feat: Go backend, enhanced search, new widgets, Docker deploy
Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
This commit is contained in:
177
backend/internal/search/searxng.go
Normal file
177
backend/internal/search/searxng.go
Normal file
@@ -0,0 +1,177 @@
|
||||
package search
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gooseek/backend/internal/types"
|
||||
"github.com/gooseek/backend/pkg/config"
|
||||
)
|
||||
|
||||
type SearXNGClient struct {
|
||||
primaryURL string
|
||||
fallbackURLs []string
|
||||
client *http.Client
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
func NewSearXNGClient(cfg *config.Config) *SearXNGClient {
|
||||
return &SearXNGClient{
|
||||
primaryURL: cfg.SearXNGURL,
|
||||
fallbackURLs: cfg.SearXNGFallbackURL,
|
||||
client: &http.Client{Timeout: cfg.SearchTimeout},
|
||||
timeout: cfg.SearchTimeout,
|
||||
}
|
||||
}
|
||||
|
||||
type SearchOptions struct {
|
||||
Engines []string
|
||||
Categories []string
|
||||
PageNo int
|
||||
Language string
|
||||
}
|
||||
|
||||
func (c *SearXNGClient) Search(ctx context.Context, query string, opts *SearchOptions) (*types.SearchResponse, error) {
|
||||
candidates := c.buildCandidates()
|
||||
if len(candidates) == 0 {
|
||||
return nil, fmt.Errorf("no SearXNG URLs configured")
|
||||
}
|
||||
|
||||
var lastErr error
|
||||
for _, baseURL := range candidates {
|
||||
result, err := c.searchWithURL(ctx, baseURL, query, opts)
|
||||
if err == nil {
|
||||
return result, nil
|
||||
}
|
||||
lastErr = err
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("all SearXNG instances failed: %w", lastErr)
|
||||
}
|
||||
|
||||
func (c *SearXNGClient) buildCandidates() []string {
|
||||
candidates := make([]string, 0)
|
||||
|
||||
if c.primaryURL != "" {
|
||||
u := strings.TrimSuffix(c.primaryURL, "/")
|
||||
if !strings.HasPrefix(u, "http") {
|
||||
u = "http://" + u
|
||||
}
|
||||
candidates = append(candidates, u)
|
||||
}
|
||||
|
||||
for _, fb := range c.fallbackURLs {
|
||||
u := strings.TrimSpace(fb)
|
||||
if u == "" {
|
||||
continue
|
||||
}
|
||||
u = strings.TrimSuffix(u, "/")
|
||||
if !strings.HasPrefix(u, "http") {
|
||||
u = "https://" + u
|
||||
}
|
||||
if !contains(candidates, u) {
|
||||
candidates = append(candidates, u)
|
||||
}
|
||||
}
|
||||
|
||||
return candidates
|
||||
}
|
||||
|
||||
func (c *SearXNGClient) searchWithURL(ctx context.Context, baseURL, query string, opts *SearchOptions) (*types.SearchResponse, error) {
|
||||
params := url.Values{}
|
||||
params.Set("format", "json")
|
||||
params.Set("q", query)
|
||||
|
||||
if opts != nil {
|
||||
if len(opts.Engines) > 0 {
|
||||
params.Set("engines", strings.Join(opts.Engines, ","))
|
||||
}
|
||||
if len(opts.Categories) > 0 {
|
||||
params.Set("categories", strings.Join(opts.Categories, ","))
|
||||
}
|
||||
if opts.PageNo > 0 {
|
||||
params.Set("pageno", fmt.Sprintf("%d", opts.PageNo))
|
||||
}
|
||||
if opts.Language != "" {
|
||||
params.Set("language", opts.Language)
|
||||
}
|
||||
}
|
||||
|
||||
reqURL := fmt.Sprintf("%s/search?%s", baseURL, params.Encode())
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, err := c.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("SearXNG returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Results []types.SearchResult `json:"results"`
|
||||
Suggestions []string `json:"suggestions"`
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &types.SearchResponse{
|
||||
Results: result.Results,
|
||||
Suggestions: result.Suggestions,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var (
|
||||
productPattern = regexp.MustCompile(`ozon\.ru/product|wildberries\.ru/catalog/\d|aliexpress\.(ru|com)/item|market\.yandex`)
|
||||
videoPattern = regexp.MustCompile(`rutube\.ru/video|vk\.com/video|vk\.com/clip|youtube\.com/watch|youtu\.be|dzen\.ru/video`)
|
||||
vkProfilePattern = regexp.MustCompile(`vk\.com/[a-zA-Z0-9_.]+$`)
|
||||
tgProfilePattern = regexp.MustCompile(`t\.me/[a-zA-Z0-9_]+$`)
|
||||
)
|
||||
|
||||
func CategorizeResult(result *types.SearchResult) types.ContentCategory {
|
||||
urlLower := strings.ToLower(result.URL)
|
||||
|
||||
if productPattern.MatchString(urlLower) {
|
||||
return types.CategoryProduct
|
||||
}
|
||||
|
||||
if videoPattern.MatchString(urlLower) || result.IframeSrc != "" || result.Category == "videos" {
|
||||
return types.CategoryVideo
|
||||
}
|
||||
|
||||
if tgProfilePattern.MatchString(urlLower) {
|
||||
return types.CategoryProfile
|
||||
}
|
||||
if vkProfilePattern.MatchString(urlLower) && !videoPattern.MatchString(urlLower) {
|
||||
return types.CategoryProfile
|
||||
}
|
||||
|
||||
if result.ImgSrc != "" && result.Category == "images" {
|
||||
return types.CategoryImage
|
||||
}
|
||||
|
||||
return types.CategoryArticle
|
||||
}
|
||||
|
||||
func contains(slice []string, item string) bool {
|
||||
for _, s := range slice {
|
||||
if s == item {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user