package search import ( "context" "encoding/json" "fmt" "net/http" "net/url" "regexp" "strings" "time" "github.com/gooseek/backend/internal/types" "github.com/gooseek/backend/pkg/config" ) type SearXNGClient struct { primaryURL string fallbackURLs []string client *http.Client timeout time.Duration } func NewSearXNGClient(cfg *config.Config) *SearXNGClient { return &SearXNGClient{ primaryURL: cfg.SearXNGURL, fallbackURLs: cfg.SearXNGFallbackURL, client: &http.Client{Timeout: cfg.SearchTimeout}, timeout: cfg.SearchTimeout, } } type SearchOptions struct { Engines []string Categories []string PageNo int Language string } func (c *SearXNGClient) Search(ctx context.Context, query string, opts *SearchOptions) (*types.SearchResponse, error) { candidates := c.buildCandidates() if len(candidates) == 0 { return nil, fmt.Errorf("no SearXNG URLs configured") } var lastErr error for _, baseURL := range candidates { result, err := c.searchWithURL(ctx, baseURL, query, opts) if err == nil { return result, nil } lastErr = err } return nil, fmt.Errorf("all SearXNG instances failed: %w", lastErr) } func (c *SearXNGClient) buildCandidates() []string { candidates := make([]string, 0) if c.primaryURL != "" { u := strings.TrimSuffix(c.primaryURL, "/") if !strings.HasPrefix(u, "http") { u = "http://" + u } candidates = append(candidates, u) } for _, fb := range c.fallbackURLs { u := strings.TrimSpace(fb) if u == "" { continue } u = strings.TrimSuffix(u, "/") if !strings.HasPrefix(u, "http") { u = "https://" + u } if !contains(candidates, u) { candidates = append(candidates, u) } } return candidates } func (c *SearXNGClient) searchWithURL(ctx context.Context, baseURL, query string, opts *SearchOptions) (*types.SearchResponse, error) { params := url.Values{} params.Set("format", "json") params.Set("q", query) if opts != nil { if len(opts.Engines) > 0 { params.Set("engines", strings.Join(opts.Engines, ",")) } if len(opts.Categories) > 0 { params.Set("categories", strings.Join(opts.Categories, ",")) } if opts.PageNo > 0 { params.Set("pageno", fmt.Sprintf("%d", opts.PageNo)) } if opts.Language != "" { params.Set("language", opts.Language) } } reqURL := fmt.Sprintf("%s/search?%s", baseURL, params.Encode()) req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) if err != nil { return nil, err } resp, err := c.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("SearXNG returned status %d", resp.StatusCode) } var result struct { Results []types.SearchResult `json:"results"` Suggestions []string `json:"suggestions"` } if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { return nil, err } return &types.SearchResponse{ Results: result.Results, Suggestions: result.Suggestions, }, nil } var ( productPattern = regexp.MustCompile(`ozon\.ru/product|wildberries\.ru/catalog/\d|aliexpress\.(ru|com)/item|market\.yandex`) videoPattern = regexp.MustCompile(`rutube\.ru/video|vk\.com/video|vk\.com/clip|youtube\.com/watch|youtu\.be|dzen\.ru/video`) vkProfilePattern = regexp.MustCompile(`vk\.com/[a-zA-Z0-9_.]+$`) tgProfilePattern = regexp.MustCompile(`t\.me/[a-zA-Z0-9_]+$`) ) func CategorizeResult(result *types.SearchResult) types.ContentCategory { urlLower := strings.ToLower(result.URL) if productPattern.MatchString(urlLower) { return types.CategoryProduct } if videoPattern.MatchString(urlLower) || result.IframeSrc != "" || result.Category == "videos" { return types.CategoryVideo } if tgProfilePattern.MatchString(urlLower) { return types.CategoryProfile } if vkProfilePattern.MatchString(urlLower) && !videoPattern.MatchString(urlLower) { return types.CategoryProfile } if result.ImgSrc != "" && result.Category == "images" { return types.CategoryImage } return types.CategoryArticle } func contains(slice []string, item string) bool { for _, s := range slice { if s == item { return true } } return false }