Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
178 lines
4.1 KiB
Go
178 lines
4.1 KiB
Go
package search
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/gooseek/backend/internal/types"
|
|
"github.com/gooseek/backend/pkg/config"
|
|
)
|
|
|
|
type SearXNGClient struct {
|
|
primaryURL string
|
|
fallbackURLs []string
|
|
client *http.Client
|
|
timeout time.Duration
|
|
}
|
|
|
|
func NewSearXNGClient(cfg *config.Config) *SearXNGClient {
|
|
return &SearXNGClient{
|
|
primaryURL: cfg.SearXNGURL,
|
|
fallbackURLs: cfg.SearXNGFallbackURL,
|
|
client: &http.Client{Timeout: cfg.SearchTimeout},
|
|
timeout: cfg.SearchTimeout,
|
|
}
|
|
}
|
|
|
|
type SearchOptions struct {
|
|
Engines []string
|
|
Categories []string
|
|
PageNo int
|
|
Language string
|
|
}
|
|
|
|
func (c *SearXNGClient) Search(ctx context.Context, query string, opts *SearchOptions) (*types.SearchResponse, error) {
|
|
candidates := c.buildCandidates()
|
|
if len(candidates) == 0 {
|
|
return nil, fmt.Errorf("no SearXNG URLs configured")
|
|
}
|
|
|
|
var lastErr error
|
|
for _, baseURL := range candidates {
|
|
result, err := c.searchWithURL(ctx, baseURL, query, opts)
|
|
if err == nil {
|
|
return result, nil
|
|
}
|
|
lastErr = err
|
|
}
|
|
|
|
return nil, fmt.Errorf("all SearXNG instances failed: %w", lastErr)
|
|
}
|
|
|
|
func (c *SearXNGClient) buildCandidates() []string {
|
|
candidates := make([]string, 0)
|
|
|
|
if c.primaryURL != "" {
|
|
u := strings.TrimSuffix(c.primaryURL, "/")
|
|
if !strings.HasPrefix(u, "http") {
|
|
u = "http://" + u
|
|
}
|
|
candidates = append(candidates, u)
|
|
}
|
|
|
|
for _, fb := range c.fallbackURLs {
|
|
u := strings.TrimSpace(fb)
|
|
if u == "" {
|
|
continue
|
|
}
|
|
u = strings.TrimSuffix(u, "/")
|
|
if !strings.HasPrefix(u, "http") {
|
|
u = "https://" + u
|
|
}
|
|
if !contains(candidates, u) {
|
|
candidates = append(candidates, u)
|
|
}
|
|
}
|
|
|
|
return candidates
|
|
}
|
|
|
|
func (c *SearXNGClient) searchWithURL(ctx context.Context, baseURL, query string, opts *SearchOptions) (*types.SearchResponse, error) {
|
|
params := url.Values{}
|
|
params.Set("format", "json")
|
|
params.Set("q", query)
|
|
|
|
if opts != nil {
|
|
if len(opts.Engines) > 0 {
|
|
params.Set("engines", strings.Join(opts.Engines, ","))
|
|
}
|
|
if len(opts.Categories) > 0 {
|
|
params.Set("categories", strings.Join(opts.Categories, ","))
|
|
}
|
|
if opts.PageNo > 0 {
|
|
params.Set("pageno", fmt.Sprintf("%d", opts.PageNo))
|
|
}
|
|
if opts.Language != "" {
|
|
params.Set("language", opts.Language)
|
|
}
|
|
}
|
|
|
|
reqURL := fmt.Sprintf("%s/search?%s", baseURL, params.Encode())
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp, err := c.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("SearXNG returned status %d", resp.StatusCode)
|
|
}
|
|
|
|
var result struct {
|
|
Results []types.SearchResult `json:"results"`
|
|
Suggestions []string `json:"suggestions"`
|
|
}
|
|
|
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &types.SearchResponse{
|
|
Results: result.Results,
|
|
Suggestions: result.Suggestions,
|
|
}, nil
|
|
}
|
|
|
|
var (
|
|
productPattern = regexp.MustCompile(`ozon\.ru/product|wildberries\.ru/catalog/\d|aliexpress\.(ru|com)/item|market\.yandex`)
|
|
videoPattern = regexp.MustCompile(`rutube\.ru/video|vk\.com/video|vk\.com/clip|youtube\.com/watch|youtu\.be|dzen\.ru/video`)
|
|
vkProfilePattern = regexp.MustCompile(`vk\.com/[a-zA-Z0-9_.]+$`)
|
|
tgProfilePattern = regexp.MustCompile(`t\.me/[a-zA-Z0-9_]+$`)
|
|
)
|
|
|
|
func CategorizeResult(result *types.SearchResult) types.ContentCategory {
|
|
urlLower := strings.ToLower(result.URL)
|
|
|
|
if productPattern.MatchString(urlLower) {
|
|
return types.CategoryProduct
|
|
}
|
|
|
|
if videoPattern.MatchString(urlLower) || result.IframeSrc != "" || result.Category == "videos" {
|
|
return types.CategoryVideo
|
|
}
|
|
|
|
if tgProfilePattern.MatchString(urlLower) {
|
|
return types.CategoryProfile
|
|
}
|
|
if vkProfilePattern.MatchString(urlLower) && !videoPattern.MatchString(urlLower) {
|
|
return types.CategoryProfile
|
|
}
|
|
|
|
if result.ImgSrc != "" && result.Category == "images" {
|
|
return types.CategoryImage
|
|
}
|
|
|
|
return types.CategoryArticle
|
|
}
|
|
|
|
func contains(slice []string, item string) bool {
|
|
for _, s := range slice {
|
|
if s == item {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|