Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
216 lines
4.0 KiB
Go
216 lines
4.0 KiB
Go
package search
|
|
|
|
import (
|
|
"context"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/gooseek/backend/internal/types"
|
|
)
|
|
|
|
type MediaSearchOptions struct {
|
|
MaxImages int
|
|
MaxVideos int
|
|
}
|
|
|
|
type MediaSearchResult struct {
|
|
Images []types.ImageData `json:"images"`
|
|
Videos []types.VideoData `json:"videos"`
|
|
}
|
|
|
|
func (c *SearXNGClient) SearchMedia(ctx context.Context, query string, opts *MediaSearchOptions) (*MediaSearchResult, error) {
|
|
if opts == nil {
|
|
opts = &MediaSearchOptions{MaxImages: 8, MaxVideos: 6}
|
|
}
|
|
|
|
result := &MediaSearchResult{
|
|
Images: make([]types.ImageData, 0),
|
|
Videos: make([]types.VideoData, 0),
|
|
}
|
|
|
|
imageCh := make(chan []types.ImageData, 1)
|
|
videoCh := make(chan []types.VideoData, 1)
|
|
errCh := make(chan error, 2)
|
|
|
|
go func() {
|
|
images, err := c.searchImages(ctx, query, opts.MaxImages)
|
|
if err != nil {
|
|
errCh <- err
|
|
imageCh <- nil
|
|
return
|
|
}
|
|
errCh <- nil
|
|
imageCh <- images
|
|
}()
|
|
|
|
go func() {
|
|
videos, err := c.searchVideos(ctx, query, opts.MaxVideos)
|
|
if err != nil {
|
|
errCh <- err
|
|
videoCh <- nil
|
|
return
|
|
}
|
|
errCh <- nil
|
|
videoCh <- videos
|
|
}()
|
|
|
|
<-errCh
|
|
<-errCh
|
|
result.Images = <-imageCh
|
|
result.Videos = <-videoCh
|
|
|
|
if result.Images == nil {
|
|
result.Images = make([]types.ImageData, 0)
|
|
}
|
|
if result.Videos == nil {
|
|
result.Videos = make([]types.VideoData, 0)
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func (c *SearXNGClient) searchImages(ctx context.Context, query string, max int) ([]types.ImageData, error) {
|
|
resp, err := c.Search(ctx, query, &SearchOptions{
|
|
Categories: []string{"images"},
|
|
PageNo: 1,
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
images := make([]types.ImageData, 0, max)
|
|
seen := make(map[string]bool)
|
|
|
|
for _, r := range resp.Results {
|
|
if len(images) >= max {
|
|
break
|
|
}
|
|
|
|
imgURL := r.ImgSrc
|
|
if imgURL == "" {
|
|
imgURL = r.ThumbnailSrc
|
|
}
|
|
if imgURL == "" {
|
|
imgURL = r.Thumbnail
|
|
}
|
|
if imgURL == "" {
|
|
continue
|
|
}
|
|
|
|
if seen[imgURL] {
|
|
continue
|
|
}
|
|
seen[imgURL] = true
|
|
|
|
images = append(images, types.ImageData{
|
|
URL: imgURL,
|
|
Title: r.Title,
|
|
Source: extractDomain(r.URL),
|
|
SourceURL: r.URL,
|
|
})
|
|
}
|
|
|
|
return images, nil
|
|
}
|
|
|
|
func (c *SearXNGClient) searchVideos(ctx context.Context, query string, max int) ([]types.VideoData, error) {
|
|
resp, err := c.Search(ctx, query, &SearchOptions{
|
|
Categories: []string{"videos"},
|
|
PageNo: 1,
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
videos := make([]types.VideoData, 0, max)
|
|
seen := make(map[string]bool)
|
|
|
|
for _, r := range resp.Results {
|
|
if len(videos) >= max {
|
|
break
|
|
}
|
|
|
|
if seen[r.URL] {
|
|
continue
|
|
}
|
|
seen[r.URL] = true
|
|
|
|
platform := detectVideoPlatform(r.URL)
|
|
|
|
video := types.VideoData{
|
|
Title: r.Title,
|
|
URL: r.URL,
|
|
Thumbnail: r.Thumbnail,
|
|
Duration: toInt(r.Duration),
|
|
Views: toInt(r.Views),
|
|
Author: r.Author,
|
|
Platform: platform,
|
|
EmbedURL: r.IframeSrc,
|
|
}
|
|
|
|
videos = append(videos, video)
|
|
}
|
|
|
|
return videos, nil
|
|
}
|
|
|
|
var (
|
|
youtubePattern = regexp.MustCompile(`youtube\.com|youtu\.be`)
|
|
rutubePattern = regexp.MustCompile(`rutube\.ru`)
|
|
vkPattern = regexp.MustCompile(`vk\.com`)
|
|
dzenPattern = regexp.MustCompile(`dzen\.ru`)
|
|
)
|
|
|
|
func detectVideoPlatform(url string) string {
|
|
urlLower := strings.ToLower(url)
|
|
|
|
if youtubePattern.MatchString(urlLower) {
|
|
return "youtube"
|
|
}
|
|
if rutubePattern.MatchString(urlLower) {
|
|
return "rutube"
|
|
}
|
|
if vkPattern.MatchString(urlLower) {
|
|
return "vk"
|
|
}
|
|
if dzenPattern.MatchString(urlLower) {
|
|
return "dzen"
|
|
}
|
|
|
|
return "other"
|
|
}
|
|
|
|
func extractDomain(rawURL string) string {
|
|
rawURL = strings.TrimPrefix(rawURL, "https://")
|
|
rawURL = strings.TrimPrefix(rawURL, "http://")
|
|
rawURL = strings.TrimPrefix(rawURL, "www.")
|
|
|
|
if idx := strings.Index(rawURL, "/"); idx > 0 {
|
|
rawURL = rawURL[:idx]
|
|
}
|
|
|
|
return rawURL
|
|
}
|
|
|
|
func toInt(v interface{}) int {
|
|
if v == nil {
|
|
return 0
|
|
}
|
|
switch val := v.(type) {
|
|
case int:
|
|
return val
|
|
case int64:
|
|
return int(val)
|
|
case float64:
|
|
return int(val)
|
|
case string:
|
|
if i, err := strconv.Atoi(val); err == nil {
|
|
return i
|
|
}
|
|
return 0
|
|
default:
|
|
return 0
|
|
}
|
|
}
|