feat: Go backend, enhanced search, new widgets, Docker deploy

Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
This commit is contained in:
home
2026-02-27 04:15:32 +03:00
parent 328d968f3f
commit 06fe57c765
285 changed files with 53132 additions and 1871 deletions

View File

@@ -0,0 +1,215 @@
package search
import (
"context"
"regexp"
"strconv"
"strings"
"github.com/gooseek/backend/internal/types"
)
type MediaSearchOptions struct {
MaxImages int
MaxVideos int
}
type MediaSearchResult struct {
Images []types.ImageData `json:"images"`
Videos []types.VideoData `json:"videos"`
}
func (c *SearXNGClient) SearchMedia(ctx context.Context, query string, opts *MediaSearchOptions) (*MediaSearchResult, error) {
if opts == nil {
opts = &MediaSearchOptions{MaxImages: 8, MaxVideos: 6}
}
result := &MediaSearchResult{
Images: make([]types.ImageData, 0),
Videos: make([]types.VideoData, 0),
}
imageCh := make(chan []types.ImageData, 1)
videoCh := make(chan []types.VideoData, 1)
errCh := make(chan error, 2)
go func() {
images, err := c.searchImages(ctx, query, opts.MaxImages)
if err != nil {
errCh <- err
imageCh <- nil
return
}
errCh <- nil
imageCh <- images
}()
go func() {
videos, err := c.searchVideos(ctx, query, opts.MaxVideos)
if err != nil {
errCh <- err
videoCh <- nil
return
}
errCh <- nil
videoCh <- videos
}()
<-errCh
<-errCh
result.Images = <-imageCh
result.Videos = <-videoCh
if result.Images == nil {
result.Images = make([]types.ImageData, 0)
}
if result.Videos == nil {
result.Videos = make([]types.VideoData, 0)
}
return result, nil
}
func (c *SearXNGClient) searchImages(ctx context.Context, query string, max int) ([]types.ImageData, error) {
resp, err := c.Search(ctx, query, &SearchOptions{
Categories: []string{"images"},
PageNo: 1,
})
if err != nil {
return nil, err
}
images := make([]types.ImageData, 0, max)
seen := make(map[string]bool)
for _, r := range resp.Results {
if len(images) >= max {
break
}
imgURL := r.ImgSrc
if imgURL == "" {
imgURL = r.ThumbnailSrc
}
if imgURL == "" {
imgURL = r.Thumbnail
}
if imgURL == "" {
continue
}
if seen[imgURL] {
continue
}
seen[imgURL] = true
images = append(images, types.ImageData{
URL: imgURL,
Title: r.Title,
Source: extractDomain(r.URL),
SourceURL: r.URL,
})
}
return images, nil
}
func (c *SearXNGClient) searchVideos(ctx context.Context, query string, max int) ([]types.VideoData, error) {
resp, err := c.Search(ctx, query, &SearchOptions{
Categories: []string{"videos"},
PageNo: 1,
})
if err != nil {
return nil, err
}
videos := make([]types.VideoData, 0, max)
seen := make(map[string]bool)
for _, r := range resp.Results {
if len(videos) >= max {
break
}
if seen[r.URL] {
continue
}
seen[r.URL] = true
platform := detectVideoPlatform(r.URL)
video := types.VideoData{
Title: r.Title,
URL: r.URL,
Thumbnail: r.Thumbnail,
Duration: toInt(r.Duration),
Views: toInt(r.Views),
Author: r.Author,
Platform: platform,
EmbedURL: r.IframeSrc,
}
videos = append(videos, video)
}
return videos, nil
}
var (
youtubePattern = regexp.MustCompile(`youtube\.com|youtu\.be`)
rutubePattern = regexp.MustCompile(`rutube\.ru`)
vkPattern = regexp.MustCompile(`vk\.com`)
dzenPattern = regexp.MustCompile(`dzen\.ru`)
)
func detectVideoPlatform(url string) string {
urlLower := strings.ToLower(url)
if youtubePattern.MatchString(urlLower) {
return "youtube"
}
if rutubePattern.MatchString(urlLower) {
return "rutube"
}
if vkPattern.MatchString(urlLower) {
return "vk"
}
if dzenPattern.MatchString(urlLower) {
return "dzen"
}
return "other"
}
func extractDomain(rawURL string) string {
rawURL = strings.TrimPrefix(rawURL, "https://")
rawURL = strings.TrimPrefix(rawURL, "http://")
rawURL = strings.TrimPrefix(rawURL, "www.")
if idx := strings.Index(rawURL, "/"); idx > 0 {
rawURL = rawURL[:idx]
}
return rawURL
}
func toInt(v interface{}) int {
if v == nil {
return 0
}
switch val := v.(type) {
case int:
return val
case int64:
return int(val)
case float64:
return int(val)
case string:
if i, err := strconv.Atoi(val); err == nil {
return i
}
return 0
default:
return 0
}
}

View File

@@ -0,0 +1,163 @@
package search
import (
"math"
"sort"
"strings"
"unicode"
"github.com/gooseek/backend/internal/types"
)
type RankedItem struct {
Chunk types.Chunk
Score float64
}
func RerankBM25(chunks []types.Chunk, query string, topK int) []types.Chunk {
if len(chunks) == 0 {
return chunks
}
queryTerms := tokenize(query)
if len(queryTerms) == 0 {
return chunks
}
df := make(map[string]int)
for _, chunk := range chunks {
seen := make(map[string]bool)
terms := tokenize(chunk.Content + " " + chunk.Metadata["title"])
for _, term := range terms {
if !seen[term] {
df[term]++
seen[term] = true
}
}
}
avgDocLen := 0.0
for _, chunk := range chunks {
avgDocLen += float64(len(tokenize(chunk.Content)))
}
avgDocLen /= float64(len(chunks))
k1 := 1.5
b := 0.75
n := float64(len(chunks))
ranked := make([]RankedItem, len(chunks))
for i, chunk := range chunks {
docTerms := tokenize(chunk.Content + " " + chunk.Metadata["title"])
docLen := float64(len(docTerms))
tf := make(map[string]int)
for _, term := range docTerms {
tf[term]++
}
score := 0.0
for _, qterm := range queryTerms {
if termFreq, ok := tf[qterm]; ok {
docFreq := float64(df[qterm])
idf := math.Log((n - docFreq + 0.5) / (docFreq + 0.5))
if idf < 0 {
idf = 0
}
tfNorm := float64(termFreq) * (k1 + 1) /
(float64(termFreq) + k1*(1-b+b*docLen/avgDocLen))
score += idf * tfNorm
}
}
if title, ok := chunk.Metadata["title"]; ok {
titleLower := strings.ToLower(title)
for _, qterm := range queryTerms {
if strings.Contains(titleLower, qterm) {
score += 2.0
}
}
}
ranked[i] = RankedItem{Chunk: chunk, Score: score}
}
sort.Slice(ranked, func(i, j int) bool {
return ranked[i].Score > ranked[j].Score
})
if topK > len(ranked) {
topK = len(ranked)
}
result := make([]types.Chunk, topK)
for i := 0; i < topK; i++ {
result[i] = ranked[i].Chunk
}
return result
}
func tokenize(text string) []string {
text = strings.ToLower(text)
var tokens []string
var current strings.Builder
for _, r := range text {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
current.WriteRune(r)
} else {
if current.Len() >= 2 {
tokens = append(tokens, current.String())
}
current.Reset()
}
}
if current.Len() >= 2 {
tokens = append(tokens, current.String())
}
return tokens
}
func EstimateQueryComplexity(query string) float64 {
terms := tokenize(query)
complexity := float64(len(terms)) / 5.0
if strings.Contains(query, "?") {
complexity += 0.2
}
if strings.Contains(query, " и ") || strings.Contains(query, " или ") {
complexity += 0.3
}
if complexity > 1.0 {
complexity = 1.0
}
return complexity
}
func ComputeAdaptiveTopK(totalResults int, complexity float64, mode string) int {
baseK := 15
switch mode {
case "speed":
baseK = 10
case "balanced":
baseK = 20
case "quality":
baseK = 30
}
adaptiveK := int(float64(baseK) * (1 + complexity*0.5))
if adaptiveK > totalResults {
adaptiveK = totalResults
}
return adaptiveK
}

View File

@@ -0,0 +1,177 @@
package search
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
"time"
"github.com/gooseek/backend/internal/types"
"github.com/gooseek/backend/pkg/config"
)
type SearXNGClient struct {
primaryURL string
fallbackURLs []string
client *http.Client
timeout time.Duration
}
func NewSearXNGClient(cfg *config.Config) *SearXNGClient {
return &SearXNGClient{
primaryURL: cfg.SearXNGURL,
fallbackURLs: cfg.SearXNGFallbackURL,
client: &http.Client{Timeout: cfg.SearchTimeout},
timeout: cfg.SearchTimeout,
}
}
type SearchOptions struct {
Engines []string
Categories []string
PageNo int
Language string
}
func (c *SearXNGClient) Search(ctx context.Context, query string, opts *SearchOptions) (*types.SearchResponse, error) {
candidates := c.buildCandidates()
if len(candidates) == 0 {
return nil, fmt.Errorf("no SearXNG URLs configured")
}
var lastErr error
for _, baseURL := range candidates {
result, err := c.searchWithURL(ctx, baseURL, query, opts)
if err == nil {
return result, nil
}
lastErr = err
}
return nil, fmt.Errorf("all SearXNG instances failed: %w", lastErr)
}
func (c *SearXNGClient) buildCandidates() []string {
candidates := make([]string, 0)
if c.primaryURL != "" {
u := strings.TrimSuffix(c.primaryURL, "/")
if !strings.HasPrefix(u, "http") {
u = "http://" + u
}
candidates = append(candidates, u)
}
for _, fb := range c.fallbackURLs {
u := strings.TrimSpace(fb)
if u == "" {
continue
}
u = strings.TrimSuffix(u, "/")
if !strings.HasPrefix(u, "http") {
u = "https://" + u
}
if !contains(candidates, u) {
candidates = append(candidates, u)
}
}
return candidates
}
func (c *SearXNGClient) searchWithURL(ctx context.Context, baseURL, query string, opts *SearchOptions) (*types.SearchResponse, error) {
params := url.Values{}
params.Set("format", "json")
params.Set("q", query)
if opts != nil {
if len(opts.Engines) > 0 {
params.Set("engines", strings.Join(opts.Engines, ","))
}
if len(opts.Categories) > 0 {
params.Set("categories", strings.Join(opts.Categories, ","))
}
if opts.PageNo > 0 {
params.Set("pageno", fmt.Sprintf("%d", opts.PageNo))
}
if opts.Language != "" {
params.Set("language", opts.Language)
}
}
reqURL := fmt.Sprintf("%s/search?%s", baseURL, params.Encode())
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
if err != nil {
return nil, err
}
resp, err := c.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("SearXNG returned status %d", resp.StatusCode)
}
var result struct {
Results []types.SearchResult `json:"results"`
Suggestions []string `json:"suggestions"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return &types.SearchResponse{
Results: result.Results,
Suggestions: result.Suggestions,
}, nil
}
var (
productPattern = regexp.MustCompile(`ozon\.ru/product|wildberries\.ru/catalog/\d|aliexpress\.(ru|com)/item|market\.yandex`)
videoPattern = regexp.MustCompile(`rutube\.ru/video|vk\.com/video|vk\.com/clip|youtube\.com/watch|youtu\.be|dzen\.ru/video`)
vkProfilePattern = regexp.MustCompile(`vk\.com/[a-zA-Z0-9_.]+$`)
tgProfilePattern = regexp.MustCompile(`t\.me/[a-zA-Z0-9_]+$`)
)
func CategorizeResult(result *types.SearchResult) types.ContentCategory {
urlLower := strings.ToLower(result.URL)
if productPattern.MatchString(urlLower) {
return types.CategoryProduct
}
if videoPattern.MatchString(urlLower) || result.IframeSrc != "" || result.Category == "videos" {
return types.CategoryVideo
}
if tgProfilePattern.MatchString(urlLower) {
return types.CategoryProfile
}
if vkProfilePattern.MatchString(urlLower) && !videoPattern.MatchString(urlLower) {
return types.CategoryProfile
}
if result.ImgSrc != "" && result.Category == "images" {
return types.CategoryImage
}
return types.CategoryArticle
}
func contains(slice []string, item string) bool {
for _, s := range slice {
if s == item {
return true
}
}
return false
}