feat: Go backend, enhanced search, new widgets, Docker deploy

Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
This commit is contained in:
home
2026-02-27 04:15:32 +03:00
parent 328d968f3f
commit 06fe57c765
285 changed files with 53132 additions and 1871 deletions

View File

@@ -0,0 +1,163 @@
package search
import (
"math"
"sort"
"strings"
"unicode"
"github.com/gooseek/backend/internal/types"
)
type RankedItem struct {
Chunk types.Chunk
Score float64
}
func RerankBM25(chunks []types.Chunk, query string, topK int) []types.Chunk {
if len(chunks) == 0 {
return chunks
}
queryTerms := tokenize(query)
if len(queryTerms) == 0 {
return chunks
}
df := make(map[string]int)
for _, chunk := range chunks {
seen := make(map[string]bool)
terms := tokenize(chunk.Content + " " + chunk.Metadata["title"])
for _, term := range terms {
if !seen[term] {
df[term]++
seen[term] = true
}
}
}
avgDocLen := 0.0
for _, chunk := range chunks {
avgDocLen += float64(len(tokenize(chunk.Content)))
}
avgDocLen /= float64(len(chunks))
k1 := 1.5
b := 0.75
n := float64(len(chunks))
ranked := make([]RankedItem, len(chunks))
for i, chunk := range chunks {
docTerms := tokenize(chunk.Content + " " + chunk.Metadata["title"])
docLen := float64(len(docTerms))
tf := make(map[string]int)
for _, term := range docTerms {
tf[term]++
}
score := 0.0
for _, qterm := range queryTerms {
if termFreq, ok := tf[qterm]; ok {
docFreq := float64(df[qterm])
idf := math.Log((n - docFreq + 0.5) / (docFreq + 0.5))
if idf < 0 {
idf = 0
}
tfNorm := float64(termFreq) * (k1 + 1) /
(float64(termFreq) + k1*(1-b+b*docLen/avgDocLen))
score += idf * tfNorm
}
}
if title, ok := chunk.Metadata["title"]; ok {
titleLower := strings.ToLower(title)
for _, qterm := range queryTerms {
if strings.Contains(titleLower, qterm) {
score += 2.0
}
}
}
ranked[i] = RankedItem{Chunk: chunk, Score: score}
}
sort.Slice(ranked, func(i, j int) bool {
return ranked[i].Score > ranked[j].Score
})
if topK > len(ranked) {
topK = len(ranked)
}
result := make([]types.Chunk, topK)
for i := 0; i < topK; i++ {
result[i] = ranked[i].Chunk
}
return result
}
func tokenize(text string) []string {
text = strings.ToLower(text)
var tokens []string
var current strings.Builder
for _, r := range text {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
current.WriteRune(r)
} else {
if current.Len() >= 2 {
tokens = append(tokens, current.String())
}
current.Reset()
}
}
if current.Len() >= 2 {
tokens = append(tokens, current.String())
}
return tokens
}
func EstimateQueryComplexity(query string) float64 {
terms := tokenize(query)
complexity := float64(len(terms)) / 5.0
if strings.Contains(query, "?") {
complexity += 0.2
}
if strings.Contains(query, " и ") || strings.Contains(query, " или ") {
complexity += 0.3
}
if complexity > 1.0 {
complexity = 1.0
}
return complexity
}
func ComputeAdaptiveTopK(totalResults int, complexity float64, mode string) int {
baseK := 15
switch mode {
case "speed":
baseK = 10
case "balanced":
baseK = 20
case "quality":
baseK = 30
}
adaptiveK := int(float64(baseK) * (1 + complexity*0.5))
if adaptiveK > totalResults {
adaptiveK = totalResults
}
return adaptiveK
}