Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
512 lines
13 KiB
Go
512 lines
13 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/gofiber/fiber/v2"
|
|
"github.com/gofiber/fiber/v2/middleware/cors"
|
|
"github.com/gofiber/fiber/v2/middleware/logger"
|
|
"github.com/gooseek/backend/internal/db"
|
|
"github.com/gooseek/backend/internal/search"
|
|
"github.com/gooseek/backend/pkg/cache"
|
|
"github.com/gooseek/backend/pkg/config"
|
|
)
|
|
|
|
type DigestCitation struct {
|
|
Index int `json:"index"`
|
|
URL string `json:"url"`
|
|
Title string `json:"title"`
|
|
Domain string `json:"domain"`
|
|
}
|
|
|
|
type Digest struct {
|
|
Topic string `json:"topic"`
|
|
Region string `json:"region"`
|
|
ClusterTitle string `json:"clusterTitle"`
|
|
SummaryRu string `json:"summaryRu"`
|
|
Citations []DigestCitation `json:"citations"`
|
|
SourcesCount int `json:"sourcesCount"`
|
|
FollowUp []string `json:"followUp"`
|
|
Thumbnail string `json:"thumbnail"`
|
|
ShortDescription string `json:"shortDescription"`
|
|
MainURL string `json:"mainUrl"`
|
|
CreatedAt time.Time `json:"createdAt"`
|
|
}
|
|
|
|
type ArticleSummary struct {
|
|
URL string `json:"url"`
|
|
Events []string `json:"events"`
|
|
CreatedAt time.Time `json:"createdAt"`
|
|
}
|
|
|
|
type DiscoverStore struct {
|
|
digests map[string]*Digest
|
|
articleSummaries map[string]*ArticleSummary
|
|
mu sync.RWMutex
|
|
}
|
|
|
|
func NewDiscoverStore() *DiscoverStore {
|
|
return &DiscoverStore{
|
|
digests: make(map[string]*Digest),
|
|
articleSummaries: make(map[string]*ArticleSummary),
|
|
}
|
|
}
|
|
|
|
func (s *DiscoverStore) GetDigest(topic, region, title string) *Digest {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
key := fmt.Sprintf("%s:%s:%s", topic, region, title)
|
|
return s.digests[key]
|
|
}
|
|
|
|
func (s *DiscoverStore) GetDigestByURL(url string) *Digest {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
normalizedURL := normalizeURL(url)
|
|
for _, d := range s.digests {
|
|
if normalizeURL(d.MainURL) == normalizedURL {
|
|
return d
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *DiscoverStore) GetDigests(topic, region string) []*Digest {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
result := make([]*Digest, 0)
|
|
prefix := fmt.Sprintf("%s:%s:", topic, region)
|
|
for k, d := range s.digests {
|
|
if strings.HasPrefix(k, prefix) {
|
|
result = append(result, d)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func (s *DiscoverStore) UpsertDigest(d *Digest) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
key := fmt.Sprintf("%s:%s:%s", d.Topic, d.Region, d.ClusterTitle)
|
|
d.CreatedAt = time.Now()
|
|
s.digests[key] = d
|
|
}
|
|
|
|
func (s *DiscoverStore) DeleteDigests(topic, region string) int {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
count := 0
|
|
prefix := fmt.Sprintf("%s:%s:", topic, region)
|
|
for k := range s.digests {
|
|
if strings.HasPrefix(k, prefix) {
|
|
delete(s.digests, k)
|
|
count++
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
|
|
func (s *DiscoverStore) GetArticleSummary(url string) *ArticleSummary {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
key := articleSummaryKey(url)
|
|
return s.articleSummaries[key]
|
|
}
|
|
|
|
func (s *DiscoverStore) SaveArticleSummary(url string, events []string) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
key := articleSummaryKey(url)
|
|
s.articleSummaries[key] = &ArticleSummary{
|
|
URL: url,
|
|
Events: events,
|
|
CreatedAt: time.Now(),
|
|
}
|
|
}
|
|
|
|
func (s *DiscoverStore) DeleteArticleSummary(url string) bool {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
key := articleSummaryKey(url)
|
|
if _, ok := s.articleSummaries[key]; ok {
|
|
delete(s.articleSummaries, key)
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func articleSummaryKey(url string) string {
|
|
hash := sha256.Sum256([]byte(normalizeURL(url)))
|
|
return hex.EncodeToString(hash[:16])
|
|
}
|
|
|
|
func normalizeURL(url string) string {
|
|
url = strings.TrimSpace(url)
|
|
url = strings.TrimSuffix(url, "/")
|
|
url = strings.TrimPrefix(url, "https://")
|
|
url = strings.TrimPrefix(url, "http://")
|
|
url = strings.TrimPrefix(url, "www.")
|
|
return url
|
|
}
|
|
|
|
func extractDomain(url string) string {
|
|
normalized := normalizeURL(url)
|
|
if idx := strings.Index(normalized, "/"); idx > 0 {
|
|
return normalized[:idx]
|
|
}
|
|
return normalized
|
|
}
|
|
|
|
func main() {
|
|
cfg, err := config.Load()
|
|
if err != nil {
|
|
log.Fatal("Failed to load config:", err)
|
|
}
|
|
|
|
store := NewDiscoverStore()
|
|
searchClient := search.NewSearXNGClient(cfg)
|
|
|
|
var database *db.PostgresDB
|
|
var digestRepo *db.DigestRepository
|
|
var summaryRepo *db.ArticleSummaryRepository
|
|
|
|
if cfg.DatabaseURL != "" {
|
|
database, err = db.NewPostgresDB(cfg.DatabaseURL)
|
|
if err != nil {
|
|
log.Printf("PostgreSQL unavailable: %v (falling back to in-memory)", err)
|
|
} else {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
|
if err := database.RunMigrations(ctx); err != nil {
|
|
log.Printf("Migration warning: %v", err)
|
|
}
|
|
cancel()
|
|
defer database.Close()
|
|
|
|
digestRepo = db.NewDigestRepository(database)
|
|
summaryRepo = db.NewArticleSummaryRepository(database)
|
|
log.Println("PostgreSQL connected")
|
|
}
|
|
}
|
|
|
|
var redisCache *cache.RedisCache
|
|
if cfg.RedisURL != "" {
|
|
redisCache, err = cache.NewRedisCache(cfg.RedisURL, "gooseek:discover")
|
|
if err != nil {
|
|
log.Printf("Redis cache unavailable: %v (falling back to in-memory)", err)
|
|
} else {
|
|
log.Printf("Redis cache connected")
|
|
defer redisCache.Close()
|
|
}
|
|
}
|
|
|
|
_ = digestRepo
|
|
_ = summaryRepo
|
|
|
|
app := fiber.New(fiber.Config{
|
|
BodyLimit: 100 * 1024 * 1024,
|
|
ReadTimeout: 30 * time.Second,
|
|
WriteTimeout: 30 * time.Second,
|
|
IdleTimeout: 60 * time.Second,
|
|
})
|
|
|
|
app.Use(logger.New())
|
|
app.Use(cors.New())
|
|
|
|
app.Get("/health", func(c *fiber.Ctx) error {
|
|
return c.JSON(fiber.Map{"status": "ok"})
|
|
})
|
|
|
|
app.Get("/ready", func(c *fiber.Ctx) error {
|
|
return c.JSON(fiber.Map{"status": "ready"})
|
|
})
|
|
|
|
app.Get("/metrics", func(c *fiber.Ctx) error {
|
|
c.Set("Content-Type", "text/plain; charset=utf-8")
|
|
return c.SendString(
|
|
"# HELP gooseek_up Service is up (1) or down (0)\n" +
|
|
"# TYPE gooseek_up gauge\n" +
|
|
"gooseek_up 1\n",
|
|
)
|
|
})
|
|
|
|
app.Get("/api/v1/discover/digest", func(c *fiber.Ctx) error {
|
|
url := c.Query("url")
|
|
if url != "" {
|
|
digest := store.GetDigestByURL(url)
|
|
if digest == nil {
|
|
return c.Status(404).JSON(fiber.Map{"message": "digest not found"})
|
|
}
|
|
return c.JSON(digest)
|
|
}
|
|
|
|
topic := c.Query("topic")
|
|
region := c.Query("region")
|
|
title := c.Query("title")
|
|
|
|
if topic == "" || region == "" || title == "" {
|
|
return c.Status(400).JSON(fiber.Map{"message": "topic, region, title (or url) required"})
|
|
}
|
|
|
|
digest := store.GetDigest(topic, region, title)
|
|
if digest == nil {
|
|
return c.Status(404).JSON(fiber.Map{"message": "digest not found"})
|
|
}
|
|
|
|
return c.JSON(digest)
|
|
})
|
|
|
|
app.Post("/api/v1/discover/digest", func(c *fiber.Ctx) error {
|
|
var d Digest
|
|
if err := c.BodyParser(&d); err != nil {
|
|
return c.Status(400).JSON(fiber.Map{"error": "Invalid request body"})
|
|
}
|
|
|
|
if d.Topic == "" || d.Region == "" || d.ClusterTitle == "" || d.SummaryRu == "" {
|
|
return c.Status(400).JSON(fiber.Map{"message": "topic, region, clusterTitle, summaryRu required"})
|
|
}
|
|
|
|
store.UpsertDigest(&d)
|
|
return c.Status(204).Send(nil)
|
|
})
|
|
|
|
app.Delete("/api/v1/discover/digest", func(c *fiber.Ctx) error {
|
|
topic := c.Query("topic")
|
|
region := c.Query("region")
|
|
|
|
if topic == "" || region == "" {
|
|
return c.Status(400).JSON(fiber.Map{"message": "topic, region required"})
|
|
}
|
|
|
|
deleted := store.DeleteDigests(topic, region)
|
|
return c.JSON(fiber.Map{"deleted": deleted})
|
|
})
|
|
|
|
app.Get("/api/v1/discover/article-summary", func(c *fiber.Ctx) error {
|
|
url := c.Query("url")
|
|
if url == "" {
|
|
return c.Status(400).JSON(fiber.Map{"message": "url required"})
|
|
}
|
|
|
|
if redisCache != nil {
|
|
events, err := redisCache.GetCachedArticleSummary(c.Context(), url)
|
|
if err == nil && len(events) > 0 {
|
|
return c.JSON(fiber.Map{"events": events})
|
|
}
|
|
}
|
|
|
|
if summaryRepo != nil {
|
|
summary, err := summaryRepo.GetByURL(c.Context(), url)
|
|
if err == nil && summary != nil {
|
|
if redisCache != nil {
|
|
redisCache.CacheArticleSummary(c.Context(), url, summary.Events, 24*time.Hour)
|
|
}
|
|
return c.JSON(fiber.Map{"events": summary.Events})
|
|
}
|
|
}
|
|
|
|
summary := store.GetArticleSummary(url)
|
|
if summary == nil {
|
|
return c.Status(404).JSON(fiber.Map{"message": "not found"})
|
|
}
|
|
|
|
return c.JSON(fiber.Map{"events": summary.Events})
|
|
})
|
|
|
|
app.Post("/api/v1/discover/article-summary", func(c *fiber.Ctx) error {
|
|
var body struct {
|
|
URL string `json:"url"`
|
|
Events []string `json:"events"`
|
|
}
|
|
|
|
if err := c.BodyParser(&body); err != nil {
|
|
return c.Status(400).JSON(fiber.Map{"error": "Invalid request body"})
|
|
}
|
|
|
|
if body.URL == "" || len(body.Events) == 0 {
|
|
return c.Status(400).JSON(fiber.Map{"message": "url and events[] required"})
|
|
}
|
|
|
|
store.SaveArticleSummary(body.URL, body.Events)
|
|
|
|
if summaryRepo != nil {
|
|
ttl := 7 * 24 * time.Hour
|
|
if err := summaryRepo.Save(c.Context(), body.URL, body.Events, ttl); err != nil {
|
|
log.Printf("postgres save article-summary error: %v", err)
|
|
}
|
|
}
|
|
|
|
if redisCache != nil {
|
|
ttl := 24 * time.Hour
|
|
if err := redisCache.CacheArticleSummary(c.Context(), body.URL, body.Events, ttl); err != nil {
|
|
log.Printf("redis cache article-summary error: %v", err)
|
|
}
|
|
}
|
|
|
|
log.Printf("article-summary saved: %s (%d events)", body.URL[:min(60, len(body.URL))], len(body.Events))
|
|
return c.Status(204).Send(nil)
|
|
})
|
|
|
|
app.Delete("/api/v1/discover/article-summary", func(c *fiber.Ctx) error {
|
|
url := c.Query("url")
|
|
if url == "" {
|
|
return c.Status(400).JSON(fiber.Map{"message": "url required"})
|
|
}
|
|
|
|
deleted := store.DeleteArticleSummary(url)
|
|
log.Printf("article-summary deleted: %s (deleted=%v)", url[:min(60, len(url))], deleted)
|
|
return c.Status(204).Send(nil)
|
|
})
|
|
|
|
app.Get("/api/v1/discover/search", func(c *fiber.Ctx) error {
|
|
q := c.Query("q")
|
|
if q == "" {
|
|
return c.Status(400).JSON(fiber.Map{"message": "Query q is required"})
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), cfg.SearchTimeout)
|
|
defer cancel()
|
|
|
|
result, err := searchClient.Search(ctx, q, &search.SearchOptions{PageNo: 1})
|
|
if err != nil {
|
|
return c.Status(503).JSON(fiber.Map{"message": "Search failed"})
|
|
}
|
|
|
|
if len(result.Results) > 10 {
|
|
result.Results = result.Results[:10]
|
|
}
|
|
|
|
return c.JSON(fiber.Map{"results": result.Results})
|
|
})
|
|
|
|
app.Get("/api/v1/discover", func(c *fiber.Ctx) error {
|
|
topic := c.Query("topic", "tech")
|
|
region := c.Query("region", "world")
|
|
|
|
digests := store.GetDigests(topic, region)
|
|
if len(digests) > 0 {
|
|
blogs := make([]fiber.Map, len(digests))
|
|
for i, d := range digests {
|
|
content := d.ShortDescription
|
|
if content == "" && len(d.SummaryRu) > 200 {
|
|
content = d.SummaryRu[:200] + "…"
|
|
} else if content == "" {
|
|
content = d.SummaryRu
|
|
}
|
|
|
|
blogs[i] = fiber.Map{
|
|
"title": d.ClusterTitle,
|
|
"content": content,
|
|
"url": d.MainURL,
|
|
"thumbnail": d.Thumbnail,
|
|
"sourcesCount": d.SourcesCount,
|
|
"digestId": fmt.Sprintf("%s:%s:%s", d.Topic, d.Region, d.ClusterTitle),
|
|
}
|
|
}
|
|
return c.JSON(fiber.Map{"blogs": blogs})
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), cfg.SearchTimeout*2)
|
|
defer cancel()
|
|
|
|
queries := getQueriesForTopic(topic, region)
|
|
results, err := searchClient.Search(ctx, queries[0], &search.SearchOptions{
|
|
Categories: []string{"news"},
|
|
PageNo: 1,
|
|
})
|
|
if err != nil {
|
|
return c.Status(503).JSON(fiber.Map{"message": "Search failed"})
|
|
}
|
|
|
|
blogs := make([]fiber.Map, 0, 7)
|
|
for i, r := range results.Results {
|
|
if i >= 7 {
|
|
break
|
|
}
|
|
thumbnail := r.Thumbnail
|
|
if thumbnail == "" {
|
|
thumbnail = r.ThumbnailSrc
|
|
}
|
|
if thumbnail == "" {
|
|
thumbnail = r.ImgSrc
|
|
}
|
|
|
|
content := r.Content
|
|
if content == "" {
|
|
content = r.Title
|
|
}
|
|
if len(content) > 300 {
|
|
content = content[:300] + "…"
|
|
}
|
|
|
|
blogs = append(blogs, fiber.Map{
|
|
"title": r.Title,
|
|
"content": content,
|
|
"url": r.URL,
|
|
"thumbnail": thumbnail,
|
|
})
|
|
}
|
|
|
|
return c.JSON(fiber.Map{"blogs": blogs})
|
|
})
|
|
|
|
port := getEnvInt("DISCOVER_SVC_PORT", 3002)
|
|
log.Printf("discover-svc listening on :%d", port)
|
|
log.Fatal(app.Listen(fmt.Sprintf(":%d", port)))
|
|
}
|
|
|
|
func getQueriesForTopic(topic, region string) []string {
|
|
queries := map[string]map[string][]string{
|
|
"tech": {
|
|
"world": {"technology news AI innovation"},
|
|
"russia": {"технологии новости IT инновации"},
|
|
"eu": {"technology news Europe AI"},
|
|
},
|
|
"finance": {
|
|
"world": {"finance news economy markets"},
|
|
"russia": {"финансы новости экономика рынки"},
|
|
"eu": {"finance news Europe economy"},
|
|
},
|
|
"sports": {
|
|
"world": {"sports news football Olympics"},
|
|
"russia": {"спорт новости футбол хоккей"},
|
|
"eu": {"sports news football Champions League"},
|
|
},
|
|
}
|
|
|
|
if topicQueries, ok := queries[topic]; ok {
|
|
if regionQueries, ok := topicQueries[region]; ok {
|
|
return regionQueries
|
|
}
|
|
if defaultQueries, ok := topicQueries["world"]; ok {
|
|
return defaultQueries
|
|
}
|
|
}
|
|
|
|
return []string{"news today"}
|
|
}
|
|
|
|
func getEnvInt(key string, defaultValue int) int {
|
|
if val := os.Getenv(key); val != "" {
|
|
var result int
|
|
if _, err := fmt.Sscanf(val, "%d", &result); err == nil {
|
|
return result
|
|
}
|
|
}
|
|
return defaultValue
|
|
}
|
|
|
|
func min(a, b int) int {
|
|
if a < b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|