package main import ( "context" "crypto/sha256" "encoding/hex" "fmt" "log" "os" "strings" "sync" "time" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" "github.com/gofiber/fiber/v2/middleware/logger" "github.com/gooseek/backend/internal/db" "github.com/gooseek/backend/internal/search" "github.com/gooseek/backend/pkg/cache" "github.com/gooseek/backend/pkg/config" ) type DigestCitation struct { Index int `json:"index"` URL string `json:"url"` Title string `json:"title"` Domain string `json:"domain"` } type Digest struct { Topic string `json:"topic"` Region string `json:"region"` ClusterTitle string `json:"clusterTitle"` SummaryRu string `json:"summaryRu"` Citations []DigestCitation `json:"citations"` SourcesCount int `json:"sourcesCount"` FollowUp []string `json:"followUp"` Thumbnail string `json:"thumbnail"` ShortDescription string `json:"shortDescription"` MainURL string `json:"mainUrl"` CreatedAt time.Time `json:"createdAt"` } type ArticleSummary struct { URL string `json:"url"` Events []string `json:"events"` CreatedAt time.Time `json:"createdAt"` } type DiscoverStore struct { digests map[string]*Digest articleSummaries map[string]*ArticleSummary mu sync.RWMutex } func NewDiscoverStore() *DiscoverStore { return &DiscoverStore{ digests: make(map[string]*Digest), articleSummaries: make(map[string]*ArticleSummary), } } func (s *DiscoverStore) GetDigest(topic, region, title string) *Digest { s.mu.RLock() defer s.mu.RUnlock() key := fmt.Sprintf("%s:%s:%s", topic, region, title) return s.digests[key] } func (s *DiscoverStore) GetDigestByURL(url string) *Digest { s.mu.RLock() defer s.mu.RUnlock() normalizedURL := normalizeURL(url) for _, d := range s.digests { if normalizeURL(d.MainURL) == normalizedURL { return d } } return nil } func (s *DiscoverStore) GetDigests(topic, region string) []*Digest { s.mu.RLock() defer s.mu.RUnlock() result := make([]*Digest, 0) prefix := fmt.Sprintf("%s:%s:", topic, region) for k, d := range s.digests { if strings.HasPrefix(k, prefix) { result = append(result, d) } } return result } func (s *DiscoverStore) UpsertDigest(d *Digest) { s.mu.Lock() defer s.mu.Unlock() key := fmt.Sprintf("%s:%s:%s", d.Topic, d.Region, d.ClusterTitle) d.CreatedAt = time.Now() s.digests[key] = d } func (s *DiscoverStore) DeleteDigests(topic, region string) int { s.mu.Lock() defer s.mu.Unlock() count := 0 prefix := fmt.Sprintf("%s:%s:", topic, region) for k := range s.digests { if strings.HasPrefix(k, prefix) { delete(s.digests, k) count++ } } return count } func (s *DiscoverStore) GetArticleSummary(url string) *ArticleSummary { s.mu.RLock() defer s.mu.RUnlock() key := articleSummaryKey(url) return s.articleSummaries[key] } func (s *DiscoverStore) SaveArticleSummary(url string, events []string) { s.mu.Lock() defer s.mu.Unlock() key := articleSummaryKey(url) s.articleSummaries[key] = &ArticleSummary{ URL: url, Events: events, CreatedAt: time.Now(), } } func (s *DiscoverStore) DeleteArticleSummary(url string) bool { s.mu.Lock() defer s.mu.Unlock() key := articleSummaryKey(url) if _, ok := s.articleSummaries[key]; ok { delete(s.articleSummaries, key) return true } return false } func articleSummaryKey(url string) string { hash := sha256.Sum256([]byte(normalizeURL(url))) return hex.EncodeToString(hash[:16]) } func normalizeURL(url string) string { url = strings.TrimSpace(url) url = strings.TrimSuffix(url, "/") url = strings.TrimPrefix(url, "https://") url = strings.TrimPrefix(url, "http://") url = strings.TrimPrefix(url, "www.") return url } func extractDomain(url string) string { normalized := normalizeURL(url) if idx := strings.Index(normalized, "/"); idx > 0 { return normalized[:idx] } return normalized } func main() { cfg, err := config.Load() if err != nil { log.Fatal("Failed to load config:", err) } store := NewDiscoverStore() searchClient := search.NewSearXNGClient(cfg) var database *db.PostgresDB var digestRepo *db.DigestRepository var summaryRepo *db.ArticleSummaryRepository if cfg.DatabaseURL != "" { database, err = db.NewPostgresDB(cfg.DatabaseURL) if err != nil { log.Printf("PostgreSQL unavailable: %v (falling back to in-memory)", err) } else { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) if err := database.RunMigrations(ctx); err != nil { log.Printf("Migration warning: %v", err) } cancel() defer database.Close() digestRepo = db.NewDigestRepository(database) summaryRepo = db.NewArticleSummaryRepository(database) log.Println("PostgreSQL connected") } } var redisCache *cache.RedisCache if cfg.RedisURL != "" { redisCache, err = cache.NewRedisCache(cfg.RedisURL, "gooseek:discover") if err != nil { log.Printf("Redis cache unavailable: %v (falling back to in-memory)", err) } else { log.Printf("Redis cache connected") defer redisCache.Close() } } _ = digestRepo _ = summaryRepo app := fiber.New(fiber.Config{ BodyLimit: 100 * 1024 * 1024, ReadTimeout: 30 * time.Second, WriteTimeout: 30 * time.Second, IdleTimeout: 60 * time.Second, }) app.Use(logger.New()) app.Use(cors.New()) app.Get("/health", func(c *fiber.Ctx) error { return c.JSON(fiber.Map{"status": "ok"}) }) app.Get("/ready", func(c *fiber.Ctx) error { return c.JSON(fiber.Map{"status": "ready"}) }) app.Get("/metrics", func(c *fiber.Ctx) error { c.Set("Content-Type", "text/plain; charset=utf-8") return c.SendString( "# HELP gooseek_up Service is up (1) or down (0)\n" + "# TYPE gooseek_up gauge\n" + "gooseek_up 1\n", ) }) app.Get("/api/v1/discover/digest", func(c *fiber.Ctx) error { url := c.Query("url") if url != "" { digest := store.GetDigestByURL(url) if digest == nil { return c.Status(404).JSON(fiber.Map{"message": "digest not found"}) } return c.JSON(digest) } topic := c.Query("topic") region := c.Query("region") title := c.Query("title") if topic == "" || region == "" || title == "" { return c.Status(400).JSON(fiber.Map{"message": "topic, region, title (or url) required"}) } digest := store.GetDigest(topic, region, title) if digest == nil { return c.Status(404).JSON(fiber.Map{"message": "digest not found"}) } return c.JSON(digest) }) app.Post("/api/v1/discover/digest", func(c *fiber.Ctx) error { var d Digest if err := c.BodyParser(&d); err != nil { return c.Status(400).JSON(fiber.Map{"error": "Invalid request body"}) } if d.Topic == "" || d.Region == "" || d.ClusterTitle == "" || d.SummaryRu == "" { return c.Status(400).JSON(fiber.Map{"message": "topic, region, clusterTitle, summaryRu required"}) } store.UpsertDigest(&d) return c.Status(204).Send(nil) }) app.Delete("/api/v1/discover/digest", func(c *fiber.Ctx) error { topic := c.Query("topic") region := c.Query("region") if topic == "" || region == "" { return c.Status(400).JSON(fiber.Map{"message": "topic, region required"}) } deleted := store.DeleteDigests(topic, region) return c.JSON(fiber.Map{"deleted": deleted}) }) app.Get("/api/v1/discover/article-summary", func(c *fiber.Ctx) error { url := c.Query("url") if url == "" { return c.Status(400).JSON(fiber.Map{"message": "url required"}) } if redisCache != nil { events, err := redisCache.GetCachedArticleSummary(c.Context(), url) if err == nil && len(events) > 0 { return c.JSON(fiber.Map{"events": events}) } } if summaryRepo != nil { summary, err := summaryRepo.GetByURL(c.Context(), url) if err == nil && summary != nil { if redisCache != nil { redisCache.CacheArticleSummary(c.Context(), url, summary.Events, 24*time.Hour) } return c.JSON(fiber.Map{"events": summary.Events}) } } summary := store.GetArticleSummary(url) if summary == nil { return c.Status(404).JSON(fiber.Map{"message": "not found"}) } return c.JSON(fiber.Map{"events": summary.Events}) }) app.Post("/api/v1/discover/article-summary", func(c *fiber.Ctx) error { var body struct { URL string `json:"url"` Events []string `json:"events"` } if err := c.BodyParser(&body); err != nil { return c.Status(400).JSON(fiber.Map{"error": "Invalid request body"}) } if body.URL == "" || len(body.Events) == 0 { return c.Status(400).JSON(fiber.Map{"message": "url and events[] required"}) } store.SaveArticleSummary(body.URL, body.Events) if summaryRepo != nil { ttl := 7 * 24 * time.Hour if err := summaryRepo.Save(c.Context(), body.URL, body.Events, ttl); err != nil { log.Printf("postgres save article-summary error: %v", err) } } if redisCache != nil { ttl := 24 * time.Hour if err := redisCache.CacheArticleSummary(c.Context(), body.URL, body.Events, ttl); err != nil { log.Printf("redis cache article-summary error: %v", err) } } log.Printf("article-summary saved: %s (%d events)", body.URL[:min(60, len(body.URL))], len(body.Events)) return c.Status(204).Send(nil) }) app.Delete("/api/v1/discover/article-summary", func(c *fiber.Ctx) error { url := c.Query("url") if url == "" { return c.Status(400).JSON(fiber.Map{"message": "url required"}) } deleted := store.DeleteArticleSummary(url) log.Printf("article-summary deleted: %s (deleted=%v)", url[:min(60, len(url))], deleted) return c.Status(204).Send(nil) }) app.Get("/api/v1/discover/search", func(c *fiber.Ctx) error { q := c.Query("q") if q == "" { return c.Status(400).JSON(fiber.Map{"message": "Query q is required"}) } ctx, cancel := context.WithTimeout(context.Background(), cfg.SearchTimeout) defer cancel() result, err := searchClient.Search(ctx, q, &search.SearchOptions{PageNo: 1}) if err != nil { return c.Status(503).JSON(fiber.Map{"message": "Search failed"}) } if len(result.Results) > 10 { result.Results = result.Results[:10] } return c.JSON(fiber.Map{"results": result.Results}) }) app.Get("/api/v1/discover", func(c *fiber.Ctx) error { topic := c.Query("topic", "tech") region := c.Query("region", "world") digests := store.GetDigests(topic, region) if len(digests) > 0 { blogs := make([]fiber.Map, len(digests)) for i, d := range digests { content := d.ShortDescription if content == "" && len(d.SummaryRu) > 200 { content = d.SummaryRu[:200] + "…" } else if content == "" { content = d.SummaryRu } blogs[i] = fiber.Map{ "title": d.ClusterTitle, "content": content, "url": d.MainURL, "thumbnail": d.Thumbnail, "sourcesCount": d.SourcesCount, "digestId": fmt.Sprintf("%s:%s:%s", d.Topic, d.Region, d.ClusterTitle), } } return c.JSON(fiber.Map{"blogs": blogs}) } ctx, cancel := context.WithTimeout(context.Background(), cfg.SearchTimeout*2) defer cancel() queries := getQueriesForTopic(topic, region) results, err := searchClient.Search(ctx, queries[0], &search.SearchOptions{ Categories: []string{"news"}, PageNo: 1, }) if err != nil { return c.Status(503).JSON(fiber.Map{"message": "Search failed"}) } blogs := make([]fiber.Map, 0, 7) for i, r := range results.Results { if i >= 7 { break } thumbnail := r.Thumbnail if thumbnail == "" { thumbnail = r.ThumbnailSrc } if thumbnail == "" { thumbnail = r.ImgSrc } content := r.Content if content == "" { content = r.Title } if len(content) > 300 { content = content[:300] + "…" } blogs = append(blogs, fiber.Map{ "title": r.Title, "content": content, "url": r.URL, "thumbnail": thumbnail, }) } return c.JSON(fiber.Map{"blogs": blogs}) }) port := getEnvInt("DISCOVER_SVC_PORT", 3002) log.Printf("discover-svc listening on :%d", port) log.Fatal(app.Listen(fmt.Sprintf(":%d", port))) } func getQueriesForTopic(topic, region string) []string { queries := map[string]map[string][]string{ "tech": { "world": {"technology news AI innovation"}, "russia": {"технологии новости IT инновации"}, "eu": {"technology news Europe AI"}, }, "finance": { "world": {"finance news economy markets"}, "russia": {"финансы новости экономика рынки"}, "eu": {"finance news Europe economy"}, }, "sports": { "world": {"sports news football Olympics"}, "russia": {"спорт новости футбол хоккей"}, "eu": {"sports news football Champions League"}, }, } if topicQueries, ok := queries[topic]; ok { if regionQueries, ok := topicQueries[region]; ok { return regionQueries } if defaultQueries, ok := topicQueries["world"]; ok { return defaultQueries } } return []string{"news today"} } func getEnvInt(key string, defaultValue int) int { if val := os.Getenv(key); val != "" { var result int if _, err := fmt.Sscanf(val, "%d", &result); err == nil { return result } } return defaultValue } func min(a, b int) int { if a < b { return a } return b }