feat: Go backend, enhanced search, new widgets, Docker deploy

Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
2026-02-27 04:15:32 +03:00
parent 328d968f3f
commit 06fe57c765
285 changed files with 53132 additions and 1871 deletions
--- a/backend/internal/podcast/generator.go
+++ b/backend/internal/podcast/generator.go
@@ -0,0 +1,507 @@
+package podcast
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/gooseek/backend/internal/llm"
+	"github.com/google/uuid"
+)
+
+type PodcastType string
+
+const (
+	PodcastDaily     PodcastType = "daily"
+	PodcastWeekly    PodcastType = "weekly"
+	PodcastTopicDeep PodcastType = "topic_deep"
+	PodcastBreaking  PodcastType = "breaking"
+)
+
+type VoiceStyle string
+
+const (
+	VoiceNeutral      VoiceStyle = "neutral"
+	VoiceEnthusiastic VoiceStyle = "enthusiastic"
+	VoiceProfessional VoiceStyle = "professional"
+	VoiceCasual       VoiceStyle = "casual"
+	VoiceStorytelling VoiceStyle = "storytelling"
+)
+
+type Podcast struct {
+	ID           string           `json:"id"`
+	Title        string           `json:"title"`
+	Description  string           `json:"description"`
+	Type         PodcastType      `json:"type"`
+	Date         time.Time        `json:"date"`
+	Duration     int              `json:"durationSeconds"`
+	AudioURL     string           `json:"audioUrl,omitempty"`
+	Transcript   string           `json:"transcript"`
+	Segments     []PodcastSegment `json:"segments"`
+	Topics       []string         `json:"topics"`
+	Sources      []Source         `json:"sources"`
+	Thumbnail    string           `json:"thumbnail,omitempty"`
+	Status       PodcastStatus    `json:"status"`
+	GeneratedAt  time.Time        `json:"generatedAt"`
+	PublishedAt  *time.Time       `json:"publishedAt,omitempty"`
+	Locale       string           `json:"locale"`
+	VoiceConfig  VoiceConfig      `json:"voiceConfig"`
+}
+
+type PodcastStatus string
+
+const (
+	StatusDraft      PodcastStatus = "draft"
+	StatusGenerating PodcastStatus = "generating"
+	StatusReady      PodcastStatus = "ready"
+	StatusPublished  PodcastStatus = "published"
+	StatusFailed     PodcastStatus = "failed"
+)
+
+type PodcastSegment struct {
+	ID         string    `json:"id"`
+	Type       string    `json:"type"`
+	Title      string    `json:"title"`
+	Content    string    `json:"content"`
+	Duration   int       `json:"durationSeconds"`
+	StartTime  int       `json:"startTime"`
+	EndTime    int       `json:"endTime"`
+	Sources    []Source  `json:"sources,omitempty"`
+	Highlights []string  `json:"highlights,omitempty"`
+}
+
+type Source struct {
+	Title     string `json:"title"`
+	URL       string `json:"url"`
+	Publisher string `json:"publisher"`
+	Date      string `json:"date,omitempty"`
+}
+
+type VoiceConfig struct {
+	Provider   string     `json:"provider"`
+	VoiceID    string     `json:"voiceId"`
+	Style      VoiceStyle `json:"style"`
+	Speed      float64    `json:"speed"`
+	Pitch      float64    `json:"pitch"`
+	Language   string     `json:"language"`
+}
+
+type PodcastGenerator struct {
+	llm        llm.Client
+	ttsClient  TTSClient
+	httpClient *http.Client
+	config     GeneratorConfig
+}
+
+type GeneratorConfig struct {
+	DefaultDuration int
+	MaxDuration     int
+	DefaultVoice    VoiceConfig
+	OutputDir       string
+}
+
+type TTSClient interface {
+	GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error)
+}
+
+func NewPodcastGenerator(llmClient llm.Client, ttsClient TTSClient, cfg GeneratorConfig) *PodcastGenerator {
+	if cfg.DefaultDuration == 0 {
+		cfg.DefaultDuration = 300
+	}
+	if cfg.MaxDuration == 0 {
+		cfg.MaxDuration = 1800
+	}
+	if cfg.DefaultVoice.Provider == "" {
+		cfg.DefaultVoice = VoiceConfig{
+			Provider: "elevenlabs",
+			VoiceID:  "21m00Tcm4TlvDq8ikWAM",
+			Style:    VoiceNeutral,
+			Speed:    1.0,
+			Pitch:    1.0,
+			Language: "ru",
+		}
+	}
+
+	return &PodcastGenerator{
+		llm:        llmClient,
+		ttsClient:  ttsClient,
+		httpClient: &http.Client{Timeout: 60 * time.Second},
+		config:     cfg,
+	}
+}
+
+type GenerateOptions struct {
+	Type          PodcastType
+	Topics        []string
+	NewsItems     []NewsItem
+	Date          time.Time
+	Duration      int
+	Locale        string
+	VoiceConfig   *VoiceConfig
+	IncludeIntro  bool
+	IncludeOutro  bool
+	PersonalizeFor string
+}
+
+type NewsItem struct {
+	Title       string   `json:"title"`
+	Summary     string   `json:"summary"`
+	URL         string   `json:"url"`
+	Source      string   `json:"source"`
+	PublishedAt string   `json:"publishedAt"`
+	Topics      []string `json:"topics"`
+	Importance  int      `json:"importance"`
+}
+
+func (g *PodcastGenerator) GenerateDailyPodcast(ctx context.Context, opts GenerateOptions) (*Podcast, error) {
+	if opts.Date.IsZero() {
+		opts.Date = time.Now()
+	}
+	if opts.Duration == 0 {
+		opts.Duration = g.config.DefaultDuration
+	}
+	if opts.Locale == "" {
+		opts.Locale = "ru"
+	}
+
+	script, err := g.generateScript(ctx, opts)
+	if err != nil {
+		return nil, fmt.Errorf("failed to generate script: %w", err)
+	}
+
+	podcast := &Podcast{
+		ID:          uuid.New().String(),
+		Title:       script.Title,
+		Description: script.Description,
+		Type:        opts.Type,
+		Date:        opts.Date,
+		Duration:    opts.Duration,
+		Transcript:  script.FullText,
+		Segments:    script.Segments,
+		Topics:      opts.Topics,
+		Sources:     script.Sources,
+		Status:      StatusDraft,
+		GeneratedAt: time.Now(),
+		Locale:      opts.Locale,
+		VoiceConfig: g.config.DefaultVoice,
+	}
+
+	if opts.VoiceConfig != nil {
+		podcast.VoiceConfig = *opts.VoiceConfig
+	}
+
+	return podcast, nil
+}
+
+type PodcastScript struct {
+	Title       string
+	Description string
+	FullText    string
+	Segments    []PodcastSegment
+	Sources     []Source
+}
+
+func (g *PodcastGenerator) generateScript(ctx context.Context, opts GenerateOptions) (*PodcastScript, error) {
+	locale := opts.Locale
+	langInstruction := ""
+	if locale == "ru" {
+		langInstruction = "Generate the entire script in Russian language. Use natural Russian speech patterns."
+	}
+
+	newsJSON, _ := json.Marshal(opts.NewsItems)
+
+	prompt := fmt.Sprintf(`Create a podcast script for a daily news digest.
+
+Date: %s
+Duration target: %d seconds (approximately %d minutes)
+Topics: %v
+%s
+
+News items to cover:
+%s
+
+Create an engaging podcast script with these requirements:
+1. Start with a catchy introduction greeting the audience
+2. Cover the most important news first
+3. Transition smoothly between stories
+4. Add brief analysis or context where appropriate
+5. End with a summary and sign-off
+
+The script should sound natural when read aloud - use conversational language, not formal news anchor style.
+
+Respond in JSON format:
+{
+  "title": "Podcast title for this episode",
+  "description": "Brief episode description",
+  "segments": [
+    {
+      "type": "intro|news|analysis|transition|outro",
+      "title": "Segment title",
+      "content": "Full text to be spoken",
+      "highlights": ["Key point 1", "Key point 2"],
+      "sources": [{"title": "Source title", "url": "url", "publisher": "publisher"}]
+    }
+  ]
+}`, opts.Date.Format("2006-01-02"), opts.Duration, opts.Duration/60, opts.Topics, langInstruction, string(newsJSON))
+
+	result, err := g.llm.GenerateText(ctx, llm.StreamRequest{
+		Messages: []llm.Message{{Role: "user", Content: prompt}},
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	jsonStr := extractJSON(result)
+
+	var parsed struct {
+		Title       string `json:"title"`
+		Description string `json:"description"`
+		Segments    []struct {
+			Type       string   `json:"type"`
+			Title      string   `json:"title"`
+			Content    string   `json:"content"`
+			Highlights []string `json:"highlights"`
+			Sources    []struct {
+				Title     string `json:"title"`
+				URL       string `json:"url"`
+				Publisher string `json:"publisher"`
+			} `json:"sources"`
+		} `json:"segments"`
+	}
+
+	if err := json.Unmarshal([]byte(jsonStr), &parsed); err != nil {
+		return g.generateDefaultScript(opts)
+	}
+
+	script := &PodcastScript{
+		Title:       parsed.Title,
+		Description: parsed.Description,
+		Segments:    make([]PodcastSegment, 0),
+		Sources:     make([]Source, 0),
+	}
+
+	var fullTextBuilder strings.Builder
+	currentTime := 0
+	avgWordsPerSecond := 2.5
+
+	for i, seg := range parsed.Segments {
+		wordCount := len(strings.Fields(seg.Content))
+		segDuration := int(float64(wordCount) / avgWordsPerSecond)
+		if segDuration < 10 {
+			segDuration = 10
+		}
+
+		segment := PodcastSegment{
+			ID:         uuid.New().String(),
+			Type:       seg.Type,
+			Title:      seg.Title,
+			Content:    seg.Content,
+			Duration:   segDuration,
+			StartTime:  currentTime,
+			EndTime:    currentTime + segDuration,
+			Highlights: seg.Highlights,
+		}
+
+		for _, src := range seg.Sources {
+			source := Source{
+				Title:     src.Title,
+				URL:       src.URL,
+				Publisher: src.Publisher,
+			}
+			segment.Sources = append(segment.Sources, source)
+			script.Sources = append(script.Sources, source)
+		}
+
+		script.Segments = append(script.Segments, segment)
+
+		fullTextBuilder.WriteString(seg.Content)
+		if i < len(parsed.Segments)-1 {
+			fullTextBuilder.WriteString("\n\n")
+		}
+
+		currentTime += segDuration
+	}
+
+	script.FullText = fullTextBuilder.String()
+
+	return script, nil
+}
+
+func (g *PodcastGenerator) generateDefaultScript(opts GenerateOptions) (*PodcastScript, error) {
+	date := opts.Date.Format("2 January 2006")
+	
+	intro := fmt.Sprintf("Добрый день! С вами GooSeek Daily — ваш ежедневный подкаст с главными новостями. Сегодня %s, и вот что происходит в мире.", date)
+
+	var newsContent strings.Builder
+	for i, news := range opts.NewsItems {
+		if i > 0 {
+			newsContent.WriteString("\n\n")
+		}
+		newsContent.WriteString(fmt.Sprintf("%s. %s", news.Title, news.Summary))
+	}
+
+	outro := "На этом всё на сегодня. Спасибо, что слушаете GooSeek Daily! Подписывайтесь на наш подкаст и до встречи завтра."
+
+	return &PodcastScript{
+		Title:       fmt.Sprintf("GooSeek Daily — %s", date),
+		Description: "Ежедневный подкаст с главными новостями",
+		FullText:    fmt.Sprintf("%s\n\n%s\n\n%s", intro, newsContent.String(), outro),
+		Segments: []PodcastSegment{
+			{ID: uuid.New().String(), Type: "intro", Title: "Вступление", Content: intro, Duration: 15},
+			{ID: uuid.New().String(), Type: "news", Title: "Новости", Content: newsContent.String(), Duration: opts.Duration - 30},
+			{ID: uuid.New().String(), Type: "outro", Title: "Завершение", Content: outro, Duration: 15},
+		},
+	}, nil
+}
+
+func (g *PodcastGenerator) GenerateAudio(ctx context.Context, podcast *Podcast) ([]byte, error) {
+	if g.ttsClient == nil {
+		return nil, fmt.Errorf("TTS client not configured")
+	}
+
+	podcast.Status = StatusGenerating
+
+	audioData, err := g.ttsClient.GenerateSpeech(ctx, podcast.Transcript, podcast.VoiceConfig)
+	if err != nil {
+		podcast.Status = StatusFailed
+		return nil, fmt.Errorf("failed to generate audio: %w", err)
+	}
+
+	podcast.Status = StatusReady
+
+	return audioData, nil
+}
+
+func (g *PodcastGenerator) GenerateWeeklySummary(ctx context.Context, weeklyNews []NewsItem, locale string) (*Podcast, error) {
+	return g.GenerateDailyPodcast(ctx, GenerateOptions{
+		Type:         PodcastWeekly,
+		NewsItems:    weeklyNews,
+		Duration:     900,
+		Locale:       locale,
+		IncludeIntro: true,
+		IncludeOutro: true,
+	})
+}
+
+func (g *PodcastGenerator) GenerateTopicDeepDive(ctx context.Context, topic string, articles []NewsItem, locale string) (*Podcast, error) {
+	return g.GenerateDailyPodcast(ctx, GenerateOptions{
+		Type:         PodcastTopicDeep,
+		Topics:       []string{topic},
+		NewsItems:    articles,
+		Duration:     600,
+		Locale:       locale,
+		IncludeIntro: true,
+		IncludeOutro: true,
+	})
+}
+
+func extractJSON(text string) string {
+	start := strings.Index(text, "{")
+	if start == -1 {
+		return "{}"
+	}
+
+	depth := 0
+	for i := start; i < len(text); i++ {
+		if text[i] == '{' {
+			depth++
+		} else if text[i] == '}' {
+			depth--
+			if depth == 0 {
+				return text[start : i+1]
+			}
+		}
+	}
+
+	return "{}"
+}
+
+func (p *Podcast) ToJSON() ([]byte, error) {
+	return json.Marshal(p)
+}
+
+func ParsePodcast(data []byte) (*Podcast, error) {
+	var podcast Podcast
+	if err := json.Unmarshal(data, &podcast); err != nil {
+		return nil, err
+	}
+	return &podcast, nil
+}
+
+type ElevenLabsTTS struct {
+	apiKey     string
+	httpClient *http.Client
+	baseURL    string
+}
+
+func NewElevenLabsTTS(apiKey string) *ElevenLabsTTS {
+	return &ElevenLabsTTS{
+		apiKey:     apiKey,
+		httpClient: &http.Client{Timeout: 120 * time.Second},
+		baseURL:    "https://api.elevenlabs.io/v1",
+	}
+}
+
+func (t *ElevenLabsTTS) GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) {
+	voiceID := config.VoiceID
+	if voiceID == "" {
+		voiceID = "21m00Tcm4TlvDq8ikWAM"
+	}
+
+	url := fmt.Sprintf("%s/text-to-speech/%s", t.baseURL, voiceID)
+
+	body := map[string]interface{}{
+		"text":     text,
+		"model_id": "eleven_multilingual_v2",
+		"voice_settings": map[string]interface{}{
+			"stability":        0.5,
+			"similarity_boost": 0.75,
+			"style":            0.5,
+			"use_speaker_boost": true,
+		},
+	}
+
+	bodyJSON, _ := json.Marshal(body)
+
+	req, err := http.NewRequestWithContext(ctx, "POST", url, strings.NewReader(string(bodyJSON)))
+	if err != nil {
+		return nil, err
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("xi-api-key", t.apiKey)
+	req.Header.Set("Accept", "audio/mpeg")
+
+	resp, err := t.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("ElevenLabs API error: %d", resp.StatusCode)
+	}
+
+	var audioData []byte
+	buf := make([]byte, 32*1024)
+	for {
+		n, err := resp.Body.Read(buf)
+		if n > 0 {
+			audioData = append(audioData, buf[:n]...)
+		}
+		if err != nil {
+			break
+		}
+	}
+
+	return audioData, nil
+}
+
+type DummyTTS struct{}
+
+func (t *DummyTTS) GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) {
+	return []byte{}, nil
+}