package podcast import ( "context" "encoding/json" "fmt" "net/http" "strings" "time" "github.com/gooseek/backend/internal/llm" "github.com/google/uuid" ) type PodcastType string const ( PodcastDaily PodcastType = "daily" PodcastWeekly PodcastType = "weekly" PodcastTopicDeep PodcastType = "topic_deep" PodcastBreaking PodcastType = "breaking" ) type VoiceStyle string const ( VoiceNeutral VoiceStyle = "neutral" VoiceEnthusiastic VoiceStyle = "enthusiastic" VoiceProfessional VoiceStyle = "professional" VoiceCasual VoiceStyle = "casual" VoiceStorytelling VoiceStyle = "storytelling" ) type Podcast struct { ID string `json:"id"` Title string `json:"title"` Description string `json:"description"` Type PodcastType `json:"type"` Date time.Time `json:"date"` Duration int `json:"durationSeconds"` AudioURL string `json:"audioUrl,omitempty"` Transcript string `json:"transcript"` Segments []PodcastSegment `json:"segments"` Topics []string `json:"topics"` Sources []Source `json:"sources"` Thumbnail string `json:"thumbnail,omitempty"` Status PodcastStatus `json:"status"` GeneratedAt time.Time `json:"generatedAt"` PublishedAt *time.Time `json:"publishedAt,omitempty"` Locale string `json:"locale"` VoiceConfig VoiceConfig `json:"voiceConfig"` } type PodcastStatus string const ( StatusDraft PodcastStatus = "draft" StatusGenerating PodcastStatus = "generating" StatusReady PodcastStatus = "ready" StatusPublished PodcastStatus = "published" StatusFailed PodcastStatus = "failed" ) type PodcastSegment struct { ID string `json:"id"` Type string `json:"type"` Title string `json:"title"` Content string `json:"content"` Duration int `json:"durationSeconds"` StartTime int `json:"startTime"` EndTime int `json:"endTime"` Sources []Source `json:"sources,omitempty"` Highlights []string `json:"highlights,omitempty"` } type Source struct { Title string `json:"title"` URL string `json:"url"` Publisher string `json:"publisher"` Date string `json:"date,omitempty"` } type VoiceConfig struct { Provider string `json:"provider"` VoiceID string `json:"voiceId"` Style VoiceStyle `json:"style"` Speed float64 `json:"speed"` Pitch float64 `json:"pitch"` Language string `json:"language"` } type PodcastGenerator struct { llm llm.Client ttsClient TTSClient httpClient *http.Client config GeneratorConfig } type GeneratorConfig struct { DefaultDuration int MaxDuration int DefaultVoice VoiceConfig OutputDir string } type TTSClient interface { GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) } func NewPodcastGenerator(llmClient llm.Client, ttsClient TTSClient, cfg GeneratorConfig) *PodcastGenerator { if cfg.DefaultDuration == 0 { cfg.DefaultDuration = 300 } if cfg.MaxDuration == 0 { cfg.MaxDuration = 1800 } if cfg.DefaultVoice.Provider == "" { cfg.DefaultVoice = VoiceConfig{ Provider: "elevenlabs", VoiceID: "21m00Tcm4TlvDq8ikWAM", Style: VoiceNeutral, Speed: 1.0, Pitch: 1.0, Language: "ru", } } return &PodcastGenerator{ llm: llmClient, ttsClient: ttsClient, httpClient: &http.Client{Timeout: 60 * time.Second}, config: cfg, } } type GenerateOptions struct { Type PodcastType Topics []string NewsItems []NewsItem Date time.Time Duration int Locale string VoiceConfig *VoiceConfig IncludeIntro bool IncludeOutro bool PersonalizeFor string } type NewsItem struct { Title string `json:"title"` Summary string `json:"summary"` URL string `json:"url"` Source string `json:"source"` PublishedAt string `json:"publishedAt"` Topics []string `json:"topics"` Importance int `json:"importance"` } func (g *PodcastGenerator) GenerateDailyPodcast(ctx context.Context, opts GenerateOptions) (*Podcast, error) { if opts.Date.IsZero() { opts.Date = time.Now() } if opts.Duration == 0 { opts.Duration = g.config.DefaultDuration } if opts.Locale == "" { opts.Locale = "ru" } script, err := g.generateScript(ctx, opts) if err != nil { return nil, fmt.Errorf("failed to generate script: %w", err) } podcast := &Podcast{ ID: uuid.New().String(), Title: script.Title, Description: script.Description, Type: opts.Type, Date: opts.Date, Duration: opts.Duration, Transcript: script.FullText, Segments: script.Segments, Topics: opts.Topics, Sources: script.Sources, Status: StatusDraft, GeneratedAt: time.Now(), Locale: opts.Locale, VoiceConfig: g.config.DefaultVoice, } if opts.VoiceConfig != nil { podcast.VoiceConfig = *opts.VoiceConfig } return podcast, nil } type PodcastScript struct { Title string Description string FullText string Segments []PodcastSegment Sources []Source } func (g *PodcastGenerator) generateScript(ctx context.Context, opts GenerateOptions) (*PodcastScript, error) { locale := opts.Locale langInstruction := "" if locale == "ru" { langInstruction = "Generate the entire script in Russian language. Use natural Russian speech patterns." } newsJSON, _ := json.Marshal(opts.NewsItems) prompt := fmt.Sprintf(`Create a podcast script for a daily news digest. Date: %s Duration target: %d seconds (approximately %d minutes) Topics: %v %s News items to cover: %s Create an engaging podcast script with these requirements: 1. Start with a catchy introduction greeting the audience 2. Cover the most important news first 3. Transition smoothly between stories 4. Add brief analysis or context where appropriate 5. End with a summary and sign-off The script should sound natural when read aloud - use conversational language, not formal news anchor style. Respond in JSON format: { "title": "Podcast title for this episode", "description": "Brief episode description", "segments": [ { "type": "intro|news|analysis|transition|outro", "title": "Segment title", "content": "Full text to be spoken", "highlights": ["Key point 1", "Key point 2"], "sources": [{"title": "Source title", "url": "url", "publisher": "publisher"}] } ] }`, opts.Date.Format("2006-01-02"), opts.Duration, opts.Duration/60, opts.Topics, langInstruction, string(newsJSON)) result, err := g.llm.GenerateText(ctx, llm.StreamRequest{ Messages: []llm.Message{{Role: "user", Content: prompt}}, }) if err != nil { return nil, err } jsonStr := extractJSON(result) var parsed struct { Title string `json:"title"` Description string `json:"description"` Segments []struct { Type string `json:"type"` Title string `json:"title"` Content string `json:"content"` Highlights []string `json:"highlights"` Sources []struct { Title string `json:"title"` URL string `json:"url"` Publisher string `json:"publisher"` } `json:"sources"` } `json:"segments"` } if err := json.Unmarshal([]byte(jsonStr), &parsed); err != nil { return g.generateDefaultScript(opts) } script := &PodcastScript{ Title: parsed.Title, Description: parsed.Description, Segments: make([]PodcastSegment, 0), Sources: make([]Source, 0), } var fullTextBuilder strings.Builder currentTime := 0 avgWordsPerSecond := 2.5 for i, seg := range parsed.Segments { wordCount := len(strings.Fields(seg.Content)) segDuration := int(float64(wordCount) / avgWordsPerSecond) if segDuration < 10 { segDuration = 10 } segment := PodcastSegment{ ID: uuid.New().String(), Type: seg.Type, Title: seg.Title, Content: seg.Content, Duration: segDuration, StartTime: currentTime, EndTime: currentTime + segDuration, Highlights: seg.Highlights, } for _, src := range seg.Sources { source := Source{ Title: src.Title, URL: src.URL, Publisher: src.Publisher, } segment.Sources = append(segment.Sources, source) script.Sources = append(script.Sources, source) } script.Segments = append(script.Segments, segment) fullTextBuilder.WriteString(seg.Content) if i < len(parsed.Segments)-1 { fullTextBuilder.WriteString("\n\n") } currentTime += segDuration } script.FullText = fullTextBuilder.String() return script, nil } func (g *PodcastGenerator) generateDefaultScript(opts GenerateOptions) (*PodcastScript, error) { date := opts.Date.Format("2 January 2006") intro := fmt.Sprintf("Добрый день! С вами GooSeek Daily — ваш ежедневный подкаст с главными новостями. Сегодня %s, и вот что происходит в мире.", date) var newsContent strings.Builder for i, news := range opts.NewsItems { if i > 0 { newsContent.WriteString("\n\n") } newsContent.WriteString(fmt.Sprintf("%s. %s", news.Title, news.Summary)) } outro := "На этом всё на сегодня. Спасибо, что слушаете GooSeek Daily! Подписывайтесь на наш подкаст и до встречи завтра." return &PodcastScript{ Title: fmt.Sprintf("GooSeek Daily — %s", date), Description: "Ежедневный подкаст с главными новостями", FullText: fmt.Sprintf("%s\n\n%s\n\n%s", intro, newsContent.String(), outro), Segments: []PodcastSegment{ {ID: uuid.New().String(), Type: "intro", Title: "Вступление", Content: intro, Duration: 15}, {ID: uuid.New().String(), Type: "news", Title: "Новости", Content: newsContent.String(), Duration: opts.Duration - 30}, {ID: uuid.New().String(), Type: "outro", Title: "Завершение", Content: outro, Duration: 15}, }, }, nil } func (g *PodcastGenerator) GenerateAudio(ctx context.Context, podcast *Podcast) ([]byte, error) { if g.ttsClient == nil { return nil, fmt.Errorf("TTS client not configured") } podcast.Status = StatusGenerating audioData, err := g.ttsClient.GenerateSpeech(ctx, podcast.Transcript, podcast.VoiceConfig) if err != nil { podcast.Status = StatusFailed return nil, fmt.Errorf("failed to generate audio: %w", err) } podcast.Status = StatusReady return audioData, nil } func (g *PodcastGenerator) GenerateWeeklySummary(ctx context.Context, weeklyNews []NewsItem, locale string) (*Podcast, error) { return g.GenerateDailyPodcast(ctx, GenerateOptions{ Type: PodcastWeekly, NewsItems: weeklyNews, Duration: 900, Locale: locale, IncludeIntro: true, IncludeOutro: true, }) } func (g *PodcastGenerator) GenerateTopicDeepDive(ctx context.Context, topic string, articles []NewsItem, locale string) (*Podcast, error) { return g.GenerateDailyPodcast(ctx, GenerateOptions{ Type: PodcastTopicDeep, Topics: []string{topic}, NewsItems: articles, Duration: 600, Locale: locale, IncludeIntro: true, IncludeOutro: true, }) } func extractJSON(text string) string { start := strings.Index(text, "{") if start == -1 { return "{}" } depth := 0 for i := start; i < len(text); i++ { if text[i] == '{' { depth++ } else if text[i] == '}' { depth-- if depth == 0 { return text[start : i+1] } } } return "{}" } func (p *Podcast) ToJSON() ([]byte, error) { return json.Marshal(p) } func ParsePodcast(data []byte) (*Podcast, error) { var podcast Podcast if err := json.Unmarshal(data, &podcast); err != nil { return nil, err } return &podcast, nil } type ElevenLabsTTS struct { apiKey string httpClient *http.Client baseURL string } func NewElevenLabsTTS(apiKey string) *ElevenLabsTTS { return &ElevenLabsTTS{ apiKey: apiKey, httpClient: &http.Client{Timeout: 120 * time.Second}, baseURL: "https://api.elevenlabs.io/v1", } } func (t *ElevenLabsTTS) GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) { voiceID := config.VoiceID if voiceID == "" { voiceID = "21m00Tcm4TlvDq8ikWAM" } url := fmt.Sprintf("%s/text-to-speech/%s", t.baseURL, voiceID) body := map[string]interface{}{ "text": text, "model_id": "eleven_multilingual_v2", "voice_settings": map[string]interface{}{ "stability": 0.5, "similarity_boost": 0.75, "style": 0.5, "use_speaker_boost": true, }, } bodyJSON, _ := json.Marshal(body) req, err := http.NewRequestWithContext(ctx, "POST", url, strings.NewReader(string(bodyJSON))) if err != nil { return nil, err } req.Header.Set("Content-Type", "application/json") req.Header.Set("xi-api-key", t.apiKey) req.Header.Set("Accept", "audio/mpeg") resp, err := t.httpClient.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("ElevenLabs API error: %d", resp.StatusCode) } var audioData []byte buf := make([]byte, 32*1024) for { n, err := resp.Body.Read(buf) if n > 0 { audioData = append(audioData, buf[:n]...) } if err != nil { break } } return audioData, nil } type DummyTTS struct{} func (t *DummyTTS) GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) { return []byte{}, nil }