Files
gooseek/backend/internal/podcast/generator.go
home 06fe57c765 feat: Go backend, enhanced search, new widgets, Docker deploy
Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
2026-02-27 04:15:32 +03:00

508 lines
13 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package podcast
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"github.com/gooseek/backend/internal/llm"
"github.com/google/uuid"
)
type PodcastType string
const (
PodcastDaily PodcastType = "daily"
PodcastWeekly PodcastType = "weekly"
PodcastTopicDeep PodcastType = "topic_deep"
PodcastBreaking PodcastType = "breaking"
)
type VoiceStyle string
const (
VoiceNeutral VoiceStyle = "neutral"
VoiceEnthusiastic VoiceStyle = "enthusiastic"
VoiceProfessional VoiceStyle = "professional"
VoiceCasual VoiceStyle = "casual"
VoiceStorytelling VoiceStyle = "storytelling"
)
type Podcast struct {
ID string `json:"id"`
Title string `json:"title"`
Description string `json:"description"`
Type PodcastType `json:"type"`
Date time.Time `json:"date"`
Duration int `json:"durationSeconds"`
AudioURL string `json:"audioUrl,omitempty"`
Transcript string `json:"transcript"`
Segments []PodcastSegment `json:"segments"`
Topics []string `json:"topics"`
Sources []Source `json:"sources"`
Thumbnail string `json:"thumbnail,omitempty"`
Status PodcastStatus `json:"status"`
GeneratedAt time.Time `json:"generatedAt"`
PublishedAt *time.Time `json:"publishedAt,omitempty"`
Locale string `json:"locale"`
VoiceConfig VoiceConfig `json:"voiceConfig"`
}
type PodcastStatus string
const (
StatusDraft PodcastStatus = "draft"
StatusGenerating PodcastStatus = "generating"
StatusReady PodcastStatus = "ready"
StatusPublished PodcastStatus = "published"
StatusFailed PodcastStatus = "failed"
)
type PodcastSegment struct {
ID string `json:"id"`
Type string `json:"type"`
Title string `json:"title"`
Content string `json:"content"`
Duration int `json:"durationSeconds"`
StartTime int `json:"startTime"`
EndTime int `json:"endTime"`
Sources []Source `json:"sources,omitempty"`
Highlights []string `json:"highlights,omitempty"`
}
type Source struct {
Title string `json:"title"`
URL string `json:"url"`
Publisher string `json:"publisher"`
Date string `json:"date,omitempty"`
}
type VoiceConfig struct {
Provider string `json:"provider"`
VoiceID string `json:"voiceId"`
Style VoiceStyle `json:"style"`
Speed float64 `json:"speed"`
Pitch float64 `json:"pitch"`
Language string `json:"language"`
}
type PodcastGenerator struct {
llm llm.Client
ttsClient TTSClient
httpClient *http.Client
config GeneratorConfig
}
type GeneratorConfig struct {
DefaultDuration int
MaxDuration int
DefaultVoice VoiceConfig
OutputDir string
}
type TTSClient interface {
GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error)
}
func NewPodcastGenerator(llmClient llm.Client, ttsClient TTSClient, cfg GeneratorConfig) *PodcastGenerator {
if cfg.DefaultDuration == 0 {
cfg.DefaultDuration = 300
}
if cfg.MaxDuration == 0 {
cfg.MaxDuration = 1800
}
if cfg.DefaultVoice.Provider == "" {
cfg.DefaultVoice = VoiceConfig{
Provider: "elevenlabs",
VoiceID: "21m00Tcm4TlvDq8ikWAM",
Style: VoiceNeutral,
Speed: 1.0,
Pitch: 1.0,
Language: "ru",
}
}
return &PodcastGenerator{
llm: llmClient,
ttsClient: ttsClient,
httpClient: &http.Client{Timeout: 60 * time.Second},
config: cfg,
}
}
type GenerateOptions struct {
Type PodcastType
Topics []string
NewsItems []NewsItem
Date time.Time
Duration int
Locale string
VoiceConfig *VoiceConfig
IncludeIntro bool
IncludeOutro bool
PersonalizeFor string
}
type NewsItem struct {
Title string `json:"title"`
Summary string `json:"summary"`
URL string `json:"url"`
Source string `json:"source"`
PublishedAt string `json:"publishedAt"`
Topics []string `json:"topics"`
Importance int `json:"importance"`
}
func (g *PodcastGenerator) GenerateDailyPodcast(ctx context.Context, opts GenerateOptions) (*Podcast, error) {
if opts.Date.IsZero() {
opts.Date = time.Now()
}
if opts.Duration == 0 {
opts.Duration = g.config.DefaultDuration
}
if opts.Locale == "" {
opts.Locale = "ru"
}
script, err := g.generateScript(ctx, opts)
if err != nil {
return nil, fmt.Errorf("failed to generate script: %w", err)
}
podcast := &Podcast{
ID: uuid.New().String(),
Title: script.Title,
Description: script.Description,
Type: opts.Type,
Date: opts.Date,
Duration: opts.Duration,
Transcript: script.FullText,
Segments: script.Segments,
Topics: opts.Topics,
Sources: script.Sources,
Status: StatusDraft,
GeneratedAt: time.Now(),
Locale: opts.Locale,
VoiceConfig: g.config.DefaultVoice,
}
if opts.VoiceConfig != nil {
podcast.VoiceConfig = *opts.VoiceConfig
}
return podcast, nil
}
type PodcastScript struct {
Title string
Description string
FullText string
Segments []PodcastSegment
Sources []Source
}
func (g *PodcastGenerator) generateScript(ctx context.Context, opts GenerateOptions) (*PodcastScript, error) {
locale := opts.Locale
langInstruction := ""
if locale == "ru" {
langInstruction = "Generate the entire script in Russian language. Use natural Russian speech patterns."
}
newsJSON, _ := json.Marshal(opts.NewsItems)
prompt := fmt.Sprintf(`Create a podcast script for a daily news digest.
Date: %s
Duration target: %d seconds (approximately %d minutes)
Topics: %v
%s
News items to cover:
%s
Create an engaging podcast script with these requirements:
1. Start with a catchy introduction greeting the audience
2. Cover the most important news first
3. Transition smoothly between stories
4. Add brief analysis or context where appropriate
5. End with a summary and sign-off
The script should sound natural when read aloud - use conversational language, not formal news anchor style.
Respond in JSON format:
{
"title": "Podcast title for this episode",
"description": "Brief episode description",
"segments": [
{
"type": "intro|news|analysis|transition|outro",
"title": "Segment title",
"content": "Full text to be spoken",
"highlights": ["Key point 1", "Key point 2"],
"sources": [{"title": "Source title", "url": "url", "publisher": "publisher"}]
}
]
}`, opts.Date.Format("2006-01-02"), opts.Duration, opts.Duration/60, opts.Topics, langInstruction, string(newsJSON))
result, err := g.llm.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
})
if err != nil {
return nil, err
}
jsonStr := extractJSON(result)
var parsed struct {
Title string `json:"title"`
Description string `json:"description"`
Segments []struct {
Type string `json:"type"`
Title string `json:"title"`
Content string `json:"content"`
Highlights []string `json:"highlights"`
Sources []struct {
Title string `json:"title"`
URL string `json:"url"`
Publisher string `json:"publisher"`
} `json:"sources"`
} `json:"segments"`
}
if err := json.Unmarshal([]byte(jsonStr), &parsed); err != nil {
return g.generateDefaultScript(opts)
}
script := &PodcastScript{
Title: parsed.Title,
Description: parsed.Description,
Segments: make([]PodcastSegment, 0),
Sources: make([]Source, 0),
}
var fullTextBuilder strings.Builder
currentTime := 0
avgWordsPerSecond := 2.5
for i, seg := range parsed.Segments {
wordCount := len(strings.Fields(seg.Content))
segDuration := int(float64(wordCount) / avgWordsPerSecond)
if segDuration < 10 {
segDuration = 10
}
segment := PodcastSegment{
ID: uuid.New().String(),
Type: seg.Type,
Title: seg.Title,
Content: seg.Content,
Duration: segDuration,
StartTime: currentTime,
EndTime: currentTime + segDuration,
Highlights: seg.Highlights,
}
for _, src := range seg.Sources {
source := Source{
Title: src.Title,
URL: src.URL,
Publisher: src.Publisher,
}
segment.Sources = append(segment.Sources, source)
script.Sources = append(script.Sources, source)
}
script.Segments = append(script.Segments, segment)
fullTextBuilder.WriteString(seg.Content)
if i < len(parsed.Segments)-1 {
fullTextBuilder.WriteString("\n\n")
}
currentTime += segDuration
}
script.FullText = fullTextBuilder.String()
return script, nil
}
func (g *PodcastGenerator) generateDefaultScript(opts GenerateOptions) (*PodcastScript, error) {
date := opts.Date.Format("2 January 2006")
intro := fmt.Sprintf("Добрый день! С вами GooSeek Daily — ваш ежедневный подкаст с главными новостями. Сегодня %s, и вот что происходит в мире.", date)
var newsContent strings.Builder
for i, news := range opts.NewsItems {
if i > 0 {
newsContent.WriteString("\n\n")
}
newsContent.WriteString(fmt.Sprintf("%s. %s", news.Title, news.Summary))
}
outro := "На этом всё на сегодня. Спасибо, что слушаете GooSeek Daily! Подписывайтесь на наш подкаст и до встречи завтра."
return &PodcastScript{
Title: fmt.Sprintf("GooSeek Daily — %s", date),
Description: "Ежедневный подкаст с главными новостями",
FullText: fmt.Sprintf("%s\n\n%s\n\n%s", intro, newsContent.String(), outro),
Segments: []PodcastSegment{
{ID: uuid.New().String(), Type: "intro", Title: "Вступление", Content: intro, Duration: 15},
{ID: uuid.New().String(), Type: "news", Title: "Новости", Content: newsContent.String(), Duration: opts.Duration - 30},
{ID: uuid.New().String(), Type: "outro", Title: "Завершение", Content: outro, Duration: 15},
},
}, nil
}
func (g *PodcastGenerator) GenerateAudio(ctx context.Context, podcast *Podcast) ([]byte, error) {
if g.ttsClient == nil {
return nil, fmt.Errorf("TTS client not configured")
}
podcast.Status = StatusGenerating
audioData, err := g.ttsClient.GenerateSpeech(ctx, podcast.Transcript, podcast.VoiceConfig)
if err != nil {
podcast.Status = StatusFailed
return nil, fmt.Errorf("failed to generate audio: %w", err)
}
podcast.Status = StatusReady
return audioData, nil
}
func (g *PodcastGenerator) GenerateWeeklySummary(ctx context.Context, weeklyNews []NewsItem, locale string) (*Podcast, error) {
return g.GenerateDailyPodcast(ctx, GenerateOptions{
Type: PodcastWeekly,
NewsItems: weeklyNews,
Duration: 900,
Locale: locale,
IncludeIntro: true,
IncludeOutro: true,
})
}
func (g *PodcastGenerator) GenerateTopicDeepDive(ctx context.Context, topic string, articles []NewsItem, locale string) (*Podcast, error) {
return g.GenerateDailyPodcast(ctx, GenerateOptions{
Type: PodcastTopicDeep,
Topics: []string{topic},
NewsItems: articles,
Duration: 600,
Locale: locale,
IncludeIntro: true,
IncludeOutro: true,
})
}
func extractJSON(text string) string {
start := strings.Index(text, "{")
if start == -1 {
return "{}"
}
depth := 0
for i := start; i < len(text); i++ {
if text[i] == '{' {
depth++
} else if text[i] == '}' {
depth--
if depth == 0 {
return text[start : i+1]
}
}
}
return "{}"
}
func (p *Podcast) ToJSON() ([]byte, error) {
return json.Marshal(p)
}
func ParsePodcast(data []byte) (*Podcast, error) {
var podcast Podcast
if err := json.Unmarshal(data, &podcast); err != nil {
return nil, err
}
return &podcast, nil
}
type ElevenLabsTTS struct {
apiKey string
httpClient *http.Client
baseURL string
}
func NewElevenLabsTTS(apiKey string) *ElevenLabsTTS {
return &ElevenLabsTTS{
apiKey: apiKey,
httpClient: &http.Client{Timeout: 120 * time.Second},
baseURL: "https://api.elevenlabs.io/v1",
}
}
func (t *ElevenLabsTTS) GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) {
voiceID := config.VoiceID
if voiceID == "" {
voiceID = "21m00Tcm4TlvDq8ikWAM"
}
url := fmt.Sprintf("%s/text-to-speech/%s", t.baseURL, voiceID)
body := map[string]interface{}{
"text": text,
"model_id": "eleven_multilingual_v2",
"voice_settings": map[string]interface{}{
"stability": 0.5,
"similarity_boost": 0.75,
"style": 0.5,
"use_speaker_boost": true,
},
}
bodyJSON, _ := json.Marshal(body)
req, err := http.NewRequestWithContext(ctx, "POST", url, strings.NewReader(string(bodyJSON)))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("xi-api-key", t.apiKey)
req.Header.Set("Accept", "audio/mpeg")
resp, err := t.httpClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("ElevenLabs API error: %d", resp.StatusCode)
}
var audioData []byte
buf := make([]byte, 32*1024)
for {
n, err := resp.Body.Read(buf)
if n > 0 {
audioData = append(audioData, buf[:n]...)
}
if err != nil {
break
}
}
return audioData, nil
}
type DummyTTS struct{}
func (t *DummyTTS) GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) {
return []byte{}, nil
}