Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
508 lines
13 KiB
Go
508 lines
13 KiB
Go
package podcast
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"net/http"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/gooseek/backend/internal/llm"
|
||
"github.com/google/uuid"
|
||
)
|
||
|
||
type PodcastType string
|
||
|
||
const (
|
||
PodcastDaily PodcastType = "daily"
|
||
PodcastWeekly PodcastType = "weekly"
|
||
PodcastTopicDeep PodcastType = "topic_deep"
|
||
PodcastBreaking PodcastType = "breaking"
|
||
)
|
||
|
||
type VoiceStyle string
|
||
|
||
const (
|
||
VoiceNeutral VoiceStyle = "neutral"
|
||
VoiceEnthusiastic VoiceStyle = "enthusiastic"
|
||
VoiceProfessional VoiceStyle = "professional"
|
||
VoiceCasual VoiceStyle = "casual"
|
||
VoiceStorytelling VoiceStyle = "storytelling"
|
||
)
|
||
|
||
type Podcast struct {
|
||
ID string `json:"id"`
|
||
Title string `json:"title"`
|
||
Description string `json:"description"`
|
||
Type PodcastType `json:"type"`
|
||
Date time.Time `json:"date"`
|
||
Duration int `json:"durationSeconds"`
|
||
AudioURL string `json:"audioUrl,omitempty"`
|
||
Transcript string `json:"transcript"`
|
||
Segments []PodcastSegment `json:"segments"`
|
||
Topics []string `json:"topics"`
|
||
Sources []Source `json:"sources"`
|
||
Thumbnail string `json:"thumbnail,omitempty"`
|
||
Status PodcastStatus `json:"status"`
|
||
GeneratedAt time.Time `json:"generatedAt"`
|
||
PublishedAt *time.Time `json:"publishedAt,omitempty"`
|
||
Locale string `json:"locale"`
|
||
VoiceConfig VoiceConfig `json:"voiceConfig"`
|
||
}
|
||
|
||
type PodcastStatus string
|
||
|
||
const (
|
||
StatusDraft PodcastStatus = "draft"
|
||
StatusGenerating PodcastStatus = "generating"
|
||
StatusReady PodcastStatus = "ready"
|
||
StatusPublished PodcastStatus = "published"
|
||
StatusFailed PodcastStatus = "failed"
|
||
)
|
||
|
||
type PodcastSegment struct {
|
||
ID string `json:"id"`
|
||
Type string `json:"type"`
|
||
Title string `json:"title"`
|
||
Content string `json:"content"`
|
||
Duration int `json:"durationSeconds"`
|
||
StartTime int `json:"startTime"`
|
||
EndTime int `json:"endTime"`
|
||
Sources []Source `json:"sources,omitempty"`
|
||
Highlights []string `json:"highlights,omitempty"`
|
||
}
|
||
|
||
type Source struct {
|
||
Title string `json:"title"`
|
||
URL string `json:"url"`
|
||
Publisher string `json:"publisher"`
|
||
Date string `json:"date,omitempty"`
|
||
}
|
||
|
||
type VoiceConfig struct {
|
||
Provider string `json:"provider"`
|
||
VoiceID string `json:"voiceId"`
|
||
Style VoiceStyle `json:"style"`
|
||
Speed float64 `json:"speed"`
|
||
Pitch float64 `json:"pitch"`
|
||
Language string `json:"language"`
|
||
}
|
||
|
||
type PodcastGenerator struct {
|
||
llm llm.Client
|
||
ttsClient TTSClient
|
||
httpClient *http.Client
|
||
config GeneratorConfig
|
||
}
|
||
|
||
type GeneratorConfig struct {
|
||
DefaultDuration int
|
||
MaxDuration int
|
||
DefaultVoice VoiceConfig
|
||
OutputDir string
|
||
}
|
||
|
||
type TTSClient interface {
|
||
GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error)
|
||
}
|
||
|
||
func NewPodcastGenerator(llmClient llm.Client, ttsClient TTSClient, cfg GeneratorConfig) *PodcastGenerator {
|
||
if cfg.DefaultDuration == 0 {
|
||
cfg.DefaultDuration = 300
|
||
}
|
||
if cfg.MaxDuration == 0 {
|
||
cfg.MaxDuration = 1800
|
||
}
|
||
if cfg.DefaultVoice.Provider == "" {
|
||
cfg.DefaultVoice = VoiceConfig{
|
||
Provider: "elevenlabs",
|
||
VoiceID: "21m00Tcm4TlvDq8ikWAM",
|
||
Style: VoiceNeutral,
|
||
Speed: 1.0,
|
||
Pitch: 1.0,
|
||
Language: "ru",
|
||
}
|
||
}
|
||
|
||
return &PodcastGenerator{
|
||
llm: llmClient,
|
||
ttsClient: ttsClient,
|
||
httpClient: &http.Client{Timeout: 60 * time.Second},
|
||
config: cfg,
|
||
}
|
||
}
|
||
|
||
type GenerateOptions struct {
|
||
Type PodcastType
|
||
Topics []string
|
||
NewsItems []NewsItem
|
||
Date time.Time
|
||
Duration int
|
||
Locale string
|
||
VoiceConfig *VoiceConfig
|
||
IncludeIntro bool
|
||
IncludeOutro bool
|
||
PersonalizeFor string
|
||
}
|
||
|
||
type NewsItem struct {
|
||
Title string `json:"title"`
|
||
Summary string `json:"summary"`
|
||
URL string `json:"url"`
|
||
Source string `json:"source"`
|
||
PublishedAt string `json:"publishedAt"`
|
||
Topics []string `json:"topics"`
|
||
Importance int `json:"importance"`
|
||
}
|
||
|
||
func (g *PodcastGenerator) GenerateDailyPodcast(ctx context.Context, opts GenerateOptions) (*Podcast, error) {
|
||
if opts.Date.IsZero() {
|
||
opts.Date = time.Now()
|
||
}
|
||
if opts.Duration == 0 {
|
||
opts.Duration = g.config.DefaultDuration
|
||
}
|
||
if opts.Locale == "" {
|
||
opts.Locale = "ru"
|
||
}
|
||
|
||
script, err := g.generateScript(ctx, opts)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to generate script: %w", err)
|
||
}
|
||
|
||
podcast := &Podcast{
|
||
ID: uuid.New().String(),
|
||
Title: script.Title,
|
||
Description: script.Description,
|
||
Type: opts.Type,
|
||
Date: opts.Date,
|
||
Duration: opts.Duration,
|
||
Transcript: script.FullText,
|
||
Segments: script.Segments,
|
||
Topics: opts.Topics,
|
||
Sources: script.Sources,
|
||
Status: StatusDraft,
|
||
GeneratedAt: time.Now(),
|
||
Locale: opts.Locale,
|
||
VoiceConfig: g.config.DefaultVoice,
|
||
}
|
||
|
||
if opts.VoiceConfig != nil {
|
||
podcast.VoiceConfig = *opts.VoiceConfig
|
||
}
|
||
|
||
return podcast, nil
|
||
}
|
||
|
||
type PodcastScript struct {
|
||
Title string
|
||
Description string
|
||
FullText string
|
||
Segments []PodcastSegment
|
||
Sources []Source
|
||
}
|
||
|
||
func (g *PodcastGenerator) generateScript(ctx context.Context, opts GenerateOptions) (*PodcastScript, error) {
|
||
locale := opts.Locale
|
||
langInstruction := ""
|
||
if locale == "ru" {
|
||
langInstruction = "Generate the entire script in Russian language. Use natural Russian speech patterns."
|
||
}
|
||
|
||
newsJSON, _ := json.Marshal(opts.NewsItems)
|
||
|
||
prompt := fmt.Sprintf(`Create a podcast script for a daily news digest.
|
||
|
||
Date: %s
|
||
Duration target: %d seconds (approximately %d minutes)
|
||
Topics: %v
|
||
%s
|
||
|
||
News items to cover:
|
||
%s
|
||
|
||
Create an engaging podcast script with these requirements:
|
||
1. Start with a catchy introduction greeting the audience
|
||
2. Cover the most important news first
|
||
3. Transition smoothly between stories
|
||
4. Add brief analysis or context where appropriate
|
||
5. End with a summary and sign-off
|
||
|
||
The script should sound natural when read aloud - use conversational language, not formal news anchor style.
|
||
|
||
Respond in JSON format:
|
||
{
|
||
"title": "Podcast title for this episode",
|
||
"description": "Brief episode description",
|
||
"segments": [
|
||
{
|
||
"type": "intro|news|analysis|transition|outro",
|
||
"title": "Segment title",
|
||
"content": "Full text to be spoken",
|
||
"highlights": ["Key point 1", "Key point 2"],
|
||
"sources": [{"title": "Source title", "url": "url", "publisher": "publisher"}]
|
||
}
|
||
]
|
||
}`, opts.Date.Format("2006-01-02"), opts.Duration, opts.Duration/60, opts.Topics, langInstruction, string(newsJSON))
|
||
|
||
result, err := g.llm.GenerateText(ctx, llm.StreamRequest{
|
||
Messages: []llm.Message{{Role: "user", Content: prompt}},
|
||
})
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
jsonStr := extractJSON(result)
|
||
|
||
var parsed struct {
|
||
Title string `json:"title"`
|
||
Description string `json:"description"`
|
||
Segments []struct {
|
||
Type string `json:"type"`
|
||
Title string `json:"title"`
|
||
Content string `json:"content"`
|
||
Highlights []string `json:"highlights"`
|
||
Sources []struct {
|
||
Title string `json:"title"`
|
||
URL string `json:"url"`
|
||
Publisher string `json:"publisher"`
|
||
} `json:"sources"`
|
||
} `json:"segments"`
|
||
}
|
||
|
||
if err := json.Unmarshal([]byte(jsonStr), &parsed); err != nil {
|
||
return g.generateDefaultScript(opts)
|
||
}
|
||
|
||
script := &PodcastScript{
|
||
Title: parsed.Title,
|
||
Description: parsed.Description,
|
||
Segments: make([]PodcastSegment, 0),
|
||
Sources: make([]Source, 0),
|
||
}
|
||
|
||
var fullTextBuilder strings.Builder
|
||
currentTime := 0
|
||
avgWordsPerSecond := 2.5
|
||
|
||
for i, seg := range parsed.Segments {
|
||
wordCount := len(strings.Fields(seg.Content))
|
||
segDuration := int(float64(wordCount) / avgWordsPerSecond)
|
||
if segDuration < 10 {
|
||
segDuration = 10
|
||
}
|
||
|
||
segment := PodcastSegment{
|
||
ID: uuid.New().String(),
|
||
Type: seg.Type,
|
||
Title: seg.Title,
|
||
Content: seg.Content,
|
||
Duration: segDuration,
|
||
StartTime: currentTime,
|
||
EndTime: currentTime + segDuration,
|
||
Highlights: seg.Highlights,
|
||
}
|
||
|
||
for _, src := range seg.Sources {
|
||
source := Source{
|
||
Title: src.Title,
|
||
URL: src.URL,
|
||
Publisher: src.Publisher,
|
||
}
|
||
segment.Sources = append(segment.Sources, source)
|
||
script.Sources = append(script.Sources, source)
|
||
}
|
||
|
||
script.Segments = append(script.Segments, segment)
|
||
|
||
fullTextBuilder.WriteString(seg.Content)
|
||
if i < len(parsed.Segments)-1 {
|
||
fullTextBuilder.WriteString("\n\n")
|
||
}
|
||
|
||
currentTime += segDuration
|
||
}
|
||
|
||
script.FullText = fullTextBuilder.String()
|
||
|
||
return script, nil
|
||
}
|
||
|
||
func (g *PodcastGenerator) generateDefaultScript(opts GenerateOptions) (*PodcastScript, error) {
|
||
date := opts.Date.Format("2 January 2006")
|
||
|
||
intro := fmt.Sprintf("Добрый день! С вами GooSeek Daily — ваш ежедневный подкаст с главными новостями. Сегодня %s, и вот что происходит в мире.", date)
|
||
|
||
var newsContent strings.Builder
|
||
for i, news := range opts.NewsItems {
|
||
if i > 0 {
|
||
newsContent.WriteString("\n\n")
|
||
}
|
||
newsContent.WriteString(fmt.Sprintf("%s. %s", news.Title, news.Summary))
|
||
}
|
||
|
||
outro := "На этом всё на сегодня. Спасибо, что слушаете GooSeek Daily! Подписывайтесь на наш подкаст и до встречи завтра."
|
||
|
||
return &PodcastScript{
|
||
Title: fmt.Sprintf("GooSeek Daily — %s", date),
|
||
Description: "Ежедневный подкаст с главными новостями",
|
||
FullText: fmt.Sprintf("%s\n\n%s\n\n%s", intro, newsContent.String(), outro),
|
||
Segments: []PodcastSegment{
|
||
{ID: uuid.New().String(), Type: "intro", Title: "Вступление", Content: intro, Duration: 15},
|
||
{ID: uuid.New().String(), Type: "news", Title: "Новости", Content: newsContent.String(), Duration: opts.Duration - 30},
|
||
{ID: uuid.New().String(), Type: "outro", Title: "Завершение", Content: outro, Duration: 15},
|
||
},
|
||
}, nil
|
||
}
|
||
|
||
func (g *PodcastGenerator) GenerateAudio(ctx context.Context, podcast *Podcast) ([]byte, error) {
|
||
if g.ttsClient == nil {
|
||
return nil, fmt.Errorf("TTS client not configured")
|
||
}
|
||
|
||
podcast.Status = StatusGenerating
|
||
|
||
audioData, err := g.ttsClient.GenerateSpeech(ctx, podcast.Transcript, podcast.VoiceConfig)
|
||
if err != nil {
|
||
podcast.Status = StatusFailed
|
||
return nil, fmt.Errorf("failed to generate audio: %w", err)
|
||
}
|
||
|
||
podcast.Status = StatusReady
|
||
|
||
return audioData, nil
|
||
}
|
||
|
||
func (g *PodcastGenerator) GenerateWeeklySummary(ctx context.Context, weeklyNews []NewsItem, locale string) (*Podcast, error) {
|
||
return g.GenerateDailyPodcast(ctx, GenerateOptions{
|
||
Type: PodcastWeekly,
|
||
NewsItems: weeklyNews,
|
||
Duration: 900,
|
||
Locale: locale,
|
||
IncludeIntro: true,
|
||
IncludeOutro: true,
|
||
})
|
||
}
|
||
|
||
func (g *PodcastGenerator) GenerateTopicDeepDive(ctx context.Context, topic string, articles []NewsItem, locale string) (*Podcast, error) {
|
||
return g.GenerateDailyPodcast(ctx, GenerateOptions{
|
||
Type: PodcastTopicDeep,
|
||
Topics: []string{topic},
|
||
NewsItems: articles,
|
||
Duration: 600,
|
||
Locale: locale,
|
||
IncludeIntro: true,
|
||
IncludeOutro: true,
|
||
})
|
||
}
|
||
|
||
func extractJSON(text string) string {
|
||
start := strings.Index(text, "{")
|
||
if start == -1 {
|
||
return "{}"
|
||
}
|
||
|
||
depth := 0
|
||
for i := start; i < len(text); i++ {
|
||
if text[i] == '{' {
|
||
depth++
|
||
} else if text[i] == '}' {
|
||
depth--
|
||
if depth == 0 {
|
||
return text[start : i+1]
|
||
}
|
||
}
|
||
}
|
||
|
||
return "{}"
|
||
}
|
||
|
||
func (p *Podcast) ToJSON() ([]byte, error) {
|
||
return json.Marshal(p)
|
||
}
|
||
|
||
func ParsePodcast(data []byte) (*Podcast, error) {
|
||
var podcast Podcast
|
||
if err := json.Unmarshal(data, &podcast); err != nil {
|
||
return nil, err
|
||
}
|
||
return &podcast, nil
|
||
}
|
||
|
||
type ElevenLabsTTS struct {
|
||
apiKey string
|
||
httpClient *http.Client
|
||
baseURL string
|
||
}
|
||
|
||
func NewElevenLabsTTS(apiKey string) *ElevenLabsTTS {
|
||
return &ElevenLabsTTS{
|
||
apiKey: apiKey,
|
||
httpClient: &http.Client{Timeout: 120 * time.Second},
|
||
baseURL: "https://api.elevenlabs.io/v1",
|
||
}
|
||
}
|
||
|
||
func (t *ElevenLabsTTS) GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) {
|
||
voiceID := config.VoiceID
|
||
if voiceID == "" {
|
||
voiceID = "21m00Tcm4TlvDq8ikWAM"
|
||
}
|
||
|
||
url := fmt.Sprintf("%s/text-to-speech/%s", t.baseURL, voiceID)
|
||
|
||
body := map[string]interface{}{
|
||
"text": text,
|
||
"model_id": "eleven_multilingual_v2",
|
||
"voice_settings": map[string]interface{}{
|
||
"stability": 0.5,
|
||
"similarity_boost": 0.75,
|
||
"style": 0.5,
|
||
"use_speaker_boost": true,
|
||
},
|
||
}
|
||
|
||
bodyJSON, _ := json.Marshal(body)
|
||
|
||
req, err := http.NewRequestWithContext(ctx, "POST", url, strings.NewReader(string(bodyJSON)))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
req.Header.Set("Content-Type", "application/json")
|
||
req.Header.Set("xi-api-key", t.apiKey)
|
||
req.Header.Set("Accept", "audio/mpeg")
|
||
|
||
resp, err := t.httpClient.Do(req)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
if resp.StatusCode != http.StatusOK {
|
||
return nil, fmt.Errorf("ElevenLabs API error: %d", resp.StatusCode)
|
||
}
|
||
|
||
var audioData []byte
|
||
buf := make([]byte, 32*1024)
|
||
for {
|
||
n, err := resp.Body.Read(buf)
|
||
if n > 0 {
|
||
audioData = append(audioData, buf[:n]...)
|
||
}
|
||
if err != nil {
|
||
break
|
||
}
|
||
}
|
||
|
||
return audioData, nil
|
||
}
|
||
|
||
type DummyTTS struct{}
|
||
|
||
func (t *DummyTTS) GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) {
|
||
return []byte{}, nil
|
||
}
|