feat: Go backend, enhanced search, new widgets, Docker deploy

Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
This commit is contained in:
home
2026-02-27 04:15:32 +03:00
parent 328d968f3f
commit 06fe57c765
285 changed files with 53132 additions and 1871 deletions

View File

@@ -0,0 +1,507 @@
package podcast
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"github.com/gooseek/backend/internal/llm"
"github.com/google/uuid"
)
type PodcastType string
const (
PodcastDaily PodcastType = "daily"
PodcastWeekly PodcastType = "weekly"
PodcastTopicDeep PodcastType = "topic_deep"
PodcastBreaking PodcastType = "breaking"
)
type VoiceStyle string
const (
VoiceNeutral VoiceStyle = "neutral"
VoiceEnthusiastic VoiceStyle = "enthusiastic"
VoiceProfessional VoiceStyle = "professional"
VoiceCasual VoiceStyle = "casual"
VoiceStorytelling VoiceStyle = "storytelling"
)
type Podcast struct {
ID string `json:"id"`
Title string `json:"title"`
Description string `json:"description"`
Type PodcastType `json:"type"`
Date time.Time `json:"date"`
Duration int `json:"durationSeconds"`
AudioURL string `json:"audioUrl,omitempty"`
Transcript string `json:"transcript"`
Segments []PodcastSegment `json:"segments"`
Topics []string `json:"topics"`
Sources []Source `json:"sources"`
Thumbnail string `json:"thumbnail,omitempty"`
Status PodcastStatus `json:"status"`
GeneratedAt time.Time `json:"generatedAt"`
PublishedAt *time.Time `json:"publishedAt,omitempty"`
Locale string `json:"locale"`
VoiceConfig VoiceConfig `json:"voiceConfig"`
}
type PodcastStatus string
const (
StatusDraft PodcastStatus = "draft"
StatusGenerating PodcastStatus = "generating"
StatusReady PodcastStatus = "ready"
StatusPublished PodcastStatus = "published"
StatusFailed PodcastStatus = "failed"
)
type PodcastSegment struct {
ID string `json:"id"`
Type string `json:"type"`
Title string `json:"title"`
Content string `json:"content"`
Duration int `json:"durationSeconds"`
StartTime int `json:"startTime"`
EndTime int `json:"endTime"`
Sources []Source `json:"sources,omitempty"`
Highlights []string `json:"highlights,omitempty"`
}
type Source struct {
Title string `json:"title"`
URL string `json:"url"`
Publisher string `json:"publisher"`
Date string `json:"date,omitempty"`
}
type VoiceConfig struct {
Provider string `json:"provider"`
VoiceID string `json:"voiceId"`
Style VoiceStyle `json:"style"`
Speed float64 `json:"speed"`
Pitch float64 `json:"pitch"`
Language string `json:"language"`
}
type PodcastGenerator struct {
llm llm.Client
ttsClient TTSClient
httpClient *http.Client
config GeneratorConfig
}
type GeneratorConfig struct {
DefaultDuration int
MaxDuration int
DefaultVoice VoiceConfig
OutputDir string
}
type TTSClient interface {
GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error)
}
func NewPodcastGenerator(llmClient llm.Client, ttsClient TTSClient, cfg GeneratorConfig) *PodcastGenerator {
if cfg.DefaultDuration == 0 {
cfg.DefaultDuration = 300
}
if cfg.MaxDuration == 0 {
cfg.MaxDuration = 1800
}
if cfg.DefaultVoice.Provider == "" {
cfg.DefaultVoice = VoiceConfig{
Provider: "elevenlabs",
VoiceID: "21m00Tcm4TlvDq8ikWAM",
Style: VoiceNeutral,
Speed: 1.0,
Pitch: 1.0,
Language: "ru",
}
}
return &PodcastGenerator{
llm: llmClient,
ttsClient: ttsClient,
httpClient: &http.Client{Timeout: 60 * time.Second},
config: cfg,
}
}
type GenerateOptions struct {
Type PodcastType
Topics []string
NewsItems []NewsItem
Date time.Time
Duration int
Locale string
VoiceConfig *VoiceConfig
IncludeIntro bool
IncludeOutro bool
PersonalizeFor string
}
type NewsItem struct {
Title string `json:"title"`
Summary string `json:"summary"`
URL string `json:"url"`
Source string `json:"source"`
PublishedAt string `json:"publishedAt"`
Topics []string `json:"topics"`
Importance int `json:"importance"`
}
func (g *PodcastGenerator) GenerateDailyPodcast(ctx context.Context, opts GenerateOptions) (*Podcast, error) {
if opts.Date.IsZero() {
opts.Date = time.Now()
}
if opts.Duration == 0 {
opts.Duration = g.config.DefaultDuration
}
if opts.Locale == "" {
opts.Locale = "ru"
}
script, err := g.generateScript(ctx, opts)
if err != nil {
return nil, fmt.Errorf("failed to generate script: %w", err)
}
podcast := &Podcast{
ID: uuid.New().String(),
Title: script.Title,
Description: script.Description,
Type: opts.Type,
Date: opts.Date,
Duration: opts.Duration,
Transcript: script.FullText,
Segments: script.Segments,
Topics: opts.Topics,
Sources: script.Sources,
Status: StatusDraft,
GeneratedAt: time.Now(),
Locale: opts.Locale,
VoiceConfig: g.config.DefaultVoice,
}
if opts.VoiceConfig != nil {
podcast.VoiceConfig = *opts.VoiceConfig
}
return podcast, nil
}
type PodcastScript struct {
Title string
Description string
FullText string
Segments []PodcastSegment
Sources []Source
}
func (g *PodcastGenerator) generateScript(ctx context.Context, opts GenerateOptions) (*PodcastScript, error) {
locale := opts.Locale
langInstruction := ""
if locale == "ru" {
langInstruction = "Generate the entire script in Russian language. Use natural Russian speech patterns."
}
newsJSON, _ := json.Marshal(opts.NewsItems)
prompt := fmt.Sprintf(`Create a podcast script for a daily news digest.
Date: %s
Duration target: %d seconds (approximately %d minutes)
Topics: %v
%s
News items to cover:
%s
Create an engaging podcast script with these requirements:
1. Start with a catchy introduction greeting the audience
2. Cover the most important news first
3. Transition smoothly between stories
4. Add brief analysis or context where appropriate
5. End with a summary and sign-off
The script should sound natural when read aloud - use conversational language, not formal news anchor style.
Respond in JSON format:
{
"title": "Podcast title for this episode",
"description": "Brief episode description",
"segments": [
{
"type": "intro|news|analysis|transition|outro",
"title": "Segment title",
"content": "Full text to be spoken",
"highlights": ["Key point 1", "Key point 2"],
"sources": [{"title": "Source title", "url": "url", "publisher": "publisher"}]
}
]
}`, opts.Date.Format("2006-01-02"), opts.Duration, opts.Duration/60, opts.Topics, langInstruction, string(newsJSON))
result, err := g.llm.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
})
if err != nil {
return nil, err
}
jsonStr := extractJSON(result)
var parsed struct {
Title string `json:"title"`
Description string `json:"description"`
Segments []struct {
Type string `json:"type"`
Title string `json:"title"`
Content string `json:"content"`
Highlights []string `json:"highlights"`
Sources []struct {
Title string `json:"title"`
URL string `json:"url"`
Publisher string `json:"publisher"`
} `json:"sources"`
} `json:"segments"`
}
if err := json.Unmarshal([]byte(jsonStr), &parsed); err != nil {
return g.generateDefaultScript(opts)
}
script := &PodcastScript{
Title: parsed.Title,
Description: parsed.Description,
Segments: make([]PodcastSegment, 0),
Sources: make([]Source, 0),
}
var fullTextBuilder strings.Builder
currentTime := 0
avgWordsPerSecond := 2.5
for i, seg := range parsed.Segments {
wordCount := len(strings.Fields(seg.Content))
segDuration := int(float64(wordCount) / avgWordsPerSecond)
if segDuration < 10 {
segDuration = 10
}
segment := PodcastSegment{
ID: uuid.New().String(),
Type: seg.Type,
Title: seg.Title,
Content: seg.Content,
Duration: segDuration,
StartTime: currentTime,
EndTime: currentTime + segDuration,
Highlights: seg.Highlights,
}
for _, src := range seg.Sources {
source := Source{
Title: src.Title,
URL: src.URL,
Publisher: src.Publisher,
}
segment.Sources = append(segment.Sources, source)
script.Sources = append(script.Sources, source)
}
script.Segments = append(script.Segments, segment)
fullTextBuilder.WriteString(seg.Content)
if i < len(parsed.Segments)-1 {
fullTextBuilder.WriteString("\n\n")
}
currentTime += segDuration
}
script.FullText = fullTextBuilder.String()
return script, nil
}
func (g *PodcastGenerator) generateDefaultScript(opts GenerateOptions) (*PodcastScript, error) {
date := opts.Date.Format("2 January 2006")
intro := fmt.Sprintf("Добрый день! С вами GooSeek Daily — ваш ежедневный подкаст с главными новостями. Сегодня %s, и вот что происходит в мире.", date)
var newsContent strings.Builder
for i, news := range opts.NewsItems {
if i > 0 {
newsContent.WriteString("\n\n")
}
newsContent.WriteString(fmt.Sprintf("%s. %s", news.Title, news.Summary))
}
outro := "На этом всё на сегодня. Спасибо, что слушаете GooSeek Daily! Подписывайтесь на наш подкаст и до встречи завтра."
return &PodcastScript{
Title: fmt.Sprintf("GooSeek Daily — %s", date),
Description: "Ежедневный подкаст с главными новостями",
FullText: fmt.Sprintf("%s\n\n%s\n\n%s", intro, newsContent.String(), outro),
Segments: []PodcastSegment{
{ID: uuid.New().String(), Type: "intro", Title: "Вступление", Content: intro, Duration: 15},
{ID: uuid.New().String(), Type: "news", Title: "Новости", Content: newsContent.String(), Duration: opts.Duration - 30},
{ID: uuid.New().String(), Type: "outro", Title: "Завершение", Content: outro, Duration: 15},
},
}, nil
}
func (g *PodcastGenerator) GenerateAudio(ctx context.Context, podcast *Podcast) ([]byte, error) {
if g.ttsClient == nil {
return nil, fmt.Errorf("TTS client not configured")
}
podcast.Status = StatusGenerating
audioData, err := g.ttsClient.GenerateSpeech(ctx, podcast.Transcript, podcast.VoiceConfig)
if err != nil {
podcast.Status = StatusFailed
return nil, fmt.Errorf("failed to generate audio: %w", err)
}
podcast.Status = StatusReady
return audioData, nil
}
func (g *PodcastGenerator) GenerateWeeklySummary(ctx context.Context, weeklyNews []NewsItem, locale string) (*Podcast, error) {
return g.GenerateDailyPodcast(ctx, GenerateOptions{
Type: PodcastWeekly,
NewsItems: weeklyNews,
Duration: 900,
Locale: locale,
IncludeIntro: true,
IncludeOutro: true,
})
}
func (g *PodcastGenerator) GenerateTopicDeepDive(ctx context.Context, topic string, articles []NewsItem, locale string) (*Podcast, error) {
return g.GenerateDailyPodcast(ctx, GenerateOptions{
Type: PodcastTopicDeep,
Topics: []string{topic},
NewsItems: articles,
Duration: 600,
Locale: locale,
IncludeIntro: true,
IncludeOutro: true,
})
}
func extractJSON(text string) string {
start := strings.Index(text, "{")
if start == -1 {
return "{}"
}
depth := 0
for i := start; i < len(text); i++ {
if text[i] == '{' {
depth++
} else if text[i] == '}' {
depth--
if depth == 0 {
return text[start : i+1]
}
}
}
return "{}"
}
func (p *Podcast) ToJSON() ([]byte, error) {
return json.Marshal(p)
}
func ParsePodcast(data []byte) (*Podcast, error) {
var podcast Podcast
if err := json.Unmarshal(data, &podcast); err != nil {
return nil, err
}
return &podcast, nil
}
type ElevenLabsTTS struct {
apiKey string
httpClient *http.Client
baseURL string
}
func NewElevenLabsTTS(apiKey string) *ElevenLabsTTS {
return &ElevenLabsTTS{
apiKey: apiKey,
httpClient: &http.Client{Timeout: 120 * time.Second},
baseURL: "https://api.elevenlabs.io/v1",
}
}
func (t *ElevenLabsTTS) GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) {
voiceID := config.VoiceID
if voiceID == "" {
voiceID = "21m00Tcm4TlvDq8ikWAM"
}
url := fmt.Sprintf("%s/text-to-speech/%s", t.baseURL, voiceID)
body := map[string]interface{}{
"text": text,
"model_id": "eleven_multilingual_v2",
"voice_settings": map[string]interface{}{
"stability": 0.5,
"similarity_boost": 0.75,
"style": 0.5,
"use_speaker_boost": true,
},
}
bodyJSON, _ := json.Marshal(body)
req, err := http.NewRequestWithContext(ctx, "POST", url, strings.NewReader(string(bodyJSON)))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("xi-api-key", t.apiKey)
req.Header.Set("Accept", "audio/mpeg")
resp, err := t.httpClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("ElevenLabs API error: %d", resp.StatusCode)
}
var audioData []byte
buf := make([]byte, 32*1024)
for {
n, err := resp.Body.Read(buf)
if n > 0 {
audioData = append(audioData, buf[:n]...)
}
if err != nil {
break
}
}
return audioData, nil
}
type DummyTTS struct{}
func (t *DummyTTS) GenerateSpeech(ctx context.Context, text string, config VoiceConfig) ([]byte, error) {
return []byte{}, nil
}