Files
gooseek/backend/internal/discover/personalization.go
home 06fe57c765 feat: Go backend, enhanced search, new widgets, Docker deploy
Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
2026-02-27 04:15:32 +03:00

692 lines
18 KiB
Go

package discover
import (
"context"
"encoding/json"
"fmt"
"math"
"sort"
"strings"
"sync"
"time"
)
type UserInterests struct {
UserID string `json:"userId"`
Topics map[string]float64 `json:"topics"`
Sources map[string]float64 `json:"sources"`
Keywords map[string]float64 `json:"keywords"`
ViewHistory []ViewEvent `json:"viewHistory"`
SavedArticles []string `json:"savedArticles"`
BlockedSources []string `json:"blockedSources"`
BlockedTopics []string `json:"blockedTopics"`
PreferredLang string `json:"preferredLang"`
Region string `json:"region"`
ReadingLevel string `json:"readingLevel"`
Notifications NotificationPrefs `json:"notifications"`
LastUpdated time.Time `json:"lastUpdated"`
CustomCategories []CustomCategory `json:"customCategories,omitempty"`
}
type ViewEvent struct {
ArticleID string `json:"articleId"`
URL string `json:"url"`
Topic string `json:"topic"`
Source string `json:"source"`
Keywords []string `json:"keywords"`
TimeSpent int `json:"timeSpentSeconds"`
Completed bool `json:"completed"`
Saved bool `json:"saved"`
Shared bool `json:"shared"`
Timestamp time.Time `json:"timestamp"`
Engagement float64 `json:"engagement"`
}
type NotificationPrefs struct {
Enabled bool `json:"enabled"`
DailyDigest bool `json:"dailyDigest"`
DigestTime string `json:"digestTime"`
BreakingNews bool `json:"breakingNews"`
TopicAlerts []string `json:"topicAlerts"`
Frequency string `json:"frequency"`
}
type CustomCategory struct {
ID string `json:"id"`
Name string `json:"name"`
Keywords []string `json:"keywords"`
Sources []string `json:"sources"`
Weight float64 `json:"weight"`
}
type PersonalizedFeed struct {
UserID string `json:"userId"`
Items []FeedItem `json:"items"`
Categories []FeedCategory `json:"categories"`
TrendingIn []string `json:"trendingIn"`
UpdatedAt time.Time `json:"updatedAt"`
NextUpdate time.Time `json:"nextUpdate"`
}
type FeedItem struct {
ID string `json:"id"`
URL string `json:"url"`
Title string `json:"title"`
Summary string `json:"summary"`
Thumbnail string `json:"thumbnail"`
Source string `json:"source"`
SourceLogo string `json:"sourceLogo"`
Topic string `json:"topic"`
Keywords []string `json:"keywords"`
PublishedAt time.Time `json:"publishedAt"`
RelevanceScore float64 `json:"relevanceScore"`
Reason string `json:"reason"`
SourcesCount int `json:"sourcesCount"`
ReadTime int `json:"readTimeMinutes"`
HasDigest bool `json:"hasDigest"`
IsBreaking bool `json:"isBreaking"`
IsTrending bool `json:"isTrending"`
IsSaved bool `json:"isSaved"`
IsRead bool `json:"isRead"`
}
type FeedCategory struct {
ID string `json:"id"`
Name string `json:"name"`
Icon string `json:"icon"`
Color string `json:"color"`
Items []FeedItem `json:"items"`
IsCustom bool `json:"isCustom"`
}
type PersonalizationEngine struct {
userStore UserInterestStore
contentRepo ContentRepository
mu sync.RWMutex
config PersonalizationConfig
}
type PersonalizationConfig struct {
MaxFeedItems int
DecayFactor float64
RecencyWeight float64
EngagementWeight float64
TopicMatchWeight float64
SourceTrustWeight float64
DiversityFactor float64
TrendingBoost float64
BreakingBoost float64
}
type UserInterestStore interface {
Get(ctx context.Context, userID string) (*UserInterests, error)
Save(ctx context.Context, interests *UserInterests) error
Delete(ctx context.Context, userID string) error
}
type ContentRepository interface {
GetLatestContent(ctx context.Context, topics []string, limit int) ([]FeedItem, error)
GetTrending(ctx context.Context, region string, limit int) ([]FeedItem, error)
GetByKeywords(ctx context.Context, keywords []string, limit int) ([]FeedItem, error)
}
func DefaultConfig() PersonalizationConfig {
return PersonalizationConfig{
MaxFeedItems: 50,
DecayFactor: 0.95,
RecencyWeight: 0.25,
EngagementWeight: 0.20,
TopicMatchWeight: 0.30,
SourceTrustWeight: 0.15,
DiversityFactor: 0.10,
TrendingBoost: 1.5,
BreakingBoost: 2.0,
}
}
func NewPersonalizationEngine(userStore UserInterestStore, contentRepo ContentRepository, cfg PersonalizationConfig) *PersonalizationEngine {
return &PersonalizationEngine{
userStore: userStore,
contentRepo: contentRepo,
config: cfg,
}
}
func (e *PersonalizationEngine) GenerateForYouFeed(ctx context.Context, userID string) (*PersonalizedFeed, error) {
interests, err := e.userStore.Get(ctx, userID)
if err != nil {
interests = &UserInterests{
UserID: userID,
Topics: make(map[string]float64),
Sources: make(map[string]float64),
Keywords: make(map[string]float64),
PreferredLang: "ru",
Region: "russia",
}
}
var allItems []FeedItem
var mu sync.Mutex
var wg sync.WaitGroup
topTopics := e.getTopInterests(interests.Topics, 5)
wg.Add(1)
go func() {
defer wg.Done()
items, _ := e.contentRepo.GetLatestContent(ctx, topTopics, 30)
mu.Lock()
allItems = append(allItems, items...)
mu.Unlock()
}()
wg.Add(1)
go func() {
defer wg.Done()
items, _ := e.contentRepo.GetTrending(ctx, interests.Region, 20)
for i := range items {
items[i].IsTrending = true
}
mu.Lock()
allItems = append(allItems, items...)
mu.Unlock()
}()
topKeywords := e.getTopKeywords(interests.Keywords, 10)
if len(topKeywords) > 0 {
wg.Add(1)
go func() {
defer wg.Done()
items, _ := e.contentRepo.GetByKeywords(ctx, topKeywords, 15)
mu.Lock()
allItems = append(allItems, items...)
mu.Unlock()
}()
}
wg.Wait()
allItems = e.deduplicateItems(allItems)
allItems = e.filterBlockedContent(allItems, interests)
for i := range allItems {
allItems[i].RelevanceScore = e.calculateRelevance(allItems[i], interests)
allItems[i].Reason = e.explainRecommendation(allItems[i], interests)
allItems[i].IsRead = e.isArticleRead(allItems[i].URL, interests)
allItems[i].IsSaved = e.isArticleSaved(allItems[i].URL, interests)
}
sort.Slice(allItems, func(i, j int) bool {
return allItems[i].RelevanceScore > allItems[j].RelevanceScore
})
allItems = e.applyDiversity(allItems)
if len(allItems) > e.config.MaxFeedItems {
allItems = allItems[:e.config.MaxFeedItems]
}
categories := e.groupByCategory(allItems, interests)
return &PersonalizedFeed{
UserID: userID,
Items: allItems,
Categories: categories,
TrendingIn: topTopics,
UpdatedAt: time.Now(),
NextUpdate: time.Now().Add(15 * time.Minute),
}, nil
}
func (e *PersonalizationEngine) RecordView(ctx context.Context, userID string, event ViewEvent) error {
interests, err := e.userStore.Get(ctx, userID)
if err != nil {
interests = &UserInterests{
UserID: userID,
Topics: make(map[string]float64),
Sources: make(map[string]float64),
Keywords: make(map[string]float64),
}
}
event.Engagement = e.calculateEngagement(event)
interests.ViewHistory = append([]ViewEvent{event}, interests.ViewHistory...)
if len(interests.ViewHistory) > 500 {
interests.ViewHistory = interests.ViewHistory[:500]
}
topicWeight := event.Engagement * 0.1
interests.Topics[event.Topic] += topicWeight
sourceWeight := event.Engagement * 0.05
interests.Sources[event.Source] += sourceWeight
keywordWeight := event.Engagement * 0.02
for _, kw := range event.Keywords {
interests.Keywords[kw] += keywordWeight
}
if event.Saved {
interests.SavedArticles = append(interests.SavedArticles, event.URL)
}
interests.LastUpdated = time.Now()
e.decayInterests(interests)
return e.userStore.Save(ctx, interests)
}
func (e *PersonalizationEngine) UpdateTopicPreference(ctx context.Context, userID, topic string, weight float64) error {
interests, err := e.userStore.Get(ctx, userID)
if err != nil {
interests = &UserInterests{
UserID: userID,
Topics: make(map[string]float64),
Sources: make(map[string]float64),
Keywords: make(map[string]float64),
}
}
interests.Topics[topic] = weight
interests.LastUpdated = time.Now()
return e.userStore.Save(ctx, interests)
}
func (e *PersonalizationEngine) BlockSource(ctx context.Context, userID, source string) error {
interests, err := e.userStore.Get(ctx, userID)
if err != nil {
return err
}
for _, blocked := range interests.BlockedSources {
if blocked == source {
return nil
}
}
interests.BlockedSources = append(interests.BlockedSources, source)
interests.LastUpdated = time.Now()
return e.userStore.Save(ctx, interests)
}
func (e *PersonalizationEngine) BlockTopic(ctx context.Context, userID, topic string) error {
interests, err := e.userStore.Get(ctx, userID)
if err != nil {
return err
}
for _, blocked := range interests.BlockedTopics {
if blocked == topic {
return nil
}
}
interests.BlockedTopics = append(interests.BlockedTopics, topic)
delete(interests.Topics, topic)
interests.LastUpdated = time.Now()
return e.userStore.Save(ctx, interests)
}
func (e *PersonalizationEngine) AddCustomCategory(ctx context.Context, userID string, category CustomCategory) error {
interests, err := e.userStore.Get(ctx, userID)
if err != nil {
return err
}
interests.CustomCategories = append(interests.CustomCategories, category)
interests.LastUpdated = time.Now()
return e.userStore.Save(ctx, interests)
}
func (e *PersonalizationEngine) GetUserTopics(ctx context.Context, userID string) (map[string]float64, error) {
interests, err := e.userStore.Get(ctx, userID)
if err != nil {
return nil, err
}
return interests.Topics, nil
}
func (e *PersonalizationEngine) calculateRelevance(item FeedItem, interests *UserInterests) float64 {
score := 0.0
if topicScore, ok := interests.Topics[item.Topic]; ok {
score += topicScore * e.config.TopicMatchWeight
}
if sourceScore, ok := interests.Sources[item.Source]; ok {
score += sourceScore * e.config.SourceTrustWeight
}
keywordScore := 0.0
for _, kw := range item.Keywords {
if kwScore, ok := interests.Keywords[strings.ToLower(kw)]; ok {
keywordScore += kwScore
}
}
score += keywordScore * 0.1
hoursSincePublish := time.Since(item.PublishedAt).Hours()
recencyScore := math.Max(0, 1.0-hoursSincePublish/168.0)
score += recencyScore * e.config.RecencyWeight
if item.IsTrending {
score *= e.config.TrendingBoost
}
if item.IsBreaking {
score *= e.config.BreakingBoost
}
return score
}
func (e *PersonalizationEngine) calculateEngagement(event ViewEvent) float64 {
engagement := 0.0
if event.TimeSpent > 0 {
readTimeScore := math.Min(1.0, float64(event.TimeSpent)/300.0)
engagement += readTimeScore * 0.4
}
if event.Completed {
engagement += 0.3
}
if event.Saved {
engagement += 0.2
}
if event.Shared {
engagement += 0.1
}
return engagement
}
func (e *PersonalizationEngine) explainRecommendation(item FeedItem, interests *UserInterests) string {
if item.IsBreaking {
return "Срочная новость"
}
if item.IsTrending {
return "Популярно сейчас"
}
if topicScore, ok := interests.Topics[item.Topic]; ok && topicScore > 0.5 {
return fmt.Sprintf("Из вашей категории: %s", item.Topic)
}
if sourceScore, ok := interests.Sources[item.Source]; ok && sourceScore > 0.3 {
return fmt.Sprintf("Из источника, который вы читаете: %s", item.Source)
}
for _, kw := range item.Keywords {
if kwScore, ok := interests.Keywords[strings.ToLower(kw)]; ok && kwScore > 0.2 {
return fmt.Sprintf("По вашему интересу: %s", kw)
}
}
return "Рекомендуем для вас"
}
func (e *PersonalizationEngine) getTopInterests(interests map[string]float64, limit int) []string {
type kv struct {
Key string
Value float64
}
var sorted []kv
for k, v := range interests {
sorted = append(sorted, kv{k, v})
}
sort.Slice(sorted, func(i, j int) bool {
return sorted[i].Value > sorted[j].Value
})
result := make([]string, 0, limit)
for i, item := range sorted {
if i >= limit {
break
}
result = append(result, item.Key)
}
return result
}
func (e *PersonalizationEngine) getTopKeywords(keywords map[string]float64, limit int) []string {
return e.getTopInterests(keywords, limit)
}
func (e *PersonalizationEngine) deduplicateItems(items []FeedItem) []FeedItem {
seen := make(map[string]bool)
result := make([]FeedItem, 0, len(items))
for _, item := range items {
if !seen[item.URL] {
seen[item.URL] = true
result = append(result, item)
}
}
return result
}
func (e *PersonalizationEngine) filterBlockedContent(items []FeedItem, interests *UserInterests) []FeedItem {
blockedSources := make(map[string]bool)
for _, s := range interests.BlockedSources {
blockedSources[strings.ToLower(s)] = true
}
blockedTopics := make(map[string]bool)
for _, t := range interests.BlockedTopics {
blockedTopics[strings.ToLower(t)] = true
}
result := make([]FeedItem, 0, len(items))
for _, item := range items {
if blockedSources[strings.ToLower(item.Source)] {
continue
}
if blockedTopics[strings.ToLower(item.Topic)] {
continue
}
result = append(result, item)
}
return result
}
func (e *PersonalizationEngine) applyDiversity(items []FeedItem) []FeedItem {
if len(items) <= 10 {
return items
}
topicCounts := make(map[string]int)
sourceCounts := make(map[string]int)
maxPerTopic := len(items) / 5
maxPerSource := len(items) / 4
if maxPerTopic < 3 {
maxPerTopic = 3
}
if maxPerSource < 3 {
maxPerSource = 3
}
result := make([]FeedItem, 0, len(items))
deferred := make([]FeedItem, 0)
for _, item := range items {
if topicCounts[item.Topic] >= maxPerTopic || sourceCounts[item.Source] >= maxPerSource {
deferred = append(deferred, item)
continue
}
topicCounts[item.Topic]++
sourceCounts[item.Source]++
result = append(result, item)
}
for _, item := range deferred {
if len(result) >= e.config.MaxFeedItems {
break
}
result = append(result, item)
}
return result
}
func (e *PersonalizationEngine) groupByCategory(items []FeedItem, interests *UserInterests) []FeedCategory {
categoryMap := make(map[string][]FeedItem)
for _, item := range items {
categoryMap[item.Topic] = append(categoryMap[item.Topic], item)
}
categories := make([]FeedCategory, 0, len(categoryMap))
categoryMeta := map[string]struct {
Icon string
Color string
}{
"tech": {"💻", "#3B82F6"},
"finance": {"💰", "#10B981"},
"sports": {"⚽", "#F59E0B"},
"politics": {"🏛️", "#6366F1"},
"science": {"🔬", "#8B5CF6"},
"health": {"🏥", "#EC4899"},
"entertainment": {"🎬", "#F97316"},
"world": {"🌍", "#14B8A6"},
"business": {"📊", "#6B7280"},
"culture": {"🎭", "#A855F7"},
}
for topic, topicItems := range categoryMap {
if len(topicItems) < 2 {
continue
}
meta, ok := categoryMeta[strings.ToLower(topic)]
if !ok {
meta = struct {
Icon string
Color string
}{"📰", "#6B7280"}
}
categories = append(categories, FeedCategory{
ID: topic,
Name: topic,
Icon: meta.Icon,
Color: meta.Color,
Items: topicItems,
})
}
for _, custom := range interests.CustomCategories {
customItems := make([]FeedItem, 0)
for _, item := range items {
for _, kw := range custom.Keywords {
if containsKeyword(item, kw) {
customItems = append(customItems, item)
break
}
}
}
if len(customItems) > 0 {
categories = append(categories, FeedCategory{
ID: custom.ID,
Name: custom.Name,
Icon: "⭐",
Color: "#FBBF24",
Items: customItems,
IsCustom: true,
})
}
}
sort.Slice(categories, func(i, j int) bool {
iScore := interests.Topics[categories[i].ID]
jScore := interests.Topics[categories[j].ID]
return iScore > jScore
})
return categories
}
func (e *PersonalizationEngine) decayInterests(interests *UserInterests) {
for k := range interests.Topics {
interests.Topics[k] *= e.config.DecayFactor
if interests.Topics[k] < 0.01 {
delete(interests.Topics, k)
}
}
for k := range interests.Sources {
interests.Sources[k] *= e.config.DecayFactor
if interests.Sources[k] < 0.01 {
delete(interests.Sources, k)
}
}
for k := range interests.Keywords {
interests.Keywords[k] *= e.config.DecayFactor
if interests.Keywords[k] < 0.01 {
delete(interests.Keywords, k)
}
}
}
func (e *PersonalizationEngine) isArticleRead(url string, interests *UserInterests) bool {
for _, event := range interests.ViewHistory {
if event.URL == url {
return true
}
}
return false
}
func (e *PersonalizationEngine) isArticleSaved(url string, interests *UserInterests) bool {
for _, saved := range interests.SavedArticles {
if saved == url {
return true
}
}
return false
}
func containsKeyword(item FeedItem, keyword string) bool {
kw := strings.ToLower(keyword)
if strings.Contains(strings.ToLower(item.Title), kw) {
return true
}
if strings.Contains(strings.ToLower(item.Summary), kw) {
return true
}
for _, itemKw := range item.Keywords {
if strings.ToLower(itemKw) == kw {
return true
}
}
return false
}
func (u *UserInterests) ToJSON() ([]byte, error) {
return json.Marshal(u)
}
func ParseUserInterests(data []byte) (*UserInterests, error) {
var interests UserInterests
if err := json.Unmarshal(data, &interests); err != nil {
return nil, err
}
return &interests, nil
}