Files
gooseek/backend/internal/learning/course_autogen.go
home ab48a0632b
Some checks failed
Build and Deploy GooSeek / build-backend (push) Failing after 1m4s
Build and Deploy GooSeek / build-webui (push) Failing after 1m2s
Build and Deploy GooSeek / deploy (push) Has been skipped
feat: CI/CD pipeline + Learning/Medicine/Travel services
- Add Gitea Actions workflow for automated build & deploy
- Add K8s manifests: webui, travel-svc, medicine-svc, sandbox-svc
- Update kustomization for localhost:5000 registry
- Add ingress for gooseek.ru and api.gooseek.ru
- Learning cabinet with onboarding, courses, sandbox integration
- Medicine service with symptom analysis and doctor matching
- Travel service with itinerary planning
- Server setup scripts (NVIDIA/CUDA, K3s, Gitea runner)

Made-with: Cursor
2026-03-02 20:25:44 +03:00

557 lines
16 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package learning
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"log"
"regexp"
"strings"
"time"
"unicode"
"github.com/gooseek/backend/internal/db"
"github.com/gooseek/backend/internal/llm"
"github.com/gooseek/backend/internal/search"
)
type CourseAutoGenConfig struct {
LLM llm.Client
Repo *db.LearningRepository
SearchClient *search.SearXNGClient
}
type CourseAutoGenerator struct {
cfg CourseAutoGenConfig
}
func NewCourseAutoGenerator(cfg CourseAutoGenConfig) *CourseAutoGenerator {
return &CourseAutoGenerator{cfg: cfg}
}
func (g *CourseAutoGenerator) StartBackground(ctx context.Context) {
log.Println("[course-autogen] starting background course generation")
time.Sleep(30 * time.Second)
ticker := time.NewTicker(2 * time.Hour)
defer ticker.Stop()
g.runCycle(ctx)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
g.runCycle(ctx)
}
}
}
func (g *CourseAutoGenerator) runCycle(ctx context.Context) {
log.Println("[course-autogen] running generation cycle")
cycleCtx, cancel := context.WithTimeout(ctx, 30*time.Minute)
defer cancel()
if err := g.collectTrends(cycleCtx); err != nil {
log.Printf("[course-autogen] trend collection error: %v", err)
}
for i := 0; i < 3; i++ {
trend, err := g.cfg.Repo.PickTopTrend(cycleCtx)
if err != nil || trend == nil {
log.Printf("[course-autogen] no more trends to process")
break
}
if err := g.designAndPublishCourse(cycleCtx, trend); err != nil {
log.Printf("[course-autogen] course design error for '%s': %v", trend.Topic, err)
continue
}
time.Sleep(5 * time.Second)
}
}
func (g *CourseAutoGenerator) collectTrends(ctx context.Context) error {
var webContext string
if g.cfg.SearchClient != nil {
webContext = g.searchTrendData(ctx)
}
prompt := `Ты — аналитик трендов IT-индустрии и образования в России и мире.`
if webContext != "" {
prompt += "\n\nРЕАЛЬНЫЕ ДАННЫЕ ИЗ ИНТЕРНЕТА:\n" + webContext
}
prompt += `
На основе реальных данных выбери 5 уникальных тем для курсов:
КРИТЕРИИ:
1. Актуальны на рынке РФ (вакансии hh.ru, habr, стеки)
2. НЕ банальные ("Основы Python", "HTML для начинающих" — НЕТ)
3. Практическая ценность для карьеры и зарплаты
4. Уникальность — чего нет на Stepik/Coursera/Skillbox
5. Тренды 2025-2026: AI/ML ops, platform engineering, Rust, WebAssembly, edge computing и т.д.
Категории: programming, devops, data, ai_ml, security, product, design, management, fintech, gamedev, mobile, blockchain, iot, other
Ответь строго JSON:
{
"trends": [
{
"topic": "Конкретное название курса",
"category": "категория",
"why_unique": "Почему этот курс уникален и привлечёт пользователей",
"demand_signals": ["сигнал спроса 1", "сигнал спроса 2"],
"target_salary": "ожидаемая зарплата после курса",
"score": 0.85
}
]
}`
result, err := generateTextWithRetry(ctx, g.cfg.LLM, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
}, 2, 2*time.Second)
if err != nil {
return err
}
jsonStr := extractJSONBlock(result)
var parsed struct {
Trends []struct {
Topic string `json:"topic"`
Category string `json:"category"`
WhyUnique string `json:"why_unique"`
DemandSignals []string `json:"demand_signals"`
TargetSalary string `json:"target_salary"`
Score float64 `json:"score"`
} `json:"trends"`
}
if err := json.Unmarshal([]byte(jsonStr), &parsed); err != nil || len(parsed.Trends) == 0 {
// Try a strict repair prompt once (common provider failure mode: extra prose / malformed JSON)
repairPrompt := "Верни ответ СТРОГО как JSON без текста. " + prompt
repaired, rerr := generateTextWithRetry(ctx, g.cfg.LLM, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: repairPrompt}},
}, 1, 2*time.Second)
if rerr != nil {
return fmt.Errorf("failed to parse trends: %w", err)
}
jsonStr = extractJSONBlock(repaired)
if uerr := json.Unmarshal([]byte(jsonStr), &parsed); uerr != nil || len(parsed.Trends) == 0 {
if uerr != nil {
return fmt.Errorf("failed to parse trends: %w", uerr)
}
return fmt.Errorf("failed to parse trends: empty trends")
}
}
saved := 0
for _, t := range parsed.Trends {
fp := generateFingerprint(t.Topic)
exists, _ := g.cfg.Repo.FingerprintExists(ctx, fp)
if exists {
continue
}
signals, _ := json.Marshal(map[string]interface{}{
"why_unique": t.WhyUnique,
"demand_signals": t.DemandSignals,
"target_salary": t.TargetSalary,
})
trend := &db.LearningTrendCandidate{
Topic: t.Topic,
Category: t.Category,
Signals: signals,
Score: t.Score,
Fingerprint: fp,
}
if err := g.cfg.Repo.CreateTrend(ctx, trend); err == nil {
saved++
}
}
log.Printf("[course-autogen] saved %d new trend candidates", saved)
return nil
}
func (g *CourseAutoGenerator) searchTrendData(ctx context.Context) string {
queries := []string{
"IT тренды обучение 2025 2026 Россия",
"самые востребованные IT навыки вакансии hh.ru",
"новые технологии программирование курсы",
}
var results []string
for _, q := range queries {
searchCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
resp, err := g.cfg.SearchClient.Search(searchCtx, q, &search.SearchOptions{
Categories: []string{"general"},
PageNo: 1,
})
cancel()
if err != nil {
continue
}
for _, r := range resp.Results {
snippet := r.Title + ": " + r.Content
if len(snippet) > 300 {
snippet = snippet[:300]
}
results = append(results, snippet)
}
}
if len(results) == 0 {
return ""
}
combined := strings.Join(results, "\n---\n")
if len(combined) > 3000 {
combined = combined[:3000]
}
return combined
}
func (g *CourseAutoGenerator) designAndPublishCourse(ctx context.Context, trend *db.LearningTrendCandidate) error {
log.Printf("[course-autogen] designing course: %s", trend.Topic)
fp := generateFingerprint(trend.Topic)
exists, _ := g.cfg.Repo.FingerprintExists(ctx, fp)
if exists {
return nil
}
var marketResearch string
if g.cfg.SearchClient != nil {
marketResearch = g.researchCourseTopic(ctx, trend.Topic)
}
var lastErr error
for attempt := 0; attempt < 3; attempt++ {
prompt := fmt.Sprintf(`Ты — ведущий методолог обучения в IT. Спроектируй профессиональный курс.
Тема: %s
Категория: %s`, trend.Topic, trend.Category)
if marketResearch != "" {
prompt += "\n\nИССЛЕДОВАНИЕ РЫНКА:\n" + marketResearch
}
prompt += `
ТРЕБОВАНИЯ:
1. Минимум теории, максимум боевой практики (как на реальных проектах в РФ)
2. Каждый модуль — практическое задание из реального проекта
3. Уровень: от базового до продвинутого
4. Курс должен быть уникальным — не копия Stepik/Coursera
5. Лендинг должен ПРОДАВАТЬ — конкретные выгоды, зарплаты, результаты
6. Outline должен быть детальным — 8-12 модулей
Ответь строго JSON:
{
"title": "Привлекательное название курса",
"slug": "slug-without-spaces",
"short_description": "Краткое описание 2-3 предложения. Конкретика, не вода.",
"difficulty": "beginner|intermediate|advanced",
"duration_hours": 40,
"tags": ["тег1", "тег2"],
"outline": {
"modules": [
{
"index": 0,
"title": "Название модуля",
"description": "Описание + что делаем на практике",
"skills": ["навык"],
"estimated_hours": 4,
"practice_focus": "Конкретная практическая задача"
}
]
},
"landing": {
"hero_title": "Заголовок лендинга (продающий)",
"hero_subtitle": "Подзаголовок с конкретной выгодой",
"benefits": ["Конкретная выгода 1", "Выгода 2", "Выгода 3", "Выгода 4"],
"target_audience": "Для кого этот курс — конкретно",
"outcomes": ["Результат 1 с цифрами", "Результат 2"],
"salary_range": "Ожидаемая зарплата после курса",
"prerequisites": "Что нужно знать заранее",
"faq": [
{"question": "Вопрос?", "answer": "Ответ"}
]
}
}`
result, err := generateTextWithRetry(ctx, g.cfg.LLM, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
}, 2, 2*time.Second)
if err != nil {
lastErr = err
continue
}
jsonStr := extractJSONBlock(result)
var parsed struct {
Title string `json:"title"`
Slug string `json:"slug"`
ShortDescription string `json:"short_description"`
Difficulty string `json:"difficulty"`
DurationHours int `json:"duration_hours"`
Tags []string `json:"tags"`
Outline json.RawMessage `json:"outline"`
Landing json.RawMessage `json:"landing"`
}
if err := json.Unmarshal([]byte(jsonStr), &parsed); err != nil {
lastErr = fmt.Errorf("failed to parse course design: %w", err)
continue
}
outlineJSON := parsed.Outline
if outlineJSON == nil {
outlineJSON = json.RawMessage("{}")
}
landingJSON := parsed.Landing
if landingJSON == nil {
landingJSON = json.RawMessage("{}")
}
if err := validateCourseArtifacts(parsed.Title, parsed.ShortDescription, outlineJSON, landingJSON); err != nil {
lastErr = err
continue
}
slug := sanitizeSlug(parsed.Slug)
if slug == "" {
slug = sanitizeSlug(parsed.Title)
}
slug = g.ensureUniqueSlug(ctx, slug)
if parsed.DurationHours == 0 {
parsed.DurationHours = 40
}
parsed.Difficulty = normalizeDifficulty(parsed.Difficulty)
course := &db.LearningCourse{
Slug: slug,
Title: strings.TrimSpace(parsed.Title),
ShortDescription: strings.TrimSpace(parsed.ShortDescription),
Category: trend.Category,
Tags: parsed.Tags,
Difficulty: parsed.Difficulty,
DurationHours: parsed.DurationHours,
BaseOutline: outlineJSON,
Landing: landingJSON,
Fingerprint: fp,
Status: "published",
}
if err := g.cfg.Repo.CreateCourse(ctx, course); err != nil {
lastErr = fmt.Errorf("failed to save course: %w", err)
continue
}
log.Printf("[course-autogen] published course: %s (%s)", course.Title, course.Slug)
return nil
}
if lastErr == nil {
lastErr = errors.New("unknown course design failure")
}
_ = g.cfg.Repo.MarkTrendFailed(ctx, trend.ID, truncateErr(lastErr.Error(), 800))
return lastErr
}
func (g *CourseAutoGenerator) researchCourseTopic(ctx context.Context, topic string) string {
queries := []string{
topic + " курс программа обучение",
topic + " вакансии зарплата Россия",
}
var results []string
for _, q := range queries {
searchCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
resp, err := g.cfg.SearchClient.Search(searchCtx, q, &search.SearchOptions{
Categories: []string{"general"},
PageNo: 1,
})
cancel()
if err != nil {
continue
}
for _, r := range resp.Results {
snippet := r.Title + ": " + r.Content
if len(snippet) > 250 {
snippet = snippet[:250]
}
results = append(results, snippet)
}
}
if len(results) == 0 {
return ""
}
combined := strings.Join(results, "\n---\n")
if len(combined) > 2000 {
combined = combined[:2000]
}
return combined
}
func generateFingerprint(topic string) string {
normalized := strings.ToLower(strings.TrimSpace(topic))
hash := sha256.Sum256([]byte(normalized))
return hex.EncodeToString(hash[:16])
}
func sanitizeSlug(s string) string {
s = strings.ToLower(strings.TrimSpace(s))
var result []rune
for _, r := range s {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
result = append(result, r)
} else if r == ' ' || r == '-' || r == '_' {
result = append(result, '-')
}
}
slug := string(result)
re := regexp.MustCompile(`-+`)
slug = re.ReplaceAllString(slug, "-")
slug = strings.Trim(slug, "-")
if len(slug) > 100 {
slug = slug[:100]
}
return slug
}
func (g *CourseAutoGenerator) ensureUniqueSlug(ctx context.Context, base string) string {
if base == "" {
base = "course"
}
slug := base
for i := 0; i < 20; i++ {
exists, err := g.cfg.Repo.SlugExists(ctx, slug)
if err == nil && !exists {
return slug
}
slug = fmt.Sprintf("%s-%d", base, i+2)
}
return fmt.Sprintf("%s-%d", base, time.Now().Unix()%10000)
}
func normalizeDifficulty(d string) string {
switch strings.ToLower(strings.TrimSpace(d)) {
case "beginner", "intermediate", "advanced":
return strings.ToLower(strings.TrimSpace(d))
default:
return "intermediate"
}
}
func validateCourseArtifacts(title, short string, outlineJSON, landingJSON json.RawMessage) error {
if strings.TrimSpace(title) == "" {
return errors.New("course title is empty")
}
if len(strings.TrimSpace(short)) < 40 {
return errors.New("short_description слишком короткое (нужна конкретика)")
}
// Outline validation
var outline struct {
Modules []struct {
Index int `json:"index"`
Title string `json:"title"`
Description string `json:"description"`
Skills []string `json:"skills"`
EstimatedHours int `json:"estimated_hours"`
PracticeFocus string `json:"practice_focus"`
} `json:"modules"`
}
if err := json.Unmarshal(outlineJSON, &outline); err != nil {
return fmt.Errorf("outline JSON invalid: %w", err)
}
if len(outline.Modules) < 8 || len(outline.Modules) > 12 {
return fmt.Errorf("outline modules count must be 8-12, got %d", len(outline.Modules))
}
for i, m := range outline.Modules {
if strings.TrimSpace(m.Title) == "" || strings.TrimSpace(m.PracticeFocus) == "" {
return fmt.Errorf("outline module[%d] missing title/practice_focus", i)
}
}
// Landing validation
var landing struct {
HeroTitle string `json:"hero_title"`
HeroSubtitle string `json:"hero_subtitle"`
Benefits []string `json:"benefits"`
Outcomes []string `json:"outcomes"`
SalaryRange string `json:"salary_range"`
FAQ []struct {
Question string `json:"question"`
Answer string `json:"answer"`
} `json:"faq"`
}
if err := json.Unmarshal(landingJSON, &landing); err != nil {
return fmt.Errorf("landing JSON invalid: %w", err)
}
if strings.TrimSpace(landing.HeroTitle) == "" || strings.TrimSpace(landing.HeroSubtitle) == "" {
return errors.New("landing missing hero_title/hero_subtitle")
}
if len(landing.Benefits) < 3 || len(landing.Outcomes) < 2 {
return errors.New("landing benefits/outcomes недостаточно конкретные")
}
if strings.TrimSpace(landing.SalaryRange) == "" {
return errors.New("landing missing salary_range")
}
if len(landing.FAQ) < 1 || strings.TrimSpace(landing.FAQ[0].Question) == "" {
return errors.New("landing FAQ missing")
}
return nil
}
func generateTextWithRetry(ctx context.Context, client llm.Client, req llm.StreamRequest, retries int, baseDelay time.Duration) (string, error) {
var lastErr error
for attempt := 0; attempt <= retries; attempt++ {
if attempt > 0 {
delay := baseDelay * time.Duration(1<<uint(attempt-1))
t := time.NewTimer(delay)
select {
case <-ctx.Done():
t.Stop()
return "", ctx.Err()
case <-t.C:
}
}
res, err := client.GenerateText(ctx, req)
if err == nil && strings.TrimSpace(res) != "" {
return res, nil
}
lastErr = err
}
if lastErr == nil {
lastErr = errors.New("empty response")
}
return "", lastErr
}
func truncateErr(s string, max int) string {
s = strings.TrimSpace(s)
if len(s) <= max {
return s
}
return s[:max] + "..."
}