Files
gooseek/backend/internal/pages/generator.go
home 06fe57c765 feat: Go backend, enhanced search, new widgets, Docker deploy
Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
2026-02-27 04:15:32 +03:00

319 lines
7.9 KiB
Go

package pages
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"github.com/gooseek/backend/internal/llm"
"github.com/gooseek/backend/internal/types"
"github.com/google/uuid"
)
type Page struct {
ID string `json:"id"`
UserID string `json:"userId"`
ThreadID string `json:"threadId,omitempty"`
Title string `json:"title"`
Subtitle string `json:"subtitle,omitempty"`
Sections []PageSection `json:"sections"`
Sources []PageSource `json:"sources"`
Thumbnail string `json:"thumbnail,omitempty"`
IsPublic bool `json:"isPublic"`
ShareID string `json:"shareId,omitempty"`
ViewCount int `json:"viewCount"`
CreatedAt time.Time `json:"createdAt"`
UpdatedAt time.Time `json:"updatedAt"`
}
type PageSection struct {
ID string `json:"id"`
Type string `json:"type"`
Title string `json:"title,omitempty"`
Content string `json:"content"`
ImageURL string `json:"imageUrl,omitempty"`
Order int `json:"order"`
}
type PageSource struct {
Index int `json:"index"`
URL string `json:"url"`
Title string `json:"title"`
Domain string `json:"domain"`
Favicon string `json:"favicon,omitempty"`
}
type PageGeneratorConfig struct {
LLMClient llm.Client
Locale string
Style string
Audience string
}
type PageGenerator struct {
cfg PageGeneratorConfig
}
func NewPageGenerator(cfg PageGeneratorConfig) *PageGenerator {
return &PageGenerator{cfg: cfg}
}
func (g *PageGenerator) GenerateFromThread(ctx context.Context, query string, answer string, sources []types.Chunk) (*Page, error) {
structurePrompt := g.buildStructurePrompt(query, answer, sources)
structure, err := g.cfg.LLMClient.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{
{Role: "user", Content: structurePrompt},
},
})
if err != nil {
return nil, fmt.Errorf("failed to generate structure: %w", err)
}
page := g.parseStructure(structure)
page.ID = uuid.New().String()
page.CreatedAt = time.Now()
page.UpdatedAt = time.Now()
for i, src := range sources {
if i >= 20 {
break
}
url := src.Metadata["url"]
title := src.Metadata["title"]
page.Sources = append(page.Sources, PageSource{
Index: i + 1,
URL: url,
Title: title,
Domain: extractDomain(url),
})
}
return page, nil
}
func (g *PageGenerator) buildStructurePrompt(query, answer string, sources []types.Chunk) string {
var sourcesText strings.Builder
for i, s := range sources {
if i >= 15 {
break
}
sourcesText.WriteString(fmt.Sprintf("[%d] %s\n%s\n\n", i+1, s.Metadata["title"], truncate(s.Content, 300)))
}
langInstr := ""
if g.cfg.Locale == "ru" {
langInstr = "Write in Russian."
}
style := g.cfg.Style
if style == "" {
style = "informative"
}
audience := g.cfg.Audience
if audience == "" {
audience = "general"
}
return fmt.Sprintf(`Create a well-structured article from this research.
Topic: %s
Research findings:
%s
Sources:
%s
%s
Style: %s
Target audience: %s
Generate the article in this exact format:
TITLE: [compelling title]
SUBTITLE: [brief subtitle]
SECTION: Introduction
[2-3 paragraphs introducing the topic]
SECTION: [Topic Name 1]
[detailed content with citations [1], [2], etc.]
SECTION: [Topic Name 2]
[detailed content with citations]
SECTION: [Topic Name 3]
[detailed content with citations]
SECTION: Conclusion
[summary and key takeaways]
SECTION: Key Points
- [bullet point 1]
- [bullet point 2]
- [bullet point 3]
Requirements:
- Use citations [1], [2], etc. throughout
- Make it comprehensive but readable
- Include specific facts and data
- Keep sections focused and well-organized`, query, truncate(answer, 2000), sourcesText.String(), langInstr, style, audience)
}
func (g *PageGenerator) parseStructure(text string) *Page {
page := &Page{
Sections: make([]PageSection, 0),
}
lines := strings.Split(text, "\n")
var currentSection *PageSection
var contentBuilder strings.Builder
order := 0
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "TITLE:") {
page.Title = strings.TrimSpace(strings.TrimPrefix(line, "TITLE:"))
continue
}
if strings.HasPrefix(line, "SUBTITLE:") {
page.Subtitle = strings.TrimSpace(strings.TrimPrefix(line, "SUBTITLE:"))
continue
}
if strings.HasPrefix(line, "SECTION:") {
if currentSection != nil {
currentSection.Content = strings.TrimSpace(contentBuilder.String())
page.Sections = append(page.Sections, *currentSection)
contentBuilder.Reset()
}
order++
currentSection = &PageSection{
ID: uuid.New().String(),
Type: "text",
Title: strings.TrimSpace(strings.TrimPrefix(line, "SECTION:")),
Order: order,
}
continue
}
if currentSection != nil {
contentBuilder.WriteString(line)
contentBuilder.WriteString("\n")
}
}
if currentSection != nil {
currentSection.Content = strings.TrimSpace(contentBuilder.String())
page.Sections = append(page.Sections, *currentSection)
}
return page
}
func (g *PageGenerator) ExportToMarkdown(page *Page) string {
var md strings.Builder
md.WriteString("# " + page.Title + "\n\n")
if page.Subtitle != "" {
md.WriteString("*" + page.Subtitle + "*\n\n")
}
for _, section := range page.Sections {
md.WriteString("## " + section.Title + "\n\n")
md.WriteString(section.Content + "\n\n")
}
md.WriteString("---\n\n## Sources\n\n")
for _, src := range page.Sources {
md.WriteString(fmt.Sprintf("%d. [%s](%s)\n", src.Index, src.Title, src.URL))
}
return md.String()
}
func (g *PageGenerator) ExportToHTML(page *Page) string {
var html strings.Builder
html.WriteString("<!DOCTYPE html>\n<html>\n<head>\n")
html.WriteString(fmt.Sprintf("<title>%s</title>\n", page.Title))
html.WriteString("<style>\n")
html.WriteString(`body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; line-height: 1.6; }
h1 { color: #1a1a1a; border-bottom: 2px solid #007bff; padding-bottom: 10px; }
h2 { color: #333; margin-top: 30px; }
.subtitle { color: #666; font-style: italic; margin-bottom: 30px; }
.sources { background: #f5f5f5; padding: 20px; border-radius: 8px; margin-top: 40px; }
.sources a { color: #007bff; text-decoration: none; }
.sources a:hover { text-decoration: underline; }
`)
html.WriteString("</style>\n</head>\n<body>\n")
html.WriteString(fmt.Sprintf("<h1>%s</h1>\n", page.Title))
if page.Subtitle != "" {
html.WriteString(fmt.Sprintf("<p class=\"subtitle\">%s</p>\n", page.Subtitle))
}
for _, section := range page.Sections {
html.WriteString(fmt.Sprintf("<h2>%s</h2>\n", section.Title))
paragraphs := strings.Split(section.Content, "\n\n")
for _, p := range paragraphs {
p = strings.TrimSpace(p)
if p != "" {
if strings.HasPrefix(p, "- ") {
html.WriteString("<ul>\n")
for _, item := range strings.Split(p, "\n") {
item = strings.TrimPrefix(item, "- ")
html.WriteString(fmt.Sprintf("<li>%s</li>\n", item))
}
html.WriteString("</ul>\n")
} else {
html.WriteString(fmt.Sprintf("<p>%s</p>\n", p))
}
}
}
}
html.WriteString("<div class=\"sources\">\n<h3>Sources</h3>\n<ol>\n")
for _, src := range page.Sources {
html.WriteString(fmt.Sprintf("<li><a href=\"%s\" target=\"_blank\">%s</a> (%s)</li>\n", src.URL, src.Title, src.Domain))
}
html.WriteString("</ol>\n</div>\n")
html.WriteString("</body>\n</html>")
return html.String()
}
func (g *PageGenerator) ToJSON(page *Page) (string, error) {
data, err := json.MarshalIndent(page, "", " ")
if err != nil {
return "", err
}
return string(data), nil
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}
func extractDomain(url string) string {
url = strings.TrimPrefix(url, "https://")
url = strings.TrimPrefix(url, "http://")
url = strings.TrimPrefix(url, "www.")
if idx := strings.Index(url, "/"); idx > 0 {
return url[:idx]
}
return url
}