Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
319 lines
7.9 KiB
Go
319 lines
7.9 KiB
Go
package pages
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/gooseek/backend/internal/llm"
|
|
"github.com/gooseek/backend/internal/types"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
type Page struct {
|
|
ID string `json:"id"`
|
|
UserID string `json:"userId"`
|
|
ThreadID string `json:"threadId,omitempty"`
|
|
Title string `json:"title"`
|
|
Subtitle string `json:"subtitle,omitempty"`
|
|
Sections []PageSection `json:"sections"`
|
|
Sources []PageSource `json:"sources"`
|
|
Thumbnail string `json:"thumbnail,omitempty"`
|
|
IsPublic bool `json:"isPublic"`
|
|
ShareID string `json:"shareId,omitempty"`
|
|
ViewCount int `json:"viewCount"`
|
|
CreatedAt time.Time `json:"createdAt"`
|
|
UpdatedAt time.Time `json:"updatedAt"`
|
|
}
|
|
|
|
type PageSection struct {
|
|
ID string `json:"id"`
|
|
Type string `json:"type"`
|
|
Title string `json:"title,omitempty"`
|
|
Content string `json:"content"`
|
|
ImageURL string `json:"imageUrl,omitempty"`
|
|
Order int `json:"order"`
|
|
}
|
|
|
|
type PageSource struct {
|
|
Index int `json:"index"`
|
|
URL string `json:"url"`
|
|
Title string `json:"title"`
|
|
Domain string `json:"domain"`
|
|
Favicon string `json:"favicon,omitempty"`
|
|
}
|
|
|
|
type PageGeneratorConfig struct {
|
|
LLMClient llm.Client
|
|
Locale string
|
|
Style string
|
|
Audience string
|
|
}
|
|
|
|
type PageGenerator struct {
|
|
cfg PageGeneratorConfig
|
|
}
|
|
|
|
func NewPageGenerator(cfg PageGeneratorConfig) *PageGenerator {
|
|
return &PageGenerator{cfg: cfg}
|
|
}
|
|
|
|
func (g *PageGenerator) GenerateFromThread(ctx context.Context, query string, answer string, sources []types.Chunk) (*Page, error) {
|
|
structurePrompt := g.buildStructurePrompt(query, answer, sources)
|
|
|
|
structure, err := g.cfg.LLMClient.GenerateText(ctx, llm.StreamRequest{
|
|
Messages: []llm.Message{
|
|
{Role: "user", Content: structurePrompt},
|
|
},
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to generate structure: %w", err)
|
|
}
|
|
|
|
page := g.parseStructure(structure)
|
|
page.ID = uuid.New().String()
|
|
page.CreatedAt = time.Now()
|
|
page.UpdatedAt = time.Now()
|
|
|
|
for i, src := range sources {
|
|
if i >= 20 {
|
|
break
|
|
}
|
|
url := src.Metadata["url"]
|
|
title := src.Metadata["title"]
|
|
page.Sources = append(page.Sources, PageSource{
|
|
Index: i + 1,
|
|
URL: url,
|
|
Title: title,
|
|
Domain: extractDomain(url),
|
|
})
|
|
}
|
|
|
|
return page, nil
|
|
}
|
|
|
|
func (g *PageGenerator) buildStructurePrompt(query, answer string, sources []types.Chunk) string {
|
|
var sourcesText strings.Builder
|
|
for i, s := range sources {
|
|
if i >= 15 {
|
|
break
|
|
}
|
|
sourcesText.WriteString(fmt.Sprintf("[%d] %s\n%s\n\n", i+1, s.Metadata["title"], truncate(s.Content, 300)))
|
|
}
|
|
|
|
langInstr := ""
|
|
if g.cfg.Locale == "ru" {
|
|
langInstr = "Write in Russian."
|
|
}
|
|
|
|
style := g.cfg.Style
|
|
if style == "" {
|
|
style = "informative"
|
|
}
|
|
|
|
audience := g.cfg.Audience
|
|
if audience == "" {
|
|
audience = "general"
|
|
}
|
|
|
|
return fmt.Sprintf(`Create a well-structured article from this research.
|
|
|
|
Topic: %s
|
|
|
|
Research findings:
|
|
%s
|
|
|
|
Sources:
|
|
%s
|
|
|
|
%s
|
|
|
|
Style: %s
|
|
Target audience: %s
|
|
|
|
Generate the article in this exact format:
|
|
|
|
TITLE: [compelling title]
|
|
SUBTITLE: [brief subtitle]
|
|
|
|
SECTION: Introduction
|
|
[2-3 paragraphs introducing the topic]
|
|
|
|
SECTION: [Topic Name 1]
|
|
[detailed content with citations [1], [2], etc.]
|
|
|
|
SECTION: [Topic Name 2]
|
|
[detailed content with citations]
|
|
|
|
SECTION: [Topic Name 3]
|
|
[detailed content with citations]
|
|
|
|
SECTION: Conclusion
|
|
[summary and key takeaways]
|
|
|
|
SECTION: Key Points
|
|
- [bullet point 1]
|
|
- [bullet point 2]
|
|
- [bullet point 3]
|
|
|
|
Requirements:
|
|
- Use citations [1], [2], etc. throughout
|
|
- Make it comprehensive but readable
|
|
- Include specific facts and data
|
|
- Keep sections focused and well-organized`, query, truncate(answer, 2000), sourcesText.String(), langInstr, style, audience)
|
|
}
|
|
|
|
func (g *PageGenerator) parseStructure(text string) *Page {
|
|
page := &Page{
|
|
Sections: make([]PageSection, 0),
|
|
}
|
|
|
|
lines := strings.Split(text, "\n")
|
|
var currentSection *PageSection
|
|
var contentBuilder strings.Builder
|
|
order := 0
|
|
|
|
for _, line := range lines {
|
|
line = strings.TrimSpace(line)
|
|
|
|
if strings.HasPrefix(line, "TITLE:") {
|
|
page.Title = strings.TrimSpace(strings.TrimPrefix(line, "TITLE:"))
|
|
continue
|
|
}
|
|
|
|
if strings.HasPrefix(line, "SUBTITLE:") {
|
|
page.Subtitle = strings.TrimSpace(strings.TrimPrefix(line, "SUBTITLE:"))
|
|
continue
|
|
}
|
|
|
|
if strings.HasPrefix(line, "SECTION:") {
|
|
if currentSection != nil {
|
|
currentSection.Content = strings.TrimSpace(contentBuilder.String())
|
|
page.Sections = append(page.Sections, *currentSection)
|
|
contentBuilder.Reset()
|
|
}
|
|
|
|
order++
|
|
currentSection = &PageSection{
|
|
ID: uuid.New().String(),
|
|
Type: "text",
|
|
Title: strings.TrimSpace(strings.TrimPrefix(line, "SECTION:")),
|
|
Order: order,
|
|
}
|
|
continue
|
|
}
|
|
|
|
if currentSection != nil {
|
|
contentBuilder.WriteString(line)
|
|
contentBuilder.WriteString("\n")
|
|
}
|
|
}
|
|
|
|
if currentSection != nil {
|
|
currentSection.Content = strings.TrimSpace(contentBuilder.String())
|
|
page.Sections = append(page.Sections, *currentSection)
|
|
}
|
|
|
|
return page
|
|
}
|
|
|
|
func (g *PageGenerator) ExportToMarkdown(page *Page) string {
|
|
var md strings.Builder
|
|
|
|
md.WriteString("# " + page.Title + "\n\n")
|
|
if page.Subtitle != "" {
|
|
md.WriteString("*" + page.Subtitle + "*\n\n")
|
|
}
|
|
|
|
for _, section := range page.Sections {
|
|
md.WriteString("## " + section.Title + "\n\n")
|
|
md.WriteString(section.Content + "\n\n")
|
|
}
|
|
|
|
md.WriteString("---\n\n## Sources\n\n")
|
|
for _, src := range page.Sources {
|
|
md.WriteString(fmt.Sprintf("%d. [%s](%s)\n", src.Index, src.Title, src.URL))
|
|
}
|
|
|
|
return md.String()
|
|
}
|
|
|
|
func (g *PageGenerator) ExportToHTML(page *Page) string {
|
|
var html strings.Builder
|
|
|
|
html.WriteString("<!DOCTYPE html>\n<html>\n<head>\n")
|
|
html.WriteString(fmt.Sprintf("<title>%s</title>\n", page.Title))
|
|
html.WriteString("<style>\n")
|
|
html.WriteString(`body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; line-height: 1.6; }
|
|
h1 { color: #1a1a1a; border-bottom: 2px solid #007bff; padding-bottom: 10px; }
|
|
h2 { color: #333; margin-top: 30px; }
|
|
.subtitle { color: #666; font-style: italic; margin-bottom: 30px; }
|
|
.sources { background: #f5f5f5; padding: 20px; border-radius: 8px; margin-top: 40px; }
|
|
.sources a { color: #007bff; text-decoration: none; }
|
|
.sources a:hover { text-decoration: underline; }
|
|
`)
|
|
html.WriteString("</style>\n</head>\n<body>\n")
|
|
|
|
html.WriteString(fmt.Sprintf("<h1>%s</h1>\n", page.Title))
|
|
if page.Subtitle != "" {
|
|
html.WriteString(fmt.Sprintf("<p class=\"subtitle\">%s</p>\n", page.Subtitle))
|
|
}
|
|
|
|
for _, section := range page.Sections {
|
|
html.WriteString(fmt.Sprintf("<h2>%s</h2>\n", section.Title))
|
|
paragraphs := strings.Split(section.Content, "\n\n")
|
|
for _, p := range paragraphs {
|
|
p = strings.TrimSpace(p)
|
|
if p != "" {
|
|
if strings.HasPrefix(p, "- ") {
|
|
html.WriteString("<ul>\n")
|
|
for _, item := range strings.Split(p, "\n") {
|
|
item = strings.TrimPrefix(item, "- ")
|
|
html.WriteString(fmt.Sprintf("<li>%s</li>\n", item))
|
|
}
|
|
html.WriteString("</ul>\n")
|
|
} else {
|
|
html.WriteString(fmt.Sprintf("<p>%s</p>\n", p))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
html.WriteString("<div class=\"sources\">\n<h3>Sources</h3>\n<ol>\n")
|
|
for _, src := range page.Sources {
|
|
html.WriteString(fmt.Sprintf("<li><a href=\"%s\" target=\"_blank\">%s</a> (%s)</li>\n", src.URL, src.Title, src.Domain))
|
|
}
|
|
html.WriteString("</ol>\n</div>\n")
|
|
|
|
html.WriteString("</body>\n</html>")
|
|
|
|
return html.String()
|
|
}
|
|
|
|
func (g *PageGenerator) ToJSON(page *Page) (string, error) {
|
|
data, err := json.MarshalIndent(page, "", " ")
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return string(data), nil
|
|
}
|
|
|
|
func truncate(s string, maxLen int) string {
|
|
if len(s) <= maxLen {
|
|
return s
|
|
}
|
|
return s[:maxLen] + "..."
|
|
}
|
|
|
|
func extractDomain(url string) string {
|
|
url = strings.TrimPrefix(url, "https://")
|
|
url = strings.TrimPrefix(url, "http://")
|
|
url = strings.TrimPrefix(url, "www.")
|
|
if idx := strings.Index(url, "/"); idx > 0 {
|
|
return url[:idx]
|
|
}
|
|
return url
|
|
}
|