feat: Go backend, enhanced search, new widgets, Docker deploy
Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
This commit is contained in:
318
backend/internal/pages/generator.go
Normal file
318
backend/internal/pages/generator.go
Normal file
@@ -0,0 +1,318 @@
|
||||
package pages
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gooseek/backend/internal/llm"
|
||||
"github.com/gooseek/backend/internal/types"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type Page struct {
|
||||
ID string `json:"id"`
|
||||
UserID string `json:"userId"`
|
||||
ThreadID string `json:"threadId,omitempty"`
|
||||
Title string `json:"title"`
|
||||
Subtitle string `json:"subtitle,omitempty"`
|
||||
Sections []PageSection `json:"sections"`
|
||||
Sources []PageSource `json:"sources"`
|
||||
Thumbnail string `json:"thumbnail,omitempty"`
|
||||
IsPublic bool `json:"isPublic"`
|
||||
ShareID string `json:"shareId,omitempty"`
|
||||
ViewCount int `json:"viewCount"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
}
|
||||
|
||||
type PageSection struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Title string `json:"title,omitempty"`
|
||||
Content string `json:"content"`
|
||||
ImageURL string `json:"imageUrl,omitempty"`
|
||||
Order int `json:"order"`
|
||||
}
|
||||
|
||||
type PageSource struct {
|
||||
Index int `json:"index"`
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Domain string `json:"domain"`
|
||||
Favicon string `json:"favicon,omitempty"`
|
||||
}
|
||||
|
||||
type PageGeneratorConfig struct {
|
||||
LLMClient llm.Client
|
||||
Locale string
|
||||
Style string
|
||||
Audience string
|
||||
}
|
||||
|
||||
type PageGenerator struct {
|
||||
cfg PageGeneratorConfig
|
||||
}
|
||||
|
||||
func NewPageGenerator(cfg PageGeneratorConfig) *PageGenerator {
|
||||
return &PageGenerator{cfg: cfg}
|
||||
}
|
||||
|
||||
func (g *PageGenerator) GenerateFromThread(ctx context.Context, query string, answer string, sources []types.Chunk) (*Page, error) {
|
||||
structurePrompt := g.buildStructurePrompt(query, answer, sources)
|
||||
|
||||
structure, err := g.cfg.LLMClient.GenerateText(ctx, llm.StreamRequest{
|
||||
Messages: []llm.Message{
|
||||
{Role: "user", Content: structurePrompt},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate structure: %w", err)
|
||||
}
|
||||
|
||||
page := g.parseStructure(structure)
|
||||
page.ID = uuid.New().String()
|
||||
page.CreatedAt = time.Now()
|
||||
page.UpdatedAt = time.Now()
|
||||
|
||||
for i, src := range sources {
|
||||
if i >= 20 {
|
||||
break
|
||||
}
|
||||
url := src.Metadata["url"]
|
||||
title := src.Metadata["title"]
|
||||
page.Sources = append(page.Sources, PageSource{
|
||||
Index: i + 1,
|
||||
URL: url,
|
||||
Title: title,
|
||||
Domain: extractDomain(url),
|
||||
})
|
||||
}
|
||||
|
||||
return page, nil
|
||||
}
|
||||
|
||||
func (g *PageGenerator) buildStructurePrompt(query, answer string, sources []types.Chunk) string {
|
||||
var sourcesText strings.Builder
|
||||
for i, s := range sources {
|
||||
if i >= 15 {
|
||||
break
|
||||
}
|
||||
sourcesText.WriteString(fmt.Sprintf("[%d] %s\n%s\n\n", i+1, s.Metadata["title"], truncate(s.Content, 300)))
|
||||
}
|
||||
|
||||
langInstr := ""
|
||||
if g.cfg.Locale == "ru" {
|
||||
langInstr = "Write in Russian."
|
||||
}
|
||||
|
||||
style := g.cfg.Style
|
||||
if style == "" {
|
||||
style = "informative"
|
||||
}
|
||||
|
||||
audience := g.cfg.Audience
|
||||
if audience == "" {
|
||||
audience = "general"
|
||||
}
|
||||
|
||||
return fmt.Sprintf(`Create a well-structured article from this research.
|
||||
|
||||
Topic: %s
|
||||
|
||||
Research findings:
|
||||
%s
|
||||
|
||||
Sources:
|
||||
%s
|
||||
|
||||
%s
|
||||
|
||||
Style: %s
|
||||
Target audience: %s
|
||||
|
||||
Generate the article in this exact format:
|
||||
|
||||
TITLE: [compelling title]
|
||||
SUBTITLE: [brief subtitle]
|
||||
|
||||
SECTION: Introduction
|
||||
[2-3 paragraphs introducing the topic]
|
||||
|
||||
SECTION: [Topic Name 1]
|
||||
[detailed content with citations [1], [2], etc.]
|
||||
|
||||
SECTION: [Topic Name 2]
|
||||
[detailed content with citations]
|
||||
|
||||
SECTION: [Topic Name 3]
|
||||
[detailed content with citations]
|
||||
|
||||
SECTION: Conclusion
|
||||
[summary and key takeaways]
|
||||
|
||||
SECTION: Key Points
|
||||
- [bullet point 1]
|
||||
- [bullet point 2]
|
||||
- [bullet point 3]
|
||||
|
||||
Requirements:
|
||||
- Use citations [1], [2], etc. throughout
|
||||
- Make it comprehensive but readable
|
||||
- Include specific facts and data
|
||||
- Keep sections focused and well-organized`, query, truncate(answer, 2000), sourcesText.String(), langInstr, style, audience)
|
||||
}
|
||||
|
||||
func (g *PageGenerator) parseStructure(text string) *Page {
|
||||
page := &Page{
|
||||
Sections: make([]PageSection, 0),
|
||||
}
|
||||
|
||||
lines := strings.Split(text, "\n")
|
||||
var currentSection *PageSection
|
||||
var contentBuilder strings.Builder
|
||||
order := 0
|
||||
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
|
||||
if strings.HasPrefix(line, "TITLE:") {
|
||||
page.Title = strings.TrimSpace(strings.TrimPrefix(line, "TITLE:"))
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "SUBTITLE:") {
|
||||
page.Subtitle = strings.TrimSpace(strings.TrimPrefix(line, "SUBTITLE:"))
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "SECTION:") {
|
||||
if currentSection != nil {
|
||||
currentSection.Content = strings.TrimSpace(contentBuilder.String())
|
||||
page.Sections = append(page.Sections, *currentSection)
|
||||
contentBuilder.Reset()
|
||||
}
|
||||
|
||||
order++
|
||||
currentSection = &PageSection{
|
||||
ID: uuid.New().String(),
|
||||
Type: "text",
|
||||
Title: strings.TrimSpace(strings.TrimPrefix(line, "SECTION:")),
|
||||
Order: order,
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if currentSection != nil {
|
||||
contentBuilder.WriteString(line)
|
||||
contentBuilder.WriteString("\n")
|
||||
}
|
||||
}
|
||||
|
||||
if currentSection != nil {
|
||||
currentSection.Content = strings.TrimSpace(contentBuilder.String())
|
||||
page.Sections = append(page.Sections, *currentSection)
|
||||
}
|
||||
|
||||
return page
|
||||
}
|
||||
|
||||
func (g *PageGenerator) ExportToMarkdown(page *Page) string {
|
||||
var md strings.Builder
|
||||
|
||||
md.WriteString("# " + page.Title + "\n\n")
|
||||
if page.Subtitle != "" {
|
||||
md.WriteString("*" + page.Subtitle + "*\n\n")
|
||||
}
|
||||
|
||||
for _, section := range page.Sections {
|
||||
md.WriteString("## " + section.Title + "\n\n")
|
||||
md.WriteString(section.Content + "\n\n")
|
||||
}
|
||||
|
||||
md.WriteString("---\n\n## Sources\n\n")
|
||||
for _, src := range page.Sources {
|
||||
md.WriteString(fmt.Sprintf("%d. [%s](%s)\n", src.Index, src.Title, src.URL))
|
||||
}
|
||||
|
||||
return md.String()
|
||||
}
|
||||
|
||||
func (g *PageGenerator) ExportToHTML(page *Page) string {
|
||||
var html strings.Builder
|
||||
|
||||
html.WriteString("<!DOCTYPE html>\n<html>\n<head>\n")
|
||||
html.WriteString(fmt.Sprintf("<title>%s</title>\n", page.Title))
|
||||
html.WriteString("<style>\n")
|
||||
html.WriteString(`body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; line-height: 1.6; }
|
||||
h1 { color: #1a1a1a; border-bottom: 2px solid #007bff; padding-bottom: 10px; }
|
||||
h2 { color: #333; margin-top: 30px; }
|
||||
.subtitle { color: #666; font-style: italic; margin-bottom: 30px; }
|
||||
.sources { background: #f5f5f5; padding: 20px; border-radius: 8px; margin-top: 40px; }
|
||||
.sources a { color: #007bff; text-decoration: none; }
|
||||
.sources a:hover { text-decoration: underline; }
|
||||
`)
|
||||
html.WriteString("</style>\n</head>\n<body>\n")
|
||||
|
||||
html.WriteString(fmt.Sprintf("<h1>%s</h1>\n", page.Title))
|
||||
if page.Subtitle != "" {
|
||||
html.WriteString(fmt.Sprintf("<p class=\"subtitle\">%s</p>\n", page.Subtitle))
|
||||
}
|
||||
|
||||
for _, section := range page.Sections {
|
||||
html.WriteString(fmt.Sprintf("<h2>%s</h2>\n", section.Title))
|
||||
paragraphs := strings.Split(section.Content, "\n\n")
|
||||
for _, p := range paragraphs {
|
||||
p = strings.TrimSpace(p)
|
||||
if p != "" {
|
||||
if strings.HasPrefix(p, "- ") {
|
||||
html.WriteString("<ul>\n")
|
||||
for _, item := range strings.Split(p, "\n") {
|
||||
item = strings.TrimPrefix(item, "- ")
|
||||
html.WriteString(fmt.Sprintf("<li>%s</li>\n", item))
|
||||
}
|
||||
html.WriteString("</ul>\n")
|
||||
} else {
|
||||
html.WriteString(fmt.Sprintf("<p>%s</p>\n", p))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
html.WriteString("<div class=\"sources\">\n<h3>Sources</h3>\n<ol>\n")
|
||||
for _, src := range page.Sources {
|
||||
html.WriteString(fmt.Sprintf("<li><a href=\"%s\" target=\"_blank\">%s</a> (%s)</li>\n", src.URL, src.Title, src.Domain))
|
||||
}
|
||||
html.WriteString("</ol>\n</div>\n")
|
||||
|
||||
html.WriteString("</body>\n</html>")
|
||||
|
||||
return html.String()
|
||||
}
|
||||
|
||||
func (g *PageGenerator) ToJSON(page *Page) (string, error) {
|
||||
data, err := json.MarshalIndent(page, "", " ")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(data), nil
|
||||
}
|
||||
|
||||
func truncate(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
|
||||
func extractDomain(url string) string {
|
||||
url = strings.TrimPrefix(url, "https://")
|
||||
url = strings.TrimPrefix(url, "http://")
|
||||
url = strings.TrimPrefix(url, "www.")
|
||||
if idx := strings.Index(url, "/"); idx > 0 {
|
||||
return url[:idx]
|
||||
}
|
||||
return url
|
||||
}
|
||||
Reference in New Issue
Block a user