feat: Go backend, enhanced search, new widgets, Docker deploy

Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
This commit is contained in:
home
2026-02-27 04:15:32 +03:00
parent 328d968f3f
commit 06fe57c765
285 changed files with 53132 additions and 1871 deletions

View File

@@ -0,0 +1,343 @@
package files
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strings"
"github.com/gooseek/backend/internal/llm"
"github.com/ledongthuc/pdf"
)
type FileAnalyzer struct {
llmClient llm.Client
storagePath string
}
type AnalysisResult struct {
FileType string `json:"fileType"`
ExtractedText string `json:"extractedText"`
Summary string `json:"summary"`
KeyPoints []string `json:"keyPoints"`
Metadata map[string]interface{} `json:"metadata"`
}
func NewFileAnalyzer(llmClient llm.Client, storagePath string) *FileAnalyzer {
if storagePath == "" {
storagePath = "/tmp/gooseek-files"
}
os.MkdirAll(storagePath, 0755)
return &FileAnalyzer{
llmClient: llmClient,
storagePath: storagePath,
}
}
func (fa *FileAnalyzer) AnalyzeFile(ctx context.Context, filePath string, fileType string) (*AnalysisResult, error) {
switch {
case strings.HasPrefix(fileType, "application/pdf"):
return fa.analyzePDF(ctx, filePath)
case strings.HasPrefix(fileType, "image/"):
return fa.analyzeImage(ctx, filePath, fileType)
case strings.HasPrefix(fileType, "text/"):
return fa.analyzeText(ctx, filePath)
default:
return nil, fmt.Errorf("unsupported file type: %s", fileType)
}
}
func (fa *FileAnalyzer) analyzePDF(ctx context.Context, filePath string) (*AnalysisResult, error) {
text, metadata, err := extractPDFContent(filePath)
if err != nil {
return nil, fmt.Errorf("failed to extract PDF content: %w", err)
}
if len(text) > 50000 {
text = text[:50000] + "\n\n[Content truncated...]"
}
summary, keyPoints, err := fa.generateSummary(ctx, text, "PDF document")
if err != nil {
summary = ""
keyPoints = nil
}
return &AnalysisResult{
FileType: "pdf",
ExtractedText: text,
Summary: summary,
KeyPoints: keyPoints,
Metadata: metadata,
}, nil
}
func extractPDFContent(filePath string) (string, map[string]interface{}, error) {
f, r, err := pdf.Open(filePath)
if err != nil {
return "", nil, err
}
defer f.Close()
var textBuilder strings.Builder
numPages := r.NumPage()
for i := 1; i <= numPages; i++ {
p := r.Page(i)
if p.V.IsNull() {
continue
}
text, err := p.GetPlainText(nil)
if err != nil {
continue
}
textBuilder.WriteString(text)
textBuilder.WriteString("\n\n")
if textBuilder.Len() > 100000 {
break
}
}
metadata := map[string]interface{}{
"numPages": numPages,
}
return textBuilder.String(), metadata, nil
}
func (fa *FileAnalyzer) analyzeImage(ctx context.Context, filePath string, mimeType string) (*AnalysisResult, error) {
imageData, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("failed to read image: %w", err)
}
base64Image := base64.StdEncoding.EncodeToString(imageData)
description, err := fa.describeImage(ctx, base64Image, mimeType)
if err != nil {
description = "Image analysis unavailable"
}
metadata := map[string]interface{}{
"size": len(imageData),
}
return &AnalysisResult{
FileType: "image",
ExtractedText: description,
Summary: description,
KeyPoints: extractKeyPointsFromDescription(description),
Metadata: metadata,
}, nil
}
func (fa *FileAnalyzer) describeImage(ctx context.Context, base64Image, mimeType string) (string, error) {
prompt := `Analyze this image and provide:
1. A detailed description of what's shown
2. Any text visible in the image (OCR)
3. Key elements and their relationships
4. Any data, charts, or diagrams and their meaning
Be thorough but concise.`
messages := []llm.Message{
{
Role: "user",
Content: prompt,
Images: []llm.ImageContent{
{
Type: mimeType,
Data: base64Image,
IsBase64: true,
},
},
},
}
result, err := fa.llmClient.GenerateText(ctx, llm.StreamRequest{
Messages: messages,
})
if err != nil {
return "", err
}
return result, nil
}
func (fa *FileAnalyzer) analyzeText(ctx context.Context, filePath string) (*AnalysisResult, error) {
content, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("failed to read file: %w", err)
}
text := string(content)
if len(text) > 50000 {
text = text[:50000] + "\n\n[Content truncated...]"
}
summary, keyPoints, err := fa.generateSummary(ctx, text, "text document")
if err != nil {
summary = ""
keyPoints = nil
}
return &AnalysisResult{
FileType: "text",
ExtractedText: text,
Summary: summary,
KeyPoints: keyPoints,
Metadata: map[string]interface{}{
"size": len(content),
"lineCount": strings.Count(text, "\n") + 1,
},
}, nil
}
func (fa *FileAnalyzer) generateSummary(ctx context.Context, text, docType string) (string, []string, error) {
if len(text) < 100 {
return text, nil, nil
}
truncatedText := text
if len(text) > 15000 {
truncatedText = text[:15000] + "\n\n[Content truncated for analysis...]"
}
prompt := fmt.Sprintf(`Analyze this %s and provide:
1. A concise summary (2-3 paragraphs)
2. 5-7 key points as bullet points
Document content:
%s
Format your response as:
SUMMARY:
[your summary here]
KEY POINTS:
- [point 1]
- [point 2]
...`, docType, truncatedText)
result, err := fa.llmClient.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{
{Role: llm.RoleUser, Content: prompt},
},
})
if err != nil {
return "", nil, err
}
summary, keyPoints := parseSummaryResponse(result)
return summary, keyPoints, nil
}
func parseSummaryResponse(response string) (string, []string) {
var summary string
var keyPoints []string
parts := strings.Split(response, "KEY POINTS:")
if len(parts) >= 2 {
summaryPart := strings.TrimPrefix(parts[0], "SUMMARY:")
summary = strings.TrimSpace(summaryPart)
keyPointsPart := parts[1]
for _, line := range strings.Split(keyPointsPart, "\n") {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "-") || strings.HasPrefix(line, "•") || strings.HasPrefix(line, "*") {
point := strings.TrimPrefix(strings.TrimPrefix(strings.TrimPrefix(line, "-"), "•"), "*")
point = strings.TrimSpace(point)
if point != "" {
keyPoints = append(keyPoints, point)
}
}
}
} else {
summary = response
}
return summary, keyPoints
}
func extractKeyPointsFromDescription(description string) []string {
var points []string
sentences := strings.Split(description, ".")
for i, s := range sentences {
s = strings.TrimSpace(s)
if len(s) > 20 && i < 5 {
points = append(points, s+".")
}
}
return points
}
func DetectMimeType(filename string, content []byte) string {
ext := strings.ToLower(filepath.Ext(filename))
switch ext {
case ".pdf":
return "application/pdf"
case ".png":
return "image/png"
case ".jpg", ".jpeg":
return "image/jpeg"
case ".gif":
return "image/gif"
case ".webp":
return "image/webp"
case ".txt":
return "text/plain"
case ".md":
return "text/markdown"
case ".csv":
return "text/csv"
case ".json":
return "application/json"
default:
return http.DetectContentType(content[:min(512, len(content))])
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func (fa *FileAnalyzer) SaveFile(filename string, content io.Reader) (string, int64, error) {
safeName := filepath.Base(filename)
destPath := filepath.Join(fa.storagePath, safeName)
file, err := os.Create(destPath)
if err != nil {
return "", 0, err
}
defer file.Close()
var buf bytes.Buffer
size, err := io.Copy(io.MultiWriter(file, &buf), content)
if err != nil {
return "", 0, err
}
return destPath, size, nil
}
func (fa *FileAnalyzer) DeleteFile(filePath string) error {
if !strings.HasPrefix(filePath, fa.storagePath) {
return fmt.Errorf("invalid file path")
}
return os.Remove(filePath)
}