- Add Gitea Actions workflow for automated build & deploy - Add K8s manifests: webui, travel-svc, medicine-svc, sandbox-svc - Update kustomization for localhost:5000 registry - Add ingress for gooseek.ru and api.gooseek.ru - Learning cabinet with onboarding, courses, sandbox integration - Medicine service with symptom analysis and doctor matching - Travel service with itinerary planning - Server setup scripts (NVIDIA/CUDA, K3s, Gitea runner) Made-with: Cursor
518 lines
14 KiB
Go
518 lines
14 KiB
Go
package agent
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"io"
|
||
"log"
|
||
"net/http"
|
||
"regexp"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/gooseek/backend/internal/llm"
|
||
"github.com/gooseek/backend/internal/search"
|
||
"github.com/google/uuid"
|
||
)
|
||
|
||
// CollectEventsEnriched collects real upcoming events/activities for the destination.
|
||
// Pipeline: SearXNG (event-focused queries) -> Crawl4AI -> LLM extraction -> geocode.
|
||
// Only returns actual events (concerts, exhibitions, festivals, etc.), NOT news articles.
|
||
func CollectEventsEnriched(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief) ([]EventCard, error) {
|
||
if cfg.SearchClient == nil {
|
||
return nil, nil
|
||
}
|
||
|
||
rawResults := searchForEvents(ctx, cfg.SearchClient, brief)
|
||
if len(rawResults) == 0 {
|
||
log.Printf("[travel-events] no search results found")
|
||
return nil, nil
|
||
}
|
||
|
||
log.Printf("[travel-events] found %d raw search results", len(rawResults))
|
||
|
||
var crawledContent []crawledPage
|
||
if cfg.Crawl4AIURL != "" {
|
||
crawledContent = crawlEventPages(ctx, cfg.Crawl4AIURL, rawResults)
|
||
}
|
||
|
||
events := extractEventsWithLLM(ctx, cfg.LLM, brief, rawResults, crawledContent)
|
||
|
||
events = geocodeEvents(ctx, cfg, brief, events)
|
||
|
||
events = deduplicateEvents(events)
|
||
|
||
events = filterFreshEvents(events, brief.StartDate)
|
||
|
||
// Hard filter: drop events that ended up in another city/country due to ambiguous geocoding.
|
||
destGeo := geocodeDestinations(ctx, cfg, brief)
|
||
events = filterEventsNearDestinations(events, destGeo, 250)
|
||
|
||
if len(events) > 15 {
|
||
events = events[:15]
|
||
}
|
||
|
||
log.Printf("[travel-events] returning %d events", len(events))
|
||
return events, nil
|
||
}
|
||
|
||
type crawledPage struct {
|
||
URL string
|
||
Title string
|
||
Content string
|
||
}
|
||
|
||
type eventSearchResult struct {
|
||
Title string
|
||
URL string
|
||
Content string
|
||
PublishedDate string
|
||
Engine string
|
||
}
|
||
|
||
func searchForEvents(ctx context.Context, client *search.SearXNGClient, brief *TripBrief) []eventSearchResult {
|
||
var results []eventSearchResult
|
||
seen := make(map[string]bool)
|
||
|
||
for _, dest := range brief.Destinations {
|
||
queries := generateEventQueries(dest, brief.StartDate, brief.EndDate)
|
||
|
||
for _, q := range queries {
|
||
searchCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||
resp, err := client.Search(searchCtx, q, &search.SearchOptions{
|
||
Categories: []string{"general"},
|
||
PageNo: 1,
|
||
})
|
||
cancel()
|
||
|
||
if err != nil {
|
||
log.Printf("[travel-events] search error for '%s': %v", q, err)
|
||
continue
|
||
}
|
||
|
||
for _, r := range resp.Results {
|
||
if r.URL == "" || seen[r.URL] {
|
||
continue
|
||
}
|
||
if isNewsArticleURL(r.URL) || isOldContent(r.PublishedDate) {
|
||
continue
|
||
}
|
||
seen[r.URL] = true
|
||
results = append(results, eventSearchResult{
|
||
Title: r.Title,
|
||
URL: r.URL,
|
||
Content: r.Content,
|
||
PublishedDate: r.PublishedDate,
|
||
Engine: r.Engine,
|
||
})
|
||
}
|
||
}
|
||
}
|
||
|
||
return results
|
||
}
|
||
|
||
func generateEventQueries(destination, startDate, endDate string) []string {
|
||
month := ""
|
||
year := ""
|
||
if len(startDate) >= 7 {
|
||
parts := strings.Split(startDate, "-")
|
||
if len(parts) >= 2 {
|
||
year = parts[0]
|
||
monthNum := parts[1]
|
||
monthNames := map[string]string{
|
||
"01": "январь", "02": "февраль", "03": "март",
|
||
"04": "апрель", "05": "май", "06": "июнь",
|
||
"07": "июль", "08": "август", "09": "сентябрь",
|
||
"10": "октябрь", "11": "ноябрь", "12": "декабрь",
|
||
}
|
||
month = monthNames[monthNum]
|
||
}
|
||
}
|
||
if year == "" {
|
||
year = time.Now().Format("2006")
|
||
}
|
||
if month == "" {
|
||
monthNames := []string{"", "январь", "февраль", "март", "апрель", "май", "июнь",
|
||
"июль", "август", "сентябрь", "октябрь", "ноябрь", "декабрь"}
|
||
month = monthNames[time.Now().Month()]
|
||
}
|
||
|
||
queries := []string{
|
||
fmt.Sprintf("афиша %s %s %s концерты выставки", destination, month, year),
|
||
fmt.Sprintf("мероприятия %s %s %s расписание", destination, month, year),
|
||
fmt.Sprintf("куда сходить %s %s %s", destination, month, year),
|
||
fmt.Sprintf("site:afisha.ru %s %s", destination, month),
|
||
fmt.Sprintf("site:kassir.ru %s %s %s", destination, month, year),
|
||
}
|
||
|
||
return queries
|
||
}
|
||
|
||
func isNewsArticleURL(u string) bool {
|
||
newsPatterns := []string{
|
||
"/news/", "/novosti/", "/article/", "/stati/",
|
||
"ria.ru", "tass.ru", "rbc.ru", "lenta.ru", "gazeta.ru",
|
||
"interfax.ru", "kommersant.ru", "iz.ru", "mk.ru",
|
||
"regnum.ru", "aif.ru", "kp.ru",
|
||
}
|
||
lower := strings.ToLower(u)
|
||
for _, p := range newsPatterns {
|
||
if strings.Contains(lower, p) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func isOldContent(publishedDate string) bool {
|
||
if publishedDate == "" {
|
||
return false
|
||
}
|
||
formats := []string{
|
||
"2006-01-02T15:04:05Z",
|
||
"2006-01-02T15:04:05-07:00",
|
||
"2006-01-02",
|
||
"02.01.2006",
|
||
}
|
||
for _, f := range formats {
|
||
if t, err := time.Parse(f, publishedDate); err == nil {
|
||
sixMonthsAgo := time.Now().AddDate(0, -6, 0)
|
||
return t.Before(sixMonthsAgo)
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func filterFreshEvents(events []EventCard, tripStartDate string) []EventCard {
|
||
if tripStartDate == "" {
|
||
return events
|
||
}
|
||
tripStart, err := time.Parse("2006-01-02", tripStartDate)
|
||
if err != nil {
|
||
return events
|
||
}
|
||
|
||
cutoff := tripStart.AddDate(0, -1, 0)
|
||
var fresh []EventCard
|
||
for _, e := range events {
|
||
if e.DateEnd != "" {
|
||
if endDate, err := time.Parse("2006-01-02", e.DateEnd); err == nil {
|
||
if endDate.Before(cutoff) {
|
||
continue
|
||
}
|
||
}
|
||
}
|
||
if e.DateStart != "" {
|
||
if startDate, err := time.Parse("2006-01-02", e.DateStart); err == nil {
|
||
twoMonthsAfterTrip := tripStart.AddDate(0, 2, 0)
|
||
if startDate.After(twoMonthsAfterTrip) {
|
||
continue
|
||
}
|
||
}
|
||
}
|
||
fresh = append(fresh, e)
|
||
}
|
||
return fresh
|
||
}
|
||
|
||
func crawlEventPages(ctx context.Context, crawl4aiURL string, results []eventSearchResult) []crawledPage {
|
||
maxCrawl := 4
|
||
if len(results) < maxCrawl {
|
||
maxCrawl = len(results)
|
||
}
|
||
|
||
var pages []crawledPage
|
||
|
||
for _, r := range results[:maxCrawl] {
|
||
crawlCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
|
||
page, err := crawlSinglePage(crawlCtx, crawl4aiURL, r.URL)
|
||
cancel()
|
||
|
||
if err != nil {
|
||
log.Printf("[travel-events] crawl failed for %s: %v", r.URL, err)
|
||
continue
|
||
}
|
||
|
||
if page != nil && len(page.Content) > 100 {
|
||
pages = append(pages, *page)
|
||
}
|
||
}
|
||
|
||
return pages
|
||
}
|
||
|
||
func crawlSinglePage(ctx context.Context, crawl4aiURL, pageURL string) (*crawledPage, error) {
|
||
reqBody := fmt.Sprintf(`{
|
||
"urls": ["%s"],
|
||
"crawler_config": {
|
||
"type": "CrawlerRunConfig",
|
||
"params": {
|
||
"cache_mode": "default",
|
||
"page_timeout": 15000
|
||
}
|
||
}
|
||
}`, pageURL)
|
||
|
||
req, err := http.NewRequestWithContext(ctx, "POST", crawl4aiURL+"/crawl", strings.NewReader(reqBody))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
req.Header.Set("Content-Type", "application/json")
|
||
|
||
client := &http.Client{Timeout: 20 * time.Second}
|
||
resp, err := client.Do(req)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
if resp.StatusCode != http.StatusOK {
|
||
return nil, fmt.Errorf("crawl4ai returned %d", resp.StatusCode)
|
||
}
|
||
|
||
body, err := io.ReadAll(resp.Body)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
content := extractCrawledMarkdown(string(body))
|
||
title := extractCrawledTitle(string(body))
|
||
|
||
if len(content) > 10000 {
|
||
content = content[:10000]
|
||
}
|
||
|
||
return &crawledPage{
|
||
URL: pageURL,
|
||
Title: title,
|
||
Content: content,
|
||
}, nil
|
||
}
|
||
|
||
func extractCrawledMarkdown(response string) string {
|
||
var result struct {
|
||
Results []struct {
|
||
RawMarkdown string `json:"raw_markdown"`
|
||
Markdown string `json:"markdown"`
|
||
} `json:"results"`
|
||
}
|
||
|
||
if err := json.Unmarshal([]byte(response), &result); err == nil && len(result.Results) > 0 {
|
||
if result.Results[0].RawMarkdown != "" {
|
||
return result.Results[0].RawMarkdown
|
||
}
|
||
return result.Results[0].Markdown
|
||
}
|
||
|
||
return ""
|
||
}
|
||
|
||
func extractCrawledTitle(response string) string {
|
||
var result struct {
|
||
Results []struct {
|
||
Title string `json:"title"`
|
||
} `json:"results"`
|
||
}
|
||
|
||
if err := json.Unmarshal([]byte(response), &result); err == nil && len(result.Results) > 0 {
|
||
return result.Results[0].Title
|
||
}
|
||
|
||
return ""
|
||
}
|
||
|
||
func extractEventsWithLLM(ctx context.Context, llmClient llm.Client, brief *TripBrief, searchResults []eventSearchResult, crawled []crawledPage) []EventCard {
|
||
var contextBuilder strings.Builder
|
||
|
||
contextBuilder.WriteString("Данные об афише и мероприятиях:\n\n")
|
||
maxSearch := 10
|
||
if len(searchResults) < maxSearch {
|
||
maxSearch = len(searchResults)
|
||
}
|
||
for i := 0; i < maxSearch; i++ {
|
||
r := searchResults[i]
|
||
contextBuilder.WriteString(fmt.Sprintf("### %s\nURL: %s\n%s\n\n", r.Title, r.URL, truncateStr(r.Content, 300)))
|
||
}
|
||
|
||
if len(crawled) > 0 {
|
||
contextBuilder.WriteString("\nПодробности со страниц:\n\n")
|
||
maxCrawled := 3
|
||
if len(crawled) < maxCrawled {
|
||
maxCrawled = len(crawled)
|
||
}
|
||
for i := 0; i < maxCrawled; i++ {
|
||
p := crawled[i]
|
||
contextBuilder.WriteString(fmt.Sprintf("### %s (%s)\n%s\n\n", p.Title, p.URL, truncateStr(p.Content, 2000)))
|
||
}
|
||
}
|
||
|
||
currentYear := time.Now().Format("2006")
|
||
|
||
prompt := fmt.Sprintf(`Извлеки ТОЛЬКО реальные МЕРОПРИЯТИЯ (концерты, выставки, фестивали, спектакли, спортивные события) в %s на %s — %s.
|
||
|
||
%s
|
||
|
||
СТРОГО ЗАПРЕЩЕНО:
|
||
- Новостные статьи, обзоры, блог-посты — это НЕ мероприятия
|
||
- Устаревшие события (до %s года)
|
||
- Выдуманные мероприятия
|
||
|
||
JSON (ТОЛЬКО массив, без текста):
|
||
[{"id":"evt-1","title":"Название","description":"Что за мероприятие, 1 предложение","dateStart":"YYYY-MM-DD","dateEnd":"YYYY-MM-DD","price":500,"currency":"RUB","url":"https://...","address":"Город, Площадка, адрес","tags":["концерт"]}]
|
||
|
||
Правила:
|
||
- ТОЛЬКО конкретные мероприятия с названием, местом и датой
|
||
- dateStart/dateEnd в формате YYYY-MM-DD, если дата неизвестна — ""
|
||
- price в рублях, 0 если неизвестна
|
||
- address — точный адрес площадки для геокодинга
|
||
- tags: концерт, выставка, фестиваль, спектакль, спорт, кино, мастер-класс, экскурсия
|
||
- Максимум 10 мероприятий`,
|
||
strings.Join(brief.Destinations, ", "),
|
||
brief.StartDate,
|
||
brief.EndDate,
|
||
contextBuilder.String(),
|
||
currentYear,
|
||
)
|
||
|
||
llmCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||
defer cancel()
|
||
|
||
response, err := llmClient.GenerateText(llmCtx, llm.StreamRequest{
|
||
Messages: []llm.Message{{Role: llm.RoleUser, Content: prompt}},
|
||
Options: llm.StreamOptions{MaxTokens: 3000, Temperature: 0.1},
|
||
})
|
||
if err != nil {
|
||
log.Printf("[travel-events] LLM extraction failed: %v", err)
|
||
return nil
|
||
}
|
||
|
||
jsonMatch := regexp.MustCompile(`\[[\s\S]*\]`).FindString(response)
|
||
if jsonMatch == "" {
|
||
log.Printf("[travel-events] no JSON array in LLM response (len=%d)", len(response))
|
||
return nil
|
||
}
|
||
|
||
var events []EventCard
|
||
if err := json.Unmarshal([]byte(jsonMatch), &events); err != nil {
|
||
log.Printf("[travel-events] JSON parse error: %v", err)
|
||
events = tryPartialEventParse(jsonMatch)
|
||
if len(events) == 0 {
|
||
return nil
|
||
}
|
||
}
|
||
|
||
for i := range events {
|
||
if events[i].ID == "" {
|
||
events[i].ID = uuid.New().String()
|
||
}
|
||
}
|
||
|
||
log.Printf("[travel-events] extracted %d events from LLM", len(events))
|
||
return events
|
||
}
|
||
|
||
func tryPartialEventParse(jsonStr string) []EventCard {
|
||
var events []EventCard
|
||
objRegex := regexp.MustCompile(`\{[^{}]*"title"\s*:\s*"[^"]+[^{}]*\}`)
|
||
matches := objRegex.FindAllString(jsonStr, -1)
|
||
for _, m := range matches {
|
||
var e EventCard
|
||
if err := json.Unmarshal([]byte(m), &e); err == nil && e.Title != "" {
|
||
events = append(events, e)
|
||
}
|
||
}
|
||
if len(events) > 0 {
|
||
log.Printf("[travel-events] partial parse recovered %d events", len(events))
|
||
}
|
||
return events
|
||
}
|
||
|
||
func geocodeEvents(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief, events []EventCard) []EventCard {
|
||
destSuffix := strings.Join(brief.Destinations, ", ")
|
||
for i := range events {
|
||
if events[i].Address == "" || (events[i].Lat != 0 && events[i].Lng != 0) {
|
||
continue
|
||
}
|
||
|
||
queries := []string{events[i].Address}
|
||
if destSuffix != "" && !strings.Contains(strings.ToLower(events[i].Address), strings.ToLower(destSuffix)) {
|
||
queries = append(queries, fmt.Sprintf("%s, %s", events[i].Address, destSuffix))
|
||
}
|
||
queries = append(queries, fmt.Sprintf("%s, %s", events[i].Title, destSuffix))
|
||
|
||
var lastErr error
|
||
for _, q := range queries {
|
||
geoCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||
geo, err := cfg.TravelData.Geocode(geoCtx, q)
|
||
cancel()
|
||
if err != nil {
|
||
lastErr = err
|
||
continue
|
||
}
|
||
events[i].Lat = geo.Lat
|
||
events[i].Lng = geo.Lng
|
||
break
|
||
}
|
||
|
||
if events[i].Lat == 0 && events[i].Lng == 0 {
|
||
if lastErr != nil {
|
||
log.Printf("[travel-events] geocode failed for '%s': %v", events[i].Address, lastErr)
|
||
} else {
|
||
log.Printf("[travel-events] geocode failed for '%s'", events[i].Address)
|
||
}
|
||
continue
|
||
}
|
||
}
|
||
|
||
return events
|
||
}
|
||
|
||
func filterEventsNearDestinations(events []EventCard, destinations []destGeoEntry, maxKm float64) []EventCard {
|
||
if len(destinations) == 0 {
|
||
return events
|
||
}
|
||
filtered := make([]EventCard, 0, len(events))
|
||
for _, e := range events {
|
||
if e.Lat == 0 && e.Lng == 0 {
|
||
continue
|
||
}
|
||
minD := 1e18
|
||
for _, d := range destinations {
|
||
if d.Lat == 0 && d.Lng == 0 {
|
||
continue
|
||
}
|
||
dd := distanceKm(e.Lat, e.Lng, d.Lat, d.Lng)
|
||
if dd < minD {
|
||
minD = dd
|
||
}
|
||
}
|
||
if minD <= maxKm {
|
||
filtered = append(filtered, e)
|
||
} else {
|
||
log.Printf("[travel-events] dropped far event '%s' (%.0fkm from destinations)", e.Title, minD)
|
||
}
|
||
}
|
||
return filtered
|
||
}
|
||
|
||
func deduplicateEvents(events []EventCard) []EventCard {
|
||
seen := make(map[string]bool)
|
||
var unique []EventCard
|
||
|
||
for _, e := range events {
|
||
key := strings.ToLower(e.Title)
|
||
if len(key) > 50 {
|
||
key = key[:50]
|
||
}
|
||
if seen[key] {
|
||
continue
|
||
}
|
||
seen[key] = true
|
||
unique = append(unique, e)
|
||
}
|
||
|
||
return unique
|
||
}
|