package agent import ( "context" "encoding/json" "fmt" "io" "log" "net/http" "regexp" "strings" "time" "github.com/gooseek/backend/internal/llm" "github.com/gooseek/backend/internal/search" "github.com/google/uuid" ) // CollectEventsEnriched collects real upcoming events/activities for the destination. // Pipeline: SearXNG (event-focused queries) -> Crawl4AI -> LLM extraction -> geocode. // Only returns actual events (concerts, exhibitions, festivals, etc.), NOT news articles. func CollectEventsEnriched(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief) ([]EventCard, error) { if cfg.SearchClient == nil { return nil, nil } rawResults := searchForEvents(ctx, cfg.SearchClient, brief) if len(rawResults) == 0 { log.Printf("[travel-events] no search results found") return nil, nil } log.Printf("[travel-events] found %d raw search results", len(rawResults)) var crawledContent []crawledPage if cfg.Crawl4AIURL != "" { crawledContent = crawlEventPages(ctx, cfg.Crawl4AIURL, rawResults) } events := extractEventsWithLLM(ctx, cfg.LLM, brief, rawResults, crawledContent) events = geocodeEvents(ctx, cfg, brief, events) events = deduplicateEvents(events) events = filterFreshEvents(events, brief.StartDate) // Hard filter: drop events that ended up in another city/country due to ambiguous geocoding. destGeo := geocodeDestinations(ctx, cfg, brief) events = filterEventsNearDestinations(events, destGeo, 250) if len(events) > 15 { events = events[:15] } log.Printf("[travel-events] returning %d events", len(events)) return events, nil } type crawledPage struct { URL string Title string Content string } type eventSearchResult struct { Title string URL string Content string PublishedDate string Engine string } func searchForEvents(ctx context.Context, client *search.SearXNGClient, brief *TripBrief) []eventSearchResult { var results []eventSearchResult seen := make(map[string]bool) for _, dest := range brief.Destinations { queries := generateEventQueries(dest, brief.StartDate, brief.EndDate) for _, q := range queries { searchCtx, cancel := context.WithTimeout(ctx, 10*time.Second) resp, err := client.Search(searchCtx, q, &search.SearchOptions{ Categories: []string{"general"}, PageNo: 1, }) cancel() if err != nil { log.Printf("[travel-events] search error for '%s': %v", q, err) continue } for _, r := range resp.Results { if r.URL == "" || seen[r.URL] { continue } if isNewsArticleURL(r.URL) || isOldContent(r.PublishedDate) { continue } seen[r.URL] = true results = append(results, eventSearchResult{ Title: r.Title, URL: r.URL, Content: r.Content, PublishedDate: r.PublishedDate, Engine: r.Engine, }) } } } return results } func generateEventQueries(destination, startDate, endDate string) []string { month := "" year := "" if len(startDate) >= 7 { parts := strings.Split(startDate, "-") if len(parts) >= 2 { year = parts[0] monthNum := parts[1] monthNames := map[string]string{ "01": "январь", "02": "февраль", "03": "март", "04": "апрель", "05": "май", "06": "июнь", "07": "июль", "08": "август", "09": "сентябрь", "10": "октябрь", "11": "ноябрь", "12": "декабрь", } month = monthNames[monthNum] } } if year == "" { year = time.Now().Format("2006") } if month == "" { monthNames := []string{"", "январь", "февраль", "март", "апрель", "май", "июнь", "июль", "август", "сентябрь", "октябрь", "ноябрь", "декабрь"} month = monthNames[time.Now().Month()] } queries := []string{ fmt.Sprintf("афиша %s %s %s концерты выставки", destination, month, year), fmt.Sprintf("мероприятия %s %s %s расписание", destination, month, year), fmt.Sprintf("куда сходить %s %s %s", destination, month, year), fmt.Sprintf("site:afisha.ru %s %s", destination, month), fmt.Sprintf("site:kassir.ru %s %s %s", destination, month, year), } return queries } func isNewsArticleURL(u string) bool { newsPatterns := []string{ "/news/", "/novosti/", "/article/", "/stati/", "ria.ru", "tass.ru", "rbc.ru", "lenta.ru", "gazeta.ru", "interfax.ru", "kommersant.ru", "iz.ru", "mk.ru", "regnum.ru", "aif.ru", "kp.ru", } lower := strings.ToLower(u) for _, p := range newsPatterns { if strings.Contains(lower, p) { return true } } return false } func isOldContent(publishedDate string) bool { if publishedDate == "" { return false } formats := []string{ "2006-01-02T15:04:05Z", "2006-01-02T15:04:05-07:00", "2006-01-02", "02.01.2006", } for _, f := range formats { if t, err := time.Parse(f, publishedDate); err == nil { sixMonthsAgo := time.Now().AddDate(0, -6, 0) return t.Before(sixMonthsAgo) } } return false } func filterFreshEvents(events []EventCard, tripStartDate string) []EventCard { if tripStartDate == "" { return events } tripStart, err := time.Parse("2006-01-02", tripStartDate) if err != nil { return events } cutoff := tripStart.AddDate(0, -1, 0) var fresh []EventCard for _, e := range events { if e.DateEnd != "" { if endDate, err := time.Parse("2006-01-02", e.DateEnd); err == nil { if endDate.Before(cutoff) { continue } } } if e.DateStart != "" { if startDate, err := time.Parse("2006-01-02", e.DateStart); err == nil { twoMonthsAfterTrip := tripStart.AddDate(0, 2, 0) if startDate.After(twoMonthsAfterTrip) { continue } } } fresh = append(fresh, e) } return fresh } func crawlEventPages(ctx context.Context, crawl4aiURL string, results []eventSearchResult) []crawledPage { maxCrawl := 4 if len(results) < maxCrawl { maxCrawl = len(results) } var pages []crawledPage for _, r := range results[:maxCrawl] { crawlCtx, cancel := context.WithTimeout(ctx, 15*time.Second) page, err := crawlSinglePage(crawlCtx, crawl4aiURL, r.URL) cancel() if err != nil { log.Printf("[travel-events] crawl failed for %s: %v", r.URL, err) continue } if page != nil && len(page.Content) > 100 { pages = append(pages, *page) } } return pages } func crawlSinglePage(ctx context.Context, crawl4aiURL, pageURL string) (*crawledPage, error) { reqBody := fmt.Sprintf(`{ "urls": ["%s"], "crawler_config": { "type": "CrawlerRunConfig", "params": { "cache_mode": "default", "page_timeout": 15000 } } }`, pageURL) req, err := http.NewRequestWithContext(ctx, "POST", crawl4aiURL+"/crawl", strings.NewReader(reqBody)) if err != nil { return nil, err } req.Header.Set("Content-Type", "application/json") client := &http.Client{Timeout: 20 * time.Second} resp, err := client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("crawl4ai returned %d", resp.StatusCode) } body, err := io.ReadAll(resp.Body) if err != nil { return nil, err } content := extractCrawledMarkdown(string(body)) title := extractCrawledTitle(string(body)) if len(content) > 10000 { content = content[:10000] } return &crawledPage{ URL: pageURL, Title: title, Content: content, }, nil } func extractCrawledMarkdown(response string) string { var result struct { Results []struct { RawMarkdown string `json:"raw_markdown"` Markdown string `json:"markdown"` } `json:"results"` } if err := json.Unmarshal([]byte(response), &result); err == nil && len(result.Results) > 0 { if result.Results[0].RawMarkdown != "" { return result.Results[0].RawMarkdown } return result.Results[0].Markdown } return "" } func extractCrawledTitle(response string) string { var result struct { Results []struct { Title string `json:"title"` } `json:"results"` } if err := json.Unmarshal([]byte(response), &result); err == nil && len(result.Results) > 0 { return result.Results[0].Title } return "" } func extractEventsWithLLM(ctx context.Context, llmClient llm.Client, brief *TripBrief, searchResults []eventSearchResult, crawled []crawledPage) []EventCard { var contextBuilder strings.Builder contextBuilder.WriteString("Данные об афише и мероприятиях:\n\n") maxSearch := 10 if len(searchResults) < maxSearch { maxSearch = len(searchResults) } for i := 0; i < maxSearch; i++ { r := searchResults[i] contextBuilder.WriteString(fmt.Sprintf("### %s\nURL: %s\n%s\n\n", r.Title, r.URL, truncateStr(r.Content, 300))) } if len(crawled) > 0 { contextBuilder.WriteString("\nПодробности со страниц:\n\n") maxCrawled := 3 if len(crawled) < maxCrawled { maxCrawled = len(crawled) } for i := 0; i < maxCrawled; i++ { p := crawled[i] contextBuilder.WriteString(fmt.Sprintf("### %s (%s)\n%s\n\n", p.Title, p.URL, truncateStr(p.Content, 2000))) } } currentYear := time.Now().Format("2006") prompt := fmt.Sprintf(`Извлеки ТОЛЬКО реальные МЕРОПРИЯТИЯ (концерты, выставки, фестивали, спектакли, спортивные события) в %s на %s — %s. %s СТРОГО ЗАПРЕЩЕНО: - Новостные статьи, обзоры, блог-посты — это НЕ мероприятия - Устаревшие события (до %s года) - Выдуманные мероприятия JSON (ТОЛЬКО массив, без текста): [{"id":"evt-1","title":"Название","description":"Что за мероприятие, 1 предложение","dateStart":"YYYY-MM-DD","dateEnd":"YYYY-MM-DD","price":500,"currency":"RUB","url":"https://...","address":"Город, Площадка, адрес","tags":["концерт"]}] Правила: - ТОЛЬКО конкретные мероприятия с названием, местом и датой - dateStart/dateEnd в формате YYYY-MM-DD, если дата неизвестна — "" - price в рублях, 0 если неизвестна - address — точный адрес площадки для геокодинга - tags: концерт, выставка, фестиваль, спектакль, спорт, кино, мастер-класс, экскурсия - Максимум 10 мероприятий`, strings.Join(brief.Destinations, ", "), brief.StartDate, brief.EndDate, contextBuilder.String(), currentYear, ) llmCtx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() response, err := llmClient.GenerateText(llmCtx, llm.StreamRequest{ Messages: []llm.Message{{Role: llm.RoleUser, Content: prompt}}, Options: llm.StreamOptions{MaxTokens: 3000, Temperature: 0.1}, }) if err != nil { log.Printf("[travel-events] LLM extraction failed: %v", err) return nil } jsonMatch := regexp.MustCompile(`\[[\s\S]*\]`).FindString(response) if jsonMatch == "" { log.Printf("[travel-events] no JSON array in LLM response (len=%d)", len(response)) return nil } var events []EventCard if err := json.Unmarshal([]byte(jsonMatch), &events); err != nil { log.Printf("[travel-events] JSON parse error: %v", err) events = tryPartialEventParse(jsonMatch) if len(events) == 0 { return nil } } for i := range events { if events[i].ID == "" { events[i].ID = uuid.New().String() } } log.Printf("[travel-events] extracted %d events from LLM", len(events)) return events } func tryPartialEventParse(jsonStr string) []EventCard { var events []EventCard objRegex := regexp.MustCompile(`\{[^{}]*"title"\s*:\s*"[^"]+[^{}]*\}`) matches := objRegex.FindAllString(jsonStr, -1) for _, m := range matches { var e EventCard if err := json.Unmarshal([]byte(m), &e); err == nil && e.Title != "" { events = append(events, e) } } if len(events) > 0 { log.Printf("[travel-events] partial parse recovered %d events", len(events)) } return events } func geocodeEvents(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief, events []EventCard) []EventCard { destSuffix := strings.Join(brief.Destinations, ", ") for i := range events { if events[i].Address == "" || (events[i].Lat != 0 && events[i].Lng != 0) { continue } queries := []string{events[i].Address} if destSuffix != "" && !strings.Contains(strings.ToLower(events[i].Address), strings.ToLower(destSuffix)) { queries = append(queries, fmt.Sprintf("%s, %s", events[i].Address, destSuffix)) } queries = append(queries, fmt.Sprintf("%s, %s", events[i].Title, destSuffix)) var lastErr error for _, q := range queries { geoCtx, cancel := context.WithTimeout(ctx, 5*time.Second) geo, err := cfg.TravelData.Geocode(geoCtx, q) cancel() if err != nil { lastErr = err continue } events[i].Lat = geo.Lat events[i].Lng = geo.Lng break } if events[i].Lat == 0 && events[i].Lng == 0 { if lastErr != nil { log.Printf("[travel-events] geocode failed for '%s': %v", events[i].Address, lastErr) } else { log.Printf("[travel-events] geocode failed for '%s'", events[i].Address) } continue } } return events } func filterEventsNearDestinations(events []EventCard, destinations []destGeoEntry, maxKm float64) []EventCard { if len(destinations) == 0 { return events } filtered := make([]EventCard, 0, len(events)) for _, e := range events { if e.Lat == 0 && e.Lng == 0 { continue } minD := 1e18 for _, d := range destinations { if d.Lat == 0 && d.Lng == 0 { continue } dd := distanceKm(e.Lat, e.Lng, d.Lat, d.Lng) if dd < minD { minD = dd } } if minD <= maxKm { filtered = append(filtered, e) } else { log.Printf("[travel-events] dropped far event '%s' (%.0fkm from destinations)", e.Title, minD) } } return filtered } func deduplicateEvents(events []EventCard) []EventCard { seen := make(map[string]bool) var unique []EventCard for _, e := range events { key := strings.ToLower(e.Title) if len(key) > 50 { key = key[:50] } if seen[key] { continue } seen[key] = true unique = append(unique, e) } return unique }