feat: travel service with 2GIS routing, POI, hotels + finance providers + UI overhaul
- Add travel-svc microservice (Amadeus, TravelPayouts, 2GIS, OpenRouteService) - Add travel orchestrator with parallel collectors (events, POI, hotels, flights) - Add 2GIS road routing with transport cost calculation (car/bus/taxi) - Add TravelMap (2GIS MapGL) and TravelWidgets components - Add useTravelChat hook for streaming travel agent responses - Add finance heatmap providers refactor - Add SearXNG settings, API proxy routes, Docker compose updates - Update Dockerfiles, config, types, and all UI pages for consistency Made-with: Cursor
This commit is contained in:
467
backend/internal/agent/travel_events_collector.go
Normal file
467
backend/internal/agent/travel_events_collector.go
Normal file
@@ -0,0 +1,467 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gooseek/backend/internal/llm"
|
||||
"github.com/gooseek/backend/internal/search"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// CollectEventsEnriched collects real upcoming events/activities for the destination.
|
||||
// Pipeline: SearXNG (event-focused queries) -> Crawl4AI -> LLM extraction -> geocode.
|
||||
// Only returns actual events (concerts, exhibitions, festivals, etc.), NOT news articles.
|
||||
func CollectEventsEnriched(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief) ([]EventCard, error) {
|
||||
if cfg.SearchClient == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
rawResults := searchForEvents(ctx, cfg.SearchClient, brief)
|
||||
if len(rawResults) == 0 {
|
||||
log.Printf("[travel-events] no search results found")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
log.Printf("[travel-events] found %d raw search results", len(rawResults))
|
||||
|
||||
var crawledContent []crawledPage
|
||||
if cfg.Crawl4AIURL != "" {
|
||||
crawledContent = crawlEventPages(ctx, cfg.Crawl4AIURL, rawResults)
|
||||
}
|
||||
|
||||
events := extractEventsWithLLM(ctx, cfg.LLM, brief, rawResults, crawledContent)
|
||||
|
||||
events = geocodeEvents(ctx, cfg, events)
|
||||
|
||||
events = deduplicateEvents(events)
|
||||
|
||||
events = filterFreshEvents(events, brief.StartDate)
|
||||
|
||||
if len(events) > 15 {
|
||||
events = events[:15]
|
||||
}
|
||||
|
||||
log.Printf("[travel-events] returning %d events", len(events))
|
||||
return events, nil
|
||||
}
|
||||
|
||||
type crawledPage struct {
|
||||
URL string
|
||||
Title string
|
||||
Content string
|
||||
}
|
||||
|
||||
type eventSearchResult struct {
|
||||
Title string
|
||||
URL string
|
||||
Content string
|
||||
PublishedDate string
|
||||
Engine string
|
||||
}
|
||||
|
||||
func searchForEvents(ctx context.Context, client *search.SearXNGClient, brief *TripBrief) []eventSearchResult {
|
||||
var results []eventSearchResult
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, dest := range brief.Destinations {
|
||||
queries := generateEventQueries(dest, brief.StartDate, brief.EndDate)
|
||||
|
||||
for _, q := range queries {
|
||||
searchCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
resp, err := client.Search(searchCtx, q, &search.SearchOptions{
|
||||
Categories: []string{"general"},
|
||||
PageNo: 1,
|
||||
})
|
||||
cancel()
|
||||
|
||||
if err != nil {
|
||||
log.Printf("[travel-events] search error for '%s': %v", q, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, r := range resp.Results {
|
||||
if r.URL == "" || seen[r.URL] {
|
||||
continue
|
||||
}
|
||||
if isNewsArticleURL(r.URL) || isOldContent(r.PublishedDate) {
|
||||
continue
|
||||
}
|
||||
seen[r.URL] = true
|
||||
results = append(results, eventSearchResult{
|
||||
Title: r.Title,
|
||||
URL: r.URL,
|
||||
Content: r.Content,
|
||||
PublishedDate: r.PublishedDate,
|
||||
Engine: r.Engine,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func generateEventQueries(destination, startDate, endDate string) []string {
|
||||
month := ""
|
||||
year := ""
|
||||
if len(startDate) >= 7 {
|
||||
parts := strings.Split(startDate, "-")
|
||||
if len(parts) >= 2 {
|
||||
year = parts[0]
|
||||
monthNum := parts[1]
|
||||
monthNames := map[string]string{
|
||||
"01": "январь", "02": "февраль", "03": "март",
|
||||
"04": "апрель", "05": "май", "06": "июнь",
|
||||
"07": "июль", "08": "август", "09": "сентябрь",
|
||||
"10": "октябрь", "11": "ноябрь", "12": "декабрь",
|
||||
}
|
||||
month = monthNames[monthNum]
|
||||
}
|
||||
}
|
||||
if year == "" {
|
||||
year = time.Now().Format("2006")
|
||||
}
|
||||
if month == "" {
|
||||
monthNames := []string{"", "январь", "февраль", "март", "апрель", "май", "июнь",
|
||||
"июль", "август", "сентябрь", "октябрь", "ноябрь", "декабрь"}
|
||||
month = monthNames[time.Now().Month()]
|
||||
}
|
||||
|
||||
queries := []string{
|
||||
fmt.Sprintf("афиша %s %s %s концерты выставки", destination, month, year),
|
||||
fmt.Sprintf("мероприятия %s %s %s расписание", destination, month, year),
|
||||
fmt.Sprintf("куда сходить %s %s %s", destination, month, year),
|
||||
fmt.Sprintf("site:afisha.ru %s %s", destination, month),
|
||||
fmt.Sprintf("site:kassir.ru %s %s %s", destination, month, year),
|
||||
}
|
||||
|
||||
return queries
|
||||
}
|
||||
|
||||
func isNewsArticleURL(u string) bool {
|
||||
newsPatterns := []string{
|
||||
"/news/", "/novosti/", "/article/", "/stati/",
|
||||
"ria.ru", "tass.ru", "rbc.ru", "lenta.ru", "gazeta.ru",
|
||||
"interfax.ru", "kommersant.ru", "iz.ru", "mk.ru",
|
||||
"regnum.ru", "aif.ru", "kp.ru",
|
||||
}
|
||||
lower := strings.ToLower(u)
|
||||
for _, p := range newsPatterns {
|
||||
if strings.Contains(lower, p) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isOldContent(publishedDate string) bool {
|
||||
if publishedDate == "" {
|
||||
return false
|
||||
}
|
||||
formats := []string{
|
||||
"2006-01-02T15:04:05Z",
|
||||
"2006-01-02T15:04:05-07:00",
|
||||
"2006-01-02",
|
||||
"02.01.2006",
|
||||
}
|
||||
for _, f := range formats {
|
||||
if t, err := time.Parse(f, publishedDate); err == nil {
|
||||
sixMonthsAgo := time.Now().AddDate(0, -6, 0)
|
||||
return t.Before(sixMonthsAgo)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func filterFreshEvents(events []EventCard, tripStartDate string) []EventCard {
|
||||
if tripStartDate == "" {
|
||||
return events
|
||||
}
|
||||
tripStart, err := time.Parse("2006-01-02", tripStartDate)
|
||||
if err != nil {
|
||||
return events
|
||||
}
|
||||
|
||||
cutoff := tripStart.AddDate(0, -1, 0)
|
||||
var fresh []EventCard
|
||||
for _, e := range events {
|
||||
if e.DateEnd != "" {
|
||||
if endDate, err := time.Parse("2006-01-02", e.DateEnd); err == nil {
|
||||
if endDate.Before(cutoff) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
if e.DateStart != "" {
|
||||
if startDate, err := time.Parse("2006-01-02", e.DateStart); err == nil {
|
||||
twoMonthsAfterTrip := tripStart.AddDate(0, 2, 0)
|
||||
if startDate.After(twoMonthsAfterTrip) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
fresh = append(fresh, e)
|
||||
}
|
||||
return fresh
|
||||
}
|
||||
|
||||
func crawlEventPages(ctx context.Context, crawl4aiURL string, results []eventSearchResult) []crawledPage {
|
||||
maxCrawl := 4
|
||||
if len(results) < maxCrawl {
|
||||
maxCrawl = len(results)
|
||||
}
|
||||
|
||||
var pages []crawledPage
|
||||
|
||||
for _, r := range results[:maxCrawl] {
|
||||
crawlCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
|
||||
page, err := crawlSinglePage(crawlCtx, crawl4aiURL, r.URL)
|
||||
cancel()
|
||||
|
||||
if err != nil {
|
||||
log.Printf("[travel-events] crawl failed for %s: %v", r.URL, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if page != nil && len(page.Content) > 100 {
|
||||
pages = append(pages, *page)
|
||||
}
|
||||
}
|
||||
|
||||
return pages
|
||||
}
|
||||
|
||||
func crawlSinglePage(ctx context.Context, crawl4aiURL, pageURL string) (*crawledPage, error) {
|
||||
reqBody := fmt.Sprintf(`{
|
||||
"urls": ["%s"],
|
||||
"crawler_config": {
|
||||
"type": "CrawlerRunConfig",
|
||||
"params": {
|
||||
"cache_mode": "default",
|
||||
"page_timeout": 15000
|
||||
}
|
||||
}
|
||||
}`, pageURL)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", crawl4aiURL+"/crawl", strings.NewReader(reqBody))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
client := &http.Client{Timeout: 20 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("crawl4ai returned %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
content := extractCrawledMarkdown(string(body))
|
||||
title := extractCrawledTitle(string(body))
|
||||
|
||||
if len(content) > 10000 {
|
||||
content = content[:10000]
|
||||
}
|
||||
|
||||
return &crawledPage{
|
||||
URL: pageURL,
|
||||
Title: title,
|
||||
Content: content,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func extractCrawledMarkdown(response string) string {
|
||||
var result struct {
|
||||
Results []struct {
|
||||
RawMarkdown string `json:"raw_markdown"`
|
||||
Markdown string `json:"markdown"`
|
||||
} `json:"results"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal([]byte(response), &result); err == nil && len(result.Results) > 0 {
|
||||
if result.Results[0].RawMarkdown != "" {
|
||||
return result.Results[0].RawMarkdown
|
||||
}
|
||||
return result.Results[0].Markdown
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractCrawledTitle(response string) string {
|
||||
var result struct {
|
||||
Results []struct {
|
||||
Title string `json:"title"`
|
||||
} `json:"results"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal([]byte(response), &result); err == nil && len(result.Results) > 0 {
|
||||
return result.Results[0].Title
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractEventsWithLLM(ctx context.Context, llmClient llm.Client, brief *TripBrief, searchResults []eventSearchResult, crawled []crawledPage) []EventCard {
|
||||
var contextBuilder strings.Builder
|
||||
|
||||
contextBuilder.WriteString("Данные об афише и мероприятиях:\n\n")
|
||||
maxSearch := 10
|
||||
if len(searchResults) < maxSearch {
|
||||
maxSearch = len(searchResults)
|
||||
}
|
||||
for i := 0; i < maxSearch; i++ {
|
||||
r := searchResults[i]
|
||||
contextBuilder.WriteString(fmt.Sprintf("### %s\nURL: %s\n%s\n\n", r.Title, r.URL, truncateStr(r.Content, 300)))
|
||||
}
|
||||
|
||||
if len(crawled) > 0 {
|
||||
contextBuilder.WriteString("\nПодробности со страниц:\n\n")
|
||||
maxCrawled := 3
|
||||
if len(crawled) < maxCrawled {
|
||||
maxCrawled = len(crawled)
|
||||
}
|
||||
for i := 0; i < maxCrawled; i++ {
|
||||
p := crawled[i]
|
||||
contextBuilder.WriteString(fmt.Sprintf("### %s (%s)\n%s\n\n", p.Title, p.URL, truncateStr(p.Content, 2000)))
|
||||
}
|
||||
}
|
||||
|
||||
currentYear := time.Now().Format("2006")
|
||||
|
||||
prompt := fmt.Sprintf(`Извлеки ТОЛЬКО реальные МЕРОПРИЯТИЯ (концерты, выставки, фестивали, спектакли, спортивные события) в %s на %s — %s.
|
||||
|
||||
%s
|
||||
|
||||
СТРОГО ЗАПРЕЩЕНО:
|
||||
- Новостные статьи, обзоры, блог-посты — это НЕ мероприятия
|
||||
- Устаревшие события (до %s года)
|
||||
- Выдуманные мероприятия
|
||||
|
||||
JSON (ТОЛЬКО массив, без текста):
|
||||
[{"id":"evt-1","title":"Название","description":"Что за мероприятие, 1 предложение","dateStart":"YYYY-MM-DD","dateEnd":"YYYY-MM-DD","price":500,"currency":"RUB","url":"https://...","address":"Город, Площадка, адрес","tags":["концерт"]}]
|
||||
|
||||
Правила:
|
||||
- ТОЛЬКО конкретные мероприятия с названием, местом и датой
|
||||
- dateStart/dateEnd в формате YYYY-MM-DD, если дата неизвестна — ""
|
||||
- price в рублях, 0 если неизвестна
|
||||
- address — точный адрес площадки для геокодинга
|
||||
- tags: концерт, выставка, фестиваль, спектакль, спорт, кино, мастер-класс, экскурсия
|
||||
- Максимум 10 мероприятий`,
|
||||
strings.Join(brief.Destinations, ", "),
|
||||
brief.StartDate,
|
||||
brief.EndDate,
|
||||
contextBuilder.String(),
|
||||
currentYear,
|
||||
)
|
||||
|
||||
llmCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
response, err := llmClient.GenerateText(llmCtx, llm.StreamRequest{
|
||||
Messages: []llm.Message{{Role: llm.RoleUser, Content: prompt}},
|
||||
Options: llm.StreamOptions{MaxTokens: 3000, Temperature: 0.1},
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("[travel-events] LLM extraction failed: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
jsonMatch := regexp.MustCompile(`\[[\s\S]*\]`).FindString(response)
|
||||
if jsonMatch == "" {
|
||||
log.Printf("[travel-events] no JSON array in LLM response (len=%d)", len(response))
|
||||
return nil
|
||||
}
|
||||
|
||||
var events []EventCard
|
||||
if err := json.Unmarshal([]byte(jsonMatch), &events); err != nil {
|
||||
log.Printf("[travel-events] JSON parse error: %v", err)
|
||||
events = tryPartialEventParse(jsonMatch)
|
||||
if len(events) == 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
for i := range events {
|
||||
if events[i].ID == "" {
|
||||
events[i].ID = uuid.New().String()
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("[travel-events] extracted %d events from LLM", len(events))
|
||||
return events
|
||||
}
|
||||
|
||||
func tryPartialEventParse(jsonStr string) []EventCard {
|
||||
var events []EventCard
|
||||
objRegex := regexp.MustCompile(`\{[^{}]*"title"\s*:\s*"[^"]+[^{}]*\}`)
|
||||
matches := objRegex.FindAllString(jsonStr, -1)
|
||||
for _, m := range matches {
|
||||
var e EventCard
|
||||
if err := json.Unmarshal([]byte(m), &e); err == nil && e.Title != "" {
|
||||
events = append(events, e)
|
||||
}
|
||||
}
|
||||
if len(events) > 0 {
|
||||
log.Printf("[travel-events] partial parse recovered %d events", len(events))
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
func geocodeEvents(ctx context.Context, cfg TravelOrchestratorConfig, events []EventCard) []EventCard {
|
||||
for i := range events {
|
||||
if events[i].Address == "" || (events[i].Lat != 0 && events[i].Lng != 0) {
|
||||
continue
|
||||
}
|
||||
|
||||
geoCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
geo, err := cfg.TravelData.Geocode(geoCtx, events[i].Address)
|
||||
cancel()
|
||||
|
||||
if err != nil {
|
||||
log.Printf("[travel-events] geocode failed for '%s': %v", events[i].Address, err)
|
||||
continue
|
||||
}
|
||||
|
||||
events[i].Lat = geo.Lat
|
||||
events[i].Lng = geo.Lng
|
||||
}
|
||||
|
||||
return events
|
||||
}
|
||||
|
||||
func deduplicateEvents(events []EventCard) []EventCard {
|
||||
seen := make(map[string]bool)
|
||||
var unique []EventCard
|
||||
|
||||
for _, e := range events {
|
||||
key := strings.ToLower(e.Title)
|
||||
if len(key) > 50 {
|
||||
key = key[:50]
|
||||
}
|
||||
if seen[key] {
|
||||
continue
|
||||
}
|
||||
seen[key] = true
|
||||
unique = append(unique, e)
|
||||
}
|
||||
|
||||
return unique
|
||||
}
|
||||
Reference in New Issue
Block a user