Files
gooseek/backend/internal/agent/travel_poi_collector.go
home ab48a0632b
Some checks failed
Build and Deploy GooSeek / build-backend (push) Failing after 1m4s
Build and Deploy GooSeek / build-webui (push) Failing after 1m2s
Build and Deploy GooSeek / deploy (push) Has been skipped
feat: CI/CD pipeline + Learning/Medicine/Travel services
- Add Gitea Actions workflow for automated build & deploy
- Add K8s manifests: webui, travel-svc, medicine-svc, sandbox-svc
- Update kustomization for localhost:5000 registry
- Add ingress for gooseek.ru and api.gooseek.ru
- Learning cabinet with onboarding, courses, sandbox integration
- Medicine service with symptom analysis and doctor matching
- Travel service with itinerary planning
- Server setup scripts (NVIDIA/CUDA, K3s, Gitea runner)

Made-with: Cursor
2026-03-02 20:25:44 +03:00

762 lines
22 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package agent
import (
"context"
"encoding/json"
"fmt"
"log"
"math"
"regexp"
"strings"
"sync"
"time"
"github.com/gooseek/backend/internal/llm"
"github.com/gooseek/backend/internal/search"
"github.com/google/uuid"
)
// POI category queries for 2GIS Places API — concrete organization types
var poiCategoryQueries = map[string][]string{
"attraction": {
"достопримечательности",
"памятники",
"исторические здания",
"смотровые площадки",
},
"museum": {
"музеи",
"галереи",
"выставки",
},
"park": {
"парки",
"скверы",
"сады",
"набережные",
},
"restaurant": {
"рестораны",
"кафе",
},
"theater": {
"театры",
"кинотеатры",
"филармония",
},
"entertainment": {
"развлечения",
"аквапарки",
"зоопарки",
"аттракционы",
"боулинг",
},
"shopping": {
"торговые центры",
"рынки",
"сувениры",
},
"religious": {
"храмы",
"церкви",
"соборы",
"мечети",
},
}
// CollectPOIsEnriched collects POIs using 2GIS Places API as primary source,
// then enriches with descriptions from SearXNG + LLM.
func CollectPOIsEnriched(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief, destinations []destGeoEntry) ([]POICard, error) {
if cfg.TravelData == nil {
return nil, nil
}
var allPOIs []POICard
// Phase 1: Collect concrete places from 2GIS for each destination
for _, dest := range destinations {
if dest.Lat == 0 && dest.Lng == 0 {
continue
}
categories := selectCategoriesForBrief(brief)
places := searchPlacesFrom2GIS(ctx, cfg, dest, categories)
allPOIs = append(allPOIs, places...)
}
log.Printf("[travel-poi] 2GIS returned %d places total", len(allPOIs))
// Phase 2: If 2GIS returned too few, supplement with SearXNG + LLM extraction
if len(allPOIs) < 5 && cfg.SearchClient != nil {
log.Printf("[travel-poi] 2GIS returned only %d places, supplementing with SearXNG+LLM", len(allPOIs))
supplementPOIs := collectPOIsFromSearch(ctx, cfg, brief, destinations)
allPOIs = append(allPOIs, supplementPOIs...)
}
// Phase 3: Enrich POIs with descriptions from SearXNG if available
if cfg.SearchClient != nil && len(allPOIs) > 0 {
allPOIs = enrichPOIDescriptions(ctx, cfg, brief, allPOIs)
}
// Phase 3b: Fetch photos via SearXNG images
if cfg.SearchClient != nil && len(allPOIs) > 0 {
allPOIs = enrichPOIPhotos(ctx, cfg, brief, allPOIs)
}
// Phase 4: Fallback geocoding for POIs without coordinates
allPOIs = geocodePOIs(ctx, cfg, brief, allPOIs)
// Hard filter: drop POIs that are far away from any destination center.
// This prevents ambiguous geocoding from pulling in other cities/countries.
allPOIs = filterPOIsNearDestinations(allPOIs, destinations, 250)
allPOIs = deduplicatePOIs(allPOIs)
// Filter out POIs without coordinates — they can't be shown on map
validPOIs := make([]POICard, 0, len(allPOIs))
for _, p := range allPOIs {
if p.Lat != 0 || p.Lng != 0 {
validPOIs = append(validPOIs, p)
}
}
if len(validPOIs) > 25 {
validPOIs = validPOIs[:25]
}
log.Printf("[travel-poi] returning %d POIs with coordinates", len(validPOIs))
return validPOIs, nil
}
// selectCategoriesForBrief picks relevant POI categories based on user interests.
func selectCategoriesForBrief(brief *TripBrief) []string {
if len(brief.Interests) == 0 {
return []string{"attraction", "museum", "park", "restaurant", "theater", "entertainment"}
}
interestMapping := map[string][]string{
"культура": {"museum", "theater", "attraction", "religious"},
"музеи": {"museum"},
"еда": {"restaurant"},
"рестораны": {"restaurant"},
"природа": {"park"},
"парки": {"park"},
"развлечения": {"entertainment"},
"шопинг": {"shopping"},
"история": {"attraction", "museum", "religious"},
"архитектура": {"attraction", "religious"},
"дети": {"entertainment", "park"},
"семья": {"entertainment", "park", "museum"},
"семейный": {"entertainment", "park", "museum"},
"активный отдых": {"entertainment", "park"},
"религия": {"religious"},
"театр": {"theater"},
"искусство": {"museum", "theater"},
}
seen := make(map[string]bool)
var categories []string
for _, interest := range brief.Interests {
lower := strings.ToLower(interest)
for keyword, cats := range interestMapping {
if strings.Contains(lower, keyword) {
for _, c := range cats {
if !seen[c] {
seen[c] = true
categories = append(categories, c)
}
}
}
}
}
if len(categories) == 0 {
return []string{"attraction", "museum", "park", "restaurant", "theater", "entertainment"}
}
// Always include attractions as baseline
if !seen["attraction"] {
categories = append(categories, "attraction")
}
return categories
}
// searchPlacesFrom2GIS queries 2GIS Places API for concrete organizations.
func searchPlacesFrom2GIS(ctx context.Context, cfg TravelOrchestratorConfig, dest destGeoEntry, categories []string) []POICard {
var (
mu sync.Mutex
pois []POICard
wg sync.WaitGroup
seen = make(map[string]bool)
)
for _, category := range categories {
queries, ok := poiCategoryQueries[category]
if !ok {
continue
}
for _, q := range queries {
wg.Add(1)
go func(query, cat string) {
defer wg.Done()
searchCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
fullQuery := fmt.Sprintf("%s %s", query, dest.Name)
places, err := cfg.TravelData.SearchPlaces(searchCtx, fullQuery, dest.Lat, dest.Lng, 10000)
if err != nil {
log.Printf("[travel-poi] 2GIS search error for '%s': %v", fullQuery, err)
return
}
mu.Lock()
defer mu.Unlock()
for _, place := range places {
if seen[place.ID] || seen[place.Name] {
continue
}
seen[place.ID] = true
seen[place.Name] = true
mappedCategory := mapPurposeToCategory(place.Purpose, place.Type, cat)
pois = append(pois, POICard{
ID: place.ID,
Name: place.Name,
Category: mappedCategory,
Address: fmt.Sprintf("%s, %s", dest.Name, place.Address),
Lat: place.Lat,
Lng: place.Lng,
Rating: place.Rating,
ReviewCount: place.ReviewCount,
Schedule: place.Schedule,
})
}
}(q, category)
}
}
wg.Wait()
return pois
}
// mapPurposeToCategory maps 2GIS purpose/type to our POI category.
func mapPurposeToCategory(purpose, itemType, fallbackCategory string) string {
lower := strings.ToLower(purpose)
switch {
case strings.Contains(lower, "музей") || strings.Contains(lower, "галере") || strings.Contains(lower, "выставк"):
return "museum"
case strings.Contains(lower, "ресторан") || strings.Contains(lower, "кафе") || strings.Contains(lower, "бар"):
return "restaurant"
case strings.Contains(lower, "парк") || strings.Contains(lower, "сквер") || strings.Contains(lower, "сад"):
return "park"
case strings.Contains(lower, "театр") || strings.Contains(lower, "кинотеатр") || strings.Contains(lower, "филармон"):
return "theater"
case strings.Contains(lower, "храм") || strings.Contains(lower, "церков") || strings.Contains(lower, "собор") || strings.Contains(lower, "мечет"):
return "religious"
case strings.Contains(lower, "торгов") || strings.Contains(lower, "магазин") || strings.Contains(lower, "рынок"):
return "shopping"
case strings.Contains(lower, "развлеч") || strings.Contains(lower, "аквапарк") || strings.Contains(lower, "зоопарк") || strings.Contains(lower, "аттракц"):
return "entertainment"
case strings.Contains(lower, "памятник") || strings.Contains(lower, "достоприм"):
return "attraction"
}
if itemType == "attraction" {
return "attraction"
}
return fallbackCategory
}
// collectPOIsFromSearch is the SearXNG + LLM fallback when 2GIS returns too few results.
func collectPOIsFromSearch(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief, destinations []destGeoEntry) []POICard {
if cfg.SearchClient == nil {
return nil
}
rawResults := searchForPOIs(ctx, cfg.SearchClient, brief)
if len(rawResults) == 0 {
return nil
}
var crawledContent []crawledPage
if cfg.Crawl4AIURL != "" {
crawledContent = crawlPOIPages(ctx, cfg.Crawl4AIURL, rawResults)
}
pois := extractPOIsWithLLM(ctx, cfg.LLM, brief, rawResults, crawledContent)
return pois
}
// enrichPOIDescriptions adds descriptions to 2GIS POIs using SearXNG + LLM.
func enrichPOIDescriptions(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief, pois []POICard) []POICard {
needsDescription := make([]int, 0)
for i, p := range pois {
if p.Description == "" {
needsDescription = append(needsDescription, i)
}
}
if len(needsDescription) == 0 {
return pois
}
// Build a list of POI names for bulk enrichment via LLM
var names []string
for _, idx := range needsDescription {
names = append(names, pois[idx].Name)
}
if len(names) > 15 {
names = names[:15]
}
dest := strings.Join(brief.Destinations, ", ")
prompt := fmt.Sprintf(`Ты — эксперт по туризму в %s. Для каждого места из списка напиши краткое описание (1-2 предложения), примерное время посещения в минутах и примерную стоимость входа в рублях (0 если бесплатно).
Места:
%s
Верни ТОЛЬКО JSON массив:
[
{
"name": "Точное название из списка",
"description": "Краткое описание",
"duration": число_минут,
"price": цена_в_рублях,
"rating": рейтинг_от_0_до_5
}
]
Правила:
- Описание должно быть информативным и привлекательным для туриста
- duration: музей 60-120 мин, парк 30-90 мин, ресторан 60 мин, памятник 15-30 мин
- price: 0 для открытых мест, реальные цены для музеев/театров
- rating: если знаешь реальный рейтинг — используй, иначе оцени по популярности (3.5-5.0)
- Верни ТОЛЬКО JSON, без пояснений`, dest, strings.Join(names, "\n"))
enrichCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
response, err := cfg.LLM.GenerateText(enrichCtx, llm.StreamRequest{
Messages: []llm.Message{{Role: llm.RoleUser, Content: prompt}},
Options: llm.StreamOptions{MaxTokens: 4096, Temperature: 0.2},
})
if err != nil {
log.Printf("[travel-poi] LLM enrichment failed: %v", err)
return pois
}
jsonMatch := regexp.MustCompile(`\[[\s\S]*\]`).FindString(response)
if jsonMatch == "" {
return pois
}
var enrichments []struct {
Name string `json:"name"`
Description string `json:"description"`
Duration int `json:"duration"`
Price float64 `json:"price"`
Rating float64 `json:"rating"`
}
if err := json.Unmarshal([]byte(jsonMatch), &enrichments); err != nil {
log.Printf("[travel-poi] enrichment JSON parse error: %v", err)
return pois
}
enrichMap := make(map[string]int)
for i, e := range enrichments {
enrichMap[strings.ToLower(e.Name)] = i
}
for _, idx := range needsDescription {
key := strings.ToLower(pois[idx].Name)
if eIdx, ok := enrichMap[key]; ok {
e := enrichments[eIdx]
if e.Description != "" {
pois[idx].Description = e.Description
}
if e.Duration > 0 {
pois[idx].Duration = e.Duration
}
if e.Price > 0 {
pois[idx].Price = e.Price
}
if e.Rating > 0 {
pois[idx].Rating = e.Rating
}
}
}
return pois
}
func enrichPOIPhotos(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief, pois []POICard) []POICard {
dest := ""
if len(brief.Destinations) > 0 {
dest = brief.Destinations[0]
}
maxEnrich := 15
if len(pois) < maxEnrich {
maxEnrich = len(pois)
}
var wg sync.WaitGroup
var mu sync.Mutex
for i := 0; i < maxEnrich; i++ {
if len(pois[i].Photos) > 0 {
continue
}
wg.Add(1)
go func(idx int) {
defer wg.Done()
query := pois[idx].Name
if dest != "" {
query = pois[idx].Name + " " + dest
}
searchCtx, cancel := context.WithTimeout(ctx, 6*time.Second)
defer cancel()
resp, err := cfg.SearchClient.Search(searchCtx, query, &search.SearchOptions{
Categories: []string{"images"},
PageNo: 1,
})
if err != nil {
return
}
var photos []string
seen := make(map[string]bool)
for _, r := range resp.Results {
if len(photos) >= 3 {
break
}
imgURL := r.ImgSrc
if imgURL == "" {
imgURL = r.ThumbnailSrc
}
if imgURL == "" {
imgURL = r.Thumbnail
}
if imgURL == "" || seen[imgURL] {
continue
}
seen[imgURL] = true
photos = append(photos, imgURL)
}
if len(photos) > 0 {
if cfg.PhotoCache != nil {
citySlug := dest
if citySlug == "" {
citySlug = "unknown"
}
photos = cfg.PhotoCache.CachePhotoBatch(ctx, citySlug, photos)
}
mu.Lock()
pois[idx].Photos = photos
mu.Unlock()
}
}(i)
}
wg.Wait()
photosFound := 0
cachedCount := 0
for _, p := range pois {
if len(p.Photos) > 0 {
photosFound++
for _, ph := range p.Photos {
if strings.Contains(ph, "storage.gooseek") || strings.Contains(ph, "minio") {
cachedCount++
}
}
}
}
log.Printf("[travel-poi] enriched %d/%d POIs with photos (%d cached in MinIO)", photosFound, len(pois), cachedCount)
return pois
}
type poiSearchResult struct {
Title string
URL string
Content string
Engine string
}
func searchForPOIs(ctx context.Context, client *search.SearXNGClient, brief *TripBrief) []poiSearchResult {
var results []poiSearchResult
seen := make(map[string]bool)
for _, dest := range brief.Destinations {
queries := generatePOIQueries(dest, brief.Interests)
for _, q := range queries {
searchCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
resp, err := client.Search(searchCtx, q, &search.SearchOptions{
Categories: []string{"general"},
PageNo: 1,
})
cancel()
if err != nil {
log.Printf("[travel-poi] search error for '%s': %v", q, err)
continue
}
for _, r := range resp.Results {
if r.URL == "" || seen[r.URL] {
continue
}
seen[r.URL] = true
results = append(results, poiSearchResult{
Title: r.Title,
URL: r.URL,
Content: r.Content,
Engine: r.Engine,
})
}
}
}
return results
}
func generatePOIQueries(destination string, interests []string) []string {
queries := []string{
fmt.Sprintf("достопримечательности %s что посмотреть конкретные места", destination),
fmt.Sprintf("лучшие рестораны %s рейтинг адреса", destination),
fmt.Sprintf("музеи %s список адреса", destination),
}
for _, interest := range interests {
queries = append(queries, fmt.Sprintf("%s %s адреса", interest, destination))
}
return queries
}
func crawlPOIPages(ctx context.Context, crawl4aiURL string, results []poiSearchResult) []crawledPage {
maxCrawl := 5
if len(results) < maxCrawl {
maxCrawl = len(results)
}
var pages []crawledPage
for _, r := range results[:maxCrawl] {
crawlCtx, cancel := context.WithTimeout(ctx, 20*time.Second)
page, err := crawlSinglePage(crawlCtx, crawl4aiURL, r.URL)
cancel()
if err != nil {
log.Printf("[travel-poi] crawl failed for %s: %v", r.URL, err)
continue
}
if page != nil && len(page.Content) > 100 {
pages = append(pages, *page)
}
}
return pages
}
func extractPOIsWithLLM(ctx context.Context, llmClient llm.Client, brief *TripBrief, searchResults []poiSearchResult, crawled []crawledPage) []POICard {
var contextBuilder strings.Builder
contextBuilder.WriteString("Результаты поиска мест и организаций:\n\n")
for i, r := range searchResults {
if i >= 15 {
break
}
contextBuilder.WriteString(fmt.Sprintf("### %s\nURL: %s\n%s\n\n", r.Title, r.URL, truncateStr(r.Content, 500)))
}
if len(crawled) > 0 {
contextBuilder.WriteString("\nПодробное содержание страниц:\n\n")
for _, p := range crawled {
contextBuilder.WriteString(fmt.Sprintf("### %s (%s)\n%s\n\n", p.Title, p.URL, truncateStr(p.Content, 3000)))
}
}
prompt := fmt.Sprintf(`Ты — эксперт по туризму. Из предоставленного контента извлеки КОНКРЕТНЫЕ достопримечательности, рестораны, музеи, парки и интересные места в %s.
%s
КРИТИЧЕСКИ ВАЖНО:
- Извлекай ТОЛЬКО конкретные места с названиями (не статьи, не списки, не обзоры)
- Каждое место должно быть реальной организацией или объектом, который можно найти на карте
- НЕ включай заголовки статей типа "ТОП-25 достопримечательностей" — это НЕ место
- Адрес ОБЯЗАТЕЛЕН и должен включать город для геокодинга
Верни JSON массив:
[
{
"id": "уникальный id",
"name": "Конкретное название места (не статьи!)",
"description": "Краткое описание (1-2 предложения)",
"category": "attraction|restaurant|museum|park|theater|shopping|entertainment|religious|viewpoint",
"rating": число_от_0_до_5_или_0,
"address": "Город, улица, дом (точный адрес)",
"duration": время_посещения_в_минутах,
"price": ценахода_в_рублях_или_0,
"currency": "RUB",
"url": "ссылка на источник"
}
]
Верни ТОЛЬКО JSON массив, без пояснений.`,
strings.Join(brief.Destinations, ", "),
contextBuilder.String(),
)
response, err := llmClient.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{{Role: llm.RoleUser, Content: prompt}},
Options: llm.StreamOptions{MaxTokens: 4096, Temperature: 0.2},
})
if err != nil {
log.Printf("[travel-poi] LLM extraction failed: %v", err)
return nil
}
jsonMatch := regexp.MustCompile(`\[[\s\S]*\]`).FindString(response)
if jsonMatch == "" {
return nil
}
var pois []POICard
if err := json.Unmarshal([]byte(jsonMatch), &pois); err != nil {
log.Printf("[travel-poi] JSON parse error: %v", err)
return nil
}
for i := range pois {
if pois[i].ID == "" {
pois[i].ID = uuid.New().String()
}
}
return pois
}
func geocodePOIs(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief, pois []POICard) []POICard {
destSuffix := strings.Join(brief.Destinations, ", ")
for i := range pois {
if pois[i].Lat != 0 && pois[i].Lng != 0 {
continue
}
// Try geocoding by address first, then by name+destination.
queries := []string{}
if pois[i].Address != "" {
queries = append(queries, pois[i].Address)
if destSuffix != "" && !strings.Contains(strings.ToLower(pois[i].Address), strings.ToLower(destSuffix)) {
queries = append(queries, fmt.Sprintf("%s, %s", pois[i].Address, destSuffix))
}
}
if pois[i].Name != "" {
if destSuffix != "" {
queries = append(queries, fmt.Sprintf("%s, %s", pois[i].Name, destSuffix))
} else {
queries = append(queries, pois[i].Name)
}
}
for _, query := range queries {
geoCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
geo, err := cfg.TravelData.Geocode(geoCtx, query)
cancel()
if err != nil {
continue
}
pois[i].Lat = geo.Lat
pois[i].Lng = geo.Lng
log.Printf("[travel-poi] geocoded '%s' -> %.4f, %.4f", query, geo.Lat, geo.Lng)
break
}
if pois[i].Lat == 0 && pois[i].Lng == 0 {
log.Printf("[travel-poi] failed to geocode POI '%s' (address: '%s')", pois[i].Name, pois[i].Address)
}
}
return pois
}
func distanceKm(lat1, lng1, lat2, lng2 float64) float64 {
const earthRadiusKm = 6371.0
toRad := func(d float64) float64 { return d * math.Pi / 180 }
lat1r := toRad(lat1)
lat2r := toRad(lat2)
dLat := toRad(lat2 - lat1)
dLng := toRad(lng2 - lng1)
a := math.Sin(dLat/2)*math.Sin(dLat/2) + math.Cos(lat1r)*math.Cos(lat2r)*math.Sin(dLng/2)*math.Sin(dLng/2)
c := 2 * math.Atan2(math.Sqrt(a), math.Sqrt(1-a))
return earthRadiusKm * c
}
func filterPOIsNearDestinations(pois []POICard, destinations []destGeoEntry, maxKm float64) []POICard {
if len(destinations) == 0 {
return pois
}
filtered := make([]POICard, 0, len(pois))
for _, p := range pois {
if p.Lat == 0 && p.Lng == 0 {
continue
}
minD := math.MaxFloat64
for _, d := range destinations {
if d.Lat == 0 && d.Lng == 0 {
continue
}
dd := distanceKm(p.Lat, p.Lng, d.Lat, d.Lng)
if dd < minD {
minD = dd
}
}
if minD <= maxKm {
filtered = append(filtered, p)
} else {
log.Printf("[travel-poi] dropped far POI '%s' (%.0fkm from destinations)", p.Name, minD)
}
}
return filtered
}
func deduplicatePOIs(pois []POICard) []POICard {
seen := make(map[string]bool)
var unique []POICard
for _, p := range pois {
key := strings.ToLower(p.Name)
if len(key) > 50 {
key = key[:50]
}
if seen[key] {
continue
}
seen[key] = true
unique = append(unique, p)
}
return unique
}