package agent import ( "context" "encoding/json" "fmt" "log" "regexp" "strings" "time" "github.com/gooseek/backend/internal/llm" "github.com/gooseek/backend/internal/search" "github.com/google/uuid" ) // CollectHotelsEnriched searches for hotels via SearXNG + Crawl4AI + LLM. func CollectHotelsEnriched(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief, destinations []destGeoEntry) ([]HotelCard, error) { if cfg.SearchClient == nil { return nil, nil } rawResults := searchForHotels(ctx, cfg.SearchClient, brief) if len(rawResults) == 0 { return nil, nil } var crawledContent []crawledPage if cfg.Crawl4AIURL != "" { crawledContent = crawlHotelPages(ctx, cfg.Crawl4AIURL, rawResults) } hotels := extractHotelsWithLLM(ctx, cfg.LLM, brief, rawResults, crawledContent) hotels = geocodeHotels(ctx, cfg, hotels) hotels = deduplicateHotels(hotels) hotels = filterHotelsNearDestinations(hotels, destinations, 250) if len(hotels) > 10 { hotels = hotels[:10] } return hotels, nil } type hotelSearchResult struct { Title string URL string Content string Engine string } func searchForHotels(ctx context.Context, client *search.SearXNGClient, brief *TripBrief) []hotelSearchResult { var results []hotelSearchResult seen := make(map[string]bool) for _, dest := range brief.Destinations { queries := generateHotelQueries(dest, brief.StartDate, brief.EndDate, brief.Travelers, brief.TravelStyle) for _, q := range queries { searchCtx, cancel := context.WithTimeout(ctx, 10*time.Second) resp, err := client.Search(searchCtx, q, &search.SearchOptions{ Categories: []string{"general"}, PageNo: 1, }) cancel() if err != nil { log.Printf("[travel-hotels] search error for '%s': %v", q, err) continue } for _, r := range resp.Results { if r.URL == "" || seen[r.URL] { continue } seen[r.URL] = true results = append(results, hotelSearchResult{ Title: r.Title, URL: r.URL, Content: r.Content, Engine: r.Engine, }) } } } return results } func generateHotelQueries(destination, startDate, endDate string, travelers int, style string) []string { dateStr := "" if startDate != "" { dateStr = startDate if endDate != "" && endDate != startDate { dateStr += " " + endDate } } familyStr := "" if travelers >= 3 { familyStr = "семейный " } queries := []string{ fmt.Sprintf("%sотели %s цены бронирование %s", familyStr, destination, dateStr), fmt.Sprintf("гостиницы %s рейтинг отзывы %d гостей", destination, travelers), fmt.Sprintf("лучшие отели %s для туристов", destination), } if travelers >= 3 { queries = append(queries, fmt.Sprintf("семейные отели %s с детьми", destination)) } if style == "luxury" { queries = append(queries, fmt.Sprintf("5 звезд отели %s премиум", destination)) } else if style == "budget" { queries = append(queries, fmt.Sprintf("хостелы %s дешево %d человек", destination, travelers)) } else { queries = append(queries, fmt.Sprintf("где остановиться %s %d человек", destination, travelers)) } return queries } func crawlHotelPages(ctx context.Context, crawl4aiURL string, results []hotelSearchResult) []crawledPage { maxCrawl := 5 if len(results) < maxCrawl { maxCrawl = len(results) } var pages []crawledPage for _, r := range results[:maxCrawl] { crawlCtx, cancel := context.WithTimeout(ctx, 20*time.Second) page, err := crawlSinglePage(crawlCtx, crawl4aiURL, r.URL) cancel() if err != nil { log.Printf("[travel-hotels] crawl failed for %s: %v", r.URL, err) continue } if page != nil && len(page.Content) > 100 { pages = append(pages, *page) } } return pages } func extractHotelsWithLLM(ctx context.Context, llmClient llm.Client, brief *TripBrief, searchResults []hotelSearchResult, crawled []crawledPage) []HotelCard { var contextBuilder strings.Builder contextBuilder.WriteString("Результаты поиска отелей:\n\n") maxSearch := 10 if len(searchResults) < maxSearch { maxSearch = len(searchResults) } for i := 0; i < maxSearch; i++ { r := searchResults[i] contextBuilder.WriteString(fmt.Sprintf("### %s\nURL: %s\n%s\n\n", r.Title, r.URL, truncateStr(r.Content, 300))) } if len(crawled) > 0 { contextBuilder.WriteString("\nПодробное содержание:\n\n") maxCrawled := 3 if len(crawled) < maxCrawled { maxCrawled = len(crawled) } for i := 0; i < maxCrawled; i++ { p := crawled[i] contextBuilder.WriteString(fmt.Sprintf("### %s (%s)\n%s\n\n", p.Title, p.URL, truncateStr(p.Content, 2000))) } } nightsStr := "1 ночь" nightsCount := calculateNights(brief.StartDate, brief.EndDate) if brief.StartDate != "" && brief.EndDate != "" && brief.EndDate != brief.StartDate { nightsStr = fmt.Sprintf("с %s по %s (%d ночей)", brief.StartDate, brief.EndDate, nightsCount) } travelers := brief.Travelers if travelers < 1 { travelers = 1 } rooms := calculateRooms(travelers) prompt := fmt.Sprintf(`Извлеки до 6 отелей в %s на %s для %d чел (%d номеров). %s JSON массив (ТОЛЬКО JSON, без текста): [{"id":"hotel-1","name":"Название","stars":3,"rating":8.5,"reviewCount":120,"pricePerNight":3500,"totalPrice":0,"currency":"RUB","address":"Город, ул. Улица, д. 1","bookingUrl":"https://...","amenities":["Wi-Fi","Завтрак"],"pros":["Центр города"],"checkIn":"%s","checkOut":"%s"}] Правила: - ТОЛЬКО реальные отели из текста - pricePerNight — за 1 номер за 1 ночь в рублях. Если не указана — оцени по звёздам: 1★=1500, 2★=2500, 3★=3500, 4★=5000, 5★=8000 - totalPrice=0 (рассчитается автоматически) - Адрес с городом для геокодинга - Максимум 6 отелей, компактный JSON`, strings.Join(brief.Destinations, ", "), nightsStr, travelers, rooms, contextBuilder.String(), brief.StartDate, brief.EndDate, ) llmCtx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() response, err := llmClient.GenerateText(llmCtx, llm.StreamRequest{ Messages: []llm.Message{{Role: llm.RoleUser, Content: prompt}}, Options: llm.StreamOptions{MaxTokens: 3000, Temperature: 0.1}, }) if err != nil { log.Printf("[travel-hotels] LLM extraction failed: %v", err) return fallbackHotelsFromSearch(searchResults, brief) } jsonMatch := regexp.MustCompile(`\[[\s\S]*\]`).FindString(response) if jsonMatch == "" { log.Printf("[travel-hotels] no JSON array found in LLM response (len=%d)", len(response)) return fallbackHotelsFromSearch(searchResults, brief) } var hotels []HotelCard if err := json.Unmarshal([]byte(jsonMatch), &hotels); err != nil { log.Printf("[travel-hotels] JSON parse error: %v, response len=%d", err, len(jsonMatch)) hotels = tryPartialHotelParse(jsonMatch) if len(hotels) == 0 { return fallbackHotelsFromSearch(searchResults, brief) } } nights := nightsCount guestRooms := rooms guests := travelers for i := range hotels { if hotels[i].ID == "" { hotels[i].ID = uuid.New().String() } if hotels[i].CheckIn == "" { hotels[i].CheckIn = brief.StartDate } if hotels[i].CheckOut == "" { hotels[i].CheckOut = brief.EndDate } hotels[i].Nights = nights hotels[i].Rooms = guestRooms hotels[i].Guests = guests if hotels[i].PricePerNight > 0 && hotels[i].TotalPrice == 0 { hotels[i].TotalPrice = hotels[i].PricePerNight * float64(nights) * float64(guestRooms) } if hotels[i].TotalPrice > 0 && hotels[i].PricePerNight == 0 && nights > 0 && guestRooms > 0 { hotels[i].PricePerNight = hotels[i].TotalPrice / float64(nights) / float64(guestRooms) } if hotels[i].PricePerNight == 0 { hotels[i].PricePerNight = estimatePriceByStars(hotels[i].Stars) hotels[i].TotalPrice = hotels[i].PricePerNight * float64(nights) * float64(guestRooms) } } log.Printf("[travel-hotels] extracted %d hotels from LLM", len(hotels)) return hotels } func tryPartialHotelParse(jsonStr string) []HotelCard { var hotels []HotelCard objRegex := regexp.MustCompile(`\{[^{}]*"name"\s*:\s*"[^"]+[^{}]*\}`) matches := objRegex.FindAllString(jsonStr, -1) for _, m := range matches { var h HotelCard if err := json.Unmarshal([]byte(m), &h); err == nil && h.Name != "" { hotels = append(hotels, h) } } if len(hotels) > 0 { log.Printf("[travel-hotels] partial parse recovered %d hotels", len(hotels)) } return hotels } func estimatePriceByStars(stars int) float64 { switch { case stars >= 5: return 8000 case stars == 4: return 5000 case stars == 3: return 3500 case stars == 2: return 2500 default: return 2000 } } func calculateNights(startDate, endDate string) int { if startDate == "" || endDate == "" { return 1 } start, err1 := time.Parse("2006-01-02", startDate) end, err2 := time.Parse("2006-01-02", endDate) if err1 != nil || err2 != nil { return 1 } nights := int(end.Sub(start).Hours() / 24) if nights < 1 { return 1 } return nights } func calculateRooms(travelers int) int { if travelers <= 2 { return 1 } return (travelers + 1) / 2 } func fallbackHotelsFromSearch(results []hotelSearchResult, brief *TripBrief) []HotelCard { hotels := make([]HotelCard, 0, len(results)) nights := calculateNights(brief.StartDate, brief.EndDate) travelers := brief.Travelers if travelers < 1 { travelers = 1 } rooms := calculateRooms(travelers) for _, r := range results { if len(hotels) >= 5 { break } name := r.Title if len(name) > 80 { name = name[:80] } price := extractPriceFromSnippet(r.Content) if price == 0 { price = 3000 } hotels = append(hotels, HotelCard{ ID: uuid.New().String(), Name: name, Stars: 3, PricePerNight: price, TotalPrice: price * float64(nights) * float64(rooms), Rooms: rooms, Nights: nights, Guests: travelers, Currency: "RUB", CheckIn: brief.StartDate, CheckOut: brief.EndDate, BookingURL: r.URL, }) } log.Printf("[travel-hotels] fallback: %d hotels from search results", len(hotels)) return hotels } func extractPriceFromSnippet(text string) float64 { priceRegex := regexp.MustCompile(`(\d[\d\s]*\d)\s*(?:₽|руб|RUB|р\.)`) match := priceRegex.FindStringSubmatch(text) if len(match) >= 2 { numStr := strings.ReplaceAll(match[1], " ", "") var price float64 if _, err := fmt.Sscanf(numStr, "%f", &price); err == nil && price > 100 && price < 500000 { return price } } return 0 } func geocodeHotels(ctx context.Context, cfg TravelOrchestratorConfig, hotels []HotelCard) []HotelCard { for i := range hotels { if hotels[i].Address == "" || (hotels[i].Lat != 0 && hotels[i].Lng != 0) { continue } geoCtx, cancel := context.WithTimeout(ctx, 5*time.Second) geo, err := cfg.TravelData.Geocode(geoCtx, hotels[i].Address) cancel() if err != nil { log.Printf("[travel-hotels] geocode failed for '%s': %v", hotels[i].Address, err) continue } hotels[i].Lat = geo.Lat hotels[i].Lng = geo.Lng } return hotels } func filterHotelsNearDestinations(hotels []HotelCard, destinations []destGeoEntry, maxKm float64) []HotelCard { if len(destinations) == 0 { return hotels } filtered := make([]HotelCard, 0, len(hotels)) for _, h := range hotels { if h.Lat == 0 && h.Lng == 0 { continue } minD := 1e18 for _, d := range destinations { if d.Lat == 0 && d.Lng == 0 { continue } dd := distanceKm(h.Lat, h.Lng, d.Lat, d.Lng) if dd < minD { minD = dd } } if minD <= maxKm { filtered = append(filtered, h) } else { log.Printf("[travel-hotels] dropped far hotel '%s' (%.0fkm from destinations)", h.Name, minD) } } return filtered } func deduplicateHotels(hotels []HotelCard) []HotelCard { seen := make(map[string]bool) var unique []HotelCard for _, h := range hotels { key := strings.ToLower(h.Name) if len(key) > 50 { key = key[:50] } if seen[key] { continue } seen[key] = true unique = append(unique, h) } return unique }