Files
gooseek/backend/internal/agent/travel_hotels_collector.go
home ab48a0632b
Some checks failed
Build and Deploy GooSeek / build-backend (push) Failing after 1m4s
Build and Deploy GooSeek / build-webui (push) Failing after 1m2s
Build and Deploy GooSeek / deploy (push) Has been skipped
feat: CI/CD pipeline + Learning/Medicine/Travel services
- Add Gitea Actions workflow for automated build & deploy
- Add K8s manifests: webui, travel-svc, medicine-svc, sandbox-svc
- Update kustomization for localhost:5000 registry
- Add ingress for gooseek.ru and api.gooseek.ru
- Learning cabinet with onboarding, courses, sandbox integration
- Medicine service with symptom analysis and doctor matching
- Travel service with itinerary planning
- Server setup scripts (NVIDIA/CUDA, K3s, Gitea runner)

Made-with: Cursor
2026-03-02 20:25:44 +03:00

451 lines
12 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package agent
import (
"context"
"encoding/json"
"fmt"
"log"
"regexp"
"strings"
"time"
"github.com/gooseek/backend/internal/llm"
"github.com/gooseek/backend/internal/search"
"github.com/google/uuid"
)
// CollectHotelsEnriched searches for hotels via SearXNG + Crawl4AI + LLM.
func CollectHotelsEnriched(ctx context.Context, cfg TravelOrchestratorConfig, brief *TripBrief, destinations []destGeoEntry) ([]HotelCard, error) {
if cfg.SearchClient == nil {
return nil, nil
}
rawResults := searchForHotels(ctx, cfg.SearchClient, brief)
if len(rawResults) == 0 {
return nil, nil
}
var crawledContent []crawledPage
if cfg.Crawl4AIURL != "" {
crawledContent = crawlHotelPages(ctx, cfg.Crawl4AIURL, rawResults)
}
hotels := extractHotelsWithLLM(ctx, cfg.LLM, brief, rawResults, crawledContent)
hotels = geocodeHotels(ctx, cfg, hotels)
hotels = deduplicateHotels(hotels)
hotels = filterHotelsNearDestinations(hotels, destinations, 250)
if len(hotels) > 10 {
hotels = hotels[:10]
}
return hotels, nil
}
type hotelSearchResult struct {
Title string
URL string
Content string
Engine string
}
func searchForHotels(ctx context.Context, client *search.SearXNGClient, brief *TripBrief) []hotelSearchResult {
var results []hotelSearchResult
seen := make(map[string]bool)
for _, dest := range brief.Destinations {
queries := generateHotelQueries(dest, brief.StartDate, brief.EndDate, brief.Travelers, brief.TravelStyle)
for _, q := range queries {
searchCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
resp, err := client.Search(searchCtx, q, &search.SearchOptions{
Categories: []string{"general"},
PageNo: 1,
})
cancel()
if err != nil {
log.Printf("[travel-hotels] search error for '%s': %v", q, err)
continue
}
for _, r := range resp.Results {
if r.URL == "" || seen[r.URL] {
continue
}
seen[r.URL] = true
results = append(results, hotelSearchResult{
Title: r.Title,
URL: r.URL,
Content: r.Content,
Engine: r.Engine,
})
}
}
}
return results
}
func generateHotelQueries(destination, startDate, endDate string, travelers int, style string) []string {
dateStr := ""
if startDate != "" {
dateStr = startDate
if endDate != "" && endDate != startDate {
dateStr += " " + endDate
}
}
familyStr := ""
if travelers >= 3 {
familyStr = "семейный "
}
queries := []string{
fmt.Sprintf("%sотели %s цены бронирование %s", familyStr, destination, dateStr),
fmt.Sprintf("гостиницы %s рейтинг отзывы %d гостей", destination, travelers),
fmt.Sprintf("лучшие отели %s для туристов", destination),
}
if travelers >= 3 {
queries = append(queries, fmt.Sprintf("семейные отели %s с детьми", destination))
}
if style == "luxury" {
queries = append(queries, fmt.Sprintf("5 звезд отели %s премиум", destination))
} else if style == "budget" {
queries = append(queries, fmt.Sprintf("хостелы %s дешево %d человек", destination, travelers))
} else {
queries = append(queries, fmt.Sprintf("где остановиться %s %d человек", destination, travelers))
}
return queries
}
func crawlHotelPages(ctx context.Context, crawl4aiURL string, results []hotelSearchResult) []crawledPage {
maxCrawl := 5
if len(results) < maxCrawl {
maxCrawl = len(results)
}
var pages []crawledPage
for _, r := range results[:maxCrawl] {
crawlCtx, cancel := context.WithTimeout(ctx, 20*time.Second)
page, err := crawlSinglePage(crawlCtx, crawl4aiURL, r.URL)
cancel()
if err != nil {
log.Printf("[travel-hotels] crawl failed for %s: %v", r.URL, err)
continue
}
if page != nil && len(page.Content) > 100 {
pages = append(pages, *page)
}
}
return pages
}
func extractHotelsWithLLM(ctx context.Context, llmClient llm.Client, brief *TripBrief, searchResults []hotelSearchResult, crawled []crawledPage) []HotelCard {
var contextBuilder strings.Builder
contextBuilder.WriteString("Результаты поиска отелей:\n\n")
maxSearch := 10
if len(searchResults) < maxSearch {
maxSearch = len(searchResults)
}
for i := 0; i < maxSearch; i++ {
r := searchResults[i]
contextBuilder.WriteString(fmt.Sprintf("### %s\nURL: %s\n%s\n\n", r.Title, r.URL, truncateStr(r.Content, 300)))
}
if len(crawled) > 0 {
contextBuilder.WriteString("\nПодробное содержание:\n\n")
maxCrawled := 3
if len(crawled) < maxCrawled {
maxCrawled = len(crawled)
}
for i := 0; i < maxCrawled; i++ {
p := crawled[i]
contextBuilder.WriteString(fmt.Sprintf("### %s (%s)\n%s\n\n", p.Title, p.URL, truncateStr(p.Content, 2000)))
}
}
nightsStr := "1 ночь"
nightsCount := calculateNights(brief.StartDate, brief.EndDate)
if brief.StartDate != "" && brief.EndDate != "" && brief.EndDate != brief.StartDate {
nightsStr = fmt.Sprintf("с %s по %s (%d ночей)", brief.StartDate, brief.EndDate, nightsCount)
}
travelers := brief.Travelers
if travelers < 1 {
travelers = 1
}
rooms := calculateRooms(travelers)
prompt := fmt.Sprintf(`Извлеки до 6 отелей в %s на %s для %d чел (%d номеров).
%s
JSON массив (ТОЛЬКО JSON, без текста):
[{"id":"hotel-1","name":"Название","stars":3,"rating":8.5,"reviewCount":120,"pricePerNight":3500,"totalPrice":0,"currency":"RUB","address":"Город, ул. Улица, д. 1","bookingUrl":"https://...","amenities":["Wi-Fi","Завтрак"],"pros":["Центр города"],"checkIn":"%s","checkOut":"%s"}]
Правила:
- ТОЛЬКО реальные отели из текста
- pricePerNight — за 1 номер за 1 ночь в рублях. Если не указана — оцени по звёздам: 1★=1500, 2★=2500, 3★=3500, 4★=5000, 5★=8000
- totalPrice=0 (рассчитается автоматически)
- Адрес с городом для геокодинга
- Максимум 6 отелей, компактный JSON`,
strings.Join(brief.Destinations, ", "),
nightsStr,
travelers,
rooms,
contextBuilder.String(),
brief.StartDate,
brief.EndDate,
)
llmCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
response, err := llmClient.GenerateText(llmCtx, llm.StreamRequest{
Messages: []llm.Message{{Role: llm.RoleUser, Content: prompt}},
Options: llm.StreamOptions{MaxTokens: 3000, Temperature: 0.1},
})
if err != nil {
log.Printf("[travel-hotels] LLM extraction failed: %v", err)
return fallbackHotelsFromSearch(searchResults, brief)
}
jsonMatch := regexp.MustCompile(`\[[\s\S]*\]`).FindString(response)
if jsonMatch == "" {
log.Printf("[travel-hotels] no JSON array found in LLM response (len=%d)", len(response))
return fallbackHotelsFromSearch(searchResults, brief)
}
var hotels []HotelCard
if err := json.Unmarshal([]byte(jsonMatch), &hotels); err != nil {
log.Printf("[travel-hotels] JSON parse error: %v, response len=%d", err, len(jsonMatch))
hotels = tryPartialHotelParse(jsonMatch)
if len(hotels) == 0 {
return fallbackHotelsFromSearch(searchResults, brief)
}
}
nights := nightsCount
guestRooms := rooms
guests := travelers
for i := range hotels {
if hotels[i].ID == "" {
hotels[i].ID = uuid.New().String()
}
if hotels[i].CheckIn == "" {
hotels[i].CheckIn = brief.StartDate
}
if hotels[i].CheckOut == "" {
hotels[i].CheckOut = brief.EndDate
}
hotels[i].Nights = nights
hotels[i].Rooms = guestRooms
hotels[i].Guests = guests
if hotels[i].PricePerNight > 0 && hotels[i].TotalPrice == 0 {
hotels[i].TotalPrice = hotels[i].PricePerNight * float64(nights) * float64(guestRooms)
}
if hotels[i].TotalPrice > 0 && hotels[i].PricePerNight == 0 && nights > 0 && guestRooms > 0 {
hotels[i].PricePerNight = hotels[i].TotalPrice / float64(nights) / float64(guestRooms)
}
if hotels[i].PricePerNight == 0 {
hotels[i].PricePerNight = estimatePriceByStars(hotels[i].Stars)
hotels[i].TotalPrice = hotels[i].PricePerNight * float64(nights) * float64(guestRooms)
}
}
log.Printf("[travel-hotels] extracted %d hotels from LLM", len(hotels))
return hotels
}
func tryPartialHotelParse(jsonStr string) []HotelCard {
var hotels []HotelCard
objRegex := regexp.MustCompile(`\{[^{}]*"name"\s*:\s*"[^"]+[^{}]*\}`)
matches := objRegex.FindAllString(jsonStr, -1)
for _, m := range matches {
var h HotelCard
if err := json.Unmarshal([]byte(m), &h); err == nil && h.Name != "" {
hotels = append(hotels, h)
}
}
if len(hotels) > 0 {
log.Printf("[travel-hotels] partial parse recovered %d hotels", len(hotels))
}
return hotels
}
func estimatePriceByStars(stars int) float64 {
switch {
case stars >= 5:
return 8000
case stars == 4:
return 5000
case stars == 3:
return 3500
case stars == 2:
return 2500
default:
return 2000
}
}
func calculateNights(startDate, endDate string) int {
if startDate == "" || endDate == "" {
return 1
}
start, err1 := time.Parse("2006-01-02", startDate)
end, err2 := time.Parse("2006-01-02", endDate)
if err1 != nil || err2 != nil {
return 1
}
nights := int(end.Sub(start).Hours() / 24)
if nights < 1 {
return 1
}
return nights
}
func calculateRooms(travelers int) int {
if travelers <= 2 {
return 1
}
return (travelers + 1) / 2
}
func fallbackHotelsFromSearch(results []hotelSearchResult, brief *TripBrief) []HotelCard {
hotels := make([]HotelCard, 0, len(results))
nights := calculateNights(brief.StartDate, brief.EndDate)
travelers := brief.Travelers
if travelers < 1 {
travelers = 1
}
rooms := calculateRooms(travelers)
for _, r := range results {
if len(hotels) >= 5 {
break
}
name := r.Title
if len(name) > 80 {
name = name[:80]
}
price := extractPriceFromSnippet(r.Content)
if price == 0 {
price = 3000
}
hotels = append(hotels, HotelCard{
ID: uuid.New().String(),
Name: name,
Stars: 3,
PricePerNight: price,
TotalPrice: price * float64(nights) * float64(rooms),
Rooms: rooms,
Nights: nights,
Guests: travelers,
Currency: "RUB",
CheckIn: brief.StartDate,
CheckOut: brief.EndDate,
BookingURL: r.URL,
})
}
log.Printf("[travel-hotels] fallback: %d hotels from search results", len(hotels))
return hotels
}
func extractPriceFromSnippet(text string) float64 {
priceRegex := regexp.MustCompile(`(\d[\d\s]*\d)\s*(?:₽|руб|RUB|р\.)`)
match := priceRegex.FindStringSubmatch(text)
if len(match) >= 2 {
numStr := strings.ReplaceAll(match[1], " ", "")
var price float64
if _, err := fmt.Sscanf(numStr, "%f", &price); err == nil && price > 100 && price < 500000 {
return price
}
}
return 0
}
func geocodeHotels(ctx context.Context, cfg TravelOrchestratorConfig, hotels []HotelCard) []HotelCard {
for i := range hotels {
if hotels[i].Address == "" || (hotels[i].Lat != 0 && hotels[i].Lng != 0) {
continue
}
geoCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
geo, err := cfg.TravelData.Geocode(geoCtx, hotels[i].Address)
cancel()
if err != nil {
log.Printf("[travel-hotels] geocode failed for '%s': %v", hotels[i].Address, err)
continue
}
hotels[i].Lat = geo.Lat
hotels[i].Lng = geo.Lng
}
return hotels
}
func filterHotelsNearDestinations(hotels []HotelCard, destinations []destGeoEntry, maxKm float64) []HotelCard {
if len(destinations) == 0 {
return hotels
}
filtered := make([]HotelCard, 0, len(hotels))
for _, h := range hotels {
if h.Lat == 0 && h.Lng == 0 {
continue
}
minD := 1e18
for _, d := range destinations {
if d.Lat == 0 && d.Lng == 0 {
continue
}
dd := distanceKm(h.Lat, h.Lng, d.Lat, d.Lng)
if dd < minD {
minD = dd
}
}
if minD <= maxKm {
filtered = append(filtered, h)
} else {
log.Printf("[travel-hotels] dropped far hotel '%s' (%.0fkm from destinations)", h.Name, minD)
}
}
return filtered
}
func deduplicateHotels(hotels []HotelCard) []HotelCard {
seen := make(map[string]bool)
var unique []HotelCard
for _, h := range hotels {
key := strings.ToLower(h.Name)
if len(key) > 50 {
key = key[:50]
}
if seen[key] {
continue
}
seen[key] = true
unique = append(unique, h)
}
return unique
}