feat: Go backend, enhanced search, new widgets, Docker deploy
Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
This commit is contained in:
229
backend/internal/llm/registry.go
Normal file
229
backend/internal/llm/registry.go
Normal file
@@ -0,0 +1,229 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sort"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type ModelCapability string
|
||||
|
||||
const (
|
||||
CapReasoning ModelCapability = "reasoning"
|
||||
CapCoding ModelCapability = "coding"
|
||||
CapSearch ModelCapability = "search"
|
||||
CapCreative ModelCapability = "creative"
|
||||
CapFast ModelCapability = "fast"
|
||||
CapLongContext ModelCapability = "long_context"
|
||||
CapVision ModelCapability = "vision"
|
||||
CapMath ModelCapability = "math"
|
||||
CapVideo ModelCapability = "video"
|
||||
CapImage ModelCapability = "image"
|
||||
)
|
||||
|
||||
type ModelSpec struct {
|
||||
ID string
|
||||
Provider string
|
||||
Model string
|
||||
Capabilities []ModelCapability
|
||||
CostPer1K float64
|
||||
MaxContext int
|
||||
Priority int
|
||||
MaxTokens int
|
||||
Description string
|
||||
}
|
||||
|
||||
func (m ModelSpec) HasCapability(cap ModelCapability) bool {
|
||||
for _, c := range m.Capabilities {
|
||||
if c == cap {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type ModelRegistry struct {
|
||||
models map[string]ModelSpec
|
||||
clients map[string]Client
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
func NewModelRegistry() *ModelRegistry {
|
||||
return &ModelRegistry{
|
||||
models: make(map[string]ModelSpec),
|
||||
clients: make(map[string]Client),
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ModelRegistry) Register(spec ModelSpec, client Client) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.models[spec.ID] = spec
|
||||
r.clients[spec.ID] = client
|
||||
}
|
||||
|
||||
func (r *ModelRegistry) Unregister(id string) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
delete(r.models, id)
|
||||
delete(r.clients, id)
|
||||
}
|
||||
|
||||
func (r *ModelRegistry) GetByID(id string) (Client, ModelSpec, error) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
|
||||
spec, ok := r.models[id]
|
||||
if !ok {
|
||||
return nil, ModelSpec{}, errors.New("model not found: " + id)
|
||||
}
|
||||
|
||||
client, ok := r.clients[id]
|
||||
if !ok {
|
||||
return nil, ModelSpec{}, errors.New("client not found: " + id)
|
||||
}
|
||||
|
||||
return client, spec, nil
|
||||
}
|
||||
|
||||
func (r *ModelRegistry) GetBest(cap ModelCapability) (Client, ModelSpec, error) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
|
||||
var candidates []ModelSpec
|
||||
for _, spec := range r.models {
|
||||
if spec.HasCapability(cap) {
|
||||
candidates = append(candidates, spec)
|
||||
}
|
||||
}
|
||||
|
||||
if len(candidates) == 0 {
|
||||
return nil, ModelSpec{}, errors.New("no model found with capability: " + string(cap))
|
||||
}
|
||||
|
||||
sort.Slice(candidates, func(i, j int) bool {
|
||||
if candidates[i].Priority != candidates[j].Priority {
|
||||
return candidates[i].Priority < candidates[j].Priority
|
||||
}
|
||||
return candidates[i].CostPer1K < candidates[j].CostPer1K
|
||||
})
|
||||
|
||||
best := candidates[0]
|
||||
client := r.clients[best.ID]
|
||||
return client, best, nil
|
||||
}
|
||||
|
||||
func (r *ModelRegistry) GetAllWithCapability(cap ModelCapability) []ModelSpec {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
|
||||
var result []ModelSpec
|
||||
for _, spec := range r.models {
|
||||
if spec.HasCapability(cap) {
|
||||
result = append(result, spec)
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(result, func(i, j int) bool {
|
||||
return result[i].Priority < result[j].Priority
|
||||
})
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (r *ModelRegistry) GetAll() []ModelSpec {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
|
||||
result := make([]ModelSpec, 0, len(r.models))
|
||||
for _, spec := range r.models {
|
||||
result = append(result, spec)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (r *ModelRegistry) GetClient(id string) (Client, error) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
|
||||
client, ok := r.clients[id]
|
||||
if !ok {
|
||||
return nil, errors.New("client not found: " + id)
|
||||
}
|
||||
return client, nil
|
||||
}
|
||||
|
||||
func (r *ModelRegistry) Count() int {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
return len(r.models)
|
||||
}
|
||||
|
||||
var DefaultModels = []ModelSpec{
|
||||
{
|
||||
ID: "gpt-4o",
|
||||
Provider: "openai",
|
||||
Model: "gpt-4o",
|
||||
Capabilities: []ModelCapability{CapSearch, CapFast, CapVision, CapCoding, CapCreative},
|
||||
CostPer1K: 0.005,
|
||||
MaxContext: 128000,
|
||||
MaxTokens: 16384,
|
||||
Priority: 1,
|
||||
Description: "GPT-4o: fast multimodal model with search",
|
||||
},
|
||||
{
|
||||
ID: "gpt-4o-mini",
|
||||
Provider: "openai",
|
||||
Model: "gpt-4o-mini",
|
||||
Capabilities: []ModelCapability{CapFast, CapCoding},
|
||||
CostPer1K: 0.00015,
|
||||
MaxContext: 128000,
|
||||
MaxTokens: 16384,
|
||||
Priority: 2,
|
||||
Description: "GPT-4o Mini: cost-effective for simple tasks",
|
||||
},
|
||||
{
|
||||
ID: "claude-3-opus",
|
||||
Provider: "anthropic",
|
||||
Model: "claude-3-opus-20240229",
|
||||
Capabilities: []ModelCapability{CapReasoning, CapCoding, CapCreative, CapLongContext},
|
||||
CostPer1K: 0.015,
|
||||
MaxContext: 200000,
|
||||
MaxTokens: 4096,
|
||||
Priority: 1,
|
||||
Description: "Claude 3 Opus: best for complex reasoning and coding",
|
||||
},
|
||||
{
|
||||
ID: "claude-3-sonnet",
|
||||
Provider: "anthropic",
|
||||
Model: "claude-3-5-sonnet-20241022",
|
||||
Capabilities: []ModelCapability{CapCoding, CapCreative, CapFast},
|
||||
CostPer1K: 0.003,
|
||||
MaxContext: 200000,
|
||||
MaxTokens: 8192,
|
||||
Priority: 1,
|
||||
Description: "Claude 3.5 Sonnet: balanced speed and quality",
|
||||
},
|
||||
{
|
||||
ID: "gemini-1.5-pro",
|
||||
Provider: "gemini",
|
||||
Model: "gemini-1.5-pro",
|
||||
Capabilities: []ModelCapability{CapLongContext, CapSearch, CapVision, CapMath},
|
||||
CostPer1K: 0.00125,
|
||||
MaxContext: 2000000,
|
||||
MaxTokens: 8192,
|
||||
Priority: 1,
|
||||
Description: "Gemini 1.5 Pro: best for long context and research",
|
||||
},
|
||||
{
|
||||
ID: "gemini-1.5-flash",
|
||||
Provider: "gemini",
|
||||
Model: "gemini-1.5-flash",
|
||||
Capabilities: []ModelCapability{CapFast, CapVision},
|
||||
CostPer1K: 0.000075,
|
||||
MaxContext: 1000000,
|
||||
MaxTokens: 8192,
|
||||
Priority: 2,
|
||||
Description: "Gemini 1.5 Flash: fastest for lightweight tasks",
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user