Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
230 lines
5.3 KiB
Go
230 lines
5.3 KiB
Go
package llm
|
|
|
|
import (
|
|
"errors"
|
|
"sort"
|
|
"sync"
|
|
)
|
|
|
|
type ModelCapability string
|
|
|
|
const (
|
|
CapReasoning ModelCapability = "reasoning"
|
|
CapCoding ModelCapability = "coding"
|
|
CapSearch ModelCapability = "search"
|
|
CapCreative ModelCapability = "creative"
|
|
CapFast ModelCapability = "fast"
|
|
CapLongContext ModelCapability = "long_context"
|
|
CapVision ModelCapability = "vision"
|
|
CapMath ModelCapability = "math"
|
|
CapVideo ModelCapability = "video"
|
|
CapImage ModelCapability = "image"
|
|
)
|
|
|
|
type ModelSpec struct {
|
|
ID string
|
|
Provider string
|
|
Model string
|
|
Capabilities []ModelCapability
|
|
CostPer1K float64
|
|
MaxContext int
|
|
Priority int
|
|
MaxTokens int
|
|
Description string
|
|
}
|
|
|
|
func (m ModelSpec) HasCapability(cap ModelCapability) bool {
|
|
for _, c := range m.Capabilities {
|
|
if c == cap {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
type ModelRegistry struct {
|
|
models map[string]ModelSpec
|
|
clients map[string]Client
|
|
mu sync.RWMutex
|
|
}
|
|
|
|
func NewModelRegistry() *ModelRegistry {
|
|
return &ModelRegistry{
|
|
models: make(map[string]ModelSpec),
|
|
clients: make(map[string]Client),
|
|
}
|
|
}
|
|
|
|
func (r *ModelRegistry) Register(spec ModelSpec, client Client) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
r.models[spec.ID] = spec
|
|
r.clients[spec.ID] = client
|
|
}
|
|
|
|
func (r *ModelRegistry) Unregister(id string) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
delete(r.models, id)
|
|
delete(r.clients, id)
|
|
}
|
|
|
|
func (r *ModelRegistry) GetByID(id string) (Client, ModelSpec, error) {
|
|
r.mu.RLock()
|
|
defer r.mu.RUnlock()
|
|
|
|
spec, ok := r.models[id]
|
|
if !ok {
|
|
return nil, ModelSpec{}, errors.New("model not found: " + id)
|
|
}
|
|
|
|
client, ok := r.clients[id]
|
|
if !ok {
|
|
return nil, ModelSpec{}, errors.New("client not found: " + id)
|
|
}
|
|
|
|
return client, spec, nil
|
|
}
|
|
|
|
func (r *ModelRegistry) GetBest(cap ModelCapability) (Client, ModelSpec, error) {
|
|
r.mu.RLock()
|
|
defer r.mu.RUnlock()
|
|
|
|
var candidates []ModelSpec
|
|
for _, spec := range r.models {
|
|
if spec.HasCapability(cap) {
|
|
candidates = append(candidates, spec)
|
|
}
|
|
}
|
|
|
|
if len(candidates) == 0 {
|
|
return nil, ModelSpec{}, errors.New("no model found with capability: " + string(cap))
|
|
}
|
|
|
|
sort.Slice(candidates, func(i, j int) bool {
|
|
if candidates[i].Priority != candidates[j].Priority {
|
|
return candidates[i].Priority < candidates[j].Priority
|
|
}
|
|
return candidates[i].CostPer1K < candidates[j].CostPer1K
|
|
})
|
|
|
|
best := candidates[0]
|
|
client := r.clients[best.ID]
|
|
return client, best, nil
|
|
}
|
|
|
|
func (r *ModelRegistry) GetAllWithCapability(cap ModelCapability) []ModelSpec {
|
|
r.mu.RLock()
|
|
defer r.mu.RUnlock()
|
|
|
|
var result []ModelSpec
|
|
for _, spec := range r.models {
|
|
if spec.HasCapability(cap) {
|
|
result = append(result, spec)
|
|
}
|
|
}
|
|
|
|
sort.Slice(result, func(i, j int) bool {
|
|
return result[i].Priority < result[j].Priority
|
|
})
|
|
|
|
return result
|
|
}
|
|
|
|
func (r *ModelRegistry) GetAll() []ModelSpec {
|
|
r.mu.RLock()
|
|
defer r.mu.RUnlock()
|
|
|
|
result := make([]ModelSpec, 0, len(r.models))
|
|
for _, spec := range r.models {
|
|
result = append(result, spec)
|
|
}
|
|
return result
|
|
}
|
|
|
|
func (r *ModelRegistry) GetClient(id string) (Client, error) {
|
|
r.mu.RLock()
|
|
defer r.mu.RUnlock()
|
|
|
|
client, ok := r.clients[id]
|
|
if !ok {
|
|
return nil, errors.New("client not found: " + id)
|
|
}
|
|
return client, nil
|
|
}
|
|
|
|
func (r *ModelRegistry) Count() int {
|
|
r.mu.RLock()
|
|
defer r.mu.RUnlock()
|
|
return len(r.models)
|
|
}
|
|
|
|
var DefaultModels = []ModelSpec{
|
|
{
|
|
ID: "gpt-4o",
|
|
Provider: "openai",
|
|
Model: "gpt-4o",
|
|
Capabilities: []ModelCapability{CapSearch, CapFast, CapVision, CapCoding, CapCreative},
|
|
CostPer1K: 0.005,
|
|
MaxContext: 128000,
|
|
MaxTokens: 16384,
|
|
Priority: 1,
|
|
Description: "GPT-4o: fast multimodal model with search",
|
|
},
|
|
{
|
|
ID: "gpt-4o-mini",
|
|
Provider: "openai",
|
|
Model: "gpt-4o-mini",
|
|
Capabilities: []ModelCapability{CapFast, CapCoding},
|
|
CostPer1K: 0.00015,
|
|
MaxContext: 128000,
|
|
MaxTokens: 16384,
|
|
Priority: 2,
|
|
Description: "GPT-4o Mini: cost-effective for simple tasks",
|
|
},
|
|
{
|
|
ID: "claude-3-opus",
|
|
Provider: "anthropic",
|
|
Model: "claude-3-opus-20240229",
|
|
Capabilities: []ModelCapability{CapReasoning, CapCoding, CapCreative, CapLongContext},
|
|
CostPer1K: 0.015,
|
|
MaxContext: 200000,
|
|
MaxTokens: 4096,
|
|
Priority: 1,
|
|
Description: "Claude 3 Opus: best for complex reasoning and coding",
|
|
},
|
|
{
|
|
ID: "claude-3-sonnet",
|
|
Provider: "anthropic",
|
|
Model: "claude-3-5-sonnet-20241022",
|
|
Capabilities: []ModelCapability{CapCoding, CapCreative, CapFast},
|
|
CostPer1K: 0.003,
|
|
MaxContext: 200000,
|
|
MaxTokens: 8192,
|
|
Priority: 1,
|
|
Description: "Claude 3.5 Sonnet: balanced speed and quality",
|
|
},
|
|
{
|
|
ID: "gemini-1.5-pro",
|
|
Provider: "gemini",
|
|
Model: "gemini-1.5-pro",
|
|
Capabilities: []ModelCapability{CapLongContext, CapSearch, CapVision, CapMath},
|
|
CostPer1K: 0.00125,
|
|
MaxContext: 2000000,
|
|
MaxTokens: 8192,
|
|
Priority: 1,
|
|
Description: "Gemini 1.5 Pro: best for long context and research",
|
|
},
|
|
{
|
|
ID: "gemini-1.5-flash",
|
|
Provider: "gemini",
|
|
Model: "gemini-1.5-flash",
|
|
Capabilities: []ModelCapability{CapFast, CapVision},
|
|
CostPer1K: 0.000075,
|
|
MaxContext: 1000000,
|
|
MaxTokens: 8192,
|
|
Priority: 2,
|
|
Description: "Gemini 1.5 Flash: fastest for lightweight tasks",
|
|
},
|
|
}
|