package llm import ( "errors" "sort" "sync" ) type ModelCapability string const ( CapReasoning ModelCapability = "reasoning" CapCoding ModelCapability = "coding" CapSearch ModelCapability = "search" CapCreative ModelCapability = "creative" CapFast ModelCapability = "fast" CapLongContext ModelCapability = "long_context" CapVision ModelCapability = "vision" CapMath ModelCapability = "math" CapVideo ModelCapability = "video" CapImage ModelCapability = "image" ) type ModelSpec struct { ID string Provider string Model string Capabilities []ModelCapability CostPer1K float64 MaxContext int Priority int MaxTokens int Description string } func (m ModelSpec) HasCapability(cap ModelCapability) bool { for _, c := range m.Capabilities { if c == cap { return true } } return false } type ModelRegistry struct { models map[string]ModelSpec clients map[string]Client mu sync.RWMutex } func NewModelRegistry() *ModelRegistry { return &ModelRegistry{ models: make(map[string]ModelSpec), clients: make(map[string]Client), } } func (r *ModelRegistry) Register(spec ModelSpec, client Client) { r.mu.Lock() defer r.mu.Unlock() r.models[spec.ID] = spec r.clients[spec.ID] = client } func (r *ModelRegistry) Unregister(id string) { r.mu.Lock() defer r.mu.Unlock() delete(r.models, id) delete(r.clients, id) } func (r *ModelRegistry) GetByID(id string) (Client, ModelSpec, error) { r.mu.RLock() defer r.mu.RUnlock() spec, ok := r.models[id] if !ok { return nil, ModelSpec{}, errors.New("model not found: " + id) } client, ok := r.clients[id] if !ok { return nil, ModelSpec{}, errors.New("client not found: " + id) } return client, spec, nil } func (r *ModelRegistry) GetBest(cap ModelCapability) (Client, ModelSpec, error) { r.mu.RLock() defer r.mu.RUnlock() var candidates []ModelSpec for _, spec := range r.models { if spec.HasCapability(cap) { candidates = append(candidates, spec) } } if len(candidates) == 0 { return nil, ModelSpec{}, errors.New("no model found with capability: " + string(cap)) } sort.Slice(candidates, func(i, j int) bool { if candidates[i].Priority != candidates[j].Priority { return candidates[i].Priority < candidates[j].Priority } return candidates[i].CostPer1K < candidates[j].CostPer1K }) best := candidates[0] client := r.clients[best.ID] return client, best, nil } func (r *ModelRegistry) GetAllWithCapability(cap ModelCapability) []ModelSpec { r.mu.RLock() defer r.mu.RUnlock() var result []ModelSpec for _, spec := range r.models { if spec.HasCapability(cap) { result = append(result, spec) } } sort.Slice(result, func(i, j int) bool { return result[i].Priority < result[j].Priority }) return result } func (r *ModelRegistry) GetAll() []ModelSpec { r.mu.RLock() defer r.mu.RUnlock() result := make([]ModelSpec, 0, len(r.models)) for _, spec := range r.models { result = append(result, spec) } return result } func (r *ModelRegistry) GetClient(id string) (Client, error) { r.mu.RLock() defer r.mu.RUnlock() client, ok := r.clients[id] if !ok { return nil, errors.New("client not found: " + id) } return client, nil } func (r *ModelRegistry) Count() int { r.mu.RLock() defer r.mu.RUnlock() return len(r.models) } var DefaultModels = []ModelSpec{ { ID: "gpt-4o", Provider: "openai", Model: "gpt-4o", Capabilities: []ModelCapability{CapSearch, CapFast, CapVision, CapCoding, CapCreative}, CostPer1K: 0.005, MaxContext: 128000, MaxTokens: 16384, Priority: 1, Description: "GPT-4o: fast multimodal model with search", }, { ID: "gpt-4o-mini", Provider: "openai", Model: "gpt-4o-mini", Capabilities: []ModelCapability{CapFast, CapCoding}, CostPer1K: 0.00015, MaxContext: 128000, MaxTokens: 16384, Priority: 2, Description: "GPT-4o Mini: cost-effective for simple tasks", }, { ID: "claude-3-opus", Provider: "anthropic", Model: "claude-3-opus-20240229", Capabilities: []ModelCapability{CapReasoning, CapCoding, CapCreative, CapLongContext}, CostPer1K: 0.015, MaxContext: 200000, MaxTokens: 4096, Priority: 1, Description: "Claude 3 Opus: best for complex reasoning and coding", }, { ID: "claude-3-sonnet", Provider: "anthropic", Model: "claude-3-5-sonnet-20241022", Capabilities: []ModelCapability{CapCoding, CapCreative, CapFast}, CostPer1K: 0.003, MaxContext: 200000, MaxTokens: 8192, Priority: 1, Description: "Claude 3.5 Sonnet: balanced speed and quality", }, { ID: "gemini-1.5-pro", Provider: "gemini", Model: "gemini-1.5-pro", Capabilities: []ModelCapability{CapLongContext, CapSearch, CapVision, CapMath}, CostPer1K: 0.00125, MaxContext: 2000000, MaxTokens: 8192, Priority: 1, Description: "Gemini 1.5 Pro: best for long context and research", }, { ID: "gemini-1.5-flash", Provider: "gemini", Model: "gemini-1.5-flash", Capabilities: []ModelCapability{CapFast, CapVision}, CostPer1K: 0.000075, MaxContext: 1000000, MaxTokens: 8192, Priority: 2, Description: "Gemini 1.5 Flash: fastest for lightweight tasks", }, }