feat: LLM routing by tier (free→Ollama, pro→Timeweb)
Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s
Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s
- Add tier-based provider routing in llm-svc - free tier → Ollama (local qwen3.5:9b) - pro/business → Timeweb Cloud AI - Add /api/v1/embed endpoint for embeddings via Ollama - Update Ollama client: qwen3.5:9b default, remove auth - Add GenerateEmbedding() function for qwen3-embedding:0.6b - Add Ollama K8s deployment with GPU support (RTX 4060 Ti) - Add monitoring stack (Prometheus, Grafana, Alertmanager) - Add Grafana dashboards for LLM and security metrics - Update deploy.sh with monitoring and Ollama deployment Made-with: Cursor
This commit is contained in:
@@ -32,7 +32,7 @@ func NewOllamaClient(cfg OllamaConfig) (*OllamaClient, error) {
|
||||
|
||||
modelKey := cfg.ModelKey
|
||||
if modelKey == "" {
|
||||
modelKey = "llama3.2"
|
||||
modelKey = "qwen3.5:9b"
|
||||
}
|
||||
|
||||
return &OllamaClient{
|
||||
@@ -231,3 +231,57 @@ func (c *OllamaClient) GenerateText(ctx context.Context, req StreamRequest) (str
|
||||
|
||||
return chatResp.Message.Content, nil
|
||||
}
|
||||
|
||||
type ollamaEmbedRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input string `json:"input"`
|
||||
}
|
||||
|
||||
type ollamaEmbedResponse struct {
|
||||
Model string `json:"model"`
|
||||
Embeddings [][]float64 `json:"embeddings"`
|
||||
}
|
||||
|
||||
func GenerateEmbedding(baseURL, model, input string) ([]float64, error) {
|
||||
if baseURL == "" {
|
||||
baseURL = "http://ollama:11434"
|
||||
}
|
||||
if model == "" {
|
||||
model = "qwen3-embedding:0.6b"
|
||||
}
|
||||
|
||||
embedReq := ollamaEmbedRequest{
|
||||
Model: model,
|
||||
Input: input,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(embedReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal embed request: %w", err)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/api/embed", baseURL)
|
||||
httpClient := &http.Client{Timeout: 30 * time.Second}
|
||||
|
||||
resp, err := httpClient.Post(url, "application/json", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("embed request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("Ollama embed API error: status %d, body: %s", resp.StatusCode, string(respBody))
|
||||
}
|
||||
|
||||
var embedResp ollamaEmbedResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&embedResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode embed response: %w", err)
|
||||
}
|
||||
|
||||
if len(embedResp.Embeddings) == 0 || len(embedResp.Embeddings[0]) == 0 {
|
||||
return nil, errors.New("empty embeddings from Ollama")
|
||||
}
|
||||
|
||||
return embedResp.Embeddings[0], nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user