feat: LLM routing by tier (free→Ollama, pro→Timeweb)
Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s

- Add tier-based provider routing in llm-svc
  - free tier → Ollama (local qwen3.5:9b)
  - pro/business → Timeweb Cloud AI
- Add /api/v1/embed endpoint for embeddings via Ollama
- Update Ollama client: qwen3.5:9b default, remove auth
- Add GenerateEmbedding() function for qwen3-embedding:0.6b
- Add Ollama K8s deployment with GPU support (RTX 4060 Ti)
- Add monitoring stack (Prometheus, Grafana, Alertmanager)
- Add Grafana dashboards for LLM and security metrics
- Update deploy.sh with monitoring and Ollama deployment

Made-with: Cursor
This commit is contained in:
home
2026-03-03 02:25:22 +03:00
parent 5ac082a7c6
commit 7a40ff629e
19 changed files with 1759 additions and 35 deletions

View File

@@ -32,7 +32,7 @@ func NewOllamaClient(cfg OllamaConfig) (*OllamaClient, error) {
modelKey := cfg.ModelKey
if modelKey == "" {
modelKey = "llama3.2"
modelKey = "qwen3.5:9b"
}
return &OllamaClient{
@@ -231,3 +231,57 @@ func (c *OllamaClient) GenerateText(ctx context.Context, req StreamRequest) (str
return chatResp.Message.Content, nil
}
type ollamaEmbedRequest struct {
Model string `json:"model"`
Input string `json:"input"`
}
type ollamaEmbedResponse struct {
Model string `json:"model"`
Embeddings [][]float64 `json:"embeddings"`
}
func GenerateEmbedding(baseURL, model, input string) ([]float64, error) {
if baseURL == "" {
baseURL = "http://ollama:11434"
}
if model == "" {
model = "qwen3-embedding:0.6b"
}
embedReq := ollamaEmbedRequest{
Model: model,
Input: input,
}
body, err := json.Marshal(embedReq)
if err != nil {
return nil, fmt.Errorf("failed to marshal embed request: %w", err)
}
url := fmt.Sprintf("%s/api/embed", baseURL)
httpClient := &http.Client{Timeout: 30 * time.Second}
resp, err := httpClient.Post(url, "application/json", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("embed request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("Ollama embed API error: status %d, body: %s", resp.StatusCode, string(respBody))
}
var embedResp ollamaEmbedResponse
if err := json.NewDecoder(resp.Body).Decode(&embedResp); err != nil {
return nil, fmt.Errorf("failed to decode embed response: %w", err)
}
if len(embedResp.Embeddings) == 0 || len(embedResp.Embeddings[0]) == 0 {
return nil, errors.New("empty embeddings from Ollama")
}
return embedResp.Embeddings[0], nil
}