diff --git a/.env.example b/.env.example index 9647c8f..363b424 100644 --- a/.env.example +++ b/.env.example @@ -42,6 +42,17 @@ ARCHON_DOCS_PORT=3838 # If not set, defaults to localhost, 127.0.0.1, ::1, and the HOST value above VITE_ALLOWED_HOSTS= +# MCP Public URL Configuration +# This is the publicly accessible URL for the MCP server +# Used to generate client configuration JSON for Claude Code, Cursor, etc. +# Format: "domain.com:8051" or "localhost:8051" +# Examples: +# - Development: localhost:8051 (default) +# - Production: archon.automatizase.com.br:8051 +# - Custom domain: mcp.mycompany.com:8051 +# If not set, defaults to "localhost:8051" +MCP_PUBLIC_URL=localhost:8051 + # Development Tools # VITE_SHOW_DEVTOOLS: Show TanStack Query DevTools (for developers only) # Set to "true" to enable the DevTools panel in bottom right corner diff --git a/CLAUDE.md b/CLAUDE.md index 77673db..61670f9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -134,6 +134,71 @@ make test-fe # Frontend tests only make test-be # Backend tests only ``` +### Kubernetes Deployment + +**Build and push images to registry:** + +```bash +# Navigate to project root +cd /home/lperl/Archon + +# Build and push Server image (includes Playwright and MCP_PUBLIC_URL fixes) +docker build -f python/Dockerfile.k8s.server \ + -t git.automatizase.com.br/luis.erlacher/archon/server:k8s-latest \ + python/ +docker push git.automatizase.com.br/luis.erlacher/archon/server:k8s-latest + +# Build and push MCP image +docker build -f python/Dockerfile.k8s.mcp \ + -t git.automatizase.com.br/luis.erlacher/archon/mcp:k8s-latest \ + python/ +docker push git.automatizase.com.br/luis.erlacher/archon/mcp:k8s-latest + +# Build and push Frontend image (if needed) +docker build -f archon-ui-main/Dockerfile.k8s.production \ + -t git.automatizase.com.br/luis.erlacher/archon/frontend:k8s-latest \ + archon-ui-main/ +docker push git.automatizase.com.br/luis.erlacher/archon/frontend:k8s-latest + +# Optional: Build and push Agents image +docker build -f python/Dockerfile.k8s.agents \ + -t git.automatizase.com.br/luis.erlacher/archon/agents:k8s-latest \ + python/ +docker push git.automatizase.com.br/luis.erlacher/archon/agents:k8s-latest +``` + +**Deploy to Kubernetes:** + +```bash +# IMPORTANT: First, update ConfigMap with your domain! +# Edit k8s-manifests-complete.yaml line 61: +# MCP_PUBLIC_URL: "your-domain.com:8051" # ← CHANGE THIS! + +# Apply manifests +kubectl apply -f k8s-manifests-complete.yaml + +# Restart deployments to use new images +kubectl rollout restart deployment/archon-server -n archon +kubectl rollout restart deployment/archon-mcp -n archon +kubectl rollout restart deployment/archon-frontend -n archon + +# Monitor deployment status +kubectl rollout status deployment/archon-server -n archon +kubectl get pods -n archon -w + +# View logs +kubectl logs -f deployment/archon-server -n archon +kubectl logs -f deployment/archon-mcp -n archon + +# Verify configuration +kubectl get configmap archon-config -n archon -o yaml | grep -A 2 MCP_PUBLIC_URL +``` + +**Complete K8s documentation:** +- Full deployment guide: `K8S_COMPLETE_ADJUSTMENTS.md` +- MCP Public URL configuration: `MCP_PUBLIC_URL_GUIDE.md` +- Image naming convention: `service:k8s-latest` (e.g., `server:k8s-latest`, `mcp:k8s-latest`) + ## Architecture Overview @PRPs/ai_docs/ARCHITECTURE.md diff --git a/K8S_COMPLETE_ADJUSTMENTS.md b/K8S_COMPLETE_ADJUSTMENTS.md new file mode 100644 index 0000000..a8c4493 --- /dev/null +++ b/K8S_COMPLETE_ADJUSTMENTS.md @@ -0,0 +1,826 @@ +# Kubernetes Complete Adjustments Guide + +## Executive Summary + +Este documento descreve **todas as mudanças necessárias** para executar o Archon em produção no Kubernetes, não apenas o Playwright. As mudanças cobrem: + +- ✅ Playwright browser binaries (JÁ CORRIGIDO) +- ⚠️ Variáveis de ambiente em K8s manifests +- ⚠️ Resource limits para crawling +- ⚠️ Nginx permissions e configuration +- ⚠️ Security contexts avançados +- ⚠️ Health checks otimizados +- ⚠️ Init containers para warm-up + +--- + +## 1. Playwright Browser Binaries (✅ JÁ CORRIGIDO) + +### Problema Identificado +Playwright instalava binários em `/root/.cache/ms-playwright` (root), mas container roda como `appuser` (UID 1001) e não tinha acesso. + +### Solução Aplicada + +**Dockerfile.k8s.server:** +```dockerfile +# Install Playwright browsers in a location accessible to appuser +ENV PATH=/venv/bin:$PATH +ENV PLAYWRIGHT_BROWSERS_PATH=/app/ms-playwright +RUN mkdir -p /app/ms-playwright && \ + playwright install chromium && \ + chown -R appuser:appuser /app/ms-playwright + +# Runtime environment +ENV PLAYWRIGHT_BROWSERS_PATH=/app/ms-playwright +``` + +**Dockerfile.server (Docker Compose):** +```dockerfile +ENV PLAYWRIGHT_BROWSERS_PATH=/tmp/ms-playwright +RUN mkdir -p /tmp/ms-playwright && \ + playwright install chromium && \ + chmod -R 777 /tmp/ms-playwright + +ENV PLAYWRIGHT_BROWSERS_PATH=/tmp/ms-playwright +``` + +### ⚠️ AÇÃO NECESSÁRIA: Adicionar em K8s Manifests + +**Adicionar em `k8s-manifests-complete.yaml` - archon-server deployment:** + +```yaml +spec: + template: + spec: + containers: + - name: server + env: + # ... outras variáveis ... + + # ADICIONAR ESTA LINHA: + - name: PLAYWRIGHT_BROWSERS_PATH + value: "/app/ms-playwright" +``` + +--- + +## 2. Resource Limits para Crawling com Chromium + +### Problema +Chromium consome significativa memória e CPU durante crawling. Os limites atuais podem ser insuficientes: + +**Atual:** +```yaml +resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "1Gi" + cpu: "1000m" +``` + +### Solução Recomendada + +**Atualizar em `k8s-manifests-complete.yaml` - archon-server:** + +```yaml +resources: + requests: + memory: "768Mi" # Aumentado de 512Mi + cpu: "500m" + limits: + memory: "2Gi" # Aumentado de 1Gi (Chromium pode usar 1.5Gi em picos) + cpu: "2000m" # Aumentado de 1000m (crawling paralelo) + + # ADICIONAR: Limitar uso de ephemeral storage + ephemeral-storage: "5Gi" +``` + +### Justificativa +- Chromium headless consome ~300-600MB por instância +- Crawling paralelo pode executar múltiplas instâncias +- Processamento de documentos grandes precisa de memória +- Margem de segurança para evitar OOMKilled + +--- + +## 3. Nginx Configuration e Permissions + +### Status Atual +✅ Nginx já configurado para rodar como non-root (user `nginx`, UID 101) + +**Dockerfile.k8s.production:** +```dockerfile +RUN chown -R nginx:nginx /usr/share/nginx/html /var/cache/nginx /var/log/nginx /etc/nginx/conf.d && \ + touch /var/run/nginx.pid && \ + chown -R nginx:nginx /var/run/nginx.pid + +USER nginx +``` + +### ⚠️ Melhorias Recomendadas + +**Adicionar em `k8s-manifests-complete.yaml` - archon-frontend:** + +```yaml +spec: + template: + spec: + securityContext: + runAsNonRoot: true + runAsUser: 101 # nginx user + runAsGroup: 101 + fsGroup: 101 + # ADICIONAR: + seccompProfile: + type: RuntimeDefault + + containers: + - name: frontend + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + # Nginx não precisa de capabilities especiais na porta 3737 + readOnlyRootFilesystem: true # MUDAR para true + + # ADICIONAR volumes para diretórios que nginx precisa escrever: + volumeMounts: + - name: nginx-cache + mountPath: /var/cache/nginx + - name: nginx-run + mountPath: /var/run + - name: nginx-logs + mountPath: /var/log/nginx + + volumes: + - name: nginx-cache + emptyDir: {} + - name: nginx-run + emptyDir: {} + - name: nginx-logs + emptyDir: {} +``` + +--- + +## 4. Advanced Security Contexts + +### Problema +Security contexts estão básicos. Podem ser fortalecidos para melhor segurança. + +### Solução: Pod Security Standards + +**Adicionar em TODOS os deployments:** + +```yaml +spec: + template: + metadata: + labels: + app: archon-server # ou mcp, frontend, etc + # ADICIONAR: + pod-security.kubernetes.io/enforce: baseline + pod-security.kubernetes.io/audit: restricted + pod-security.kubernetes.io/warn: restricted + + spec: + # Security context do pod + securityContext: + runAsNonRoot: true + runAsUser: 1001 + runAsGroup: 1001 + fsGroup: 1001 + # ADICIONAR: + seccompProfile: + type: RuntimeDefault + supplementalGroups: [] + + # Security context do container + containers: + - name: server + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1001 + capabilities: + drop: + - ALL + # ADICIONAR (se possível - testar primeiro): + readOnlyRootFilesystem: false # true após configurar volumes + seccompProfile: + type: RuntimeDefault +``` + +### Arquivos que Precisam Escrever + +**archon-server:** +- `/app/ms-playwright` - Playwright browser cache (já configurado com ownership correto) +- `/tmp` - Temporary files (já acessível para appuser) +- Nenhum volume persistente necessário (tudo vai para Supabase) + +**archon-mcp e archon-agents:** +- Nenhum arquivo local necessário +- Podem usar `readOnlyRootFilesystem: true` + +--- + +## 5. Health Checks Otimizados + +### Problema Atual +Health checks podem ser muito agressivos durante operações pesadas (crawling). + +### Solução + +**Atualizar em `k8s-manifests-complete.yaml` - archon-server:** + +```yaml +livenessProbe: + httpGet: + path: /health + port: 8181 + initialDelaySeconds: 60 # Aumentado de 40 (tempo para Playwright inicializar) + periodSeconds: 30 # OK + timeoutSeconds: 15 # Aumentado de 10 (crawling pode deixar servidor lento) + failureThreshold: 5 # Aumentado de 3 (mais tolerante) + successThreshold: 1 + +readinessProbe: + httpGet: + path: /health + port: 8181 + initialDelaySeconds: 15 # Aumentado de 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + successThreshold: 1 + +# ADICIONAR startup probe para não matar pod durante startup lento: +startupProbe: + httpGet: + path: /health + port: 8181 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 12 # 12 x 10s = 2 minutos para startup + successThreshold: 1 +``` + +--- + +## 6. Init Container para Playwright Warm-up (Opcional mas Recomendado) + +### Problema +Primeira requisição de crawling é lenta porque Playwright precisa inicializar. + +### Solução + +**Adicionar em `k8s-manifests-complete.yaml` - archon-server:** + +```yaml +spec: + template: + spec: + # ADICIONAR antes de containers: + initContainers: + - name: playwright-warmup + image: git.automatizase.com.br/luis.erlacher/archon/server:k8s-latest + imagePullPolicy: Always + command: + - sh + - -c + - | + echo "Verificando instalação do Playwright..." + python -c "from playwright.sync_api import sync_playwright; print('Playwright OK')" || exit 1 + echo "Playwright inicializado com sucesso" + env: + - name: PLAYWRIGHT_BROWSERS_PATH + value: "/app/ms-playwright" + resources: + requests: + memory: "256Mi" + cpu: "200m" + limits: + memory: "512Mi" + cpu: "500m" + securityContext: + runAsNonRoot: true + runAsUser: 1001 + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + + containers: + - name: server + # ... resto da configuração ... +``` + +--- + +## 7. ConfigMap Updates + +### Adicionar Playwright e outras configurações + +**Atualizar em `k8s-manifests-complete.yaml` - ConfigMap:** + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: archon-config + namespace: archon +data: + # Existing configs... + SERVICE_DISCOVERY_MODE: "kubernetes" + LOG_LEVEL: "INFO" + ARCHON_SERVER_PORT: "8181" + ARCHON_MCP_PORT: "8051" + ARCHON_UI_PORT: "3737" + ARCHON_HOST: "localhost" + TRANSPORT: "sse" + AGENTS_ENABLED: "false" + + # ADICIONAR: + PLAYWRIGHT_BROWSERS_PATH: "/app/ms-playwright" + + # MCP Public URL - IMPORTANTE: Configure com seu domínio! + # Format: "domain.com:8051" or "localhost:8051" + # Examples: + # - Development: localhost:8051 + # - Production: archon.automatizase.com.br:8051 + # - Custom: mcp.mycompany.com:8051 + # This is used to generate MCP client configuration JSON + MCP_PUBLIC_URL: "archon.automatizase.com.br:8051" # ← CHANGE THIS! + + # Chromium optimization flags (já configurados no código, mas podem ser sobrescritos): + CHROMIUM_DISABLE_DEV_SHM: "true" + CHROMIUM_HEADLESS: "true" +``` + +--- + +## 8. Network Policies (Segurança Adicional) + +### Criar Network Policy para isolar pods + +**Criar arquivo `k8s-network-policies.yaml`:** + +```yaml +--- +# Network Policy - Archon Server +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: archon-server-netpol + namespace: archon +spec: + podSelector: + matchLabels: + app: archon-server + policyTypes: + - Ingress + - Egress + ingress: + # Permite tráfego do frontend + - from: + - podSelector: + matchLabels: + app: archon-frontend + ports: + - protocol: TCP + port: 8181 + # Permite tráfego do MCP + - from: + - podSelector: + matchLabels: + app: archon-mcp + ports: + - protocol: TCP + port: 8181 + egress: + # Permite DNS + - to: + - namespaceSelector: + matchLabels: + name: kube-system + ports: + - protocol: UDP + port: 53 + # Permite Supabase (internet) + - to: + - namespaceSelector: {} + ports: + - protocol: TCP + port: 443 + # Permite comunicação com MCP + - to: + - podSelector: + matchLabels: + app: archon-mcp + ports: + - protocol: TCP + port: 8051 + +--- +# Network Policy - Archon MCP +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: archon-mcp-netpol + namespace: archon +spec: + podSelector: + matchLabels: + app: archon-mcp + policyTypes: + - Ingress + - Egress + ingress: + # Permite tráfego do server + - from: + - podSelector: + matchLabels: + app: archon-server + ports: + - protocol: TCP + port: 8051 + egress: + # Permite DNS + - to: + - namespaceSelector: + matchLabels: + name: kube-system + ports: + - protocol: UDP + port: 53 + # Permite Supabase + - to: + - namespaceSelector: {} + ports: + - protocol: TCP + port: 443 + # Permite comunicação com server + - to: + - podSelector: + matchLabels: + app: archon-server + ports: + - protocol: TCP + port: 8181 + +--- +# Network Policy - Archon Frontend +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: archon-frontend-netpol + namespace: archon +spec: + podSelector: + matchLabels: + app: archon-frontend + policyTypes: + - Ingress + - Egress + ingress: + # Permite tráfego de qualquer lugar (public-facing) + - {} + egress: + # Permite DNS + - to: + - namespaceSelector: + matchLabels: + name: kube-system + ports: + - protocol: UDP + port: 53 + # Permite comunicação com server (para API calls) + - to: + - podSelector: + matchLabels: + app: archon-server + ports: + - protocol: TCP + port: 8181 +``` + +--- + +## 9. Horizontal Pod Autoscaling (HPA) + +### Configurar autoscaling para server + +**Criar arquivo `k8s-hpa.yaml`:** + +```yaml +--- +# HPA - Archon Server (crawling pode ter spikes de carga) +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: archon-server-hpa + namespace: archon +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: archon-server + minReplicas: 2 + maxReplicas: 5 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 + behavior: + scaleDown: + stabilizationWindowSeconds: 300 # Espera 5min antes de scale down + policies: + - type: Percent + value: 50 + periodSeconds: 60 + scaleUp: + stabilizationWindowSeconds: 30 # Scale up rápido + policies: + - type: Percent + value: 100 + periodSeconds: 30 + +--- +# HPA - Frontend (menos crítico, pode ser fixo em 2 réplicas) +# Opcional se houver muito tráfego +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: archon-frontend-hpa + namespace: archon +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: archon-frontend + minReplicas: 2 + maxReplicas: 4 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 75 +``` + +--- + +## 10. PodDisruptionBudget (Alta Disponibilidade) + +### Garantir disponibilidade durante rolling updates + +**Criar arquivo `k8s-pdb.yaml`:** + +```yaml +--- +# PDB - Archon Server +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: archon-server-pdb + namespace: archon +spec: + minAvailable: 1 + selector: + matchLabels: + app: archon-server + unhealthyPodEvictionPolicy: AlwaysAllow + +--- +# PDB - Archon Frontend +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: archon-frontend-pdb + namespace: archon +spec: + minAvailable: 1 + selector: + matchLabels: + app: archon-frontend + unhealthyPodEvictionPolicy: AlwaysAllow +``` + +--- + +## 11. Persistent Volumes - NÃO NECESSÁRIO + +### Análise de Necessidade + +**✅ Arquon NÃO precisa de volumes persistentes porque:** + +1. **Uploads de documentos**: Processados em memória e salvos no Supabase +2. **Crawling results**: Salvos diretamente no Supabase +3. **Playwright cache**: Reinstalado na inicialização do pod (stateless) +4. **Logs**: Enviados para stdout/stderr (capturados pelo K8s) +5. **Credenciais**: Armazenadas no Supabase (encrypted) +6. **Session data**: Gerenciado por Socket.IO em memória + +**📊 Arquitetura Stateless:** +``` +Pod → Processa dados → Salva no Supabase → Pod morre → Novo pod funciona igual +``` + +**⚠️ Exceção:** Se precisar de cache local para performance: +```yaml +# Opcional: Volume efêmero para cache de embeddings (não persiste entre restarts) +volumes: +- name: embedding-cache + emptyDir: + sizeLimit: 1Gi +``` + +--- + +## 12. Monitoring e Observability + +### Prometheus Metrics (Recomendado) + +**Adicionar annotations nos deployments:** + +```yaml +spec: + template: + metadata: + annotations: + # ADICIONAR: + prometheus.io/scrape: "true" + prometheus.io/port: "8181" # ou 8051 para MCP + prometheus.io/path: "/metrics" # Se implementar endpoint +``` + +### Logfire Integration + +**Verificar em `k8s-manifests-complete.yaml` - Secrets:** + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: archon-secrets + namespace: archon +type: Opaque +stringData: + SUPABASE_URL: "https://seu-projeto.supabase.co" + SUPABASE_SERVICE_KEY: "sua-service-role-key-aqui" + OPENAI_API_KEY: "sua-openai-key-aqui" + LOGFIRE_TOKEN: "seu-logfire-token-aqui" # CONFIGURAR se usar Logfire +``` + +--- + +## Checklist de Implementação + +### 🔴 PRIORIDADE CRÍTICA (Impede funcionamento) +- [x] ✅ Corrigir Playwright browser path nos Dockerfiles +- [x] ✅ Adicionar `PLAYWRIGHT_BROWSERS_PATH` env var no deployment K8s +- [x] ✅ Adicionar `MCP_PUBLIC_URL` no ConfigMap e deployment K8s +- [x] ✅ Aumentar resource limits (memory: 2Gi, cpu: 2000m) +- [ ] ⚠️ Configurar `MCP_PUBLIC_URL` com o domínio correto no ConfigMap +- [ ] ⚠️ Rebuild e push das imagens K8s + +### 🟡 PRIORIDADE ALTA (Segurança e estabilidade) +- [x] ✅ Atualizar health checks (startup probe, failureThreshold) +- [ ] ⚠️ Adicionar security contexts avançados (seccompProfile, readOnlyRootFilesystem) +- [ ] ⚠️ Configurar volumes para nginx (cache, run, logs) +- [ ] ⚠️ Implementar Network Policies + +### 🟢 PRIORIDADE MÉDIA (Performance e observabilidade) +- [ ] 🔄 Adicionar init container para Playwright warm-up +- [ ] 🔄 Configurar HPA para server +- [ ] 🔄 Configurar PodDisruptionBudget +- [ ] 🔄 Adicionar Prometheus annotations + +### 🔵 PRIORIDADE BAIXA (Melhoria contínua) +- [ ] 📝 Implementar /metrics endpoint para Prometheus +- [ ] 📝 Configurar Logfire token +- [ ] 📝 Testar readOnlyRootFilesystem: true no server +- [ ] 📝 Considerar resource quotas por namespace + +--- + +## Comandos para Deploy + +### 1. Rebuild e Push das Imagens + +```bash +# Server +cd /home/lperl/Archon +docker build -f python/Dockerfile.k8s.server -t git.automatizase.com.br/luis.erlacher/archon/server:k8s-latest python/ +docker push git.automatizase.com.br/luis.erlacher/archon/server:k8s-latest + +# MCP (não mudou, mas rebuild para garantir) +docker build -f python/Dockerfile.k8s.mcp -t git.automatizase.com.br/luis.erlacher/archon/mcp:k8s-latest python/ +docker push git.automatizase.com.br/luis.erlacher/archon/mcp:k8s-latest + +# Frontend (não mudou, mas rebuild para garantir) +docker build -f archon-ui-main/Dockerfile.k8s.production -t git.automatizase.com.br/luis.erlacher/archon/frontend:k8s-latest archon-ui-main/ +docker push git.automatizase.com.br/luis.erlacher/archon/frontend:k8s-latest + +# Agents (se usado) +docker build -f python/Dockerfile.k8s.agents -t git.automatizase.com.br/luis.erlacher/archon/agents:k8s-latest python/ +docker push git.automatizase.com.br/luis.erlacher/archon/agents:k8s-latest +``` + +### 2. Aplicar K8s Manifests + +```bash +# Namespace e secrets (se ainda não existir) +kubectl apply -f k8s-manifests-complete.yaml + +# Network policies (criar arquivo primeiro) +kubectl apply -f k8s-network-policies.yaml + +# HPA (criar arquivo primeiro) +kubectl apply -f k8s-hpa.yaml + +# PDB (criar arquivo primeiro) +kubectl apply -f k8s-pdb.yaml +``` + +### 3. Rolling Restart + +```bash +# Restart server (vai pegar nova imagem) +kubectl rollout restart deployment/archon-server -n archon +kubectl rollout status deployment/archon-server -n archon + +# Restart MCP +kubectl rollout restart deployment/archon-mcp -n archon +kubectl rollout status deployment/archon-mcp -n archon + +# Restart frontend +kubectl rollout restart deployment/archon-frontend -n archon +kubectl rollout status deployment/archon-frontend -n archon +``` + +### 4. Verificar Status + +```bash +# Ver pods +kubectl get pods -n archon -w + +# Ver logs do server +kubectl logs -f deployment/archon-server -n archon + +# Ver eventos +kubectl get events -n archon --sort-by='.lastTimestamp' + +# Testar crawling +kubectl port-forward -n archon svc/archon-server-service 8181:8181 +# Em outro terminal: +curl -X POST http://localhost:8181/api/knowledge/crawl \ + -H "Content-Type: application/json" \ + -d '{"url": "https://example.com"}' +``` + +--- + +## Troubleshooting + +### Problema: Pod crashando com OOMKilled +**Solução:** Aumentar memory limits para 2Gi ou mais + +### Problema: Playwright ainda não encontra browser +**Verificar:** +```bash +kubectl exec -it deployment/archon-server -n archon -- bash +echo $PLAYWRIGHT_BROWSERS_PATH +ls -la /app/ms-playwright +``` + +### Problema: Health check falhando +**Solução:** Aumentar `initialDelaySeconds` e `failureThreshold` + +### Problema: Rolling update com downtime +**Solução:** Verificar PodDisruptionBudget e garantir minAvailable: 1 + +--- + +## Referências + +- [Kubernetes Best Practices](https://kubernetes.io/docs/concepts/configuration/overview/) +- [Pod Security Standards](https://kubernetes.io/docs/concepts/security/pod-security-standards/) +- [Playwright in Docker](https://playwright.dev/docs/docker) +- [Nginx Non-Root](https://hub.docker.com/_/nginx) +- [FastAPI Deployment](https://fastapi.tiangolo.com/deployment/docker/) diff --git a/MCP_PUBLIC_URL_GUIDE.md b/MCP_PUBLIC_URL_GUIDE.md new file mode 100644 index 0000000..d6367aa --- /dev/null +++ b/MCP_PUBLIC_URL_GUIDE.md @@ -0,0 +1,452 @@ +# MCP Public URL Configuration Guide + +## Overview + +The `MCP_PUBLIC_URL` environment variable allows you to configure the publicly accessible URL for the MCP server. This is used to generate the correct client configuration JSON for MCP-compatible IDEs (Claude Code, Cursor, Windsurf, etc.). + +## Why This Feature Exists + +### The Problem + +When Archon is deployed on Kubernetes or behind a reverse proxy, the MCP server needs to provide client configuration with the **publicly accessible domain**, not `localhost`. + +**Before this feature:** +```json +{ + "mcpServers": { + "archon": { + "url": "http://localhost:8051/mcp" ← ❌ Doesn't work from external machines! + } + } +} +``` + +**After this feature:** +```json +{ + "mcpServers": { + "archon": { + "url": "http://archon.automatizase.com.br:8051/mcp" ← ✅ Works from anywhere! + } + } +} +``` + +## Configuration + +### Format + +The `MCP_PUBLIC_URL` variable accepts the following formats: + +```bash +# With explicit port +MCP_PUBLIC_URL="archon.automatizase.com.br:8051" + +# Domain only (port will be inferred from ARCHON_MCP_PORT) +MCP_PUBLIC_URL="archon.automatizase.com.br" + +# Development (default) +MCP_PUBLIC_URL="localhost:8051" +``` + +### Docker Compose + +Edit your `.env` file: + +```bash +# MCP Public URL Configuration +# Used to generate client configuration JSON for Claude Code, Cursor, etc. +MCP_PUBLIC_URL=localhost:8051 # Change to your domain for production +``` + +Or export in your shell: + +```bash +export MCP_PUBLIC_URL="archon.yourdomain.com:8051" +docker compose up -d +``` + +### Kubernetes + +Edit `k8s-manifests-complete.yaml` - **ConfigMap section:** + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: archon-config + namespace: archon +data: + # ... other configs ... + + # MCP Public URL - CHANGE THIS TO YOUR DOMAIN! + MCP_PUBLIC_URL: "archon.automatizase.com.br:8051" # ← UPDATE THIS +``` + +Then apply: + +```bash +kubectl apply -f k8s-manifests-complete.yaml +kubectl rollout restart deployment/archon-server -n archon +``` + +## How It Works + +### Backend (Python) + +**File:** `python/src/server/api_routes/mcp_api.py` + +The `/api/mcp/config` endpoint reads `MCP_PUBLIC_URL` and uses it to generate the configuration: + +```python +# Get MCP public URL from environment +mcp_public_url = os.getenv("MCP_PUBLIC_URL") + +if mcp_public_url: + # Parse to extract host and port + if ":" in mcp_public_url: + host, port_str = mcp_public_url.rsplit(":", 1) + port = int(port_str) + else: + host = mcp_public_url + port = int(os.getenv("ARCHON_MCP_PORT", "8051")) +else: + # Fallback to localhost + host = "localhost" + port = 8051 + +config = { + "host": host, + "port": port, + "transport": "streamable-http" +} +``` + +### Frontend (React) + +**File:** `archon-ui-main/src/features/mcp/components/McpConfigSection.tsx` + +The frontend fetches the config and generates IDE-specific JSON: + +```typescript +// For Claude Code +{ + "name": "archon", + "transport": "http", + "url": `http://${config.host}:${config.port}/mcp` +} + +// For Cursor +{ + "mcpServers": { + "archon": { + "url": `http://${config.host}:${config.port}/mcp` + } + } +} + +// For Windsurf +{ + "mcpServers": { + "archon": { + "serverUrl": `http://${config.host}:${config.port}/mcp` + } + } +} + +// And so on for all supported IDEs... +``` + +## Deployment Scenarios + +### Local Development + +```bash +# .env +MCP_PUBLIC_URL=localhost:8051 +``` + +Generated URL: `http://localhost:8051/mcp` + +### Production - Direct Access + +If MCP is directly accessible on the public internet: + +```bash +# Kubernetes ConfigMap +MCP_PUBLIC_URL: "archon.mycompany.com:8051" +``` + +Generated URL: `http://archon.mycompany.com:8051/mcp` + +### Production - Behind Reverse Proxy + +If MCP is behind Nginx/Traefik on standard HTTP port: + +```bash +# Kubernetes ConfigMap +MCP_PUBLIC_URL: "mcp.mycompany.com:80" +# Or if reverse proxy handles port mapping: +MCP_PUBLIC_URL: "mcp.mycompany.com" # Port inferred from ARCHON_MCP_PORT +``` + +Generated URL: `http://mcp.mycompany.com/mcp` + +### Production - HTTPS with Custom Port + +```bash +# Note: Frontend still generates http:// URLs +# Your reverse proxy should handle HTTPS termination +MCP_PUBLIC_URL: "archon.mycompany.com:443" +``` + +**Important:** The MCP protocol uses HTTP URLs even when behind HTTPS. Your reverse proxy or load balancer should handle SSL termination. + +## Verification + +### 1. Check Backend Config Endpoint + +```bash +curl http://localhost:8181/api/mcp/config | jq +``` + +Expected output: + +```json +{ + "host": "archon.automatizase.com.br", + "port": 8051, + "transport": "streamable-http", + "model_choice": "gpt-4o-mini" +} +``` + +### 2. Check Frontend MCP Page + +1. Open Archon UI: `http://localhost:3737` +2. Navigate to MCP page +3. Select an IDE (e.g., Claude Code) +4. Verify the generated command/JSON contains your domain: + +```bash +# Should show: +claude mcp add --transport http archon http://archon.automatizase.com.br:8051/mcp +``` + +### 3. Test from External Machine + +From another machine, try the MCP connection: + +```bash +curl http://archon.automatizase.com.br:8051/health +``` + +Should return: + +```json +{ + "status": "ok", + "version": "..." +} +``` + +## Troubleshooting + +### Problem: Still showing localhost + +**Check:** +1. Is `MCP_PUBLIC_URL` set in ConfigMap? + ```bash + kubectl get configmap archon-config -n archon -o yaml | grep MCP_PUBLIC_URL + ``` + +2. Did you restart the server deployment? + ```bash + kubectl rollout restart deployment/archon-server -n archon + ``` + +3. Check server logs: + ```bash + kubectl logs -f deployment/archon-server -n archon | grep MCP_PUBLIC_URL + ``` + + Should see: + ``` + Using MCP_PUBLIC_URL - host=archon.automatizase.com.br, port=8051 + ``` + +### Problem: Port not included in URL + +**Solution:** Explicitly include the port in `MCP_PUBLIC_URL`: + +```bash +# Instead of: +MCP_PUBLIC_URL="archon.mycompany.com" + +# Use: +MCP_PUBLIC_URL="archon.mycompany.com:8051" +``` + +### Problem: Can't connect from IDE + +**Check:** +1. **Firewall:** Is port 8051 open? + ```bash + telnet archon.automatizase.com.br 8051 + ``` + +2. **MCP Service:** Is it running? + ```bash + kubectl get pods -n archon | grep mcp + ``` + +3. **Network Policy:** Do you have network policies blocking ingress? + ```bash + kubectl get networkpolicies -n archon + ``` + +## Security Considerations + +### 1. MCP Exposes Read/Write Access + +MCP tools can: +- Search and read your knowledge base +- Create/update/delete projects and tasks +- Execute searches +- Modify data in Supabase + +**Recommendation:** +- Use authentication (future feature) +- Restrict access via firewall/network policies +- Don't expose MCP publicly without authentication + +### 2. No Built-in Authentication (Yet) + +Currently, anyone who can reach `http://your-domain:8051/mcp` can use your MCP server. + +**Mitigation strategies:** +- Use Kubernetes Network Policies to restrict access +- Use VPN or private networking +- Put MCP behind a reverse proxy with authentication +- Use IP allowlisting on your firewall + +### 3. HTTPS Considerations + +MCP protocol uses `http://` URLs even when behind HTTPS. This is normal - your reverse proxy handles SSL termination. + +**Example setup:** +``` +User → HTTPS (443) → Reverse Proxy → HTTP (8051) → MCP Pod +``` + +## Advanced Configuration + +### Multiple Environments + +Use different ConfigMaps per environment: + +```bash +# dev-config.yaml +MCP_PUBLIC_URL: "localhost:8051" + +# staging-config.yaml +MCP_PUBLIC_URL: "staging.archon.mycompany.com:8051" + +# prod-config.yaml +MCP_PUBLIC_URL: "archon.mycompany.com:8051" +``` + +### Custom Domain with Ingress + +If using Kubernetes Ingress: + +```yaml +# ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: archon-mcp-ingress + namespace: archon +spec: + rules: + - host: mcp.mycompany.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: archon-mcp-service + port: + number: 8051 +``` + +Then set: +```yaml +# ConfigMap +MCP_PUBLIC_URL: "mcp.mycompany.com:80" # or just "mcp.mycompany.com" +``` + +## Related Configuration + +### ARCHON_HOST (Legacy) + +The old `ARCHON_HOST` variable is still used as a fallback if `MCP_PUBLIC_URL` is not set: + +```bash +# Legacy mode (still works) +ARCHON_HOST=localhost +ARCHON_MCP_PORT=8051 + +# New mode (preferred) +MCP_PUBLIC_URL=localhost:8051 +``` + +**Migration path:** +1. Add `MCP_PUBLIC_URL` with your production domain +2. Keep `ARCHON_HOST` for backwards compatibility +3. Eventually, `ARCHON_HOST` may be removed + +### MCP_SERVICE_URL (Internal) + +**Do not confuse** `MCP_PUBLIC_URL` with `MCP_SERVICE_URL`: + +- `MCP_PUBLIC_URL`: **External** URL for client configuration (e.g., `archon.mycompany.com:8051`) +- `MCP_SERVICE_URL`: **Internal** K8s DNS for server-to-MCP communication (e.g., `http://archon-mcp-service.archon.svc.cluster.local:8051`) + +## Summary + +**Quick Setup:** + +1. **Edit ConfigMap:** + ```yaml + MCP_PUBLIC_URL: "your-domain.com:8051" + ``` + +2. **Apply and restart:** + ```bash + kubectl apply -f k8s-manifests-complete.yaml + kubectl rollout restart deployment/archon-server -n archon + ``` + +3. **Verify:** + ```bash + curl http://localhost:8181/api/mcp/config | jq .host + # Should return: "your-domain.com" + ``` + +4. **Test from IDE:** + - Open Archon UI → MCP page + - Copy configuration for your IDE + - Verify URL contains your domain + +**Files Modified:** +- ✅ `python/src/server/api_routes/mcp_api.py` - Backend logic +- ✅ `k8s-manifests-complete.yaml` - K8s ConfigMap and deployment +- ✅ `docker-compose.yml` - Docker Compose environment +- ✅ `.env.example` - Environment variable documentation +- ✅ `K8S_COMPLETE_ADJUSTMENTS.md` - Deployment guide + +**Support:** +- GitHub Issues: https://github.com/your-repo/archon/issues +- Documentation: https://docs.archon.yourdomain.com diff --git a/docker-compose.yml b/docker-compose.yml index 9d1e588..84ac15c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,8 @@ services: - ARCHON_AGENTS_PORT=${ARCHON_AGENTS_PORT:-8052} - AGENTS_ENABLED=${AGENTS_ENABLED:-false} - ARCHON_HOST=${HOST:-localhost} + - PLAYWRIGHT_BROWSERS_PATH=/tmp/ms-playwright + - MCP_PUBLIC_URL=${MCP_PUBLIC_URL:-localhost:8051} networks: - app-network volumes: diff --git a/k8s-manifests-complete.yaml b/k8s-manifests-complete.yaml index ea53a25..0e1ee0e 100644 --- a/k8s-manifests-complete.yaml +++ b/k8s-manifests-complete.yaml @@ -49,6 +49,17 @@ data: TRANSPORT: "sse" AGENTS_ENABLED: "false" + # Playwright Configuration + PLAYWRIGHT_BROWSERS_PATH: "/app/ms-playwright" + + # MCP Public URL Configuration + # Format: "domain.com" or "domain.com:8051" or "localhost:8051" + # This is used to generate the MCP client configuration JSON + # IMPORTANTE: Configure this with your actual domain! + # Example: "archon.automatizase.com.br" (port will be inferred from ARCHON_MCP_PORT) + # Example: "archon.automatizase.com.br:8051" (explicit port) + MCP_PUBLIC_URL: "localhost:8051" # CHANGE THIS TO YOUR DOMAIN! + --- # ============================================================================= # DEPLOYMENT - ARCHON SERVER (Backend Principal) @@ -151,28 +162,49 @@ spec: - name: MCP_SERVICE_URL value: "http://archon-mcp-service.archon.svc.cluster.local:8051" + # Playwright configuration + - name: PLAYWRIGHT_BROWSERS_PATH + value: "/app/ms-playwright" + + # MCP Public URL (for client configuration) + - name: MCP_PUBLIC_URL + valueFrom: + configMapKeyRef: + name: archon-config + key: MCP_PUBLIC_URL + resources: requests: - memory: "512Mi" + memory: "768Mi" cpu: "500m" limits: - memory: "1Gi" - cpu: "1000m" + memory: "2Gi" + cpu: "2000m" + ephemeral-storage: "5Gi" + + startupProbe: + httpGet: + path: /health + port: 8181 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 12 livenessProbe: httpGet: path: /health port: 8181 - initialDelaySeconds: 40 + initialDelaySeconds: 60 periodSeconds: 30 - timeoutSeconds: 10 - failureThreshold: 3 + timeoutSeconds: 15 + failureThreshold: 5 readinessProbe: httpGet: path: /health port: 8181 - initialDelaySeconds: 10 + initialDelaySeconds: 15 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 diff --git a/python/Dockerfile.k8s.server b/python/Dockerfile.k8s.server index 8aafec0..5c25f4a 100644 --- a/python/Dockerfile.k8s.server +++ b/python/Dockerfile.k8s.server @@ -64,10 +64,12 @@ RUN groupadd -r appuser -g 1001 && \ # Copy the virtual environment from builder COPY --from=builder --chown=appuser:appuser /venv /venv -# Install Playwright browsers as root (needs permissions) +# Install Playwright browsers in a location accessible to appuser ENV PATH=/venv/bin:$PATH -RUN playwright install chromium && \ - chown -R appuser:appuser /root/.cache/ms-playwright 2>/dev/null || true +ENV PLAYWRIGHT_BROWSERS_PATH=/app/ms-playwright +RUN mkdir -p /app/ms-playwright && \ + playwright install chromium && \ + chown -R appuser:appuser /app/ms-playwright # Copy server code (production only - no tests) COPY --chown=appuser:appuser src/server/ src/server/ @@ -77,6 +79,7 @@ COPY --chown=appuser:appuser src/__init__.py src/ ENV PYTHONPATH="/app:$PYTHONPATH" ENV PYTHONUNBUFFERED=1 ENV PATH="/venv/bin:$PATH" +ENV PLAYWRIGHT_BROWSERS_PATH=/app/ms-playwright # Expose Server port ARG ARCHON_SERVER_PORT=8181 diff --git a/python/Dockerfile.server b/python/Dockerfile.server index 6e0ea55..9bb525b 100644 --- a/python/Dockerfile.server +++ b/python/Dockerfile.server @@ -51,9 +51,12 @@ RUN apt-get update && apt-get install -y \ # Copy the virtual environment from builder COPY --from=builder /venv /venv -# Install Playwright browsers +# Install Playwright browsers in accessible location ENV PATH=/venv/bin:$PATH -RUN playwright install chromium +ENV PLAYWRIGHT_BROWSERS_PATH=/tmp/ms-playwright +RUN mkdir -p /tmp/ms-playwright && \ + playwright install chromium && \ + chmod -R 777 /tmp/ms-playwright # Copy server code and tests COPY src/server/ src/server/ @@ -64,6 +67,7 @@ COPY tests/ tests/ ENV PYTHONPATH="/app:$PYTHONPATH" ENV PYTHONUNBUFFERED=1 ENV PATH="/venv/bin:$PATH" +ENV PLAYWRIGHT_BROWSERS_PATH=/tmp/ms-playwright # Expose Server port ARG ARCHON_SERVER_PORT=8181 diff --git a/python/src/mcp_server/mcp_server.py b/python/src/mcp_server/mcp_server.py index b0aa9b7..31b6494 100644 --- a/python/src/mcp_server/mcp_server.py +++ b/python/src/mcp_server/mcp_server.py @@ -31,6 +31,8 @@ from typing import Any from dotenv import load_dotenv from mcp.server.fastmcp import Context, FastMCP +from starlette.requests import Request +from starlette.responses import JSONResponse # Add the project root to Python path for imports sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) @@ -332,7 +334,31 @@ except Exception as e: raise -# Health check endpoint +# HTTP health check endpoint for K8s and frontend polling +@mcp.custom_route("/health", methods=["GET"]) +async def http_health_check(request: Request) -> JSONResponse: + """ + HTTP health check endpoint for K8s probes and frontend monitoring. + + This is a lightweight endpoint that returns 200 OK to indicate the server is running. + For detailed health checks including dependent services, use the health_check tool. + """ + try: + # Quick health check without heavy dependencies + return JSONResponse({ + "status": "healthy", + "service": "mcp", + "timestamp": datetime.now().isoformat(), + }) + except Exception as e: + logger.error(f"HTTP health check failed: {e}") + return JSONResponse( + {"status": "unhealthy", "error": str(e)}, + status_code=503 + ) + + +# MCP tool health check endpoint (detailed health status) @mcp.tool() async def health_check(ctx: Context) -> str: """ diff --git a/python/src/server/api_routes/mcp_api.py b/python/src/server/api_routes/mcp_api.py index 7ea7b47..7e34506 100644 --- a/python/src/server/api_routes/mcp_api.py +++ b/python/src/server/api_routes/mcp_api.py @@ -172,13 +172,36 @@ async def get_mcp_config(): try: api_logger.info("Getting MCP server configuration") - # Get actual MCP port from environment or use default - mcp_port = int(os.getenv("ARCHON_MCP_PORT", "8051")) + # Get MCP public URL from environment (for production/Kubernetes) + # Format: "domain.com:8051" or "localhost:8051" + mcp_public_url = os.getenv("MCP_PUBLIC_URL") - # Configuration for streamable-http mode with actual port + if mcp_public_url: + # Parse public URL to extract host and port + if ":" in mcp_public_url: + host, port_str = mcp_public_url.rsplit(":", 1) + try: + port = int(port_str) + except ValueError: + # If port is not a number, use default + host = mcp_public_url + port = int(os.getenv("ARCHON_MCP_PORT", "8051")) + else: + # No port in URL, use default + host = mcp_public_url + port = int(os.getenv("ARCHON_MCP_PORT", "8051")) + + api_logger.info(f"Using MCP_PUBLIC_URL - host={host}, port={port}") + else: + # Fallback to legacy behavior (localhost) + host = os.getenv("ARCHON_HOST", "localhost") + port = int(os.getenv("ARCHON_MCP_PORT", "8051")) + api_logger.info(f"Using legacy ARCHON_HOST - host={host}, port={port}") + + # Configuration for streamable-http mode config = { - "host": os.getenv("ARCHON_HOST", "localhost"), - "port": mcp_port, + "host": host, + "port": port, "transport": "streamable-http", }