diff --git a/python/src/mcp_server/features/documents/document_tools.py b/python/src/mcp_server/features/documents/document_tools.py index e14d07a..2656206 100644 --- a/python/src/mcp_server/features/documents/document_tools.py +++ b/python/src/mcp_server/features/documents/document_tools.py @@ -144,7 +144,11 @@ def register_document_tools(mcp: FastMCP): timeout = get_default_timeout() async with httpx.AsyncClient(timeout=timeout) as client: - response = await client.get(urljoin(api_url, f"/api/projects/{project_id}/docs")) + # Pass include_content=False for lightweight response + response = await client.get( + urljoin(api_url, f"/api/projects/{project_id}/docs"), + params={"include_content": False} + ) if response.status_code == 200: result = response.json() diff --git a/python/src/mcp_server/features/projects/project_tools.py b/python/src/mcp_server/features/projects/project_tools.py index 367e932..db4819f 100644 --- a/python/src/mcp_server/features/projects/project_tools.py +++ b/python/src/mcp_server/features/projects/project_tools.py @@ -175,7 +175,11 @@ def register_project_tools(mcp: FastMCP): timeout = get_default_timeout() async with httpx.AsyncClient(timeout=timeout) as client: - response = await client.get(urljoin(api_url, "/api/projects")) + # CRITICAL: Pass include_content=False for lightweight response + response = await client.get( + urljoin(api_url, "/api/projects"), + params={"include_content": False} + ) if response.status_code == 200: projects = response.json() diff --git a/python/src/server/api_routes/projects_api.py b/python/src/server/api_routes/projects_api.py index 4d97ba3..3a735a8 100644 --- a/python/src/server/api_routes/projects_api.py +++ b/python/src/server/api_routes/projects_api.py @@ -9,7 +9,9 @@ Handles: """ import asyncio +import json import secrets +import sys from typing import Any from fastapi import APIRouter, HTTPException @@ -74,23 +76,49 @@ class CreateTaskRequest(BaseModel): @router.get("/projects") -async def list_projects(): - """List all projects.""" +async def list_projects(include_content: bool = True): + """ + List all projects. + + Args: + include_content: If True (default), returns full project content. + If False, returns lightweight metadata with statistics. + """ try: - logfire.info("Listing all projects") + logfire.info(f"Listing all projects | include_content={include_content}") - # Use ProjectService to get projects + # Use ProjectService to get projects with include_content parameter project_service = ProjectService() - success, result = project_service.list_projects() + success, result = project_service.list_projects(include_content=include_content) if not success: raise HTTPException(status_code=500, detail=result) - # Use SourceLinkingService to format projects with sources - source_service = SourceLinkingService() - formatted_projects = source_service.format_projects_with_sources(result["projects"]) + # Only format with sources if we have full content + if include_content: + # Use SourceLinkingService to format projects with sources + source_service = SourceLinkingService() + formatted_projects = source_service.format_projects_with_sources(result["projects"]) + else: + # Lightweight response doesn't need source formatting + formatted_projects = result["projects"] - logfire.info(f"Projects listed successfully | count={len(formatted_projects)}") + # Monitor response size for optimization validation + response_json = json.dumps(formatted_projects) + response_size = len(response_json) + + # Log response metrics + logfire.info( + f"Projects listed successfully | count={len(formatted_projects)} | " + f"size_bytes={response_size} | include_content={include_content}" + ) + + # Warning for large responses (>10KB) + if response_size > 10000: + logfire.warning( + f"Large response size detected | size_bytes={response_size} | " + f"include_content={include_content} | project_count={len(formatted_projects)}" + ) return formatted_projects @@ -473,11 +501,11 @@ async def get_project_features(project_id: str): @router.get("/projects/{project_id}/tasks") -async def list_project_tasks(project_id: str, include_archived: bool = False): +async def list_project_tasks(project_id: str, include_archived: bool = False, exclude_large_fields: bool = False): """List all tasks for a specific project. By default, filters out archived tasks.""" try: logfire.info( - f"Listing project tasks | project_id={project_id} | include_archived={include_archived}" + f"Listing project tasks | project_id={project_id} | include_archived={include_archived} | exclude_large_fields={exclude_large_fields}" ) # Use TaskService to list tasks @@ -485,6 +513,7 @@ async def list_project_tasks(project_id: str, include_archived: bool = False): success, result = task_service.list_tasks( project_id=project_id, include_closed=True, # Get all tasks, we'll filter archived separately + exclude_large_fields=exclude_large_fields, ) if not success: @@ -571,6 +600,7 @@ async def list_tasks( project_id=project_id, status=status, include_closed=include_closed, + exclude_large_fields=exclude_large_fields, ) if not success: @@ -591,8 +621,8 @@ async def list_tasks( end_idx = start_idx + per_page paginated_tasks = tasks[start_idx:end_idx] - # Return paginated response - return { + # Prepare response + response = { "tasks": paginated_tasks, "pagination": { "total": len(tasks), @@ -601,6 +631,25 @@ async def list_tasks( "pages": (len(tasks) + per_page - 1) // per_page, }, } + + # Monitor response size for optimization validation + response_json = json.dumps(response) + response_size = len(response_json) + + # Log response metrics + logfire.info( + f"Tasks listed successfully | count={len(paginated_tasks)} | " + f"size_bytes={response_size} | exclude_large_fields={exclude_large_fields}" + ) + + # Warning for large responses (>10KB) + if response_size > 10000: + logfire.warning( + f"Large task response size | size_bytes={response_size} | " + f"exclude_large_fields={exclude_large_fields} | task_count={len(paginated_tasks)}" + ) + + return response except HTTPException: raise @@ -795,14 +844,23 @@ async def mcp_update_task_status_with_socketio(task_id: str, status: str): @router.get("/projects/{project_id}/docs") -async def list_project_documents(project_id: str): - """List all documents for a specific project.""" +async def list_project_documents(project_id: str, include_content: bool = False): + """ + List all documents for a specific project. + + Args: + project_id: Project UUID + include_content: If True, includes full document content. + If False (default), returns metadata only. + """ try: - logfire.info(f"Listing documents for project | project_id={project_id}") + logfire.info( + f"Listing documents for project | project_id={project_id} | include_content={include_content}" + ) # Use DocumentService to list documents document_service = DocumentService() - success, result = document_service.list_documents(project_id) + success, result = document_service.list_documents(project_id, include_content=include_content) if not success: if "not found" in result.get("error", "").lower(): @@ -811,7 +869,7 @@ async def list_project_documents(project_id: str): raise HTTPException(status_code=500, detail=result) logfire.info( - f"Documents listed successfully | project_id={project_id} | count={result.get('total_count', 0)}" + f"Documents listed successfully | project_id={project_id} | count={result.get('total_count', 0)} | lightweight={not include_content}" ) return result diff --git a/python/src/server/services/projects/document_service.py b/python/src/server/services/projects/document_service.py index cd206e0..020ec30 100644 --- a/python/src/server/services/projects/document_service.py +++ b/python/src/server/services/projects/document_service.py @@ -96,10 +96,15 @@ class DocumentService: logger.error(f"Error adding document: {e}") return False, {"error": f"Error adding document: {str(e)}"} - def list_documents(self, project_id: str) -> tuple[bool, dict[str, Any]]: + def list_documents(self, project_id: str, include_content: bool = False) -> tuple[bool, dict[str, Any]]: """ List all documents in a project's docs JSONB field. + Args: + project_id: The project ID + include_content: If True, includes full document content. + If False (default), returns metadata only. + Returns: Tuple of (success, result_dict) """ @@ -116,20 +121,28 @@ class DocumentService: docs = response.data[0].get("docs", []) - # Format documents for response (exclude full content for listing) + # Format documents for response documents = [] for doc in docs: - documents.append({ - "id": doc.get("id"), - "document_type": doc.get("document_type"), - "title": doc.get("title"), - "status": doc.get("status"), - "version": doc.get("version"), - "tags": doc.get("tags", []), - "author": doc.get("author"), - "created_at": doc.get("created_at"), - "updated_at": doc.get("updated_at"), - }) + if include_content: + # Return full document + documents.append(doc) + else: + # Return metadata only + documents.append({ + "id": doc.get("id"), + "document_type": doc.get("document_type"), + "title": doc.get("title"), + "status": doc.get("status"), + "version": doc.get("version"), + "tags": doc.get("tags", []), + "author": doc.get("author"), + "created_at": doc.get("created_at"), + "updated_at": doc.get("updated_at"), + "stats": { + "content_size": len(str(doc.get("content", {}))) + } + }) return True, { "project_id": project_id, diff --git a/python/src/server/services/projects/project_service.py b/python/src/server/services/projects/project_service.py index c407b0b..ff88784 100644 --- a/python/src/server/services/projects/project_service.py +++ b/python/src/server/services/projects/project_service.py @@ -73,35 +73,73 @@ class ProjectService: logger.error(f"Error creating project: {e}") return False, {"error": f"Database error: {str(e)}"} - def list_projects(self) -> tuple[bool, dict[str, Any]]: + def list_projects(self, include_content: bool = True) -> tuple[bool, dict[str, Any]]: """ List all projects. + Args: + include_content: If True (default), includes docs, features, data fields. + If False, returns lightweight metadata only with counts. + Returns: Tuple of (success, result_dict) """ try: - response = ( - self.supabase_client.table("archon_projects") - .select("*") - .order("created_at", desc=True) - .execute() - ) + if include_content: + # Current behavior - maintain backward compatibility + response = ( + self.supabase_client.table("archon_projects") + .select("*") + .order("created_at", desc=True) + .execute() + ) - projects = [] - for project in response.data: - projects.append({ - "id": project["id"], - "title": project["title"], - "github_repo": project.get("github_repo"), - "created_at": project["created_at"], - "updated_at": project["updated_at"], - "pinned": project.get("pinned", False), - "description": project.get("description", ""), - "docs": project.get("docs", []), - "features": project.get("features", []), - "data": project.get("data", []), - }) + projects = [] + for project in response.data: + projects.append({ + "id": project["id"], + "title": project["title"], + "github_repo": project.get("github_repo"), + "created_at": project["created_at"], + "updated_at": project["updated_at"], + "pinned": project.get("pinned", False), + "description": project.get("description", ""), + "docs": project.get("docs", []), + "features": project.get("features", []), + "data": project.get("data", []), + }) + else: + # Lightweight response for MCP - fetch all data but only return metadata + stats + # FIXED: N+1 query problem - now using single query + response = ( + self.supabase_client.table("archon_projects") + .select("*") # Fetch all fields in single query + .order("created_at", desc=True) + .execute() + ) + + projects = [] + for project in response.data: + # Calculate counts from fetched data (no additional queries) + docs_count = len(project.get("docs", [])) + features_count = len(project.get("features", [])) + has_data = bool(project.get("data", [])) + + # Return only metadata + stats, excluding large JSONB fields + projects.append({ + "id": project["id"], + "title": project["title"], + "github_repo": project.get("github_repo"), + "created_at": project["created_at"], + "updated_at": project["updated_at"], + "pinned": project.get("pinned", False), + "description": project.get("description", ""), + "stats": { + "docs_count": docs_count, + "features_count": features_count, + "has_data": has_data + } + }) return True, {"projects": projects, "total_count": len(projects)} diff --git a/python/src/server/services/projects/task_service.py b/python/src/server/services/projects/task_service.py index 1d8f450..9d82d0f 100644 --- a/python/src/server/services/projects/task_service.py +++ b/python/src/server/services/projects/task_service.py @@ -186,17 +186,36 @@ class TaskService: return False, {"error": f"Error creating task: {str(e)}"} def list_tasks( - self, project_id: str = None, status: str = None, include_closed: bool = False + self, + project_id: str = None, + status: str = None, + include_closed: bool = False, + exclude_large_fields: bool = False ) -> tuple[bool, dict[str, Any]]: """ List tasks with various filters. + Args: + project_id: Filter by project + status: Filter by status + include_closed: Include done tasks + exclude_large_fields: If True, excludes sources and code_examples fields + Returns: Tuple of (success, result_dict) """ try: # Start with base query - query = self.supabase_client.table("archon_tasks").select("*") + if exclude_large_fields: + # Select all fields except large JSONB ones + query = self.supabase_client.table("archon_tasks").select( + "id, project_id, parent_task_id, title, description, " + "status, assignee, task_order, feature, archived, " + "archived_at, archived_by, created_at, updated_at, " + "sources, code_examples" # Still fetch for counting, but will process differently + ) + else: + query = self.supabase_client.table("archon_tasks").select("*") # Track filters for debugging filters_applied = [] @@ -265,7 +284,7 @@ class TaskService: tasks = [] for task in response.data: - tasks.append({ + task_data = { "id": task["id"], "project_id": task["project_id"], "title": task["title"], @@ -276,7 +295,20 @@ class TaskService: "feature": task.get("feature"), "created_at": task["created_at"], "updated_at": task["updated_at"], - }) + } + + if not exclude_large_fields: + # Include full JSONB fields + task_data["sources"] = task.get("sources", []) + task_data["code_examples"] = task.get("code_examples", []) + else: + # Add counts instead of full content + task_data["stats"] = { + "sources_count": len(task.get("sources", [])), + "code_examples_count": len(task.get("code_examples", [])) + } + + tasks.append(task_data) filter_info = [] if project_id: diff --git a/python/tests/test_token_optimization.py b/python/tests/test_token_optimization.py new file mode 100644 index 0000000..ebc5ac0 --- /dev/null +++ b/python/tests/test_token_optimization.py @@ -0,0 +1,356 @@ +""" +Test suite for token optimization changes. +Ensures backward compatibility and validates token reduction. +""" + +import json +import pytest +from unittest.mock import Mock, patch + +from src.server.services.projects import ProjectService +from src.server.services.projects.task_service import TaskService +from src.server.services.projects.document_service import DocumentService + + +class TestProjectServiceOptimization: + """Test ProjectService with include_content parameter.""" + + @patch('src.server.utils.get_supabase_client') + def test_list_projects_with_full_content(self, mock_supabase): + """Test backward compatibility - default returns full content.""" + # Setup mock + mock_client = Mock() + mock_supabase.return_value = mock_client + + # Mock response with large JSONB fields + mock_response = Mock() + mock_response.data = [{ + "id": "test-id", + "title": "Test Project", + "description": "Test Description", + "github_repo": "https://github.com/test/repo", + "docs": [{"id": "doc1", "content": {"large": "content" * 100}}], + "features": [{"feature1": "data"}], + "data": [{"key": "value"}], + "pinned": False, + "created_at": "2024-01-01", + "updated_at": "2024-01-01" + }] + + mock_table = Mock() + mock_select = Mock() + mock_order = Mock() + mock_order.execute.return_value = mock_response + mock_select.order.return_value = mock_order + mock_table.select.return_value = mock_select + mock_client.table.return_value = mock_table + + # Test + service = ProjectService(mock_client) + success, result = service.list_projects() # Default include_content=True + + # Assertions + assert success + assert len(result["projects"]) == 1 + assert "docs" in result["projects"][0] + assert "features" in result["projects"][0] + assert "data" in result["projects"][0] + + # Verify full content is returned + assert len(result["projects"][0]["docs"]) == 1 + assert result["projects"][0]["docs"][0]["content"]["large"] is not None + + # Verify SELECT * was used + mock_table.select.assert_called_with("*") + + @patch('src.server.utils.get_supabase_client') + def test_list_projects_lightweight(self, mock_supabase): + """Test lightweight response excludes large fields.""" + # Setup mock + mock_client = Mock() + mock_supabase.return_value = mock_client + + # Mock response with full data (after N+1 fix, we fetch all data) + mock_response = Mock() + mock_response.data = [{ + "id": "test-id", + "title": "Test Project", + "description": "Test Description", + "github_repo": "https://github.com/test/repo", + "created_at": "2024-01-01", + "updated_at": "2024-01-01", + "pinned": False, + "docs": [{"id": "doc1"}, {"id": "doc2"}, {"id": "doc3"}], # 3 docs + "features": [{"feature1": "data"}, {"feature2": "data"}], # 2 features + "data": [{"key": "value"}] # Has data + }] + + # Setup mock chain - now simpler after N+1 fix + mock_table = Mock() + mock_select = Mock() + mock_order = Mock() + + mock_order.execute.return_value = mock_response + mock_select.order.return_value = mock_order + mock_table.select.return_value = mock_select + mock_client.table.return_value = mock_table + + # Test + service = ProjectService(mock_client) + success, result = service.list_projects(include_content=False) + + # Assertions + assert success + assert len(result["projects"]) == 1 + project = result["projects"][0] + + # Verify no large fields + assert "docs" not in project + assert "features" not in project + assert "data" not in project + + # Verify stats are present + assert "stats" in project + assert project["stats"]["docs_count"] == 3 + assert project["stats"]["features_count"] == 2 + assert project["stats"]["has_data"] is True + + # Verify SELECT * was used (after N+1 fix, we fetch all data in one query) + mock_table.select.assert_called_with("*") + assert mock_client.table.call_count == 1 # Only one query now! + + def test_token_reduction(self): + """Verify token count reduction.""" + # Simulate full content response + full_content = { + "projects": [{ + "id": "test", + "title": "Test", + "description": "Test Description", + "docs": [{"content": {"large": "x" * 10000}} for _ in range(5)], + "features": [{"data": "y" * 5000} for _ in range(3)], + "data": [{"values": "z" * 8000}] + }] + } + + # Simulate lightweight response + lightweight = { + "projects": [{ + "id": "test", + "title": "Test", + "description": "Test Description", + "stats": { + "docs_count": 5, + "features_count": 3, + "has_data": True + } + }] + } + + # Calculate approximate token counts (rough estimate: 1 token ≈ 4 chars) + full_tokens = len(json.dumps(full_content)) / 4 + light_tokens = len(json.dumps(lightweight)) / 4 + + reduction_percentage = (1 - light_tokens / full_tokens) * 100 + + # Assert 95% reduction (allowing some margin) + assert reduction_percentage > 95, f"Token reduction is only {reduction_percentage:.1f}%" + + +class TestTaskServiceOptimization: + """Test TaskService with exclude_large_fields parameter.""" + + @patch('src.server.utils.get_supabase_client') + def test_list_tasks_with_large_fields(self, mock_supabase): + """Test backward compatibility - default includes large fields.""" + mock_client = Mock() + mock_supabase.return_value = mock_client + + mock_response = Mock() + mock_response.data = [{ + "id": "task-1", + "project_id": "proj-1", + "title": "Test Task", + "description": "Test Description", + "sources": [{"url": "http://example.com", "content": "large"}], + "code_examples": [{"code": "function() { /* large */ }"}], + "status": "todo", + "assignee": "User", + "task_order": 0, + "feature": None, + "created_at": "2024-01-01", + "updated_at": "2024-01-01" + }] + + # Setup mock chain + mock_table = Mock() + mock_select = Mock() + mock_or = Mock() + mock_order1 = Mock() + mock_order2 = Mock() + + mock_order2.execute.return_value = mock_response + mock_order1.order.return_value = mock_order2 + mock_or.order.return_value = mock_order1 + mock_select.neq().or_.return_value = mock_or + mock_table.select.return_value = mock_select + mock_client.table.return_value = mock_table + + service = TaskService(mock_client) + success, result = service.list_tasks() + + assert success + assert "sources" in result["tasks"][0] + assert "code_examples" in result["tasks"][0] + + @patch('src.server.utils.get_supabase_client') + def test_list_tasks_exclude_large_fields(self, mock_supabase): + """Test excluding large fields returns counts instead.""" + mock_client = Mock() + mock_supabase.return_value = mock_client + + mock_response = Mock() + mock_response.data = [{ + "id": "task-1", + "project_id": "proj-1", + "title": "Test Task", + "description": "Test Description", + "status": "todo", + "assignee": "User", + "task_order": 0, + "feature": None, + "sources": [1, 2, 3], # Will be counted + "code_examples": [1, 2], # Will be counted + "created_at": "2024-01-01", + "updated_at": "2024-01-01" + }] + + # Setup mock chain + mock_table = Mock() + mock_select = Mock() + mock_or = Mock() + mock_order1 = Mock() + mock_order2 = Mock() + + mock_order2.execute.return_value = mock_response + mock_order1.order.return_value = mock_order2 + mock_or.order.return_value = mock_order1 + mock_select.neq().or_.return_value = mock_or + mock_table.select.return_value = mock_select + mock_client.table.return_value = mock_table + + service = TaskService(mock_client) + success, result = service.list_tasks(exclude_large_fields=True) + + assert success + task = result["tasks"][0] + assert "sources" not in task + assert "code_examples" not in task + assert "stats" in task + assert task["stats"]["sources_count"] == 3 + assert task["stats"]["code_examples_count"] == 2 + + +class TestDocumentServiceOptimization: + """Test DocumentService with include_content parameter.""" + + @patch('src.server.utils.get_supabase_client') + def test_list_documents_metadata_only(self, mock_supabase): + """Test default returns metadata only.""" + mock_client = Mock() + mock_supabase.return_value = mock_client + + mock_response = Mock() + mock_response.data = [{ + "docs": [{ + "id": "doc-1", + "title": "Test Doc", + "content": {"huge": "content" * 1000}, + "document_type": "spec", + "status": "draft", + "version": "1.0", + "tags": ["test"], + "author": "Test Author" + }] + }] + + # Setup mock chain + mock_table = Mock() + mock_select = Mock() + mock_eq = Mock() + + mock_eq.execute.return_value = mock_response + mock_select.eq.return_value = mock_eq + mock_table.select.return_value = mock_select + mock_client.table.return_value = mock_table + + service = DocumentService(mock_client) + success, result = service.list_documents("project-1") # Default include_content=False + + assert success + doc = result["documents"][0] + assert "content" not in doc + assert "stats" in doc + assert doc["stats"]["content_size"] > 0 + assert doc["title"] == "Test Doc" + + @patch('src.server.utils.get_supabase_client') + def test_list_documents_with_content(self, mock_supabase): + """Test include_content=True returns full documents.""" + mock_client = Mock() + mock_supabase.return_value = mock_client + + mock_response = Mock() + mock_response.data = [{ + "docs": [{ + "id": "doc-1", + "title": "Test Doc", + "content": {"huge": "content"}, + "document_type": "spec" + }] + }] + + # Setup mock chain + mock_table = Mock() + mock_select = Mock() + mock_eq = Mock() + + mock_eq.execute.return_value = mock_response + mock_select.eq.return_value = mock_eq + mock_table.select.return_value = mock_select + mock_client.table.return_value = mock_table + + service = DocumentService(mock_client) + success, result = service.list_documents("project-1", include_content=True) + + assert success + doc = result["documents"][0] + assert "content" in doc + assert doc["content"]["huge"] == "content" + + +class TestBackwardCompatibility: + """Ensure all changes are backward compatible.""" + + def test_api_defaults_preserve_behavior(self): + """Test that API defaults maintain current behavior.""" + # ProjectService default should include content + service = ProjectService(Mock()) + # Check default parameter value + import inspect + sig = inspect.signature(service.list_projects) + assert sig.parameters['include_content'].default is True + + # DocumentService default should NOT include content + doc_service = DocumentService(Mock()) + sig = inspect.signature(doc_service.list_documents) + assert sig.parameters['include_content'].default is False + + # TaskService default should NOT exclude fields + task_service = TaskService(Mock()) + sig = inspect.signature(task_service.list_tasks) + assert sig.parameters['exclude_large_fields'].default is False + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/python/tests/test_token_optimization_integration.py b/python/tests/test_token_optimization_integration.py new file mode 100644 index 0000000..7d82caa --- /dev/null +++ b/python/tests/test_token_optimization_integration.py @@ -0,0 +1,189 @@ +""" +Integration tests to verify token optimization in running system. +Run with: uv run pytest tests/test_token_optimization_integration.py -v +""" + +import httpx +import json +import asyncio +from typing import Dict, Any, Tuple + + +async def measure_response_size(url: str, params: Dict[str, Any] = None) -> Tuple[int, int]: + """Measure response size and estimate token count.""" + async with httpx.AsyncClient() as client: + try: + response = await client.get(url, params=params, timeout=10.0) + response_text = response.text + response_size = len(response_text) + # Rough token estimate: 1 token ≈ 4 characters + estimated_tokens = response_size / 4 + return response_size, estimated_tokens + except httpx.ConnectError: + print(f"⚠️ Could not connect to {url} - is the server running?") + return 0, 0 + except Exception as e: + print(f"❌ Error measuring {url}: {e}") + return 0, 0 + + +async def test_projects_endpoint(): + """Test /api/projects with and without include_content.""" + base_url = "http://localhost:8181/api/projects" + + print("\n=== Testing Projects Endpoint ===") + + # Test with full content (backward compatibility) + size_full, tokens_full = await measure_response_size(base_url, {"include_content": "true"}) + if size_full > 0: + print(f"Full content: {size_full:,} bytes | ~{tokens_full:,.0f} tokens") + else: + print("⚠️ Skipping - server not available") + return + + # Test lightweight + size_light, tokens_light = await measure_response_size(base_url, {"include_content": "false"}) + print(f"Lightweight: {size_light:,} bytes | ~{tokens_light:,.0f} tokens") + + # Calculate reduction + if size_full > 0: + reduction = (1 - size_light / size_full) * 100 if size_full > size_light else 0 + print(f"Reduction: {reduction:.1f}%") + + if reduction > 50: + print("✅ Significant token reduction achieved!") + else: + print("⚠️ Token reduction less than expected") + + # Verify backward compatibility - default should include content + size_default, _ = await measure_response_size(base_url) + if size_default > 0: + if abs(size_default - size_full) < 100: # Allow small variation + print("✅ Backward compatibility maintained (default includes content)") + else: + print("⚠️ Default behavior may have changed") + + +async def test_tasks_endpoint(): + """Test /api/tasks with exclude_large_fields.""" + base_url = "http://localhost:8181/api/tasks" + + print("\n=== Testing Tasks Endpoint ===") + + # Test with full content + size_full, tokens_full = await measure_response_size(base_url, {"exclude_large_fields": "false"}) + if size_full > 0: + print(f"Full content: {size_full:,} bytes | ~{tokens_full:,.0f} tokens") + else: + print("⚠️ Skipping - server not available") + return + + # Test lightweight + size_light, tokens_light = await measure_response_size(base_url, {"exclude_large_fields": "true"}) + print(f"Lightweight: {size_light:,} bytes | ~{tokens_light:,.0f} tokens") + + # Calculate reduction + if size_full > size_light: + reduction = (1 - size_light / size_full) * 100 + print(f"Reduction: {reduction:.1f}%") + + if reduction > 30: # Tasks may have less reduction if fewer have large fields + print("✅ Token reduction achieved for tasks!") + else: + print("ℹ️ Minimal reduction (tasks may not have large fields)") + + +async def test_documents_endpoint(): + """Test /api/projects/{id}/docs with include_content.""" + # First get a project ID if available + async with httpx.AsyncClient() as client: + try: + response = await client.get( + "http://localhost:8181/api/projects", + params={"include_content": "false"}, + timeout=10.0 + ) + if response.status_code == 200: + projects = response.json() + if projects and len(projects) > 0: + project_id = projects[0]["id"] + print(f"\n=== Testing Documents Endpoint (Project: {project_id[:8]}...) ===") + + base_url = f"http://localhost:8181/api/projects/{project_id}/docs" + + # Test with content + size_full, tokens_full = await measure_response_size(base_url, {"include_content": "true"}) + print(f"With content: {size_full:,} bytes | ~{tokens_full:,.0f} tokens") + + # Test without content (default) + size_light, tokens_light = await measure_response_size(base_url, {"include_content": "false"}) + print(f"Metadata only: {size_light:,} bytes | ~{tokens_light:,.0f} tokens") + + # Calculate reduction if there are documents + if size_full > size_light and size_full > 500: # Only if meaningful data + reduction = (1 - size_light / size_full) * 100 + print(f"Reduction: {reduction:.1f}%") + print("✅ Document endpoint optimized!") + else: + print("ℹ️ No documents or minimal content in project") + else: + print("\n⚠️ No projects available for document testing") + except Exception as e: + print(f"\n⚠️ Could not test documents endpoint: {e}") + + +async def test_mcp_endpoints(): + """Test MCP endpoints if available.""" + mcp_url = "http://localhost:8051/health" + + print("\n=== Testing MCP Server ===") + + async with httpx.AsyncClient() as client: + try: + response = await client.get(mcp_url, timeout=5.0) + if response.status_code == 200: + print("✅ MCP server is running") + # Could add specific MCP tool tests here + else: + print(f"⚠️ MCP server returned status {response.status_code}") + except httpx.ConnectError: + print("ℹ️ MCP server not running (optional for tests)") + except Exception as e: + print(f"⚠️ Could not check MCP server: {e}") + + +async def main(): + """Run all integration tests.""" + print("=" * 60) + print("Token Optimization Integration Tests") + print("=" * 60) + + # Check if server is running + async with httpx.AsyncClient() as client: + try: + response = await client.get("http://localhost:8181/health", timeout=5.0) + if response.status_code == 200: + print("✅ Server is healthy and running") + else: + print(f"⚠️ Server returned status {response.status_code}") + except httpx.ConnectError: + print("❌ Server is not running! Start with: docker-compose up -d") + print("\nTests require a running server. Please start the services first.") + return + except Exception as e: + print(f"❌ Error checking server health: {e}") + return + + # Run tests + await test_projects_endpoint() + await test_tasks_endpoint() + await test_documents_endpoint() + await test_mcp_endpoints() + + print("\n" + "=" * 60) + print("✅ Integration tests completed!") + print("=" * 60) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file