Archon/original_archon/archon/agent_tools.py

142 lines
4.9 KiB
Python

from typing import Dict, Any, List, Optional
from openai import AsyncOpenAI
from supabase import Client
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.utils import get_env_var
embedding_model = get_env_var('EMBEDDING_MODEL') or 'text-embedding-3-small'
async def get_embedding(text: str, embedding_client: AsyncOpenAI) -> List[float]:
"""Get embedding vector from OpenAI."""
try:
response = await embedding_client.embeddings.create(
model=embedding_model,
input=text
)
return response.data[0].embedding
except Exception as e:
print(f"Error getting embedding: {e}")
return [0] * 1536 # Return zero vector on error
async def retrieve_relevant_documentation_tool(supabase: Client, embedding_client: AsyncOpenAI, user_query: str) -> str:
try:
# Get the embedding for the query
query_embedding = await get_embedding(user_query, embedding_client)
# Query Supabase for relevant documents
result = supabase.rpc(
'match_site_pages',
{
'query_embedding': query_embedding,
'match_count': 4,
'filter': {'source': 'pydantic_ai_docs'}
}
).execute()
if not result.data:
return "No relevant documentation found."
# Format the results
formatted_chunks = []
for doc in result.data:
chunk_text = f"""
# {doc['title']}
{doc['content']}
"""
formatted_chunks.append(chunk_text)
# Join all chunks with a separator
return "\n\n---\n\n".join(formatted_chunks)
except Exception as e:
print(f"Error retrieving documentation: {e}")
return f"Error retrieving documentation: {str(e)}"
async def list_documentation_pages_tool(supabase: Client) -> List[str]:
"""
Function to retrieve a list of all available Pydantic AI documentation pages.
This is called by the list_documentation_pages tool and also externally
to fetch documentation pages for the reasoner LLM.
Returns:
List[str]: List of unique URLs for all documentation pages
"""
try:
# Query Supabase for unique URLs where source is pydantic_ai_docs
result = supabase.from_('site_pages') \
.select('url') \
.eq('metadata->>source', 'pydantic_ai_docs') \
.execute()
if not result.data:
return []
# Extract unique URLs
urls = sorted(set(doc['url'] for doc in result.data))
return urls
except Exception as e:
print(f"Error retrieving documentation pages: {e}")
return []
async def get_page_content_tool(supabase: Client, url: str) -> str:
"""
Retrieve the full content of a specific documentation page by combining all its chunks.
Args:
ctx: The context including the Supabase client
url: The URL of the page to retrieve
Returns:
str: The complete page content with all chunks combined in order
"""
try:
# Query Supabase for all chunks of this URL, ordered by chunk_number
result = supabase.from_('site_pages') \
.select('title, content, chunk_number') \
.eq('url', url) \
.eq('metadata->>source', 'pydantic_ai_docs') \
.order('chunk_number') \
.execute()
if not result.data:
return f"No content found for URL: {url}"
# Format the page with its title and all chunks
page_title = result.data[0]['title'].split(' - ')[0] # Get the main title
formatted_content = [f"# {page_title}\n"]
# Add each chunk's content
for chunk in result.data:
formatted_content.append(chunk['content'])
# Join everything together but limit the characters in case the page is massive (there are a coule big ones)
# This will be improved later so if the page is too big RAG will be performed on the page itself
return "\n\n".join(formatted_content)[:20000]
except Exception as e:
print(f"Error retrieving page content: {e}")
return f"Error retrieving page content: {str(e)}"
def get_file_content_tool(file_path: str) -> str:
"""
Retrieves the content of a specific file. Use this to get the contents of an example, tool, config for an MCP server
Args:
file_path: The path to the file
Returns:
The raw contents of the file
"""
try:
with open(file_path, "r") as file:
file_contents = file.read()
return file_contents
except Exception as e:
print(f"Error retrieving file contents: {e}")
return f"Error retrieving file contents: {str(e)}"