142 lines
4.9 KiB
Python
142 lines
4.9 KiB
Python
from typing import Dict, Any, List, Optional
|
|
from openai import AsyncOpenAI
|
|
from supabase import Client
|
|
import sys
|
|
import os
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
from utils.utils import get_env_var
|
|
|
|
embedding_model = get_env_var('EMBEDDING_MODEL') or 'text-embedding-3-small'
|
|
|
|
async def get_embedding(text: str, embedding_client: AsyncOpenAI) -> List[float]:
|
|
"""Get embedding vector from OpenAI."""
|
|
try:
|
|
response = await embedding_client.embeddings.create(
|
|
model=embedding_model,
|
|
input=text
|
|
)
|
|
return response.data[0].embedding
|
|
except Exception as e:
|
|
print(f"Error getting embedding: {e}")
|
|
return [0] * 1536 # Return zero vector on error
|
|
|
|
async def retrieve_relevant_documentation_tool(supabase: Client, embedding_client: AsyncOpenAI, user_query: str) -> str:
|
|
try:
|
|
# Get the embedding for the query
|
|
query_embedding = await get_embedding(user_query, embedding_client)
|
|
|
|
# Query Supabase for relevant documents
|
|
result = supabase.rpc(
|
|
'match_site_pages',
|
|
{
|
|
'query_embedding': query_embedding,
|
|
'match_count': 4,
|
|
'filter': {'source': 'pydantic_ai_docs'}
|
|
}
|
|
).execute()
|
|
|
|
if not result.data:
|
|
return "No relevant documentation found."
|
|
|
|
# Format the results
|
|
formatted_chunks = []
|
|
for doc in result.data:
|
|
chunk_text = f"""
|
|
# {doc['title']}
|
|
|
|
{doc['content']}
|
|
"""
|
|
formatted_chunks.append(chunk_text)
|
|
|
|
# Join all chunks with a separator
|
|
return "\n\n---\n\n".join(formatted_chunks)
|
|
|
|
except Exception as e:
|
|
print(f"Error retrieving documentation: {e}")
|
|
return f"Error retrieving documentation: {str(e)}"
|
|
|
|
async def list_documentation_pages_tool(supabase: Client) -> List[str]:
|
|
"""
|
|
Function to retrieve a list of all available Pydantic AI documentation pages.
|
|
This is called by the list_documentation_pages tool and also externally
|
|
to fetch documentation pages for the reasoner LLM.
|
|
|
|
Returns:
|
|
List[str]: List of unique URLs for all documentation pages
|
|
"""
|
|
try:
|
|
# Query Supabase for unique URLs where source is pydantic_ai_docs
|
|
result = supabase.from_('site_pages') \
|
|
.select('url') \
|
|
.eq('metadata->>source', 'pydantic_ai_docs') \
|
|
.execute()
|
|
|
|
if not result.data:
|
|
return []
|
|
|
|
# Extract unique URLs
|
|
urls = sorted(set(doc['url'] for doc in result.data))
|
|
return urls
|
|
|
|
except Exception as e:
|
|
print(f"Error retrieving documentation pages: {e}")
|
|
return []
|
|
|
|
async def get_page_content_tool(supabase: Client, url: str) -> str:
|
|
"""
|
|
Retrieve the full content of a specific documentation page by combining all its chunks.
|
|
|
|
Args:
|
|
ctx: The context including the Supabase client
|
|
url: The URL of the page to retrieve
|
|
|
|
Returns:
|
|
str: The complete page content with all chunks combined in order
|
|
"""
|
|
try:
|
|
# Query Supabase for all chunks of this URL, ordered by chunk_number
|
|
result = supabase.from_('site_pages') \
|
|
.select('title, content, chunk_number') \
|
|
.eq('url', url) \
|
|
.eq('metadata->>source', 'pydantic_ai_docs') \
|
|
.order('chunk_number') \
|
|
.execute()
|
|
|
|
if not result.data:
|
|
return f"No content found for URL: {url}"
|
|
|
|
# Format the page with its title and all chunks
|
|
page_title = result.data[0]['title'].split(' - ')[0] # Get the main title
|
|
formatted_content = [f"# {page_title}\n"]
|
|
|
|
# Add each chunk's content
|
|
for chunk in result.data:
|
|
formatted_content.append(chunk['content'])
|
|
|
|
# Join everything together but limit the characters in case the page is massive (there are a coule big ones)
|
|
# This will be improved later so if the page is too big RAG will be performed on the page itself
|
|
return "\n\n".join(formatted_content)[:20000]
|
|
|
|
except Exception as e:
|
|
print(f"Error retrieving page content: {e}")
|
|
return f"Error retrieving page content: {str(e)}"
|
|
|
|
def get_file_content_tool(file_path: str) -> str:
|
|
"""
|
|
Retrieves the content of a specific file. Use this to get the contents of an example, tool, config for an MCP server
|
|
|
|
Args:
|
|
file_path: The path to the file
|
|
|
|
Returns:
|
|
The raw contents of the file
|
|
"""
|
|
try:
|
|
with open(file_path, "r") as file:
|
|
file_contents = file.read()
|
|
return file_contents
|
|
except Exception as e:
|
|
print(f"Error retrieving file contents: {e}")
|
|
return f"Error retrieving file contents: {str(e)}"
|