Archon/migration/complete_setup.sql

795 lines
33 KiB
PL/PgSQL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- =====================================================
-- Archon Complete Database Setup
-- =====================================================
-- This script combines all migrations into a single file
-- for easy one-time database initialization
--
-- Run this script in your Supabase SQL Editor to set up
-- the complete Archon database schema and initial data
-- =====================================================
-- =====================================================
-- SECTION 1: EXTENSIONS
-- =====================================================
-- Enable required PostgreSQL extensions
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- =====================================================
-- SECTION 2: CREDENTIALS AND SETTINGS
-- =====================================================
-- Credentials and Configuration Management Table
-- This table stores both encrypted sensitive data and plain configuration settings
CREATE TABLE IF NOT EXISTS archon_settings (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
key VARCHAR(255) UNIQUE NOT NULL,
value TEXT, -- For plain text config values
encrypted_value TEXT, -- For encrypted sensitive data (bcrypt hashed)
is_encrypted BOOLEAN DEFAULT FALSE,
category VARCHAR(100), -- Group related settings (e.g., 'rag_strategy', 'api_keys', 'server_config')
description TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Create indexes for faster lookups
CREATE INDEX IF NOT EXISTS idx_archon_settings_key ON archon_settings(key);
CREATE INDEX IF NOT EXISTS idx_archon_settings_category ON archon_settings(category);
-- Create trigger to automatically update updated_at timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ language 'plpgsql';
CREATE TRIGGER update_archon_settings_updated_at
BEFORE UPDATE ON archon_settings
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Create RLS (Row Level Security) policies for settings
ALTER TABLE archon_settings ENABLE ROW LEVEL SECURITY;
CREATE POLICY "Allow service role full access" ON archon_settings
FOR ALL USING (auth.role() = 'service_role');
CREATE POLICY "Allow authenticated users to read and update" ON archon_settings
FOR ALL TO authenticated
USING (true);
-- =====================================================
-- SECTION 3: INITIAL SETTINGS DATA
-- =====================================================
-- Server Configuration
INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES
('MCP_TRANSPORT', 'dual', false, 'server_config', 'MCP server transport mode - sse (web clients), stdio (IDE clients), or dual (both)'),
('HOST', 'localhost', false, 'server_config', 'Host to bind to if using sse as the transport (leave empty if using stdio)'),
('PORT', '8051', false, 'server_config', 'Port to listen on if using sse as the transport (leave empty if using stdio)'),
('MODEL_CHOICE', 'gpt-4.1-nano', false, 'rag_strategy', 'The LLM you want to use for summaries and contextual embeddings. Generally this is a very cheap and fast LLM like gpt-4.1-nano');
-- RAG Strategy Configuration (all default to true)
INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES
('USE_CONTEXTUAL_EMBEDDINGS', 'true', true, 'rag_strategy', 'Enhances embeddings with contextual information for better retrieval'),
('CONTEXTUAL_EMBEDDINGS_MAX_WORKERS', '3', false, 'rag_strategy', 'Maximum parallel workers for contextual embedding generation (1-10)'),
('USE_HYBRID_SEARCH', 'true', true, 'rag_strategy', 'Combines vector similarity search with keyword search for better results'),
('USE_AGENTIC_RAG', 'true', true, 'rag_strategy', 'Enables code example extraction, storage, and specialized code search functionality'),
('USE_RERANKING', 'true', true, 'rag_strategy', 'Applies cross-encoder reranking to improve search result relevance');
-- Monitoring Configuration
INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES
('LOGFIRE_ENABLED', 'true', false, 'monitoring', 'Enable or disable Pydantic Logfire logging and observability platform'),
('PROJECTS_ENABLED', 'true', false, 'features', 'Enable or disable Projects and Tasks functionality');
-- Placeholder for sensitive credentials (to be added via Settings UI)
INSERT INTO archon_settings (key, encrypted_value, is_encrypted, category, description) VALUES
('OPENAI_API_KEY', NULL, true, 'api_keys', 'OpenAI API Key for embedding model (text-embedding-3-small). Get from: https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key');
-- LLM Provider configuration settings
INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES
('LLM_PROVIDER', 'openai', false, 'rag_strategy', 'LLM provider to use: openai, ollama, or google'),
('LLM_BASE_URL', NULL, false, 'rag_strategy', 'Custom base URL for LLM provider (mainly for Ollama, e.g., http://localhost:11434/v1)'),
('EMBEDDING_MODEL', 'text-embedding-3-small', false, 'rag_strategy', 'Embedding model for vector search and similarity matching (required for all embedding operations)')
ON CONFLICT (key) DO NOTHING;
-- Add provider API key placeholders
INSERT INTO archon_settings (key, encrypted_value, is_encrypted, category, description) VALUES
('GOOGLE_API_KEY', NULL, true, 'api_keys', 'Google API Key for Gemini models. Get from: https://aistudio.google.com/apikey')
ON CONFLICT (key) DO NOTHING;
-- Code Extraction Settings Migration
-- Adds configurable settings for the code extraction service
-- Insert Code Extraction Configuration Settings
INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES
-- Length Settings
('MIN_CODE_BLOCK_LENGTH', '250', false, 'code_extraction', 'Base minimum length for code blocks in characters'),
('MAX_CODE_BLOCK_LENGTH', '5000', false, 'code_extraction', 'Maximum length before stopping code block extension in characters'),
('CONTEXT_WINDOW_SIZE', '1000', false, 'code_extraction', 'Number of characters of context to preserve before and after code blocks'),
-- Detection Features
('ENABLE_COMPLETE_BLOCK_DETECTION', 'true', false, 'code_extraction', 'Extend code blocks to natural boundaries like closing braces'),
('ENABLE_LANGUAGE_SPECIFIC_PATTERNS', 'true', false, 'code_extraction', 'Use specialized patterns for different programming languages'),
('ENABLE_CONTEXTUAL_LENGTH', 'true', false, 'code_extraction', 'Adjust minimum length based on surrounding context (example, snippet, implementation)'),
-- Content Filtering
('ENABLE_PROSE_FILTERING', 'true', false, 'code_extraction', 'Filter out documentation text mistakenly wrapped in code blocks'),
('MAX_PROSE_RATIO', '0.15', false, 'code_extraction', 'Maximum allowed ratio of prose indicators (0-1) in code blocks'),
('MIN_CODE_INDICATORS', '3', false, 'code_extraction', 'Minimum number of code patterns required (brackets, operators, keywords)'),
('ENABLE_DIAGRAM_FILTERING', 'true', false, 'code_extraction', 'Exclude diagram languages like Mermaid, PlantUML from code extraction'),
-- Processing Settings
('CODE_EXTRACTION_MAX_WORKERS', '3', false, 'code_extraction', 'Number of parallel workers for generating code summaries'),
('ENABLE_CODE_SUMMARIES', 'true', false, 'code_extraction', 'Generate AI-powered summaries and names for extracted code examples')
-- Only insert if they don't already exist
ON CONFLICT (key) DO NOTHING;
-- Crawling Performance Settings (from add_performance_settings.sql)
INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES
('CRAWL_BATCH_SIZE', '50', false, 'rag_strategy', 'Number of URLs to crawl in parallel per batch (10-100)'),
('CRAWL_MAX_CONCURRENT', '10', false, 'rag_strategy', 'Maximum concurrent browser sessions for crawling (1-20)'),
('CRAWL_WAIT_STRATEGY', 'domcontentloaded', false, 'rag_strategy', 'When to consider page loaded: domcontentloaded, networkidle, or load'),
('CRAWL_PAGE_TIMEOUT', '30000', false, 'rag_strategy', 'Maximum time to wait for page load in milliseconds'),
('CRAWL_DELAY_BEFORE_HTML', '0.5', false, 'rag_strategy', 'Time to wait for JavaScript rendering in seconds (0.1-5.0)')
ON CONFLICT (key) DO NOTHING;
-- Document Storage Performance Settings (from add_performance_settings.sql and optimize_batch_sizes.sql)
INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES
('DOCUMENT_STORAGE_BATCH_SIZE', '100', false, 'rag_strategy', 'Number of document chunks to process per batch (50-200) - increased for better performance'),
('EMBEDDING_BATCH_SIZE', '200', false, 'rag_strategy', 'Number of embeddings to create per API call (100-500) - increased for better throughput'),
('DELETE_BATCH_SIZE', '100', false, 'rag_strategy', 'Number of URLs to delete in one database operation (50-200) - increased for better performance'),
('ENABLE_PARALLEL_BATCHES', 'true', false, 'rag_strategy', 'Enable parallel processing of document batches')
ON CONFLICT (key) DO UPDATE SET
value = EXCLUDED.value,
description = EXCLUDED.description;
-- Advanced Performance Settings (from add_performance_settings.sql and optimize_batch_sizes.sql)
INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES
('MEMORY_THRESHOLD_PERCENT', '80', false, 'rag_strategy', 'Memory usage threshold for crawler dispatcher (50-90)'),
('DISPATCHER_CHECK_INTERVAL', '0.5', false, 'rag_strategy', 'How often to check memory usage in seconds (0.1-2.0)'),
('CODE_EXTRACTION_BATCH_SIZE', '40', false, 'rag_strategy', 'Number of code blocks to extract per batch (20-100) - increased for better performance'),
('CODE_SUMMARY_MAX_WORKERS', '3', false, 'rag_strategy', 'Maximum parallel workers for code summarization (1-10)'),
('CONTEXTUAL_EMBEDDING_BATCH_SIZE', '50', false, 'rag_strategy', 'Number of chunks to process in contextual embedding batch API calls (20-100)')
ON CONFLICT (key) DO UPDATE SET
value = EXCLUDED.value,
description = EXCLUDED.description;
-- Add a comment to document when this migration was added
COMMENT ON TABLE archon_settings IS 'Stores application configuration including API keys, RAG settings, and code extraction parameters';
-- =====================================================
-- SECTION 4: KNOWLEDGE BASE TABLES
-- =====================================================
-- Create the sources table
CREATE TABLE IF NOT EXISTS archon_sources (
source_id TEXT PRIMARY KEY,
summary TEXT,
total_word_count INTEGER DEFAULT 0,
title TEXT,
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()) NOT NULL,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()) NOT NULL
);
-- Create indexes for better query performance
CREATE INDEX IF NOT EXISTS idx_archon_sources_title ON archon_sources(title);
CREATE INDEX IF NOT EXISTS idx_archon_sources_metadata ON archon_sources USING GIN(metadata);
CREATE INDEX IF NOT EXISTS idx_archon_sources_knowledge_type ON archon_sources((metadata->>'knowledge_type'));
-- Add comments to document the new columns
COMMENT ON COLUMN archon_sources.title IS 'Descriptive title for the source (e.g., "Pydantic AI API Reference")';
COMMENT ON COLUMN archon_sources.metadata IS 'JSONB field storing knowledge_type, tags, and other metadata';
-- Create the documentation chunks table
CREATE TABLE IF NOT EXISTS archon_crawled_pages (
id BIGSERIAL PRIMARY KEY,
url VARCHAR NOT NULL,
chunk_number INTEGER NOT NULL,
content TEXT NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
source_id TEXT NOT NULL,
embedding VECTOR(1536), -- OpenAI embeddings are 1536 dimensions
created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()) NOT NULL,
-- Add a unique constraint to prevent duplicate chunks for the same URL
UNIQUE(url, chunk_number),
-- Add foreign key constraint to sources table
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id)
);
-- Create indexes for better performance
CREATE INDEX ON archon_crawled_pages USING ivfflat (embedding vector_cosine_ops);
CREATE INDEX idx_archon_crawled_pages_metadata ON archon_crawled_pages USING GIN (metadata);
CREATE INDEX idx_archon_crawled_pages_source_id ON archon_crawled_pages (source_id);
-- Create the code_examples table
CREATE TABLE IF NOT EXISTS archon_code_examples (
id BIGSERIAL PRIMARY KEY,
url VARCHAR NOT NULL,
chunk_number INTEGER NOT NULL,
content TEXT NOT NULL, -- The code example content
summary TEXT NOT NULL, -- Summary of the code example
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
source_id TEXT NOT NULL,
embedding VECTOR(1536), -- OpenAI embeddings are 1536 dimensions
created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()) NOT NULL,
-- Add a unique constraint to prevent duplicate chunks for the same URL
UNIQUE(url, chunk_number),
-- Add foreign key constraint to sources table
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id)
);
-- Create indexes for better performance
CREATE INDEX ON archon_code_examples USING ivfflat (embedding vector_cosine_ops);
CREATE INDEX idx_archon_code_examples_metadata ON archon_code_examples USING GIN (metadata);
CREATE INDEX idx_archon_code_examples_source_id ON archon_code_examples (source_id);
-- =====================================================
-- SECTION 5: SEARCH FUNCTIONS
-- =====================================================
-- Create a function to search for documentation chunks
CREATE OR REPLACE FUNCTION match_archon_crawled_pages (
query_embedding VECTOR(1536),
match_count INT DEFAULT 10,
filter JSONB DEFAULT '{}'::jsonb,
source_filter TEXT DEFAULT NULL
) RETURNS TABLE (
id BIGINT,
url VARCHAR,
chunk_number INTEGER,
content TEXT,
metadata JSONB,
source_id TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
#variable_conflict use_column
BEGIN
RETURN QUERY
SELECT
id,
url,
chunk_number,
content,
metadata,
source_id,
1 - (archon_crawled_pages.embedding <=> query_embedding) AS similarity
FROM archon_crawled_pages
WHERE metadata @> filter
AND (source_filter IS NULL OR source_id = source_filter)
ORDER BY archon_crawled_pages.embedding <=> query_embedding
LIMIT match_count;
END;
$$;
-- Create a function to search for code examples
CREATE OR REPLACE FUNCTION match_archon_code_examples (
query_embedding VECTOR(1536),
match_count INT DEFAULT 10,
filter JSONB DEFAULT '{}'::jsonb,
source_filter TEXT DEFAULT NULL
) RETURNS TABLE (
id BIGINT,
url VARCHAR,
chunk_number INTEGER,
content TEXT,
summary TEXT,
metadata JSONB,
source_id TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
#variable_conflict use_column
BEGIN
RETURN QUERY
SELECT
id,
url,
chunk_number,
content,
summary,
metadata,
source_id,
1 - (archon_code_examples.embedding <=> query_embedding) AS similarity
FROM archon_code_examples
WHERE metadata @> filter
AND (source_filter IS NULL OR source_id = source_filter)
ORDER BY archon_code_examples.embedding <=> query_embedding
LIMIT match_count;
END;
$$;
-- =====================================================
-- SECTION 6: RLS POLICIES FOR KNOWLEDGE BASE
-- =====================================================
-- Enable RLS on the knowledge base tables
ALTER TABLE archon_crawled_pages ENABLE ROW LEVEL SECURITY;
ALTER TABLE archon_sources ENABLE ROW LEVEL SECURITY;
ALTER TABLE archon_code_examples ENABLE ROW LEVEL SECURITY;
-- Create policies that allow anyone to read
CREATE POLICY "Allow public read access to archon_crawled_pages"
ON archon_crawled_pages
FOR SELECT
TO public
USING (true);
CREATE POLICY "Allow public read access to archon_sources"
ON archon_sources
FOR SELECT
TO public
USING (true);
CREATE POLICY "Allow public read access to archon_code_examples"
ON archon_code_examples
FOR SELECT
TO public
USING (true);
-- =====================================================
-- SECTION 7: PROJECTS AND TASKS MODULE
-- =====================================================
-- Task status enumeration
-- Create task_status enum if it doesn't exist
DO $$ BEGIN
CREATE TYPE task_status AS ENUM ('todo','doing','review','done');
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
-- Assignee is now a text field to allow any agent name
-- No longer using enum to support flexible agent assignments
-- Projects table
CREATE TABLE IF NOT EXISTS archon_projects (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
title TEXT NOT NULL,
description TEXT DEFAULT '',
docs JSONB DEFAULT '[]'::jsonb,
features JSONB DEFAULT '[]'::jsonb,
data JSONB DEFAULT '[]'::jsonb,
github_repo TEXT,
pinned BOOLEAN DEFAULT false,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Tasks table
CREATE TABLE IF NOT EXISTS archon_tasks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID REFERENCES archon_projects(id) ON DELETE CASCADE,
parent_task_id UUID REFERENCES archon_tasks(id) ON DELETE CASCADE,
title TEXT NOT NULL,
description TEXT DEFAULT '',
status task_status DEFAULT 'todo',
assignee TEXT DEFAULT 'User' CHECK (assignee IS NOT NULL AND assignee != ''),
task_order INTEGER DEFAULT 0,
feature TEXT,
sources JSONB DEFAULT '[]'::jsonb,
code_examples JSONB DEFAULT '[]'::jsonb,
archived BOOLEAN DEFAULT false,
archived_at TIMESTAMPTZ NULL,
archived_by TEXT NULL,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Project Sources junction table for many-to-many relationship
CREATE TABLE IF NOT EXISTS archon_project_sources (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID REFERENCES archon_projects(id) ON DELETE CASCADE,
source_id TEXT NOT NULL, -- References sources in the knowledge base
linked_at TIMESTAMPTZ DEFAULT NOW(),
created_by TEXT DEFAULT 'system',
notes TEXT,
-- Unique constraint to prevent duplicate links
UNIQUE(project_id, source_id)
);
-- Document Versions table for version control of project JSONB fields only
CREATE TABLE IF NOT EXISTS archon_document_versions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID REFERENCES archon_projects(id) ON DELETE CASCADE,
task_id UUID REFERENCES archon_tasks(id) ON DELETE CASCADE, -- DEPRECATED: No longer used, kept for historical data
field_name TEXT NOT NULL, -- 'docs', 'features', 'data', 'prd' (task fields no longer versioned)
version_number INTEGER NOT NULL,
content JSONB NOT NULL, -- Full snapshot of the field content
change_summary TEXT, -- Human-readable description of changes
change_type TEXT DEFAULT 'update', -- 'create', 'update', 'delete', 'restore', 'backup'
document_id TEXT, -- For docs array, store the specific document ID
created_by TEXT DEFAULT 'system',
created_at TIMESTAMPTZ DEFAULT NOW(),
-- Ensure we have either project_id OR task_id, not both
CONSTRAINT chk_project_or_task CHECK (
(project_id IS NOT NULL AND task_id IS NULL) OR
(project_id IS NULL AND task_id IS NOT NULL)
),
-- Unique constraint to prevent duplicate version numbers per field
UNIQUE(project_id, task_id, field_name, version_number)
);
-- Create indexes for better performance
CREATE INDEX IF NOT EXISTS idx_archon_tasks_project_id ON archon_tasks(project_id);
CREATE INDEX IF NOT EXISTS idx_archon_tasks_status ON archon_tasks(status);
CREATE INDEX IF NOT EXISTS idx_archon_tasks_assignee ON archon_tasks(assignee);
CREATE INDEX IF NOT EXISTS idx_archon_tasks_order ON archon_tasks(task_order);
CREATE INDEX IF NOT EXISTS idx_archon_tasks_archived ON archon_tasks(archived);
CREATE INDEX IF NOT EXISTS idx_archon_tasks_archived_at ON archon_tasks(archived_at);
CREATE INDEX IF NOT EXISTS idx_archon_project_sources_project_id ON archon_project_sources(project_id);
CREATE INDEX IF NOT EXISTS idx_archon_project_sources_source_id ON archon_project_sources(source_id);
CREATE INDEX IF NOT EXISTS idx_archon_document_versions_project_id ON archon_document_versions(project_id);
CREATE INDEX IF NOT EXISTS idx_archon_document_versions_task_id ON archon_document_versions(task_id);
CREATE INDEX IF NOT EXISTS idx_archon_document_versions_field_name ON archon_document_versions(field_name);
CREATE INDEX IF NOT EXISTS idx_archon_document_versions_version_number ON archon_document_versions(version_number);
CREATE INDEX IF NOT EXISTS idx_archon_document_versions_created_at ON archon_document_versions(created_at);
-- Apply triggers to tables
CREATE OR REPLACE TRIGGER update_archon_projects_updated_at
BEFORE UPDATE ON archon_projects
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE OR REPLACE TRIGGER update_archon_tasks_updated_at
BEFORE UPDATE ON archon_tasks
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- Soft delete function for tasks
CREATE OR REPLACE FUNCTION archive_task(
task_id_param UUID,
archived_by_param TEXT DEFAULT 'system'
)
RETURNS BOOLEAN AS $$
DECLARE
task_exists BOOLEAN;
BEGIN
-- Check if task exists and is not already archived
SELECT EXISTS(
SELECT 1 FROM archon_tasks
WHERE id = task_id_param AND archived = FALSE
) INTO task_exists;
IF NOT task_exists THEN
RETURN FALSE;
END IF;
-- Archive the task
UPDATE archon_tasks
SET
archived = TRUE,
archived_at = NOW(),
archived_by = archived_by_param,
updated_at = NOW()
WHERE id = task_id_param;
-- Also archive all subtasks
UPDATE archon_tasks
SET
archived = TRUE,
archived_at = NOW(),
archived_by = archived_by_param,
updated_at = NOW()
WHERE parent_task_id = task_id_param AND archived = FALSE;
RETURN TRUE;
END;
$$ LANGUAGE plpgsql;
-- Add comments to document the soft delete fields
COMMENT ON COLUMN archon_tasks.assignee IS 'The agent or user assigned to this task. Can be any valid agent name or "User"';
COMMENT ON COLUMN archon_tasks.archived IS 'Soft delete flag - TRUE if task is archived/deleted';
COMMENT ON COLUMN archon_tasks.archived_at IS 'Timestamp when task was archived';
COMMENT ON COLUMN archon_tasks.archived_by IS 'User/system that archived the task';
-- Add comments for versioning table
COMMENT ON TABLE archon_document_versions IS 'Version control for JSONB fields in projects only - task versioning has been removed to simplify MCP operations';
COMMENT ON COLUMN archon_document_versions.field_name IS 'Name of JSONB field being versioned (docs, features, data) - task fields and prd removed as unused';
COMMENT ON COLUMN archon_document_versions.content IS 'Full snapshot of field content at this version';
COMMENT ON COLUMN archon_document_versions.change_type IS 'Type of change: create, update, delete, restore, backup';
COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed';
COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data';
-- =====================================================
-- SECTION 8: PROMPTS TABLE
-- =====================================================
-- Prompts table for managing agent system prompts
CREATE TABLE IF NOT EXISTS archon_prompts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
prompt_name TEXT UNIQUE NOT NULL,
prompt TEXT NOT NULL,
description TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Create index for faster lookups
CREATE INDEX IF NOT EXISTS idx_archon_prompts_name ON archon_prompts(prompt_name);
-- Add trigger to automatically update updated_at timestamp
CREATE OR REPLACE TRIGGER update_archon_prompts_updated_at
BEFORE UPDATE ON archon_prompts
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- =====================================================
-- SECTION 9: RLS POLICIES FOR PROJECTS MODULE
-- =====================================================
-- Enable Row Level Security (RLS) for all tables
ALTER TABLE archon_projects ENABLE ROW LEVEL SECURITY;
ALTER TABLE archon_tasks ENABLE ROW LEVEL SECURITY;
ALTER TABLE archon_project_sources ENABLE ROW LEVEL SECURITY;
ALTER TABLE archon_document_versions ENABLE ROW LEVEL SECURITY;
ALTER TABLE archon_prompts ENABLE ROW LEVEL SECURITY;
-- Create RLS policies for service role (full access)
CREATE POLICY "Allow service role full access to archon_projects" ON archon_projects
FOR ALL USING (auth.role() = 'service_role');
CREATE POLICY "Allow service role full access to archon_tasks" ON archon_tasks
FOR ALL USING (auth.role() = 'service_role');
CREATE POLICY "Allow service role full access to archon_project_sources" ON archon_project_sources
FOR ALL USING (auth.role() = 'service_role');
CREATE POLICY "Allow service role full access to archon_document_versions" ON archon_document_versions
FOR ALL USING (auth.role() = 'service_role');
CREATE POLICY "Allow service role full access to archon_prompts" ON archon_prompts
FOR ALL USING (auth.role() = 'service_role');
-- Create RLS policies for authenticated users
CREATE POLICY "Allow authenticated users to read and update archon_projects" ON archon_projects
FOR ALL TO authenticated
USING (true);
CREATE POLICY "Allow authenticated users to read and update archon_tasks" ON archon_tasks
FOR ALL TO authenticated
USING (true);
CREATE POLICY "Allow authenticated users to read and update archon_project_sources" ON archon_project_sources
FOR ALL TO authenticated
USING (true);
CREATE POLICY "Allow authenticated users to read archon_document_versions" ON archon_document_versions
FOR SELECT TO authenticated
USING (true);
CREATE POLICY "Allow authenticated users to read archon_prompts" ON archon_prompts
FOR SELECT TO authenticated
USING (true);
-- =====================================================
-- SECTION 10: DEFAULT PROMPTS DATA
-- =====================================================
-- Seed with default prompts for each content type
INSERT INTO archon_prompts (prompt_name, prompt, description) VALUES
('document_builder', 'SYSTEM PROMPT Document-Builder Agent
1. Mission
You are the Document-Builder Agent. Your sole purpose is to transform a user''s natural-language description of work (a project, feature, or refactor) into a structured JSON record stored in the docs table. Produce documentation that is concise yet thorough—clear enough for an engineer to act after a single read-through.
2. Workflow
1. Classify request → Decide which document type fits best:
• PRD net-new product or major initiative.
• FEATURE_SPEC incremental feature expressed in user-story form.
• REFACTOR_PLAN internal code quality improvement.
2. Clarify (if needed) → If the description is ambiguous, ask exactly one clarifying question, then continue.
3. Generate JSON → Build an object that follows the schema below and insert (or return) it for the docs table.
3. docs JSON Schema
{
"id": "uuid|string", // generate using uuid
"doc_type": "PRD | FEATURE_SPEC | REFACTOR_PLAN",
"title": "string", // short, descriptive
"author": "string", // requestor name
"body": { /* see templates below */ },
"created_at": "ISO-8601",
"updated_at": "ISO-8601"
}
4. Section Templates
PRD → body must include
• Background_and_Context
• Problem_Statement
• Goals_and_Success_Metrics
• Non_Goals
• Assumptions
• Stakeholders
• User_Personas
• Functional_Requirements // bullet list or user stories
• Technical_Requirements // tech stack, APIs, data
• UX_UI_and_Style_Guidelines
• Architecture_Overview // diagram link or text
• Milestones_and_Timeline
• Risks_and_Mitigations
• Open_Questions
FEATURE_SPEC → body must include
• Epic
• User_Stories // list of { id, as_a, i_want, so_that }
• Acceptance_Criteria // Given / When / Then
• Edge_Cases
• Dependencies
• Technical_Notes
• Design_References
• Metrics
• Risks
REFACTOR_PLAN → body must include
• Current_State_Summary
• Refactor_Goals
• Design_Principles_and_Best_Practices
• Proposed_Approach // step-by-step plan
• Impacted_Areas
• Test_Strategy
• Roll_Back_and_Recovery
• Timeline
• Risks
5. Writing Guidelines
• Brevity with substance: no fluff, no filler, no passive voice.
• Markdown inside strings: use headings, lists, and code fences for clarity.
• Consistent conventions: ISO dates, 24-hour times, SI units.
• Insert "TBD" where information is genuinely unknown.
• Produce valid JSON only—no comments or trailing commas.
6. Example Output (truncated)
{
"id": "01HQ2VPZ62KSF185Y54MQ93VD2",
"doc_type": "PRD",
"title": "Real-time Collaboration for Docs",
"author": "Sean",
"body": {
"Background_and_Context": "Customers need to co-edit documents ...",
"Problem_Statement": "Current single-editor flow slows teams ...",
"Goals_and_Success_Metrics": "Reduce hand-off time by 50% ..."
/* remaining sections */
},
"created_at": "2025-06-17T00:10:00-04:00",
"updated_at": "2025-06-17T00:10:00-04:00"
}
Remember: Your output is the JSON itself—no explanatory prose before or after. Stay sharp, write once, write right.', 'System prompt for DocumentAgent to create structured documentation following the Document-Builder pattern'),
('feature_builder', 'SYSTEM PROMPT Feature-Builder Agent
1. Mission
You are the Feature-Builder Agent. Your purpose is to transform user descriptions of features into structured feature plans stored in the features array. Create feature documentation that developers can implement directly.
2. Feature JSON Schema
{
"id": "uuid|string", // generate using uuid
"feature_type": "feature_plan", // always "feature_plan"
"name": "string", // short feature name
"title": "string", // descriptive title
"content": {
"feature_overview": {
"name": "string",
"description": "string",
"priority": "high|medium|low",
"estimated_effort": "string"
},
"user_stories": ["string"], // list of user stories
"react_flow_diagram": { // optional visual flow
"nodes": [...],
"edges": [...],
"viewport": {...}
},
"acceptance_criteria": ["string"], // testable criteria
"technical_notes": {
"frontend_components": ["string"],
"backend_endpoints": ["string"],
"database_changes": "string"
}
},
"created_by": "string" // author
}
3. Writing Guidelines
• Focus on implementation clarity
• Include specific technical details
• Define clear acceptance criteria
• Consider edge cases
• Keep descriptions actionable
Remember: Create structured, implementable feature plans.', 'System prompt for creating feature plans in the features array'),
('data_builder', 'SYSTEM PROMPT Data-Builder Agent
1. Mission
You are the Data-Builder Agent. Your purpose is to transform descriptions of data models into structured ERDs and schemas stored in the data array. Create clear data models that can guide database implementation.
2. Data JSON Schema
{
"id": "uuid|string", // generate using uuid
"data_type": "erd", // always "erd" for now
"name": "string", // system name
"title": "string", // descriptive title
"content": {
"entities": [...], // entity definitions
"relationships": [...], // entity relationships
"sql_schema": "string", // Generated SQL
"mermaid_diagram": "string", // Optional diagram
"notes": {
"indexes": ["string"],
"constraints": ["string"],
"diagram_tool": "string",
"normalization_level": "string",
"scalability_notes": "string"
}
},
"created_by": "string" // author
}
3. Writing Guidelines
• Follow database normalization principles
• Include proper indexes and constraints
• Consider scalability from the start
• Provide clear relationship definitions
• Generate valid, executable SQL
Remember: Create production-ready data models.', 'System prompt for creating data models in the data array');
-- =====================================================
-- SETUP COMPLETE
-- =====================================================
-- Your Archon database is now fully configured!
--
-- Next steps:
-- 1. Add your OpenAI API key via the Settings UI
-- 2. Enable Projects feature if needed
-- 3. Start crawling websites or uploading documents
-- =====================================================