feat: Document Browser with Domain Filtering (Updated Architecture) (#564)

* feat: Add DocumentBrowser with domain filtering (updated for latest architecture)

- Add DocumentBrowser component with two-column layout
- Add domain filtering and search functionality
- Add chunks API endpoint for browsing document content
- Add clickable page count badge to open browser
- Integrate with latest HTTP polling architecture
- Add service method for fetching chunks with domain filtering
- Compatible with new modular component structure

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* fix: Apply CodeRabbit suggestions for domain filtering and API reliability

- Preserve subdomains in domain extraction (docs.anthropic.com vs anthropic.com)
- Add deterministic ordering to API queries for stable chunk lists
- Use case-insensitive domain filtering with ilike
- Add explicit Supabase error handling to prevent silent failures

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Update document browser branch for main branch compatibility

- Add TanStack Query package dependencies
- Add getKnowledgeItemChunks service method for DocumentBrowser
- Add minimal feature components for build compatibility
- Ensure document browser functionality works with latest architecture
- Maintain clickable page count badges and document browsing modal

Document browser is now ready for use with modernized Archon codebase.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
DIY Smart Code 2025-09-06 12:27:17 +02:00 committed by GitHub
parent e74d6134e7
commit cadda22d22
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 487 additions and 34 deletions

View File

@ -3735,9 +3735,9 @@
"license": "MIT"
},
"node_modules/@tanstack/query-core": {
"version": "5.85.7",
"resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.85.7.tgz",
"integrity": "sha512-FLT3EtuTbXBmOrDku4bI80Eivmjn/o/Zc1lVEd/6yzR8UAUSnDwYiwghCZvLqHyGSN5mO35ux1yPGMFYBFRSwA==",
"version": "5.87.0",
"resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.87.0.tgz",
"integrity": "sha512-gRZig2csRl71i/HEAHlE9TOmMqKKs9WkMAqIUlzagH+sNtgjvqxwaVo2HmfNGe+iDWUak0ratSkiRv0m/Y8ijg==",
"license": "MIT",
"funding": {
"type": "github",
@ -3745,9 +3745,9 @@
}
},
"node_modules/@tanstack/query-devtools": {
"version": "5.84.0",
"resolved": "https://registry.npmjs.org/@tanstack/query-devtools/-/query-devtools-5.84.0.tgz",
"integrity": "sha512-fbF3n+z1rqhvd9EoGp5knHkv3p5B2Zml1yNRjh7sNXklngYI5RVIWUrUjZ1RIcEoscarUb0+bOvIs5x9dwzOXQ==",
"version": "5.86.0",
"resolved": "https://registry.npmjs.org/@tanstack/query-devtools/-/query-devtools-5.86.0.tgz",
"integrity": "sha512-/JDw9BP80eambEK/EsDMGAcsL2VFT+8F5KCOwierjPU7QP8Wt1GT32yJpn3qOinBM8/zS3Jy36+F0GiyJp411A==",
"license": "MIT",
"funding": {
"type": "github",
@ -3755,12 +3755,12 @@
}
},
"node_modules/@tanstack/react-query": {
"version": "5.85.8",
"resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.85.8.tgz",
"integrity": "sha512-r3rW55STAO03EJg5mrCVIJvaEK3oeHme5u7QovuRFIKRbEgTzTv2DPdenX46X+x56LsU3ree1N4rzI/+gJ7KEA==",
"version": "5.87.0",
"resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.87.0.tgz",
"integrity": "sha512-3uRCGHo7KWHl6h7ptzLd5CbrjTQP5Q/37aC1cueClkSN4t/OaNFmfGolgs1AoA0kFjP/OZxTY2ytQoifyJzpWQ==",
"license": "MIT",
"dependencies": {
"@tanstack/query-core": "5.85.7"
"@tanstack/query-core": "5.87.0"
},
"funding": {
"type": "github",
@ -3771,19 +3771,19 @@
}
},
"node_modules/@tanstack/react-query-devtools": {
"version": "5.85.8",
"resolved": "https://registry.npmjs.org/@tanstack/react-query-devtools/-/react-query-devtools-5.85.8.tgz",
"integrity": "sha512-83SXqRpmVlRMpaj32veez/8ohjY7O4VQIYDqW91b4i9AQjiYgE24FbBfR/SOL8b5MfKhHMZkD+BQSpCh9jY06w==",
"version": "5.87.0",
"resolved": "https://registry.npmjs.org/@tanstack/react-query-devtools/-/react-query-devtools-5.87.0.tgz",
"integrity": "sha512-OeOSKsPyLcTVLdn391iNeRqYFEmpYJrY9t+FjKpaC6ql0SyRu2XT3mKYJIfYczhMMlwOIlbJkNaifBveertV8Q==",
"license": "MIT",
"dependencies": {
"@tanstack/query-devtools": "5.84.0"
"@tanstack/query-devtools": "5.86.0"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/tannerlinsley"
},
"peerDependencies": {
"@tanstack/react-query": "^5.85.8",
"@tanstack/react-query": "^5.87.0",
"react": "^18 || ^19"
}
},
@ -4572,9 +4572,9 @@
"license": "MIT"
},
"node_modules/acorn": {
"version": "8.15.0",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"version": "8.14.1",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.1.tgz",
"integrity": "sha512-OvQ/2pUDKmgfCg++xsTX1wGxfTaszcHVcTctW4UJB4hibJx2HXxxO5UmVgyjMa+ZDsiaf5wWLXYpRWMmBI0QHg==",
"license": "MIT",
"bin": {
"acorn": "bin/acorn"
@ -6004,7 +6004,20 @@
"eslint": ">=8.40"
}
},
"node_modules/eslint-scope": {
"node_modules/eslint-visitor-keys": {
"version": "3.4.3",
"resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz",
"integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==",
"dev": true,
"license": "Apache-2.0",
"engines": {
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
},
"funding": {
"url": "https://opencollective.com/eslint"
}
},
"node_modules/eslint/node_modules/eslint-scope": {
"version": "7.2.2",
"resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.2.2.tgz",
"integrity": "sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg==",
@ -6021,17 +6034,14 @@
"url": "https://opencollective.com/eslint"
}
},
"node_modules/eslint-visitor-keys": {
"version": "3.4.3",
"resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz",
"integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==",
"node_modules/eslint/node_modules/estraverse": {
"version": "5.3.0",
"resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
"integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
"dev": true,
"license": "Apache-2.0",
"license": "BSD-2-Clause",
"engines": {
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
},
"funding": {
"url": "https://opencollective.com/eslint"
"node": ">=4.0"
}
},
"node_modules/eslint/node_modules/globals": {
@ -6096,6 +6106,16 @@
"node": ">=0.10"
}
},
"node_modules/esquery/node_modules/estraverse": {
"version": "5.3.0",
"resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
"integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
"dev": true,
"license": "BSD-2-Clause",
"engines": {
"node": ">=4.0"
}
},
"node_modules/esrecurse": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz",
@ -6109,7 +6129,7 @@
"node": ">=4.0"
}
},
"node_modules/estraverse": {
"node_modules/esrecurse/node_modules/estraverse": {
"version": "5.3.0",
"resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
"integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",

View File

@ -0,0 +1,319 @@
import React, { useState, useEffect, useMemo } from 'react';
import { createPortal } from 'react-dom';
import { Search, Filter, FileText, Globe, X } from 'lucide-react';
import { motion, AnimatePresence } from 'framer-motion';
import { Badge } from '../ui/Badge';
import { Button } from '../ui/Button';
import { knowledgeBaseService } from '../../services/knowledgeBaseService';
interface DocumentChunk {
id: string;
source_id: string;
content: string;
metadata?: any;
url?: string;
}
interface DocumentBrowserProps {
sourceId: string;
isOpen: boolean;
onClose: () => void;
}
const extractDomain = (url: string): string => {
try {
const urlObj = new URL(url);
const hostname = urlObj.hostname;
// Remove 'www.' prefix if present
const withoutWww = hostname.startsWith('www.') ? hostname.slice(4) : hostname;
// Keep full hostname (minus 'www.') to preserve subdomain-level filtering
return withoutWww;
} catch {
return url; // Return original if URL parsing fails
}
};
export const DocumentBrowser: React.FC<DocumentBrowserProps> = ({
sourceId,
isOpen,
onClose,
}) => {
const [chunks, setChunks] = useState<DocumentChunk[]>([]);
const [loading, setLoading] = useState(true);
const [searchQuery, setSearchQuery] = useState('');
const [selectedDomain, setSelectedDomain] = useState<string>('all');
const [selectedChunkId, setSelectedChunkId] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
// Extract unique domains from chunks
const domains = useMemo(() => {
const domainSet = new Set<string>();
chunks.forEach(chunk => {
if (chunk.url) {
domainSet.add(extractDomain(chunk.url));
}
});
return Array.from(domainSet).sort();
}, [chunks]);
// Filter chunks based on search and domain
const filteredChunks = useMemo(() => {
return chunks.filter(chunk => {
// Search filter
const searchLower = searchQuery.toLowerCase();
const searchMatch = !searchQuery ||
chunk.content.toLowerCase().includes(searchLower) ||
chunk.url?.toLowerCase().includes(searchLower);
// Domain filter
const domainMatch = selectedDomain === 'all' ||
(chunk.url && extractDomain(chunk.url) === selectedDomain);
return searchMatch && domainMatch;
});
}, [chunks, searchQuery, selectedDomain]);
// Get selected chunk
const selectedChunk = useMemo(() => {
return filteredChunks.find(chunk => chunk.id === selectedChunkId) || filteredChunks[0];
}, [filteredChunks, selectedChunkId]);
// Load chunks when component opens
useEffect(() => {
if (isOpen && sourceId) {
loadChunks();
}
}, [isOpen, sourceId]);
const loadChunks = async () => {
try {
setLoading(true);
setError(null);
const response = await knowledgeBaseService.getKnowledgeItemChunks(sourceId);
if (response.success) {
setChunks(response.chunks);
// Auto-select first chunk if none selected
if (response.chunks.length > 0 && !selectedChunkId) {
setSelectedChunkId(response.chunks[0].id);
}
} else {
setError('Failed to load document chunks');
}
} catch (error) {
console.error('Failed to load chunks:', error);
setError(error instanceof Error ? error.message : 'Failed to load document chunks');
} finally {
setLoading(false);
}
};
const loadChunksWithDomainFilter = async (domain: string) => {
try {
setLoading(true);
setError(null);
const domainFilter = domain === 'all' ? undefined : domain;
const response = await knowledgeBaseService.getKnowledgeItemChunks(sourceId, domainFilter);
if (response.success) {
setChunks(response.chunks);
} else {
setError('Failed to load document chunks');
}
} catch (error) {
console.error('Failed to load chunks with domain filter:', error);
setError(error instanceof Error ? error.message : 'Failed to load document chunks');
} finally {
setLoading(false);
}
};
const handleDomainChange = (domain: string) => {
setSelectedDomain(domain);
// Note: We could reload with server-side filtering, but for now we'll do client-side filtering
// loadChunksWithDomainFilter(domain);
};
if (!isOpen) return null;
return createPortal(
<motion.div
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
exit={{ opacity: 0 }}
className="fixed inset-0 flex items-center justify-center z-50 bg-black/60 backdrop-blur-sm"
onClick={onClose}
>
<motion.div
initial={{ scale: 0.9, opacity: 0 }}
animate={{ scale: 1, opacity: 1 }}
exit={{ scale: 0.9, opacity: 0 }}
className="relative bg-gray-900/95 border border-gray-800 rounded-xl w-full max-w-7xl h-[85vh] flex overflow-hidden shadow-2xl"
onClick={(e) => e.stopPropagation()}
>
{/* Blue accent line at the top */}
<div className="absolute top-0 left-0 right-0 h-[2px] bg-gradient-to-r from-blue-500 to-cyan-500 shadow-[0_0_20px_5px_rgba(59,130,246,0.5)]"></div>
{/* Sidebar */}
<div className="w-80 bg-gray-950/50 border-r border-gray-800 flex flex-col overflow-hidden">
{/* Sidebar Header */}
<div className="p-4 border-b border-gray-800">
<div className="flex items-center justify-between mb-3">
<h3 className="text-sm font-semibold text-blue-400">
Document Chunks ({(filteredChunks || []).length})
</h3>
</div>
{/* Search */}
<div className="relative mb-3">
<Search className="absolute left-3 top-1/2 transform -translate-y-1/2 w-4 h-4 text-gray-500" />
<input
type="text"
placeholder="Search documents..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
className="w-full pl-10 pr-3 py-2 bg-gray-900/70 border border-gray-800 rounded-lg text-sm text-gray-300 placeholder-gray-600 focus:outline-none focus:border-blue-500/50 focus:ring-1 focus:ring-blue-500/20 transition-all"
/>
</div>
{/* Domain Filter */}
<div className="flex items-center gap-2">
<Globe className="w-4 h-4 text-gray-500" />
<select
value={selectedDomain}
onChange={(e) => handleDomainChange(e.target.value)}
className="flex-1 bg-gray-900/70 border border-gray-800 rounded-lg text-sm text-gray-300 px-3 py-2 focus:outline-none focus:border-blue-500/50"
>
<option value="all">All Domains</option>
{domains?.map(domain => (
<option key={domain} value={domain}>{domain}</option>
)) || []}
</select>
</div>
</div>
{/* Document List */}
<div className="flex-1 overflow-y-auto p-2">
{filteredChunks.length === 0 ? (
<div className="text-gray-500 text-sm text-center py-8">
No documents found
</div>
) : (
filteredChunks.map((chunk, index) => (
<button
key={chunk.id}
onClick={() => setSelectedChunkId(chunk.id)}
className={`w-full text-left p-3 mb-1 rounded-lg transition-all duration-200 ${
selectedChunk?.id === chunk.id
? 'bg-blue-500/20 border border-blue-500/40 shadow-[0_0_15px_rgba(59,130,246,0.2)]'
: 'hover:bg-gray-800/50 border border-transparent'
}`}
>
<div className="flex items-start gap-2">
<FileText className={`w-4 h-4 mt-0.5 flex-shrink-0 ${
selectedChunk?.id === chunk.id ? 'text-blue-400' : 'text-gray-500'
}`} />
<div className="flex-1 min-w-0">
<div className={`text-sm font-medium ${
selectedChunk?.id === chunk.id ? 'text-blue-300' : 'text-gray-300'
} line-clamp-1`}>
Chunk {index + 1}
</div>
<div className="text-xs text-gray-500 line-clamp-2 mt-0.5">
{chunk.content?.substring(0, 100) || 'No content'}...
</div>
{chunk.url && (
<div className="text-xs text-blue-400 mt-1 truncate">
{extractDomain(chunk.url)}
</div>
)}
</div>
</div>
</button>
))
)}
</div>
</div>
{/* Main Content Area */}
<div className="flex-1 flex flex-col">
{/* Header */}
<div className="p-4 border-b border-gray-800 flex items-center justify-between">
<div className="flex items-center gap-3">
<h2 className="text-xl font-semibold text-blue-400">
{selectedChunk ? `Document Chunk` : 'Document Browser'}
</h2>
{selectedChunk?.url && (
<Badge color="blue" className="flex items-center gap-1">
<Globe className="w-3 h-3" />
{extractDomain(selectedChunk.url)}
</Badge>
)}
</div>
<button
onClick={onClose}
className="text-gray-500 hover:text-white p-1 rounded transition-colors"
>
<X className="w-5 h-5" />
</button>
</div>
{/* Content */}
<div className="flex-1 overflow-auto">
{loading ? (
<div className="h-full flex items-center justify-center">
<div className="text-center">
<div className="animate-spin rounded-full h-12 w-12 border-b-2 border-blue-400 mx-auto mb-4"></div>
<p className="text-gray-400">Loading document chunks...</p>
</div>
</div>
) : !selectedChunk || filteredChunks.length === 0 ? (
<div className="h-full flex items-center justify-center">
<div className="text-center">
<FileText className="w-12 h-12 text-gray-600 mx-auto mb-4" />
<p className="text-gray-400">Select a document chunk to view content</p>
</div>
</div>
) : (
<div className="h-full p-4">
<div className="bg-gray-900/70 rounded-lg border border-gray-800 h-full overflow-auto">
<div className="p-6">
{selectedChunk.url && (
<div className="text-sm text-blue-400 mb-4 font-mono">
{selectedChunk.url}
</div>
)}
<div className="prose prose-sm prose-invert max-w-none">
<div className="text-gray-300 whitespace-pre-wrap leading-relaxed">
{selectedChunk.content || 'No content available'}
</div>
</div>
{selectedChunk.metadata && (
<div className="mt-6 pt-4 border-t border-gray-700">
<details className="text-sm text-gray-400">
<summary className="cursor-pointer hover:text-gray-300 font-medium">
View Metadata
</summary>
<pre className="mt-3 bg-gray-800 p-3 rounded text-xs overflow-x-auto text-gray-300">
{JSON.stringify(selectedChunk.metadata, null, 2)}
</pre>
</details>
</div>
)}
</div>
</div>
</div>
)}
</div>
</div>
</motion.div>
</motion.div>,
document.body
);
};

View File

@ -129,6 +129,7 @@ interface KnowledgeItemCardProps {
onDelete: (sourceId: string) => void;
onUpdate?: () => void;
onRefresh?: (sourceId: string) => void;
onBrowseDocuments?: (sourceId: string) => void;
isSelectionMode?: boolean;
isSelected?: boolean;
onToggleSelection?: (event: React.MouseEvent) => void;
@ -139,6 +140,7 @@ export const KnowledgeItemCard = ({
onDelete,
onUpdate,
onRefresh,
onBrowseDocuments,
isSelectionMode = false,
isSelected = false,
onToggleSelection
@ -454,13 +456,20 @@ export const KnowledgeItemCard = ({
</div>
)}
{/* Page count - orange neon container */}
{/* Page count - orange neon container (clickable for document browser) */}
<div
className="relative card-3d-layer-3"
className="relative card-3d-layer-3 cursor-pointer"
onClick={(e) => {
e.stopPropagation();
if (onBrowseDocuments) {
onBrowseDocuments(item.source_id);
}
}}
onMouseEnter={() => setShowPageTooltip(true)}
onMouseLeave={() => setShowPageTooltip(false)}
title="Click to browse document chunks"
>
<div className="flex items-center gap-1 px-2 py-1 bg-orange-500/20 border border-orange-500/40 rounded-full backdrop-blur-sm shadow-[0_0_15px_rgba(251,146,60,0.3)] transition-all duration-300">
<div className="flex items-center gap-1 px-2 py-1 bg-orange-500/20 border border-orange-500/40 rounded-full backdrop-blur-sm shadow-[0_0_15px_rgba(251,146,60,0.3)] hover:shadow-[0_0_20px_rgba(251,146,60,0.5)] transition-all duration-300">
<FileText className="w-3 h-3 text-orange-400" />
<span className="text-xs text-orange-400 font-medium">
{Math.ceil(
@ -471,10 +480,13 @@ export const KnowledgeItemCard = ({
{/* Page count tooltip - positioned relative to the badge */}
{showPageTooltip && (
<div className="absolute bottom-full left-1/2 transform -translate-x-1/2 mb-2 bg-black dark:bg-zinc-800 text-white text-xs px-3 py-2 rounded-lg shadow-lg z-50 whitespace-nowrap">
<div className="font-medium mb-1">
{(item.metadata.word_count || 0).toLocaleString()} words
<div className="font-medium mb-1 text-orange-300">
Click to Browse Documents
</div>
<div className="text-gray-300 space-y-0.5">
<div>
{(item.metadata.word_count || 0).toLocaleString()} words
</div>
<div>
= {Math.ceil((item.metadata.word_count || 0) / 250).toLocaleString()} pages
</div>

View File

@ -16,6 +16,7 @@ import { KnowledgeGridSkeleton, KnowledgeTableSkeleton } from '../components/kno
import { GroupCreationModal } from '../components/knowledge-base/GroupCreationModal';
import { AddKnowledgeModal } from '../components/knowledge-base/AddKnowledgeModal';
import { CrawlingTab } from '../components/knowledge-base/CrawlingTab';
import { DocumentBrowser } from '../components/knowledge-base/DocumentBrowser';
interface GroupedKnowledgeItem {
id: string;
@ -53,6 +54,10 @@ export const KnowledgeBasePage = () => {
const [isSelectionMode, setIsSelectionMode] = useState(false);
const [lastSelectedIndex, setLastSelectedIndex] = useState<number | null>(null);
// Document browser state
const [documentBrowserSourceId, setDocumentBrowserSourceId] = useState<string | null>(null);
const [isDocumentBrowserOpen, setIsDocumentBrowserOpen] = useState(false);
const { showToast } = useToast();
// Load knowledge items
@ -267,6 +272,11 @@ export const KnowledgeBasePage = () => {
const handleAddKnowledge = () => {
setIsAddModalOpen(true);
};
const handleBrowseDocuments = (sourceId: string) => {
setDocumentBrowserSourceId(sourceId);
setIsDocumentBrowserOpen(true);
};
const toggleSelectionMode = () => {
setIsSelectionMode(!isSelectionMode);
@ -749,6 +759,7 @@ export const KnowledgeBasePage = () => {
onDelete={handleDeleteItem}
onUpdate={loadKnowledgeItems}
onRefresh={handleRefreshItem}
onBrowseDocuments={handleBrowseDocuments}
isSelectionMode={isSelectionMode}
isSelected={selectedItems.has(item.id)}
onToggleSelection={(e) => toggleItemSelection(item.id, index, e)}
@ -790,6 +801,18 @@ export const KnowledgeBasePage = () => {
}}
/>
)}
{/* Document Browser Modal */}
{isDocumentBrowserOpen && documentBrowserSourceId && (
<DocumentBrowser
sourceId={documentBrowserSourceId}
isOpen={isDocumentBrowserOpen}
onClose={() => {
setIsDocumentBrowserOpen(false);
setDocumentBrowserSourceId(null);
}}
/>
)}
</div>
);
};

View File

@ -205,6 +205,35 @@ class KnowledgeBaseService {
})
}
/**
* Get document chunks for a knowledge item with optional domain filtering
*/
async getKnowledgeItemChunks(sourceId: string, domainFilter?: string) {
console.log('📄 [KnowledgeBase] Getting chunks for:', sourceId, 'domainFilter:', domainFilter);
const params = new URLSearchParams();
if (domainFilter) {
params.append('domain_filter', domainFilter);
}
const queryString = params.toString();
const endpoint = `/knowledge-items/${sourceId}/chunks${queryString ? `?${queryString}` : ''}`;
return apiRequest<{
success: boolean;
source_id: string;
domain_filter?: string;
chunks: Array<{
id: string;
source_id: string;
content: string;
metadata?: any;
url?: string;
}>;
count: number;
}>(endpoint);
}
/**
* Upload a document to the knowledge base with progress tracking
*/
@ -295,6 +324,7 @@ class KnowledgeBaseService {
count: number
}>(`/knowledge-items/${sourceId}/code-examples`);
}
}
// Export singleton instance

View File

@ -237,6 +237,55 @@ async def delete_knowledge_item(source_id: str):
raise HTTPException(status_code=500, detail={"error": str(e)})
@router.get("/knowledge-items/{source_id}/chunks")
async def get_knowledge_item_chunks(source_id: str, domain_filter: str | None = None):
"""Get all document chunks for a specific knowledge item with optional domain filtering."""
try:
safe_logfire_info(f"Fetching chunks for source_id: {source_id}, domain_filter: {domain_filter}")
# Query document chunks with content for this specific source
supabase = get_supabase_client()
# Build the query
query = supabase.from_("archon_crawled_pages").select(
"id, source_id, content, metadata, url"
)
query = query.eq("source_id", source_id)
# Apply domain filtering if provided
if domain_filter:
# Case-insensitive URL match
query = query.ilike("url", f"%{domain_filter}%")
# Deterministic ordering (URL then id)
query = query.order("url", desc=False).order("id", desc=False)
result = query.execute()
if getattr(result, "error", None):
safe_logfire_error(
f"Supabase query error | source_id={source_id} | error={result.error}"
)
raise HTTPException(status_code=500, detail={"error": str(result.error)})
chunks = result.data if result.data else []
safe_logfire_info(f"Found {len(chunks)} chunks for {source_id}")
return {
"success": True,
"source_id": source_id,
"domain_filter": domain_filter,
"chunks": chunks,
"count": len(chunks),
}
except Exception as e:
safe_logfire_error(
f"Failed to fetch chunks | error={str(e)} | source_id={source_id}"
)
raise HTTPException(status_code=500, detail={"error": str(e)})
@router.get("/knowledge-items/{source_id}/code-examples")
async def get_knowledge_item_code_examples(source_id: str):
"""Get all code examples for a specific knowledge item."""