Crawl4AI RAG MCP Server

mcpragcrawl4ai
migrations

v001_add_kg_support.sql•6.66 KiB

-- Migration: Add Knowledge Graph Support -- Version: 001 -- Description: Adds tables and columns for Knowledge Graph integration -- Date: 2025-10-15 -- ============================================================================ -- 1. Add KG tracking columns to crawled_content -- ============================================================================ -- Check if columns exist before adding (SQLite doesn't have ALTER COLUMN IF NOT EXISTS) -- These will be added by Python migration script with existence checks -- kg_processed: Flag indicating if KG extraction completed -- kg_entity_count: Number of entities extracted -- kg_relationship_count: Number of relationships extracted -- kg_document_id: Neo4j Document node ID reference -- kg_processed_at: Timestamp of KG processing completion -- ============================================================================ -- 2. Create content_chunks table -- ============================================================================ CREATE TABLE IF NOT EXISTS content_chunks ( rowid INTEGER PRIMARY KEY, -- Same as content_vectors rowid content_id INTEGER NOT NULL, -- FK to crawled_content chunk_index INTEGER NOT NULL, -- Sequential: 0, 1, 2, ... chunk_text TEXT NOT NULL, -- Full chunk text char_start INTEGER NOT NULL, -- Position in original markdown char_end INTEGER NOT NULL, -- End position in original markdown word_count INTEGER, -- Words in chunk kg_processed BOOLEAN DEFAULT 0, -- Flag: KG extraction attempted for this chunk created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (content_id) REFERENCES crawled_content(id) ON DELETE CASCADE ); CREATE INDEX IF NOT EXISTS idx_content_chunks_content_id ON content_chunks(content_id); CREATE INDEX IF NOT EXISTS idx_content_chunks_position ON content_chunks(char_start, char_end); CREATE INDEX IF NOT EXISTS idx_content_chunks_kg_processed ON content_chunks(kg_processed); -- ============================================================================ -- 3. Create chunk_entities table -- ============================================================================ CREATE TABLE IF NOT EXISTS chunk_entities ( id INTEGER PRIMARY KEY AUTOINCREMENT, chunk_rowid INTEGER NOT NULL, -- Which chunk (FK to content_chunks) content_id INTEGER NOT NULL, -- Which document (FK to crawled_content) entity_text TEXT NOT NULL, -- "FastAPI", "Python", etc. entity_normalized TEXT, -- lowercase for deduplication entity_type_primary TEXT, -- "Framework", "Language", etc. entity_type_sub1 TEXT, -- "Backend", "Interpreted", etc. entity_type_sub2 TEXT, -- "Python", "DynamicTyped", etc. entity_type_sub3 TEXT, -- Optional 3rd level confidence REAL, -- 0.0-1.0 from GLiNER offset_start INTEGER, -- Position within chunk offset_end INTEGER, -- End position within chunk neo4j_node_id TEXT, -- Reference to Neo4j Entity node spans_multiple_chunks BOOLEAN DEFAULT 0, -- True if entity in overlap region created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (chunk_rowid) REFERENCES content_chunks(rowid) ON DELETE CASCADE, FOREIGN KEY (content_id) REFERENCES crawled_content(id) ON DELETE CASCADE ); CREATE INDEX IF NOT EXISTS idx_chunk_entities_chunk ON chunk_entities(chunk_rowid); CREATE INDEX IF NOT EXISTS idx_chunk_entities_entity ON chunk_entities(entity_text); CREATE INDEX IF NOT EXISTS idx_chunk_entities_type ON chunk_entities(entity_type_primary, entity_type_sub1); CREATE INDEX IF NOT EXISTS idx_chunk_entities_content ON chunk_entities(content_id); CREATE INDEX IF NOT EXISTS idx_chunk_entities_neo4j ON chunk_entities(neo4j_node_id); CREATE INDEX IF NOT EXISTS idx_chunk_entities_normalized ON chunk_entities(entity_normalized); -- ============================================================================ -- 4. Create chunk_relationships table -- ============================================================================ CREATE TABLE IF NOT EXISTS chunk_relationships ( id INTEGER PRIMARY KEY AUTOINCREMENT, content_id INTEGER NOT NULL, -- Which document subject_entity TEXT NOT NULL, -- "FastAPI" predicate TEXT NOT NULL, -- "uses", "competes_with", etc. object_entity TEXT NOT NULL, -- "Pydantic" confidence REAL, -- 0.0-1.0 from vLLM context TEXT, -- Sentence where relationship found neo4j_relationship_id TEXT, -- Reference to Neo4j relationship spans_chunks BOOLEAN DEFAULT 0, -- True if entities in different chunks chunk_rowids TEXT, -- JSON array: [45001] or [45001, 45015] created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (content_id) REFERENCES crawled_content(id) ON DELETE CASCADE ); CREATE INDEX IF NOT EXISTS idx_chunk_relationships_content ON chunk_relationships(content_id); CREATE INDEX IF NOT EXISTS idx_chunk_relationships_subject ON chunk_relationships(subject_entity); CREATE INDEX IF NOT EXISTS idx_chunk_relationships_object ON chunk_relationships(object_entity); CREATE INDEX IF NOT EXISTS idx_chunk_relationships_predicate ON chunk_relationships(predicate); -- ============================================================================ -- 5. Create kg_processing_queue table -- ============================================================================ CREATE TABLE IF NOT EXISTS kg_processing_queue ( id INTEGER PRIMARY KEY AUTOINCREMENT, content_id INTEGER NOT NULL, status TEXT DEFAULT 'pending', -- pending, processing, completed, failed, skipped priority INTEGER DEFAULT 1, -- Higher = process first queued_at DATETIME DEFAULT CURRENT_TIMESTAMP, processing_started_at DATETIME, processed_at DATETIME, retry_count INTEGER DEFAULT 0, error_message TEXT, result_summary TEXT, -- JSON with statistics skipped_reason TEXT, -- Reason if status='skipped' (e.g., 'kg_service_unavailable') FOREIGN KEY (content_id) REFERENCES crawled_content(id) ON DELETE CASCADE ); CREATE INDEX IF NOT EXISTS idx_kg_queue_status ON kg_processing_queue(status, priority, queued_at); CREATE INDEX IF NOT EXISTS idx_kg_queue_content ON kg_processing_queue(content_id); -- ============================================================================ -- Migration Complete -- ============================================================================

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Rob-P-Smith/mcpragcrawl4ai'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

v001_add_kg_support.sql•6.66 KiB