Skip to main content
Glama
009-add-pages-table.sql4.57 kB
-- Migration: Add pages table to normalize page-level metadata and support Etag tracking -- This migration introduces a pages table to store page-level metadata once per URL -- and links document chunks to their parent pages via page_id foreign key -- 1. Create pages table to store unique page-level metadata CREATE TABLE IF NOT EXISTS pages ( id INTEGER PRIMARY KEY AUTOINCREMENT, version_id INTEGER NOT NULL REFERENCES versions(id), url TEXT NOT NULL, title TEXT, etag TEXT, last_modified TEXT, content_type TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, UNIQUE(version_id, url) ); -- 2. Add indexes for efficient querying CREATE INDEX IF NOT EXISTS idx_pages_version_id ON pages(version_id); CREATE INDEX IF NOT EXISTS idx_pages_url ON pages(url); CREATE INDEX IF NOT EXISTS idx_pages_etag ON pages(etag); -- 3. Create new documents table with page_id foreign key CREATE TABLE documents_new ( id INTEGER PRIMARY KEY AUTOINCREMENT, page_id INTEGER NOT NULL REFERENCES pages(id), content TEXT, metadata JSON, -- Now contains only chunk-specific metadata (level, path) sort_order INTEGER NOT NULL, embedding BLOB, -- Store embeddings directly in documents table created_at DATETIME DEFAULT CURRENT_TIMESTAMP ); -- 4. Create indexes for the new documents table CREATE INDEX IF NOT EXISTS idx_documents_page_id ON documents_new(page_id); CREATE INDEX IF NOT EXISTS idx_documents_sort_order ON documents_new(page_id, sort_order); -- 5. Migrate data from old documents table to new structure -- First, populate pages table with unique page data from existing documents -- Group by version_id and url to ensure uniqueness, using MAX() to handle any duplicates INSERT INTO pages (version_id, url, title, created_at, updated_at) SELECT version_id, url, MAX(json_extract(metadata, '$.title')) as title, MAX(COALESCE(indexed_at, CURRENT_TIMESTAMP)) as created_at, MAX(COALESCE(indexed_at, CURRENT_TIMESTAMP)) as updated_at FROM documents GROUP BY version_id, url; -- 6. Migrate document chunks to new table structure -- Preserve all existing metadata except page-level fields (url, title, library, version) -- that are now stored in pages and versions tables INSERT INTO documents_new (id, page_id, content, metadata, sort_order, created_at) SELECT d.id, p.id as page_id, d.content, json_remove( json_remove( json_remove( json_remove(d.metadata, '$.url'), '$.title' ), '$.library' ), '$.version' ) as metadata, d.sort_order, COALESCE(d.indexed_at, CURRENT_TIMESTAMP) FROM documents d JOIN pages p ON d.version_id = p.version_id AND d.url = p.url; -- 7. Drop the old documents table DROP TABLE documents; -- 8. Rename the new table to documents ALTER TABLE documents_new RENAME TO documents; -- 9. Recreate FTS5 virtual table to work with new structure -- Drop existing FTS table and triggers DROP TRIGGER IF EXISTS documents_fts_after_delete; DROP TRIGGER IF EXISTS documents_fts_after_update; DROP TRIGGER IF EXISTS documents_fts_after_insert; DROP TABLE IF EXISTS documents_fts; -- Create new FTS table CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5( content, title, url, path, tokenize='porter unicode61' ); -- 10. Create new FTS triggers that join with pages table CREATE TRIGGER IF NOT EXISTS documents_fts_after_delete AFTER DELETE ON documents BEGIN DELETE FROM documents_fts WHERE rowid = old.id; END; CREATE TRIGGER IF NOT EXISTS documents_fts_after_update AFTER UPDATE ON documents BEGIN DELETE FROM documents_fts WHERE rowid = old.id; INSERT INTO documents_fts(rowid, content, title, url, path) SELECT new.id, new.content, p.title, p.url, json_extract(new.metadata, '$.path') FROM pages p WHERE p.id = new.page_id; END; CREATE TRIGGER IF NOT EXISTS documents_fts_after_insert AFTER INSERT ON documents BEGIN INSERT INTO documents_fts(rowid, content, title, url, path) SELECT new.id, new.content, p.title, p.url, json_extract(new.metadata, '$.path') FROM pages p WHERE p.id = new.page_id; END; -- 11. Create trigger to update pages.updated_at when page title changes CREATE TRIGGER IF NOT EXISTS pages_updated_at_trigger AFTER UPDATE ON pages BEGIN UPDATE pages SET updated_at = CURRENT_TIMESTAMP WHERE id = new.id; END; -- 12. Rebuild FTS index from migrated data INSERT INTO documents_fts(rowid, content, title, url, path) SELECT d.id, d.content, p.title, p.url, json_extract(d.metadata, '$.path') FROM documents d JOIN pages p ON d.page_id = p.id;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server