Context Mode

store.ts•16.5 KiB

/** * ContentStore — FTS5 BM25-based knowledge base for context-mode. * * Chunks markdown content by headings (keeping code blocks intact), * stores in SQLite FTS5, and retrieves via BM25-ranked search. * * Use for documentation, API references, and any content where * you need EXACT text later — not summaries. */ import type DatabaseConstructor from "better-sqlite3"; import type { Database as DatabaseInstance } from "better-sqlite3"; import { createRequire } from "node:module"; import { readFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; // Lazy-load better-sqlite3 — only when ContentStore is first used. // This lets the MCP server start instantly even if the native module // isn't installed yet (marketplace first-run scenario). let _Database: typeof DatabaseConstructor | null = null; function loadDatabase(): typeof DatabaseConstructor { if (!_Database) { const require = createRequire(import.meta.url); _Database = require("better-sqlite3") as typeof DatabaseConstructor; } return _Database; } // ───────────────────────────────────────────────────────── // Types // ───────────────────────────────────────────────────────── interface Chunk { title: string; content: string; hasCode: boolean; } export interface IndexResult { sourceId: number; label: string; totalChunks: number; codeChunks: number; } export interface SearchResult { title: string; content: string; source: string; rank: number; contentType: "code" | "prose"; } export interface StoreStats { sources: number; chunks: number; codeChunks: number; } // ───────────────────────────────────────────────────────── // Constants // ───────────────────────────────────────────────────────── const STOPWORDS = new Set([ "the", "and", "for", "are", "but", "not", "you", "all", "can", "had", "her", "was", "one", "our", "out", "has", "his", "how", "its", "may", "new", "now", "old", "see", "way", "who", "did", "get", "got", "let", "say", "she", "too", "use", "will", "with", "this", "that", "from", "they", "been", "have", "many", "some", "them", "than", "each", "make", "like", "just", "over", "such", "take", "into", "year", "your", "good", "could", "would", "about", "which", "their", "there", "other", "after", "should", "through", "also", "more", "most", "only", "very", "when", "what", "then", "these", "those", "being", "does", "done", "both", "same", "still", "while", "where", "here", "were", "much", // Common in code/changelogs "update", "updates", "updated", "deps", "dev", "tests", "test", "add", "added", "fix", "fixed", "run", "running", "using", ]); // ───────────────────────────────────────────────────────── // Helpers // ───────────────────────────────────────────────────────── function sanitizeQuery(query: string): string { const words = query .replace(/['"(){}[\]*:^~]/g, " ") .split(/\s+/) .filter( (w) => w.length > 0 && !["AND", "OR", "NOT", "NEAR"].includes(w.toUpperCase()), ); if (words.length === 0) return '""'; return words.map((w) => `"${w}"`).join(" OR "); } // ───────────────────────────────────────────────────────── // ContentStore // ───────────────────────────────────────────────────────── export class ContentStore { #db: DatabaseInstance; constructor(dbPath?: string) { const Database = loadDatabase(); const path = dbPath ?? join(tmpdir(), `context-mode-${process.pid}.db`); this.#db = new Database(path, { timeout: 5000 }); this.#db.pragma("journal_mode = WAL"); this.#db.pragma("synchronous = NORMAL"); this.#initSchema(); } // ── Schema ── #initSchema(): void { this.#db.exec(` CREATE TABLE IF NOT EXISTS sources ( id INTEGER PRIMARY KEY AUTOINCREMENT, label TEXT NOT NULL, chunk_count INTEGER NOT NULL DEFAULT 0, code_chunk_count INTEGER NOT NULL DEFAULT 0, indexed_at TEXT NOT NULL DEFAULT (datetime('now')) ); CREATE VIRTUAL TABLE IF NOT EXISTS chunks USING fts5( title, content, source_id UNINDEXED, content_type UNINDEXED, tokenize='porter unicode61' ); `); } // ── Index ── index(options: { content?: string; path?: string; source?: string; }): IndexResult { const { content, path, source } = options; if (!content && !path) { throw new Error("Either content or path must be provided"); } const text = content ?? readFileSync(path!, "utf-8"); const label = source ?? path ?? "untitled"; const chunks = this.#chunkMarkdown(text); if (chunks.length === 0) { const insertSource = this.#db.prepare( "INSERT INTO sources (label, chunk_count, code_chunk_count) VALUES (?, 0, 0)", ); const info = insertSource.run(label); return { sourceId: Number(info.lastInsertRowid), label, totalChunks: 0, codeChunks: 0, }; } const codeChunks = chunks.filter((c) => c.hasCode).length; const insertSource = this.#db.prepare( "INSERT INTO sources (label, chunk_count, code_chunk_count) VALUES (?, ?, ?)", ); const insertChunk = this.#db.prepare( "INSERT INTO chunks (title, content, source_id, content_type) VALUES (?, ?, ?, ?)", ); const transaction = this.#db.transaction(() => { const info = insertSource.run(label, chunks.length, codeChunks); const sourceId = Number(info.lastInsertRowid); for (const chunk of chunks) { insertChunk.run( chunk.title, chunk.content, sourceId, chunk.hasCode ? "code" : "prose", ); } return sourceId; }); const sourceId = transaction(); return { sourceId, label, totalChunks: chunks.length, codeChunks, }; } // ── Index Plain Text ── /** * Index plain-text output (logs, build output, test results) by splitting * into fixed-size line groups. Unlike markdown indexing, this does not * look for headings — it chunks by line count with overlap. */ indexPlainText( content: string, source: string, linesPerChunk: number = 20, ): IndexResult { if (!content || content.trim().length === 0) { const insertSource = this.#db.prepare( "INSERT INTO sources (label, chunk_count, code_chunk_count) VALUES (?, 0, 0)", ); const info = insertSource.run(source); return { sourceId: Number(info.lastInsertRowid), label: source, totalChunks: 0, codeChunks: 0, }; } const chunks = this.#chunkPlainText(content, linesPerChunk); const insertSource = this.#db.prepare( "INSERT INTO sources (label, chunk_count, code_chunk_count) VALUES (?, ?, ?)", ); const insertChunk = this.#db.prepare( "INSERT INTO chunks (title, content, source_id, content_type) VALUES (?, ?, ?, ?)", ); const transaction = this.#db.transaction(() => { const info = insertSource.run(source, chunks.length, 0); const sourceId = Number(info.lastInsertRowid); for (const chunk of chunks) { insertChunk.run(chunk.title, chunk.content, sourceId, "prose"); } return sourceId; }); const sourceId = transaction(); return { sourceId, label: source, totalChunks: chunks.length, codeChunks: 0, }; } // ── Search ── search(query: string, limit: number = 3, source?: string): SearchResult[] { const sanitized = sanitizeQuery(query); const sourceFilter = source ? "AND sources.label LIKE ?" : ""; const stmt = this.#db.prepare(` SELECT chunks.title, chunks.content, chunks.content_type, sources.label, bm25(chunks, 2.0, 1.0) AS rank FROM chunks JOIN sources ON sources.id = chunks.source_id WHERE chunks MATCH ? ${sourceFilter} ORDER BY rank LIMIT ? `); const params = source ? [sanitized, `%${source}%`, limit] : [sanitized, limit]; const rows = stmt.all(...params) as Array<{ title: string; content: string; content_type: string; label: string; rank: number; }>; return rows.map((r) => ({ title: r.title, content: r.content, source: r.label, rank: r.rank, contentType: r.content_type as "code" | "prose", })); } // ── Sources ── listSources(): Array<{ label: string; chunkCount: number }> { return this.#db .prepare( "SELECT label, chunk_count as chunkCount FROM sources ORDER BY id DESC", ) .all() as Array<{ label: string; chunkCount: number }>; } /** * Get all chunks for a given source by ID — bypasses FTS5 MATCH entirely. * Use this for inventory/listing where you need all sections, not search. */ getChunksBySource(sourceId: number): SearchResult[] { const rows = this.#db .prepare( `SELECT c.title, c.content, c.content_type, s.label FROM chunks c JOIN sources s ON s.id = c.source_id WHERE c.source_id = ? ORDER BY c.rowid`, ) .all(sourceId) as Array<{ title: string; content: string; content_type: string; label: string; }>; return rows.map((r) => ({ title: r.title, content: r.content, source: r.label, rank: 0, contentType: r.content_type as "code" | "prose", })); } // ── Vocabulary ── getDistinctiveTerms(sourceId: number, maxTerms: number = 40): string[] { const stats = this.#db .prepare("SELECT chunk_count FROM sources WHERE id = ?") .get(sourceId) as { chunk_count: number } | undefined; if (!stats || stats.chunk_count < 3) return []; const totalChunks = stats.chunk_count; const minAppearances = 2; const maxAppearances = Math.max(3, Math.ceil(totalChunks * 0.4)); const rows = this.#db .prepare("SELECT content FROM chunks WHERE source_id = ?") .all(sourceId) as Array<{ content: string }>; // Count document frequency (how many sections contain each word) const docFreq = new Map<string, number>(); for (const row of rows) { const words = new Set( row.content .toLowerCase() .split(/[^\p{L}\p{N}_-]+/u) .filter((w) => w.length >= 3 && !STOPWORDS.has(w)), ); for (const word of words) { docFreq.set(word, (docFreq.get(word) ?? 0) + 1); } } const filtered = Array.from(docFreq.entries()) .filter(([, count]) => count >= minAppearances && count <= maxAppearances); // Score: IDF (rarity) + length bonus + identifier bonus (underscore/camelCase) const scored = filtered.map(([word, count]: [string, number]) => { const idf = Math.log(totalChunks / count); const lenBonus = Math.min(word.length / 20, 0.5); const hasSpecialChars = /[_]/.test(word); const isCamelOrLong = word.length >= 12; const identifierBonus = hasSpecialChars ? 1.5 : isCamelOrLong ? 0.8 : 0; return { word, score: idf + lenBonus + identifierBonus }; }); return scored .sort((a: { word: string; score: number }, b: { word: string; score: number }) => b.score - a.score) .slice(0, maxTerms) .map((s: { word: string; score: number }) => s.word); } // ── Stats ── getStats(): StoreStats { const sources = ( this.#db.prepare("SELECT COUNT(*) as c FROM sources").get() as { c: number; } )?.c ?? 0; const chunks = ( this.#db .prepare("SELECT COUNT(*) as c FROM chunks") .get() as { c: number } )?.c ?? 0; const codeChunks = ( this.#db .prepare( "SELECT COUNT(*) as c FROM chunks WHERE content_type = 'code'", ) .get() as { c: number } )?.c ?? 0; return { sources, chunks, codeChunks }; } // ── Cleanup ── close(): void { this.#db.close(); } // ── Chunking ── #chunkMarkdown(text: string): Chunk[] { const chunks: Chunk[] = []; const lines = text.split("\n"); const headingStack: Array<{ level: number; text: string }> = []; let currentContent: string[] = []; let currentHeading = ""; const flush = () => { const joined = currentContent.join("\n").trim(); if (joined.length === 0) return; chunks.push({ title: this.#buildTitle(headingStack, currentHeading), content: joined, hasCode: currentContent.some((l) => /^`{3,}/.test(l)), }); currentContent = []; }; let i = 0; while (i < lines.length) { const line = lines[i]; // Horizontal rule separator (Context7 uses long dashes) if (/^[-_*]{3,}\s*$/.test(line)) { flush(); i++; continue; } // Heading (H1-H4) const headingMatch = line.match(/^(#{1,4})\s+(.+)$/); if (headingMatch) { flush(); const level = headingMatch[1].length; const heading = headingMatch[2].trim(); // Pop deeper levels from stack while ( headingStack.length > 0 && headingStack[headingStack.length - 1].level >= level ) { headingStack.pop(); } headingStack.push({ level, text: heading }); currentHeading = heading; currentContent.push(line); i++; continue; } // Code block — collect entire block as a unit const codeMatch = line.match(/^(`{3,})(.*)?$/); if (codeMatch) { const fence = codeMatch[1]; const codeLines: string[] = [line]; i++; while (i < lines.length) { codeLines.push(lines[i]); if (lines[i].startsWith(fence) && lines[i].trim() === fence) { i++; break; } i++; } currentContent.push(...codeLines); continue; } // Regular line currentContent.push(line); i++; } // Flush remaining content flush(); return chunks; } #chunkPlainText( text: string, linesPerChunk: number, ): Array<{ title: string; content: string }> { // Try blank-line splitting first for naturally-sectioned output const sections = text.split(/\n\s*\n/); if ( sections.length >= 3 && sections.length <= 200 && sections.every((s) => Buffer.byteLength(s) < 5000) ) { return sections .map((section, i) => { const trimmed = section.trim(); const firstLine = trimmed.split("\n")[0].slice(0, 80); return { title: firstLine || `Section ${i + 1}`, content: trimmed, }; }) .filter((s) => s.content.length > 0); } const lines = text.split("\n"); // Small enough for a single chunk if (lines.length <= linesPerChunk) { return [{ title: "Output", content: text }]; } // Fixed-size line groups with 2-line overlap const chunks: Array<{ title: string; content: string }> = []; const overlap = 2; const step = Math.max(linesPerChunk - overlap, 1); for (let i = 0; i < lines.length; i += step) { const slice = lines.slice(i, i + linesPerChunk); if (slice.length === 0) break; const startLine = i + 1; const endLine = Math.min(i + slice.length, lines.length); const firstLine = slice[0]?.trim().slice(0, 80); chunks.push({ title: firstLine || `Lines ${startLine}-${endLine}`, content: slice.join("\n"), }); } return chunks; } #buildTitle( headingStack: Array<{ level: number; text: string }>, currentHeading: string, ): string { if (headingStack.length === 0) { return currentHeading || "Untitled"; } return headingStack.map((h) => h.text).join(" > "); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mksglu/claude-context-mode'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

store.ts•16.5 KiB