verify_index
Checks the local SQLite index for structural integrity, foreign-key violations, and embedding consistency. Returns a report with status and suggested repairs. Use as a preflight before reindexing.
Instructions
Read-only structural check of the local SQLite index: SQLite integrity_check, foreign-key violations, required-table presence, FTS5 integrity-check, embedding dimension consistency, and orphan embedding detection. Returns a check-by-check report with status (ok/warn/error) and a suggested repair mode for any non-ok finding. Never writes. Use as a preflight before reindex/embed_repo or when search is misbehaving. Returns JSON: { ok, status, checks: [{ name, status, detail, count?, suggested_repair? }] }.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- src/db/verify.ts:96-275 (handler)The core handler function `verifyIndex` that executes the read-only structural checks: SQLite integrity_check, foreign-key violations, required-table presence, FTS5 integrity-check, embedding dimension consistency, and orphan embedding detection. Returns a VerifyReport.
export function verifyIndex(db: Database.Database): VerifyReport { const checks: VerifyCheck[] = []; // ── 1. SQLite integrity_check ───────────────────────────────────────── try { const rows = db.prepare('PRAGMA integrity_check').all() as IntegrityRow[]; if (rows.length === 1 && rows[0].integrity_check === 'ok') { checks.push({ name: 'sqlite_integrity', status: 'ok', detail: 'PRAGMA integrity_check: ok' }); } else { checks.push({ name: 'sqlite_integrity', status: 'error', detail: `PRAGMA integrity_check returned ${rows.length} issues: ${rows .slice(0, 3) .map((r) => r.integrity_check) .join('; ')}${rows.length > 3 ? '; …' : ''}`, suggested_repair: 'reindex --force', }); } } catch (e) { checks.push({ name: 'sqlite_integrity', status: 'error', detail: `PRAGMA integrity_check failed: ${e instanceof Error ? e.message : String(e)}`, suggested_repair: 'reindex --force', }); } // ── 2. Foreign-key check ────────────────────────────────────────────── try { db.pragma('foreign_keys = ON'); const violations = db.prepare('PRAGMA foreign_key_check').all() as FkViolationRow[]; if (violations.length === 0) { checks.push({ name: 'foreign_keys', status: 'ok', detail: 'No foreign-key violations' }); } else { checks.push({ name: 'foreign_keys', status: 'error', detail: `${violations.length} foreign-key violation(s): ${violations .slice(0, 3) .map((v) => `${v.table}#${v.rowid} → ${v.parent}`) .join(', ')}${violations.length > 3 ? ', …' : ''}`, count: violations.length, suggested_repair: 'drop-orphans', }); } } catch (e) { checks.push({ name: 'foreign_keys', status: 'warn', detail: `foreign_key_check probe failed: ${e instanceof Error ? e.message : String(e)}`, }); } // ── 3. Required tables ──────────────────────────────────────────────── const missing: string[] = []; for (const t of REQUIRED_TABLES) { if (!tableExists(db, t)) missing.push(t); } if (missing.length === 0) { checks.push({ name: 'required_tables', status: 'ok', detail: `All ${REQUIRED_TABLES.length} required tables present`, }); } else { checks.push({ name: 'required_tables', status: 'error', detail: `Missing tables: ${missing.join(', ')}`, suggested_repair: 'reindex --force', }); } // ── 4. FTS5 integrity probe ─────────────────────────────────────────── // symbols_fts uses external-content (content='symbols'), so COUNT(*) on it // always matches the source table — it isn't a useful drift signal. The // FTS5 'integrity-check' command, on the other hand, walks the inverted // index and reports physical corruption. if (tableExists(db, 'symbols_fts')) { try { db.prepare(`INSERT INTO symbols_fts(symbols_fts) VALUES ('integrity-check')`).run(); checks.push({ name: 'fts_integrity', status: 'ok', detail: 'symbols_fts integrity-check passed', }); } catch (e) { checks.push({ name: 'fts_integrity', status: 'warn', detail: `symbols_fts integrity-check failed: ${e instanceof Error ? e.message : String(e)}`, suggested_repair: 'rebuild-fts', }); } } // ── 5. Embedding dimension consistency ──────────────────────────────── if (tableExists(db, 'symbol_embeddings') && tableExists(db, 'embedding_meta')) { let expectedDim: number | null = null; try { const meta = db.prepare('SELECT dim FROM embedding_meta WHERE id = 1').get() as | { dim: number } | undefined; expectedDim = meta?.dim ?? null; } catch { expectedDim = null; } const total = tableRowCount(db, 'symbol_embeddings'); if (total === 0) { checks.push({ name: 'embedding_dim', status: 'ok', detail: 'No embeddings yet' }); } else if (expectedDim === null) { checks.push({ name: 'embedding_dim', status: 'warn', detail: `${total} embeddings but no embedding_meta.dim — dimension cannot be verified`, count: total, suggested_repair: 'drop-vec', }); } else { const expectedBytes = expectedDim * 4; // Float32 const stmt = db.prepare( 'SELECT COUNT(*) AS c FROM symbol_embeddings WHERE LENGTH(embedding) != ?', ); const r = stmt.get(expectedBytes) as { c: number }; const wrong = r?.c ?? 0; if (wrong === 0) { checks.push({ name: 'embedding_dim', status: 'ok', detail: `${total} embeddings × ${expectedDim}d match`, }); } else { checks.push({ name: 'embedding_dim', status: 'error', detail: `${wrong} of ${total} embeddings have a wrong byte length (expected ${expectedBytes} for ${expectedDim}d)`, count: wrong, suggested_repair: 'drop-vec', }); } } } // ── 6. Orphan embeddings ────────────────────────────────────────────── if (tableExists(db, 'symbol_embeddings') && tableExists(db, 'symbols')) { try { const r = db .prepare( 'SELECT COUNT(*) AS c FROM symbol_embeddings e LEFT JOIN symbols s ON s.id = e.symbol_id WHERE s.id IS NULL', ) .get() as { c: number }; const orphans = r?.c ?? 0; if (orphans === 0) { checks.push({ name: 'orphan_embeddings', status: 'ok', detail: 'No orphan embeddings' }); } else { checks.push({ name: 'orphan_embeddings', status: 'warn', detail: `${orphans} embedding row(s) reference deleted symbols`, count: orphans, suggested_repair: 'drop-orphans', }); } } catch (e) { checks.push({ name: 'orphan_embeddings', status: 'warn', detail: `Orphan probe failed: ${e instanceof Error ? e.message : String(e)}`, }); } } const status: VerifyCheckStatus = checks.some((c) => c.status === 'error') ? 'error' : checks.some((c) => c.status === 'warn') ? 'warn' : 'ok'; return { ok: status === 'ok', status, checks }; } - src/db/verify.ts:26-43 (schema)Type definitions for `VerifyCheck` (individual check result with name, status, detail, count, suggested_repair) and `VerifyReport` (aggregate ok/status/checks). Also `VerifyCheckStatus` union type.
export type VerifyCheckStatus = 'ok' | 'warn' | 'error'; export interface VerifyCheck { name: string; status: VerifyCheckStatus; detail: string; /** Optional row-count or delta surfaced for the human reader. */ count?: number; /** Suggested repair mode to clear this check (when applicable). */ suggested_repair?: string; } export interface VerifyReport { ok: boolean; /** Highest severity surfaced. */ status: VerifyCheckStatus; checks: VerifyCheck[]; } - src/tools/register/core.ts:202-213 (registration)MCP tool registration for 'verify_index' using `server.tool()`. Describes the tool as a read-only structural check of the SQLite index. Calls `verifyIndex(store.db)` and returns JSON report.
server.tool( 'verify_index', 'Read-only structural check of the local SQLite index: SQLite integrity_check, foreign-key violations, required-table presence, FTS5 integrity-check, embedding dimension consistency, and orphan embedding detection. Returns a check-by-check report with status (ok/warn/error) and a suggested repair mode for any non-ok finding. Never writes. Use as a preflight before reindex/embed_repo or when search is misbehaving. Returns JSON: { ok, status, checks: [{ name, status, detail, count?, suggested_repair? }] }.', {}, async () => { const report = verifyIndex(store.db); return { content: [{ type: 'text', text: j(report) }], isError: report.status === 'error', }; }, ); - src/tools/register/core.ts:1-25 (registration)Imports: `verifyIndex` is imported from `../../db/verify.js` at line 7. The registration function `registerCoreTools` is exported at line 19 and called from the server setup.
import path from 'node:path'; import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; import { optionalNonEmptyString } from './_zod-helpers.js'; import { EmbeddingPipeline } from '../../ai/embedding-pipeline.js'; import { repairIndex, type RepairMode } from '../../db/repair.js'; import { verifyIndex } from '../../db/verify.js'; import { LOCKS_DIR, projectHash } from '../../global.js'; import { IndexingPipeline } from '../../indexer/pipeline.js'; import { buildProjectContext } from '../../indexer/project-context.js'; import { shouldSkipRecentReindex } from '../../indexer/recent-reindex-cache.js'; import { logger } from '../../logger.js'; import type { ServerContext } from '../../server/types.js'; import { LockError, withLock } from '../../utils/pid-lock.js'; import { checkFileForDuplicates } from '../analysis/duplication.js'; import { getMinimalContext } from '../project/minimal-context.js'; import { getIndexHealth, getProjectMap } from '../project/project.js'; export function registerCoreTools(server: McpServer, ctx: ServerContext): void { const { store, registry, config, projectRoot, guardPath, - src/db/verify.ts:65-88 (helper)Helper utilities used by `verifyIndex`: `rowExists`, `tableExists`, and `tableRowCount` for database introspection.
function rowExists(db: Database.Database, sql: string): boolean { try { const r = db.prepare(sql).get(); return r !== undefined; } catch { return false; } } function tableExists(db: Database.Database, name: string): boolean { return rowExists( db, `SELECT name FROM sqlite_master WHERE type IN ('table','view') AND name = '${name.replace(/'/g, "''")}'`, ); } function tableRowCount(db: Database.Database, name: string): number { try { const r = db.prepare(`SELECT COUNT(*) AS c FROM ${name}`).get() as { c: number }; return r?.c ?? 0; } catch { return 0; } }