read_smart
Reads files with diff-only caching, returning full content first and then only changes or 'unchanged' status to save tokens on re-reads.
Instructions
Read a file with diff-only caching. Returns: (1) full content + chunk metadata on first read, (2) "unchanged" + cached chunk list (~50 tokens) if mtime matches, (3) "unchanged_content" if mtime changed but sha256 matches (touched but not modified), (4) changed chunks with content + unchanged chunks as metadata-only if the file was truly modified. Use INSTEAD of Read for files you have read before — saves 50%+ tokens on re-reads.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| path | Yes | Absolute file path | |
| force | No | If true, return full content regardless of cache state |
Implementation Reference
- src/mcp/read-smart.ts:38-177 (handler)Main handler function for the read_smart tool. Implements diff-only caching: returns full content on first read, 'unchanged' metadata on re-read if file unchanged, or only changed chunks + unchanged summary when file modified. Saves ~50%+ tokens on re-reads.
export function handleReadSmart( db: Database.Database, args: { path: string; force?: boolean } ): string { const { path, force = false } = args; let stat; try { stat = statSync(path); } catch (e: any) { return JSON.stringify({ ok: false, error: `File not found: ${path}` }); } const mtime = Math.floor(stat.mtimeMs / 1000); const size = stat.size; const prior = db.prepare('SELECT * FROM file_snapshots WHERE path = ?').get(path) as SnapshotRow | undefined; // --- CASE A: first read or force --- if (!prior || force) { const content = readFileSync(path, 'utf8'); const fileHash = hashFile(content); const chunks = chunkFile(path, content); const chunkMeta = chunks.map(toMeta); db.prepare( `INSERT INTO file_snapshots (path, content_hash, mtime, size_bytes, chunks, last_read_at, read_count) VALUES (?, ?, ?, ?, ?, unixepoch(), 1) ON CONFLICT(path) DO UPDATE SET content_hash = excluded.content_hash, mtime = excluded.mtime, size_bytes = excluded.size_bytes, chunks = excluded.chunks, last_read_at = unixepoch(), read_count = read_count + 1` ).run(path, fileHash, mtime, size, JSON.stringify(chunkMeta)); return JSON.stringify({ ok: true, status: force ? 'forced_full' : 'first_read', path, content, chunks: chunkMeta, bytes: size, tokens_approx: estimateTokens(content), tokens_saved: 0, }); } // --- CASE B: mtime unchanged → content guaranteed unchanged (fast path) --- if (prior.mtime === mtime) { db.prepare('UPDATE file_snapshots SET last_read_at = unixepoch(), read_count = read_count + 1 WHERE path = ?').run(path); const storedChunks = JSON.parse(prior.chunks) as StoredChunkMeta[]; const factRows = db.prepare('SELECT fact, layer, chunk_hash FROM file_facts WHERE file_path = ?').all(path); // Token savings = what a full read would have cost const savedTokens = Math.round(size * TOKENS_PER_CHAR); return JSON.stringify({ ok: true, status: 'unchanged', path, last_read_at: new Date(prior.last_read_at * 1000).toISOString(), chunk_count: storedChunks.length, chunks: storedChunks, file_facts: factRows, tokens_saved: savedTokens, note: 'File unchanged since last read. Call with force:true if full content is needed.', }); } // --- CASE C: mtime changed → compute hash, maybe false alarm --- const content = readFileSync(path, 'utf8'); const fileHash = hashFile(content); if (fileHash === prior.content_hash) { db.prepare('UPDATE file_snapshots SET mtime = ?, last_read_at = unixepoch(), read_count = read_count + 1 WHERE path = ?').run(mtime, path); return JSON.stringify({ ok: true, status: 'unchanged_content', path, note: 'mtime changed but sha256 identical (file was touched but not modified).', tokens_saved: Math.round(size * TOKENS_PER_CHAR), }); } // --- CASE D: real diff --- const newChunks = chunkFile(path, content); const oldChunks = JSON.parse(prior.chunks) as StoredChunkMeta[]; const oldById = new Map(oldChunks.map((c) => [c.id, c])); const changedChunks: Array<{ id: string; kind: string; status: 'added' | 'modified'; start_line: number; end_line: number; content: string }> = []; const unchangedChunks: StoredChunkMeta[] = []; const seenIds = new Set<string>(); for (const c of newChunks) { seenIds.add(c.id); const prev = oldById.get(c.id); if (!prev) { changedChunks.push({ id: c.id, kind: c.kind, status: 'added', start_line: c.start_line, end_line: c.end_line, content: c.content }); } else if (prev.hash !== c.hash) { changedChunks.push({ id: c.id, kind: c.kind, status: 'modified', start_line: c.start_line, end_line: c.end_line, content: c.content }); } else { unchangedChunks.push({ id: c.id, kind: c.kind, start_line: c.start_line, end_line: c.end_line, hash: c.hash }); } } const removedChunks = oldChunks .filter((c) => !seenIds.has(c.id)) .map((c) => ({ id: c.id, kind: c.kind, prev_lines: `${c.start_line}-${c.end_line}` })); const newChunkMeta = newChunks.map(toMeta); db.prepare( `UPDATE file_snapshots SET content_hash = ?, mtime = ?, size_bytes = ?, chunks = ?, last_read_at = unixepoch(), read_count = read_count + 1 WHERE path = ?` ).run(fileHash, mtime, size, JSON.stringify(newChunkMeta), path); const fullTokens = estimateTokens(content); const returnedTokens = changedChunks.reduce((s, c) => s + estimateTokens(c.content), 0) + 80; // ~80 for the envelope const savedTokens = Math.max(0, fullTokens - returnedTokens); const pctSaved = fullTokens > 0 ? Math.round((savedTokens / fullTokens) * 100) : 0; return JSON.stringify({ ok: true, status: 'modified', path, changed_chunks: changedChunks, unchanged_chunks: unchangedChunks, removed_chunks: removedChunks, summary: { changed: changedChunks.length, unchanged: unchangedChunks.length, removed: removedChunks.length, tokens_full: fullTokens, tokens_returned: returnedTokens, tokens_saved: savedTokens, pct_saved: pctSaved, }, }); } - src/mcp/server.ts:167-177 (schema)Input schema and tool registration definition for read_smart. Defines path (required string) and force (optional boolean) inputs with descriptions.
{ name: 'read_smart', description: 'Read a file with diff-only caching. Returns: (1) full content + chunk metadata on first read, (2) "unchanged" + cached chunk list (~50 tokens) if mtime matches, (3) "unchanged_content" if mtime changed but sha256 matches (touched but not modified), (4) changed chunks with content + unchanged chunks as metadata-only if the file was truly modified. Use INSTEAD of Read for files you have read before — saves 50%+ tokens on re-reads.', inputSchema: { type: 'object', properties: { path: { type: 'string', description: 'Absolute file path' }, force: { type: 'boolean', description: 'If true, return full content regardless of cache state', default: false }, }, required: ['path'], - src/mcp/server.ts:811-811 (registration)Tool dispatch – routes the 'read_smart' tool name from the MCP CallToolRequest to the handleReadSmart wrapper function.
case 'read_smart': text = handleReadSmart(args); break; - src/lib/file-chunker.ts:44-273 (helper)File chunker used by read_smart to split files into semantic chunks (AST-based for TS/JS, indent-based for Python, heading-based for Markdown, fixed 100-line windows for others). Enables diff-only re-reads by computing chunk hashes.
export function chunkFile(path: string, content: string): Chunk[] { const ext = extname(path).toLowerCase(); try { if (['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'].includes(ext)) { return chunkTsJs(content); } if (ext === '.py') return chunkPython(content); if (ext === '.md' || ext === '.markdown') return chunkMarkdown(content); } catch { // parse failures fall back to fixed } return chunkFixed(content, 100); } // ============================================================ // TypeScript / JavaScript — AST-based // ============================================================ function chunkTsJs(content: string): Chunk[] { const ast = babelParse(content, { sourceType: 'unambiguous', plugins: ['typescript', 'jsx', 'decorators-legacy'], errorRecovery: true, allowImportExportEverywhere: true, allowReturnOutsideFunction: true, }); const chunks: Chunk[] = []; const importNodes: { start: number; end: number }[] = []; for (const node of (ast as any).program.body) { if (!node.loc) continue; const start = node.loc.start.line; const end = node.loc.end.line; if (node.type === 'ImportDeclaration') { importNodes.push({ start, end }); continue; } const name = extractDeclName(node); const kind = mapNodeKind(node); const text = extractLines(content, start, end); chunks.push({ id: `${kind}:${name}`, kind, start_line: start, end_line: end, hash: shortHash(text), content: text, }); } if (importNodes.length > 0) { const first = importNodes[0].start; const last = importNodes[importNodes.length - 1].end; const text = extractLines(content, first, last); chunks.unshift({ id: 'import:_block', kind: 'import', start_line: first, end_line: last, hash: shortHash(text), content: text, }); } if (chunks.length === 0) return chunkFixed(content, 100); return chunks; } function extractDeclName(node: any): string { if (node.type === 'FunctionDeclaration') return node.id?.name ?? 'anonymous'; if (node.type === 'ClassDeclaration') return node.id?.name ?? 'anonymous'; if (node.type === 'VariableDeclaration') { const d = node.declarations?.[0]; return d?.id?.name ?? 'anonymous'; } if (node.type === 'ExportNamedDeclaration') { if (node.declaration) return extractDeclName(node.declaration); const first = node.specifiers?.[0]; return first?.exported?.name ?? 'named_export'; } if (node.type === 'ExportDefaultDeclaration') { if (node.declaration?.id?.name) return node.declaration.id.name; return 'default'; } if (node.type === 'TSInterfaceDeclaration') return node.id?.name ?? 'interface'; if (node.type === 'TSTypeAliasDeclaration') return node.id?.name ?? 'type'; if (node.type === 'TSEnumDeclaration') return node.id?.name ?? 'enum'; if (node.type === 'TSModuleDeclaration') return node.id?.name ?? 'module'; return node.type; } function mapNodeKind(node: any): ChunkKind { const t = node.type; if (t === 'FunctionDeclaration') return 'function'; if (t === 'ClassDeclaration') return 'class'; if (['VariableDeclaration', 'TSInterfaceDeclaration', 'TSTypeAliasDeclaration', 'TSEnumDeclaration', 'TSModuleDeclaration'].includes(t)) return 'variable'; if (t === 'ExportNamedDeclaration' || t === 'ExportDefaultDeclaration') { if (node.declaration) return mapNodeKind(node.declaration); return 'variable'; } return 'variable'; } // ============================================================ // Python — indent-based // ============================================================ function chunkPython(content: string): Chunk[] { const lines = content.split('\n'); const chunks: Chunk[] = []; let current: { start: number; name: string; kind: 'python_def' | 'python_class' } | null = null; const pushCurrent = (end: number) => { if (!current) return; const text = extractLines(content, current.start, end); chunks.push({ id: `${current.kind}:${current.name}`, kind: current.kind, start_line: current.start, end_line: end, hash: shortHash(text), content: text, }); current = null; }; for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (line.startsWith(' ') || line.startsWith('\t')) continue; const m = line.match(/^(async\s+def|def|class)\s+(\w+)/); if (m) { pushCurrent(i); // previous ends at the line before current const kind: 'python_def' | 'python_class' = m[1] === 'class' ? 'python_class' : 'python_def'; current = { start: i + 1, name: m[2], kind }; } } pushCurrent(lines.length); if (chunks.length === 0) return chunkFixed(content, 100); if (chunks[0].start_line > 1) { const preambleText = extractLines(content, 1, chunks[0].start_line - 1); if (preambleText.trim()) { chunks.unshift({ id: 'python_preamble', kind: 'python_preamble', start_line: 1, end_line: chunks[0].start_line - 1, hash: shortHash(preambleText), content: preambleText, }); } } return chunks; } // ============================================================ // Markdown — h2/h3 boundaries // ============================================================ function chunkMarkdown(content: string): Chunk[] { const lines = content.split('\n'); const chunks: Chunk[] = []; let current: { start: number; heading: string } | null = null; const pushCurrent = (end: number) => { if (!current) return; const text = extractLines(content, current.start, end); chunks.push({ id: `heading:${current.heading}`, kind: 'heading', start_line: current.start, end_line: end, hash: shortHash(text), content: text, }); current = null; }; for (let i = 0; i < lines.length; i++) { const m = lines[i].match(/^(#{2,3})\s+(.+?)\s*$/); if (m) { pushCurrent(i); current = { start: i + 1, heading: m[2] }; } } pushCurrent(lines.length); if (chunks.length === 0) return chunkFixed(content, 100); if (chunks[0].start_line > 1) { const preamble = extractLines(content, 1, chunks[0].start_line - 1); if (preamble.trim()) { chunks.unshift({ id: 'heading:_preamble', kind: 'heading', start_line: 1, end_line: chunks[0].start_line - 1, hash: shortHash(preamble), content: preamble, }); } } return chunks; } // ============================================================ // Fixed — 100-line windows (fallback) // ============================================================ function chunkFixed(content: string, size: number): Chunk[] { const lines = content.split('\n'); const chunks: Chunk[] = []; if (lines.length === 0) return chunks; for (let i = 0; i < lines.length; i += size) { const slice = lines.slice(i, i + size); const text = slice.join('\n'); const end = Math.min(i + size, lines.length); chunks.push({ id: `lines:${i + 1}_${end}`, kind: 'fixed', start_line: i + 1, end_line: end, hash: shortHash(text), content: text, }); } return chunks; } - src/mcp/server.ts:789-791 (registration)Thin wrapper that calls the imported handleReadSmart implementation from read-smart.ts, passing the database instance and parsed arguments.
function handleReadSmart(args: any): string { return handleReadSmartImpl(db, { path: args.path, force: args.force }); }