claude-recall

Overview Schema Related Servers Score Discussions

claude-recall
scripts

analyze-transformations-smart.js•14.3 KiB

#!/usr/bin/env node import fs from 'fs'; import { Database } from 'bun:sqlite'; import readline from 'readline'; import path from 'path'; import { homedir } from 'os'; import { globSync } from 'glob'; // ============================================================================= // TOOL REPLACEMENT DECISION TABLE // ============================================================================= // // KEY INSIGHT: Observations are the SEMANTIC SYNTHESIS of tool results. // They contain what Claude LEARNED, which is what future Claude needs. // // Tool | Replace OUTPUT? | Reason // ------------------|-----------------|---------------------------------------- // Read | ✅ YES | Observation = what was learned from file // Bash | ✅ YES | Observation = what command revealed // Grep | ✅ YES | Observation = what search found // Task | ✅ YES | Observation = what agent discovered // WebFetch | ✅ YES | Observation = what page contained // Glob | ⚠️ MAYBE | File lists are often small already // WebSearch | ⚠️ MAYBE | Results are moderate size // Edit | ❌ NO | OUTPUT is tiny ("success"), INPUT is ground truth // Write | ❌ NO | OUTPUT is tiny, INPUT is the file content // NotebookEdit | ❌ NO | OUTPUT is tiny, INPUT is the code // TodoWrite | ❌ NO | Both tiny // AskUserQuestion | ❌ NO | Both small, user input matters // mcp__* | ⚠️ MAYBE | Varies by tool // // NEVER REPLACE INPUT - it contains the action (diff, command, query, path) // ONLY REPLACE OUTPUT - swap raw results for semantic synthesis (observation) // // REPLACEMENT FORMAT: // Original output gets replaced with: // "[Strategically Omitted by claude-recall to save tokens] // // [Observation: Title here] // Facts: ... // Concepts: ..." // ============================================================================= // Configuration const DB_PATH = path.join(homedir(), '.claude-recall', 'claude-recall.db'); const MAX_TRANSCRIPTS = parseInt(process.env.MAX_TRANSCRIPTS || '500', 10); // Find transcript files (most recent first) const TRANSCRIPT_DIR = path.join(homedir(), '.claude/projects/-Users-alexnewman-Scripts-claude-recall'); const allTranscriptFiles = globSync(path.join(TRANSCRIPT_DIR, '*.jsonl')); // Sort by modification time (most recent first), take MAX_TRANSCRIPTS const transcriptFiles = allTranscriptFiles .map(f => ({ path: f, mtime: fs.statSync(f).mtime })) .sort((a, b) => b.mtime - a.mtime) .slice(0, MAX_TRANSCRIPTS) .map(f => f.path); console.log(`Config: MAX_TRANSCRIPTS=${MAX_TRANSCRIPTS}`); console.log(`Using ${transcriptFiles.length} most recent transcript files (of ${allTranscriptFiles.length} total)\n`); // Map to store original content from transcript (both inputs and outputs) const originalContent = new Map(); // Track contaminated (already transformed) transcripts let skippedTranscripts = 0; // Marker for already-transformed content (endless mode replacement format) const TRANSFORMATION_MARKER = '**Key Facts:**'; // Auto-discover agent transcripts linked to main session async function discoverAgentFiles(mainTranscriptPath) { console.log('Discovering linked agent transcripts...'); const agentIds = new Set(); const fileStream = fs.createReadStream(mainTranscriptPath); const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity }); for await (const line of rl) { if (!line.includes('agentId')) continue; try { const obj = JSON.parse(line); // Check for agentId in toolUseResult if (obj.toolUseResult?.agentId) { agentIds.add(obj.toolUseResult.agentId); } } catch (e) { // Skip malformed lines } } // Build agent file paths const directory = path.dirname(mainTranscriptPath); const agentFiles = Array.from(agentIds).map(id => path.join(directory, `agent-${id}.jsonl`) ).filter(filePath => fs.existsSync(filePath)); console.log(` → Found ${agentIds.size} agent IDs`); console.log(` → ${agentFiles.length} agent files exist on disk\n`); return agentFiles; } // Parse transcript to get BOTH tool_use (inputs) and tool_result (outputs) content // Returns true if transcript is clean, false if contaminated (already transformed) async function loadOriginalContentFromFile(filePath, fileLabel) { const fileStream = fs.createReadStream(filePath); const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity }); let count = 0; let isContaminated = false; const toolUseIdsFromThisFile = new Set(); for await (const line of rl) { if (!line.includes('toolu_')) continue; try { const obj = JSON.parse(line); if (obj.message?.content) { for (const item of obj.message.content) { // Capture tool_use (inputs) if (item.type === 'tool_use' && item.id) { const existing = originalContent.get(item.id) || { input: '', output: '', name: '' }; existing.input = JSON.stringify(item.input || {}); existing.name = item.name; originalContent.set(item.id, existing); toolUseIdsFromThisFile.add(item.id); count++; } // Capture tool_result (outputs) if (item.type === 'tool_result' && item.tool_use_id) { const content = typeof item.content === 'string' ? item.content : JSON.stringify(item.content); // Check for transformation marker - if found, transcript is contaminated if (content.includes(TRANSFORMATION_MARKER)) { isContaminated = true; } const existing = originalContent.get(item.tool_use_id) || { input: '', output: '', name: '' }; existing.output = content; originalContent.set(item.tool_use_id, existing); toolUseIdsFromThisFile.add(item.tool_use_id); } } } } catch (e) { // Skip malformed lines } } // If contaminated, remove all data from this file and report if (isContaminated) { for (const id of toolUseIdsFromThisFile) { originalContent.delete(id); } console.log(` ⚠️ Skipped ${fileLabel} (already transformed)`); return false; } if (count > 0) { console.log(` → Found ${count} tool uses in ${fileLabel}`); } return true; } async function loadOriginalContent() { console.log('Loading original content from transcripts...'); console.log(` → Scanning ${transcriptFiles.length} transcript files...\n`); let cleanTranscripts = 0; // Load from all transcript files for (const transcriptFile of transcriptFiles) { const filename = path.basename(transcriptFile); const isClean = await loadOriginalContentFromFile(transcriptFile, filename); if (isClean) { cleanTranscripts++; } else { skippedTranscripts++; } } // Also check for any agent files not already included for (const transcriptFile of transcriptFiles) { if (transcriptFile.includes('agent-')) continue; // Already an agent file const agentFiles = await discoverAgentFiles(transcriptFile); for (const agentFile of agentFiles) { if (transcriptFiles.includes(agentFile)) continue; // Already processed const filename = path.basename(agentFile); const isClean = await loadOriginalContentFromFile(agentFile, `agent transcript (${filename})`); if (!isClean) { skippedTranscripts++; } } } console.log(`\nTotal: Loaded original content for ${originalContent.size} tool uses (inputs + outputs)`); if (skippedTranscripts > 0) { console.log(`⚠️ Skipped ${skippedTranscripts} transcripts (already transformed with endless mode)`); } console.log(); } // Strip __N suffix from tool_use_id to get base ID function getBaseToolUseId(id) { return id ? id.replace(/__\d+$/, '') : id; } // Query observations from database using tool_use_ids found in transcripts // Handles suffixed IDs like toolu_abc__1, toolu_abc__2 matching transcript's toolu_abc function queryObservations() { // Get tool_use_ids from the loaded transcript content const toolUseIds = Array.from(originalContent.keys()); if (toolUseIds.length === 0) { console.log('No tool use IDs found in transcripts\n'); return []; } console.log(`Querying observations for ${toolUseIds.length} tool use IDs from transcripts...`); const db = new Database(DB_PATH, { readonly: true }); // Build LIKE clauses to match both exact IDs and suffixed variants (toolu_abc, toolu_abc__1, etc) const likeConditions = toolUseIds.map(() => 'tool_use_id LIKE ?').join(' OR '); const likeParams = toolUseIds.map(id => `${id}%`); const query = ` SELECT id, tool_use_id, type, narrative, title, facts, concepts, LENGTH(COALESCE(facts,'')) as facts_len, LENGTH(COALESCE(title,'')) + LENGTH(COALESCE(facts,'')) as title_facts_len, LENGTH(COALESCE(title,'')) + LENGTH(COALESCE(facts,'')) + LENGTH(COALESCE(concepts,'')) as compact_len, LENGTH(COALESCE(narrative,'')) as narrative_len, LENGTH(COALESCE(title,'')) + LENGTH(COALESCE(narrative,'')) + LENGTH(COALESCE(facts,'')) + LENGTH(COALESCE(concepts,'')) as full_obs_len FROM observations WHERE ${likeConditions} ORDER BY created_at DESC `; const observations = db.prepare(query).all(...likeParams); db.close(); console.log(`Found ${observations.length} observations matching tool use IDs (including suffixed variants)\n`); return observations; } // Tools eligible for OUTPUT replacement (observation = semantic synthesis of result) const REPLACEABLE_TOOLS = new Set(['Read', 'Bash', 'Grep', 'Task', 'WebFetch', 'Glob', 'WebSearch']); // Analyze OUTPUT-only replacement for eligible tools function analyzeTransformations(observations) { console.log('='.repeat(110)); console.log('OUTPUT REPLACEMENT ANALYSIS (Eligible Tools Only)'); console.log('='.repeat(110)); console.log(); console.log('Eligible tools:', Array.from(REPLACEABLE_TOOLS).join(', ')); console.log(); // Group observations by BASE tool_use_id (strip __N suffix) // This groups toolu_abc, toolu_abc__1, toolu_abc__2 together const obsByToolId = new Map(); observations.forEach(obs => { const baseId = getBaseToolUseId(obs.tool_use_id); if (!obsByToolId.has(baseId)) { obsByToolId.set(baseId, []); } obsByToolId.get(baseId).push(obs); }); // Define strategies to test const strategies = [ { name: 'facts_only', field: 'facts_len', desc: 'Facts only (~400 chars)' }, { name: 'title_facts', field: 'title_facts_len', desc: 'Title + Facts (~450 chars)' }, { name: 'compact', field: 'compact_len', desc: 'Title + Facts + Concepts (~500 chars)' }, { name: 'narrative', field: 'narrative_len', desc: 'Narrative only (~700 chars)' }, { name: 'full', field: 'full_obs_len', desc: 'Full observation (~1200 chars)' } ]; // Track results per strategy const results = {}; strategies.forEach(s => { results[s.name] = { transforms: 0, noTransform: 0, saved: 0, totalOriginal: 0 }; }); // Track stats let eligible = 0; let ineligible = 0; let noTranscript = 0; const toolCounts = {}; // Analyze each tool use obsByToolId.forEach((obsArray, toolUseId) => { const original = originalContent.get(toolUseId); const toolName = original?.name || 'unknown'; const outputLen = original?.output?.length || 0; // Skip if no transcript data if (!original || outputLen === 0) { noTranscript++; return; } // Skip if tool not eligible for replacement if (!REPLACEABLE_TOOLS.has(toolName)) { ineligible++; return; } eligible++; toolCounts[toolName] = (toolCounts[toolName] || 0) + 1; // Sum lengths across ALL observations for this tool use (handles multiple obs per tool_use_id) // Test each strategy - OUTPUT replacement only strategies.forEach(strategy => { const obsLen = obsArray.reduce((sum, obs) => sum + (obs[strategy.field] || 0), 0); const r = results[strategy.name]; r.totalOriginal += outputLen; if (obsLen > 0 && obsLen < outputLen) { r.transforms++; r.saved += (outputLen - obsLen); } else { r.noTransform++; } }); }); // Print results console.log('TOOL BREAKDOWN:'); Object.entries(toolCounts).sort((a, b) => b[1] - a[1]).forEach(([tool, count]) => { console.log(` ${tool}: ${count}`); }); console.log(); console.log('-'.repeat(100)); console.log(`Eligible tool uses: ${eligible}`); console.log(`Ineligible (Edit/Write/etc): ${ineligible}`); console.log(`No transcript data: ${noTranscript}`); console.log('-'.repeat(100)); console.log(); console.log('Strategy Transforms No Transform Chars Saved Original Size Savings %'); console.log('-'.repeat(100)); strategies.forEach(strategy => { const r = results[strategy.name]; const pct = r.totalOriginal > 0 ? ((r.saved / r.totalOriginal) * 100).toFixed(1) : '0.0'; console.log( `${strategy.desc.padEnd(35)} ${String(r.transforms).padStart(10)} ${String(r.noTransform).padStart(12)} ${String(r.saved.toLocaleString()).padStart(13)} ${String(r.totalOriginal.toLocaleString()).padStart(15)} ${pct.padStart(8)}%` ); }); console.log('-'.repeat(100)); console.log(); // Find best strategy let bestStrategy = null; let bestSavings = 0; strategies.forEach(strategy => { if (results[strategy.name].saved > bestSavings) { bestSavings = results[strategy.name].saved; bestStrategy = strategy; } }); if (bestStrategy) { const r = results[bestStrategy.name]; const pct = ((r.saved / r.totalOriginal) * 100).toFixed(1); console.log(`BEST STRATEGY: ${bestStrategy.desc}`); console.log(` - Transforms ${r.transforms} of ${eligible} eligible tool uses (${((r.transforms/eligible)*100).toFixed(1)}%)`); console.log(` - Saves ${r.saved.toLocaleString()} of ${r.totalOriginal.toLocaleString()} chars (${pct}% reduction)`); } console.log(); } // Main execution async function main() { await loadOriginalContent(); const observations = queryObservations(); analyzeTransformations(observations); } main().catch(error => { console.error('Fatal error:', error); process.exit(1); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nhevers/claude-recall'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

analyze-transformations-smart.js•14.3 KiB