Claude Historian

parser.ts•25 KiB

import { createReadStream } from 'fs'; import { createInterface } from 'readline'; import { join } from 'path'; import { ClaudeMessage, CompactMessage, ConversationSession } from './types.js'; import { getClaudeProjectsPath, decodeProjectPath, extractContentFromMessage, calculateRelevanceScore, formatTimestamp, } from './utils.js'; export class ConversationParser { private sessions: Map<string, ConversationSession> = new Map(); async parseJsonlFile( projectDir: string, filename: string, query?: string, timeFilter?: (timestamp: string) => boolean ): Promise<CompactMessage[]> { const messages: CompactMessage[] = []; const filePath = join(getClaudeProjectsPath(), projectDir, filename); try { const fileStream = createReadStream(filePath, { encoding: 'utf8' }); const rl = createInterface({ input: fileStream, crlfDelay: Infinity, }); for await (const line of rl) { if (!line.trim()) continue; try { const claudeMessage: ClaudeMessage = JSON.parse(line); // Apply time filter if provided if (timeFilter && !timeFilter(claudeMessage.timestamp)) { continue; } const content = extractContentFromMessage(claudeMessage.message || {}); if (!content) continue; const compactMessage: CompactMessage = { uuid: claudeMessage.uuid, timestamp: formatTimestamp(claudeMessage.timestamp), type: claudeMessage.type, content: this.smartContentPreservation(content, this.getContentLimit(content)), // Adaptive limit based on content type sessionId: claudeMessage.sessionId, projectPath: decodeProjectPath(projectDir), relevanceScore: query ? calculateRelevanceScore(claudeMessage, query, projectDir) : 0, context: this.extractContext(claudeMessage, content), }; messages.push(compactMessage); // Track session info this.updateSessionInfo(claudeMessage, projectDir); } catch (parseError) { // Gracefully handle corrupted JSONL lines console.warn(`Skipping malformed line in ${filename}:`, parseError); continue; } } } catch (error) { console.error(`Error reading file ${filename}:`, error); } return messages; } private extractContext(message: ClaudeMessage, content: string): CompactMessage['context'] { const context: CompactMessage['context'] = {}; // Extract file references - ENHANCED for comprehensive detection like GLOBAL const filePatterns = [ // Standard file extensions - much more comprehensive /[\w\-/\\.]+\.(ts|tsx|js|jsx|json|md|py|java|cpp|c|h|css|html|yml|yaml|toml|rs|go|txt|log|env|config|gitignore|lock|sql|sh|bat|php|rb|swift|kt|scala|fs|clj|ex|elm|vue|svelte|astro)(?:\b|$)/gi, // File paths in git status output /(?:modified|added|deleted|new file|renamed):\s+([^\n\r\t]+)/gi, // File paths with common prefixes /(?:src\/|\.\/|\.\.\/|~\/|\/)[^\s]+\.(ts|tsx|js|jsx|json|md|py|java|cpp|c|h|css|html|yml|yaml|toml|rs|go|txt|log|env|config|gitignore|lock|sql|sh|bat|php|rb|swift|kt|scala|fs|clj|ex|elm|vue|svelte|astro)/gi, // Standalone common files like CLAUDE.md, README.md, package.json /\b(CLAUDE\.md|README\.md|package\.json|tsconfig\.json|next\.config\.js|tailwind\.config\.js|vite\.config\.js|webpack\.config\.js|babel\.config\.js|eslint\.config\.js|prettier\.config\.js|\.env|\.gitignore|Dockerfile|docker-compose\.yml)\b/gi, /src\/[\w\-/\\.]+/gi, /\.\/[\w\-/\\.]+/gi, ]; const files = new Set<string>(); filePatterns.forEach((pattern) => { const matches = content.match(pattern); if (matches) { matches.forEach((match) => files.add(match)); } }); if (files.size > 0) { context.filesReferenced = Array.from(files); } // Extract tool usage from multiple sources const tools = new Set<string>(); // Method 1: Direct tool_use content extraction from message structure if (message.message?.content) { const toolContent = Array.isArray(message.message.content) ? message.message.content : [message.message.content]; toolContent .filter((item) => item && item.type === 'tool_use' && item.name) .forEach((item) => { // Extract tool name const cleanName = item.name.replace(/^mcp__.*?__/, '').replace(/[_-]/g, ''); if (cleanName) tools.add(cleanName); // Extract file paths from tool parameters if (item.input) { const input = item.input as any; // Check common file path parameter names const filePath = input.file_path || input.filepath || input.path || input.notebook_path; if (filePath && typeof filePath === 'string') { files.add(filePath); } // For tools that work with patterns or globs if (input.pattern && typeof input.pattern === 'string' && input.pattern.includes('/')) { files.add(input.pattern); } // Extract bash commands for tool pattern analysis if (input.command && typeof input.command === 'string') { if (!context.bashCommands) context.bashCommands = []; context.bashCommands.push(input.command.substring(0, 100)); } } }); } // Method 2: Extract from assistant type messages with tool_use content if (message.type === 'assistant' && message.message?.content) { const toolContent = Array.isArray(message.message.content) ? message.message.content : [message.message.content]; toolContent .filter((item) => item && item.type === 'tool_use' && item.name) .forEach((item) => { const cleanName = item.name.replace(/^mcp__.*?__/, '').replace(/[_-]/g, ''); if (cleanName) tools.add(cleanName); // Extract file paths from tool parameters if (item.input) { const input = item.input as any; const filePath = input.file_path || input.filepath || input.path || input.notebook_path; if (filePath && typeof filePath === 'string') { files.add(filePath); } if (input.pattern && typeof input.pattern === 'string' && input.pattern.includes('/')) { files.add(input.pattern); } // Extract bash commands for tool pattern analysis if (input.command && typeof input.command === 'string') { if (!context.bashCommands) context.bashCommands = []; context.bashCommands.push(input.command.substring(0, 100)); } } }); } // Method 3: Look for tool usage patterns in content text const toolPatterns = [ /\[Tool:\s*(\w+)\]/gi, // Matches [Tool: Read], [Tool: Edit], etc. /Called the (\w+) tool/gi, // Matches "Called the Read tool" /\bmcp__[\w-]+__([\w-]+)/gi, // MCP tool calls /Result of calling the (\w+) tool/gi, // Tool results /tool_use.*?"name":\s*"([^"]+)"/gi, // JSON tool_use name extraction ]; toolPatterns.forEach((pattern) => { // Reset the regex to ensure we start from the beginning pattern.lastIndex = 0; let match; while ((match = pattern.exec(content)) !== null) { if (match[1]) { // Extract the captured group (tool name) const cleanName = match[1].replace(/^mcp__.*?__/, '').replace(/[_-]/g, ''); if (cleanName) tools.add(cleanName); } // Prevent infinite loop on zero-length matches if (match.index === pattern.lastIndex) { pattern.lastIndex++; } } }); if (tools.size > 0) { context.toolsUsed = Array.from(tools); } // Extract error patterns - broadened to capture common Unix/Node/JS errors const errorPatterns = [ /error[:\s]+([^\n]+)/gi, /failed[:\s]+([^\n]+)/gi, /exception[:\s]+([^\n]+)/gi, /cannot[:\s]+([^\n]+)/gi, /unable to[:\s]+([^\n]+)/gi, // Unix/Node system errors (without prefix requirement) /(ENOENT|EACCES|ETIMEDOUT|ECONNREFUSED|EPERM|EEXIST|ENOTDIR|EISDIR)[:\s]+([^\n]+)/gi, // JavaScript error types /(TypeError|ReferenceError|SyntaxError|RangeError|URIError)[:\s]+([^\n]+)/gi, // Common error phrases without prefix /permission denied[:\s]*([^\n]*)/gi, /connection refused[:\s]*([^\n]*)/gi, /module not found[:\s]*([^\n]*)/gi, /command not found[:\s]*([^\n]*)/gi, /no such file[:\s]*([^\n]*)/gi, /not found[:\s]*([^\n]*)/gi, ]; const errors = new Set<string>(); errorPatterns.forEach((pattern) => { const matches = content.match(pattern); if (matches) { matches.forEach((match) => errors.add(match.substring(0, 100))); } }); if (errors.size > 0) { context.errorPatterns = Array.from(errors); } // Extract Claude's valuable insights - solutions, explanations, actions if (message.type === 'assistant') { const insights = this.extractClaudeInsights(content); if (insights.length > 0) { context.claudeInsights = insights; } } // Extract code snippets and technical solutions const codeSnippets = this.extractCodeSnippets(content); if (codeSnippets.length > 0) { context.codeSnippets = codeSnippets; } // Extract action items and next steps const actionItems = this.extractActionItems(content); if (actionItems.length > 0) { context.actionItems = actionItems; } return Object.keys(context).length > 0 ? context : undefined; } // Adaptive content limit based on content type - more space for code/technical content private getContentLimit(content: string): number { const contentType = this.detectContentType(content); switch (contentType) { case 'code': return 4000; // More space for code blocks case 'error': return 3500; // Errors need full context case 'technical': return 3500; default: return 3000; } } public smartContentPreservation(content: string, maxLength: number): string { if (content.length <= maxLength) return content; // First, extract the most valuable sentences/paragraphs const valuableContent = this.extractMostValuableContent(content, maxLength); if (valuableContent.length <= maxLength) { return valuableContent; } // Detect content type and apply appropriate strategy const contentType = this.detectContentType(content); switch (contentType) { case 'code': return this.preserveCodeBlocks(content, maxLength); case 'error': return this.preserveErrorMessages(content, maxLength); case 'technical': return this.preserveTechnicalContent(content, maxLength); default: return this.intelligentTruncation(content, maxLength); } } private detectContentType(content: string): 'code' | 'error' | 'technical' | 'conversational' { // Code block detection if ( content.includes('```') || content.includes('function ') || content.includes('const ') || content.includes('import ') || content.includes('export ') || content.match(/\{\s*\n.*\}\s*$/s) ) { return 'code'; } // Error message detection if ( content.match(/(error|exception|failed|cannot|unable to|stack trace)/i) && content.match(/at \w+|line \d+|:\d+:\d+/) ) { return 'error'; } // Technical content detection if ( content.match(/\.(ts|js|json|md|py|java|cpp|rs|go|yml|yaml)\b/) || content.includes('src/') || content.includes('./') || content.match(/\w+:\d+/) || content.includes('tool_use') ) { return 'technical'; } return 'conversational'; } private preserveCodeBlocks(content: string, maxLength: number): string { // Try to preserve complete code blocks const codeBlockRegex = /```[\s\S]*?```/g; const codeBlocks = content.match(codeBlockRegex) || []; if (codeBlocks.length > 0) { let preserved = ''; let remainingLength = maxLength; for (const block of codeBlocks) { if (block.length <= remainingLength) { preserved += block + '\n'; remainingLength -= block.length + 1; } else { // If we can't fit the whole block, include context and truncate const contextBefore = content.substring(0, content.indexOf(block)).slice(-100); preserved += contextBefore + block.substring(0, remainingLength - contextBefore.length - 3) + '...'; break; } } return preserved.trim(); } // No code blocks, preserve function definitions and imports return this.preserveTechnicalContent(content, maxLength); } private preserveErrorMessages(content: string, maxLength: number): string { // Preserve error messages and stack traces completely const errorRegex = /(error|exception|failed)[\s\S]*?(\n\n|\n(?=[A-Z])|$)/gi; const errors = content.match(errorRegex) || []; if (errors.length > 0) { const mainError = errors[0]; if (mainError && mainError.length <= maxLength) { return mainError + (errors.length > 1 ? '\n... (additional errors truncated)' : ''); } } // If error is too long, preserve the beginning and any stack trace const stackTrace = content.match(/at [\s\S]*$/); if (stackTrace) { const errorPart = content.substring(0, maxLength - stackTrace[0].length - 10); return errorPart + '\n...\n' + stackTrace[0]; } return this.intelligentTruncation(content, maxLength); } private preserveTechnicalContent(content: string, maxLength: number): string { // Extract and preserve key technical elements const technicalElements = []; // File paths and line numbers const filePaths = content.match(/[\w\-/\\.]+\.(ts|js|json|md|py|java|cpp|rs|go|yml|yaml)(?::\d+)?/g) || []; technicalElements.push(...filePaths); // Function definitions const functions = content.match(/(function \w+|const \w+ =|export \w+|class \w+)/g) || []; technicalElements.push(...functions); // Tool usage const tools = content.match(/tool_use.*?"name":\s*"([^"]+)"/g) || []; technicalElements.push(...tools); // Commands const commands = content.match(/`[^`]+`/g) || []; technicalElements.push(...commands); if (technicalElements.length > 0) { const preserved = technicalElements.join(' | '); if (preserved.length <= maxLength) { // Add some context around the technical elements const contextLength = maxLength - preserved.length - 20; const context = content.substring(0, contextLength); return context + '\n--- Key elements: ' + preserved; } } return this.intelligentTruncation(content, maxLength); } private intelligentTruncation(content: string, maxLength: number): string { if (content.length <= maxLength) return content; // Try to truncate at natural boundaries const boundaries = ['\n\n', '. ', '! ', '? ', '\n', ', ', ' ']; for (const boundary of boundaries) { const lastBoundary = content.lastIndexOf(boundary, maxLength - 3); if (lastBoundary > maxLength * 0.7) { // Don't truncate too early return content.substring(0, lastBoundary) + '...'; } } // Fallback to character limit with ellipsis return content.substring(0, maxLength - 3) + '...'; } // Extract Claude's most valuable insights from assistant messages private extractClaudeInsights(content: string): string[] { const insights: string[] = []; // Solution patterns - capture Claude's solutions const solutionPatterns = [ /(?:solution|fix|resolve|answer)[:\s]*([^\n.]{20,200})/gi, /(?:here's how|to fix this|you can)[:\s]*([^\n.]{20,200})/gi, /(?:the issue is|problem is|cause is)[:\s]*([^\n.]{20,200})/gi, /(?:✅|✓|fixed|solved|resolved)[:\s]*([^\n.]{15,150})/gi, ]; solutionPatterns.forEach((pattern) => { let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { if (match[1] && match[1].trim().length > 15) { insights.push(`Solution: ${match[1].trim()}`); } } }); // Explanation patterns - capture Claude's explanations const explanationPatterns = [ /(?:this means|this is because|the reason)[:\s]*([^\n.]{25,250})/gi, /(?:explanation|basically|in other words)[:\s]*([^\n.]{25,200})/gi, ]; explanationPatterns.forEach((pattern) => { let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { if (match[1] && match[1].trim().length > 20) { insights.push(`Explanation: ${match[1].trim()}`); } } }); return insights.slice(0, 3); // Top 3 most valuable insights } // Extract code snippets with context - balanced limit for actionable content private extractCodeSnippets(content: string): string[] { const snippets: string[] = []; // Extract code blocks - preserve more context (400 chars balanced) const codeBlockRegex = /```[\w]*\n([\s\S]*?)\n```/g; let match: RegExpExecArray | null; while ((match = codeBlockRegex.exec(content)) !== null) { if (match[1] && match[1].trim().length > 10) { const snippet = match[1].trim(); snippets.push(snippet.length > 400 ? snippet.substring(0, 400) + '...' : snippet); } } // Extract inline code with context const inlineCodeRegex = /`([^`]{10,120})`/g; let inlineMatch: RegExpExecArray | null; while ((inlineMatch = inlineCodeRegex.exec(content)) !== null) { if (inlineMatch?.[1] && !snippets.some((s) => s.includes(inlineMatch![1]))) { snippets.push(inlineMatch[1]); } } return snippets.slice(0, 5); // Top 5 code snippets for better coverage } // Extract actionable items and next steps private extractActionItems(content: string): string[] { const actions: string[] = []; // Action patterns const actionPatterns = [ /(?:next step|now|then|first|finally|to do)[:\s]*([^\n.]{15,150})/gi, /(?:run|execute|install|update|create|add|remove)[:\s]*([^\n.]{10,100})/gi, /(?:you should|you need to|you can)[:\s]*([^\n.]{15,150})/gi, /\d+\.\s+([^\n.]{15,150})/g, // Numbered lists /[-*]\s+([^\n.]{15,150})/g, // Bullet points ]; actionPatterns.forEach((pattern) => { let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { if (match[1] && match[1].trim().length > 10) { const action = match[1].trim(); if (!actions.some((a) => a.includes(action.substring(0, 20)))) { actions.push(action); } } } }); return actions.slice(0, 4); // Top 4 action items } // Extract the most valuable content by prioritizing sentences with high information density private extractMostValuableContent(content: string, maxLength: number): string { // For structured content (code, errors), preserve original order and structure if (this.hasStructuredContent(content)) { return this.preserveStructuredContent(content, maxLength); } // For conversational content, use sentence-based extraction const sentences = content.split(/[.!?]+/).filter((s) => s.trim().length > 20); // Score sentences based on value indicators const scoredSentences = sentences.map((sentence) => { let score = 0; // High value keywords const highValueTerms = [ 'solution', 'fix', 'error', 'problem', 'resolved', 'working', 'success', 'function', 'class', 'import', 'export', 'const', 'let', 'var', 'install', 'update', 'create', 'build', 'test', 'deploy', 'file', 'path', 'directory', 'config', 'settings', ]; const lowerSentence = sentence.toLowerCase(); highValueTerms.forEach((term) => { if (lowerSentence.includes(term)) score += 2; }); // Boost sentences with code or technical references if ( sentence.includes('`') || sentence.includes('/') || sentence.includes('.ts') || sentence.includes('.js') ) { score += 3; } // Boost sentences that explain outcomes or provide answers if ( lowerSentence.includes('now') || lowerSentence.includes('result') || lowerSentence.includes('this will') ) { score += 2; } // Penalize very short or generic sentences if (sentence.length < 40) score -= 1; if ( lowerSentence.includes('this session is being continued') || lowerSentence.includes('caveat:') || lowerSentence.includes('command-name>') || lowerSentence.includes('generated by the user while running') || lowerSentence.includes('local-command-stdout') || lowerSentence.includes('analysis:') || lowerSentence.includes('command-message>') || lowerSentence.includes('system-reminder') || content.length < 50 ) { score -= 50; // Aggressively eliminate noise and short content } return { sentence: sentence.trim(), score }; }); // Sort by score and build result const sortedSentences = scoredSentences .filter((s) => s.score > 0) .sort((a, b) => b.score - a.score); let result = ''; for (const { sentence } of sortedSentences) { if (result.length + sentence.length + 2 <= maxLength) { result += sentence + '. '; } else { break; } } return result.trim() || content.substring(0, maxLength - 3) + '...'; } private hasStructuredContent(content: string): boolean { return ( content.includes('function ') || content.includes('Error:') || content.includes('Exception:') || content.includes('```') || content.match(/at \w+.*:\d+:\d+/) !== null || content.includes('Solution:') || content.includes('TypeError:') ); } private preserveStructuredContent(content: string, maxLength: number): string { // For structured content, preserve the first occurrence of each key section const sections = []; // Extract function definitions const functionMatch = content.match(/function\s+\w+[^}]*\}/); if (functionMatch) { sections.push({ content: functionMatch[0], priority: 3, type: 'function' }); } // Extract error messages const errorMatch = content.match( /(Error|Exception|TypeError):[^\n]*(\n[^\n]*)*?(?=\n\n|\n[A-Z]|$)/ ); if (errorMatch) { sections.push({ content: errorMatch[0], priority: 3, type: 'error' }); } // Extract solutions const solutionMatch = content.match(/Solution:[^\n]*(\n[^\n]*)*?(?=\n\n|\n[A-Z]|$)/); if (solutionMatch) { sections.push({ content: solutionMatch[0], priority: 2, type: 'solution' }); } // Sort by priority and fit within limit sections.sort((a, b) => b.priority - a.priority); let result = ''; for (const section of sections) { if (result.length + section.content.length + 2 <= maxLength) { result += section.content + '\n\n'; } else { // Try to fit a truncated version const remaining = maxLength - result.length - 5; if (remaining > 50) { result += section.content.substring(0, remaining) + '...'; } break; } } return result.trim(); } private updateSessionInfo(message: ClaudeMessage, projectDir: string): void { const sessionId = message.sessionId; if (!this.sessions.has(sessionId)) { this.sessions.set(sessionId, { sessionId, projectPath: decodeProjectPath(projectDir), startTime: this.isValidTimestamp(message.timestamp) ? message.timestamp : new Date().toISOString(), endTime: this.isValidTimestamp(message.timestamp) ? message.timestamp : new Date().toISOString(), messageCount: 0, }); } const session = this.sessions.get(sessionId)!; session.endTime = this.isValidTimestamp(message.timestamp) ? message.timestamp : session.endTime; session.messageCount++; // Update start time if this message is earlier (with timestamp validation) if (this.isValidTimestamp(message.timestamp) && this.isValidTimestamp(session.startTime)) { if (new Date(message.timestamp) < new Date(session.startTime)) { session.startTime = message.timestamp; } } } getSession(sessionId: string): ConversationSession | undefined { return this.sessions.get(sessionId); } getAllSessions(): ConversationSession[] { return Array.from(this.sessions.values()).sort( (a, b) => new Date(b.endTime).getTime() - new Date(a.endTime).getTime() ); } private isValidTimestamp(timestamp: string): boolean { if (!timestamp || typeof timestamp !== 'string') return false; const date = new Date(timestamp); return !isNaN(date.getTime()) && date.getFullYear() > 2020; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Vvkmnn/claude-historian'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

parser.ts•25 KiB