Context Sync

file-skimmer.ts•9.95 KiB

import * as fs from 'fs'; import * as path from 'path'; export interface FileSkimmingConfig { maxFileSize: number; // When to start skimming (bytes) skimChunkSize: number; // Size of each chunk to read (bytes) headerSize: number; // Always read from start (bytes) footerSize: number; // Always read from end (bytes) searchPatterns?: string[]; // Patterns to look for while skimming maxChunks: number; // Maximum chunks to read when skimming encoding: BufferEncoding; // File encoding } export interface SkimmedContent { content: string; skimmed: boolean; originalSize: number; actualSize: number; chunks: SkimChunk[]; patterns?: PatternMatch[]; } export interface SkimChunk { start: number; // Byte offset in original file end: number; // Byte offset end type: 'header' | 'footer' | 'middle' | 'pattern'; content: string; } export interface PatternMatch { pattern: string; line: number; context: string; byteOffset: number; } export class FileSkimmer { private static readonly DEFAULT_CONFIG: FileSkimmingConfig = { maxFileSize: 1 * 1024 * 1024, // Start skimming at 1MB skimChunkSize: 64 * 1024, // 64KB chunks headerSize: 32 * 1024, // Read first 32KB footerSize: 16 * 1024, // Read last 16KB maxChunks: 10, // Max 10 chunks (640KB total) encoding: 'utf8', }; private config: FileSkimmingConfig; constructor(config?: Partial<FileSkimmingConfig>) { this.config = { ...FileSkimmer.DEFAULT_CONFIG, ...config }; } /** * Smart file reading with skimming for large files */ readFile(filePath: string, searchPatterns?: string[]): SkimmedContent { try { const stats = fs.statSync(filePath); const fileSize = stats.size; // For small files, read normally if (fileSize <= this.config.maxFileSize) { const content = fs.readFileSync(filePath, this.config.encoding); return { content, skimmed: false, originalSize: fileSize, actualSize: content.length, chunks: [{ start: 0, end: fileSize, type: 'header', content, }], }; } // For large files, use intelligent skimming return this.skimFile(filePath, fileSize, searchPatterns || this.config.searchPatterns); } catch (error) { return { content: '', skimmed: false, originalSize: 0, actualSize: 0, chunks: [], }; } } /** * Skim a large file intelligently */ private skimFile(filePath: string, fileSize: number, searchPatterns?: string[]): SkimmedContent { const fd = fs.openSync(filePath, 'r'); const chunks: SkimChunk[] = []; let totalContent = ''; try { // 1. Always read header (beginning of file) const headerSize = Math.min(this.config.headerSize, fileSize); const headerBuffer = Buffer.alloc(headerSize); fs.readSync(fd, headerBuffer, 0, headerSize, 0); const headerContent = headerBuffer.toString(this.config.encoding); chunks.push({ start: 0, end: headerSize, type: 'header', content: headerContent, }); totalContent += headerContent; // 2. Always read footer (end of file) const footerSize = Math.min(this.config.footerSize, fileSize - headerSize); if (footerSize > 0) { const footerBuffer = Buffer.alloc(footerSize); const footerStart = fileSize - footerSize; fs.readSync(fd, footerBuffer, 0, footerSize, footerStart); const footerContent = footerBuffer.toString(this.config.encoding); chunks.push({ start: footerStart, end: fileSize, type: 'footer', content: footerContent, }); totalContent += '\n\n[... file content skipped ...]\n\n' + footerContent; } // 3. Pattern-based skimming if patterns provided let patterns: PatternMatch[] = []; if (searchPatterns && searchPatterns.length > 0) { const patternResult = this.searchPatterns(filePath, fileSize, searchPatterns, fd); patterns = patternResult.matches; // Add pattern chunks to content for (const match of patterns) { totalContent += `\n\n[... pattern match at line ${match.line} ...]\n${match.context}`; } } // 4. Strategic middle sampling (for files without specific patterns) if (!searchPatterns || searchPatterns.length === 0) { const middleChunks = this.sampleMiddleSections(fd, fileSize, headerSize, footerSize); chunks.push(...middleChunks); for (const chunk of middleChunks) { totalContent += '\n\n[... skipped content ...]\n' + chunk.content; } } return { content: totalContent, skimmed: true, originalSize: fileSize, actualSize: totalContent.length, chunks, patterns: patterns.length > 0 ? patterns : undefined, }; } finally { fs.closeSync(fd); } } /** * Search for specific patterns in large files */ private searchPatterns( filePath: string, fileSize: number, patterns: string[], fd: number ): { matches: PatternMatch[]; chunks: SkimChunk[] } { const matches: PatternMatch[] = []; const chunks: SkimChunk[] = []; const chunkSize = this.config.skimChunkSize; let currentOffset = 0; let processedChunks = 0; while (currentOffset < fileSize && processedChunks < this.config.maxChunks) { const readSize = Math.min(chunkSize, fileSize - currentOffset); const buffer = Buffer.alloc(readSize); fs.readSync(fd, buffer, 0, readSize, currentOffset); const content = buffer.toString(this.config.encoding); // Check for patterns in this chunk const lines = content.split('\n'); for (let i = 0; i < lines.length; i++) { const line = lines[i]; for (const pattern of patterns) { if (line.toLowerCase().includes(pattern.toLowerCase())) { // Found a match! Include context const contextStart = Math.max(0, i - 2); const contextEnd = Math.min(lines.length, i + 3); const context = lines.slice(contextStart, contextEnd).join('\n'); matches.push({ pattern, line: Math.floor(currentOffset / 80) + i, // Rough line estimate context, byteOffset: currentOffset, }); chunks.push({ start: currentOffset, end: currentOffset + readSize, type: 'pattern', content: context, }); } } } currentOffset += chunkSize; processedChunks++; } return { matches, chunks }; } /** * Sample middle sections of large files strategically */ private sampleMiddleSections( fd: number, fileSize: number, headerSize: number, footerSize: number ): SkimChunk[] { const chunks: SkimChunk[] = []; const availableMiddle = fileSize - headerSize - footerSize; if (availableMiddle <= 0) return chunks; // Sample 3-5 strategic points in the middle const samplePoints = Math.min(5, this.config.maxChunks - 2); // -2 for header/footer const interval = Math.floor(availableMiddle / (samplePoints + 1)); for (let i = 1; i <= samplePoints; i++) { const sampleStart = headerSize + (interval * i); const sampleSize = Math.min(this.config.skimChunkSize, availableMiddle - (interval * i)); if (sampleSize > 0) { const buffer = Buffer.alloc(sampleSize); fs.readSync(fd, buffer, 0, sampleSize, sampleStart); const content = buffer.toString(this.config.encoding); chunks.push({ start: sampleStart, end: sampleStart + sampleSize, type: 'middle', content, }); } } return chunks; } /** * Get skimming statistics */ getSkimmingInfo(result: SkimmedContent): string { if (!result.skimmed) { return 'File read completely (no skimming needed)'; } const compressionRatio = ((result.originalSize - result.actualSize) / result.originalSize * 100).toFixed(1); const chunksInfo = result.chunks.map(c => c.type).join(', '); let info = `Skimmed large file: ${this.formatSize(result.originalSize)} → ${this.formatSize(result.actualSize)} (${compressionRatio}% reduction)\n`; info += `Chunks read: ${chunksInfo}`; if (result.patterns && result.patterns.length > 0) { info += `\nPattern matches: ${result.patterns.length}`; } return info; } private formatSize(bytes: number): string { const units = ['B', 'KB', 'MB', 'GB']; let size = bytes; let unitIndex = 0; while (size >= 1024 && unitIndex < units.length - 1) { size /= 1024; unitIndex++; } return `${size.toFixed(1)}${units[unitIndex]}`; } } /** * Convenience functions for different use cases */ export function skimForDependencies(filePath: string): SkimmedContent { const skimmer = new FileSkimmer({ maxFileSize: 512 * 1024, // 512KB threshold for dependency files searchPatterns: ['import', 'require', 'export', 'from'], headerSize: 64 * 1024, // Dependencies usually at top footerSize: 8 * 1024, // Small footer }); return skimmer.readFile(filePath, ['import', 'require', 'export', 'from']); } export function skimForFunctions(filePath: string): SkimmedContent { const skimmer = new FileSkimmer({ maxFileSize: 1 * 1024 * 1024, // 1MB threshold searchPatterns: ['function', 'class', 'const ', 'let ', 'var ', '=>'], }); return skimmer.readFile(filePath, ['function', 'class', 'const ', 'let ', 'var ', '=>']); } export function skimForContent(filePath: string, searchTerms: string[]): SkimmedContent { const skimmer = new FileSkimmer(); return skimmer.readFile(filePath, searchTerms); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Intina47/context-sync'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

file-skimmer.ts•9.95 KiB