Skip to main content
Glama
utils.ts12.5 kB
/** * IndexFoundry-MCP Utility Functions * * Core utilities for hashing, file operations, and deterministic processing. */ import { createHash } from "crypto"; import { promises as fs } from "fs"; import { join, dirname, basename, extname } from "path"; import { v7 as uuidv7 } from "uuid"; import type { LogEvent, LogLevel, RunManifest, PhaseManifest } from "./types.js"; // ============================================================================= // Hashing Utilities // ============================================================================= /** * Compute SHA256 hash of a string (normalized to UTF-8) */ export function hashString(text: string): string { return createHash("sha256").update(text, "utf8").digest("hex"); } /** * Compute SHA256 hash of a buffer */ export function hashBuffer(buffer: Buffer): string { return createHash("sha256").update(buffer).digest("hex"); } /** * Compute SHA256 hash of a file */ export async function hashFile(filePath: string): Promise<string> { const buffer = await fs.readFile(filePath); return hashBuffer(buffer); } /** * Generate a deterministic chunk ID from document and position */ export function generateChunkId(docId: string, byteStart: number, byteEnd: number): string { const input = `${docId}::${byteStart}::${byteEnd}`; return hashString(input); } /** * Normalize text for consistent hashing (trim, collapse whitespace, lowercase) */ export function normalizeText(text: string): string { return text .trim() .replace(/\s+/g, " ") .toLowerCase(); } // ============================================================================= // Run Directory Management // ============================================================================= export interface RunDirectory { runId: string; basePath: string; paths: { manifest: string; config: string; raw: string; extracted: string; normalized: string; indexed: string; served: string; logs: string; }; } /** * Create a new run directory with all required subdirectories */ export async function createRunDirectory( runsDir: string, runId?: string ): Promise<RunDirectory> { const id = runId ?? uuidv7(); const basePath = join(runsDir, id); const paths = { manifest: join(basePath, "manifest.json"), config: join(basePath, "config.json"), raw: join(basePath, "raw"), extracted: join(basePath, "extracted"), normalized: join(basePath, "normalized"), indexed: join(basePath, "indexed"), served: join(basePath, "served"), logs: join(basePath, "logs"), }; // Create all directories await fs.mkdir(basePath, { recursive: true }); await fs.mkdir(paths.raw, { recursive: true }); await fs.mkdir(paths.extracted, { recursive: true }); await fs.mkdir(paths.normalized, { recursive: true }); await fs.mkdir(paths.indexed, { recursive: true }); await fs.mkdir(paths.served, { recursive: true }); await fs.mkdir(paths.logs, { recursive: true }); return { runId: id, basePath, paths }; } /** * Get an existing run directory */ export async function getRunDirectory( runsDir: string, runId: string ): Promise<RunDirectory | null> { const basePath = join(runsDir, runId); try { await fs.access(basePath); } catch { return null; } return { runId, basePath, paths: { manifest: join(basePath, "manifest.json"), config: join(basePath, "config.json"), raw: join(basePath, "raw"), extracted: join(basePath, "extracted"), normalized: join(basePath, "normalized"), indexed: join(basePath, "indexed"), served: join(basePath, "served"), logs: join(basePath, "logs"), }, }; } // ============================================================================= // Manifest Management // ============================================================================= /** * Create initial run manifest */ export function createInitialManifest(runId: string, configHash: string): RunManifest { return { run_id: runId, created_at: new Date().toISOString(), status: "running", config_hash: configHash, phases: {}, totals: { sources_fetched: 0, documents_extracted: 0, chunks_created: 0, vectors_indexed: 0, errors_encountered: 0, }, timing: { total_duration_ms: 0, phase_durations: {}, }, }; } /** * Create initial phase manifest */ export function createPhaseManifest(toolVersion: string): PhaseManifest { return { started_at: new Date().toISOString(), status: "running", inputs: { count: 0, hashes: [] }, outputs: { count: 0, hashes: [] }, tool_version: toolVersion, errors: [], }; } /** * Save manifest to disk */ export async function saveManifest(manifestPath: string, manifest: RunManifest): Promise<void> { await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2)); } /** * Load manifest from disk */ export async function loadManifest(manifestPath: string): Promise<RunManifest> { const content = await fs.readFile(manifestPath, "utf8"); return JSON.parse(content) as RunManifest; } // ============================================================================= // JSONL Operations // ============================================================================= /** * Append a record to a JSONL file */ export async function appendJsonl<T>(filePath: string, record: T): Promise<void> { const line = JSON.stringify(record) + "\n"; await fs.appendFile(filePath, line); } /** * Read all records from a JSONL file */ export async function readJsonl<T>(filePath: string): Promise<T[]> { const content = await fs.readFile(filePath, "utf8"); return content .split("\n") .filter(line => line.trim()) .map(line => JSON.parse(line) as T); } /** * Stream records from a JSONL file (generator) */ export async function* streamJsonl<T>(filePath: string): AsyncGenerator<T> { const content = await fs.readFile(filePath, "utf8"); for (const line of content.split("\n")) { if (line.trim()) { yield JSON.parse(line) as T; } } } /** * Write records to a JSONL file (overwrite) */ export async function writeJsonl<T>(filePath: string, records: T[]): Promise<void> { const content = records.map(r => JSON.stringify(r)).join("\n") + "\n"; await fs.writeFile(filePath, content); } // ============================================================================= // Logging // ============================================================================= export class RunLogger { private eventsPath: string; private errorsPath: string; private currentPhase: string = "init"; private currentTool: string = "system"; constructor(logsDir: string) { this.eventsPath = join(logsDir, "events.ndjson"); this.errorsPath = join(logsDir, "errors.ndjson"); } setContext(phase: string, tool: string): void { this.currentPhase = phase; this.currentTool = tool; } async log(level: LogLevel, message: string, details?: unknown): Promise<void> { const event: LogEvent = { timestamp: new Date().toISOString(), level, phase: this.currentPhase, tool: this.currentTool, message, details, }; await appendJsonl(this.eventsPath, event); if (level === "error") { await appendJsonl(this.errorsPath, event); } // Also log to stderr for debugging console.error(`[${level.toUpperCase()}] [${this.currentPhase}/${this.currentTool}] ${message}`); } debug(message: string, details?: unknown): Promise<void> { return this.log("debug", message, details); } info(message: string, details?: unknown): Promise<void> { return this.log("info", message, details); } warn(message: string, details?: unknown): Promise<void> { return this.log("warn", message, details); } error(message: string, details?: unknown): Promise<void> { return this.log("error", message, details); } } // ============================================================================= // File Utilities // ============================================================================= /** * Get file extension from path or content-type */ export function getExtension(pathOrContentType: string): string { if (pathOrContentType.includes("/")) { // It's a content-type const typeMap: Record<string, string> = { "application/pdf": ".pdf", "text/html": ".html", "text/plain": ".txt", "text/markdown": ".md", "text/csv": ".csv", "application/json": ".json", "application/xml": ".xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", }; return typeMap[pathOrContentType] ?? ".bin"; } return extname(pathOrContentType) || ".bin"; } /** * Ensure a directory exists */ export async function ensureDir(dirPath: string): Promise<void> { await fs.mkdir(dirPath, { recursive: true }); } /** * Check if a file exists */ export async function fileExists(filePath: string): Promise<boolean> { try { await fs.access(filePath); return true; } catch { return false; } } /** * Copy a file with hash verification */ export async function copyFileWithHash( source: string, dest: string ): Promise<{ hash: string; size: number }> { await ensureDir(dirname(dest)); const buffer = await fs.readFile(source); await fs.writeFile(dest, buffer); return { hash: hashBuffer(buffer), size: buffer.length, }; } /** * Get sorted list of files matching a glob pattern */ export async function getSortedFiles( dir: string, pattern: string = "*" ): Promise<string[]> { const { glob } = await import("glob"); const files = await glob(pattern, { cwd: dir, absolute: true }); return files.sort(); } // ============================================================================= // Text Processing // ============================================================================= /** * Estimate token count (rough approximation: chars / 4) */ export function estimateTokens(text: string): number { return Math.ceil(text.length / 4); } /** * Split text by a hierarchy of separators (for recursive chunking) */ export function splitByHierarchy( text: string, separators: string[], maxChars: number ): string[] { if (text.length <= maxChars || separators.length === 0) { return [text]; } const [sep, ...restSeps] = separators; const parts = text.split(sep); const chunks: string[] = []; let current = ""; for (const part of parts) { const addition = current ? sep + part : part; if ((current + addition).length <= maxChars) { current += addition; } else { if (current) { // Recursively split if still too large if (current.length > maxChars && restSeps.length > 0) { chunks.push(...splitByHierarchy(current, restSeps, maxChars)); } else { chunks.push(current); } } current = part; } } if (current) { if (current.length > maxChars && restSeps.length > 0) { chunks.push(...splitByHierarchy(current, restSeps, maxChars)); } else { chunks.push(current); } } return chunks; } // ============================================================================= // Error Formatting // ============================================================================= export interface FormattedError { code: string; message: string; details?: unknown; recoverable: boolean; suggestion?: string; } export function formatError( code: string, message: string, options?: { details?: unknown; recoverable?: boolean; suggestion?: string; } ): FormattedError { return { code, message, details: options?.details, recoverable: options?.recoverable ?? false, suggestion: options?.suggestion, }; } // ============================================================================= // Timing Utilities // ============================================================================= export class Timer { private start: bigint; constructor() { this.start = process.hrtime.bigint(); } elapsedMs(): number { const end = process.hrtime.bigint(); return Number(end - this.start) / 1_000_000; } reset(): void { this.start = process.hrtime.bigint(); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Mnehmos/mnehmos.index-foundry.mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server