Skip to main content
Glama
utils.ts12.5 kB
/** * IndexFoundry-MCP: Core Utilities * * Deterministic utilities for hashing, file operations, and ID generation. * * Copyright (c) 2024 vario.automation * Proprietary and confidential. All rights reserved. */ import { createHash } from "crypto"; import { v7 as uuidv7 } from "uuid"; import * as fs from "fs/promises"; import * as path from "path"; import type { EventLogEntry, ErrorCode, ToolError, RunManifest, PhaseManifest, RawArtifact } from "./types.js"; // ============================================================================ // Hashing Utilities (Deterministic) // ============================================================================ /** * Generate SHA256 hash of content */ export function sha256(content: string | Buffer): string { return createHash("sha256").update(content).digest("hex"); } /** * Generate deterministic chunk ID from doc_id and byte offsets */ export function generateChunkId(docId: string, byteStart: number, byteEnd: number): string { return sha256(`${docId}|${byteStart}|${byteEnd}`); } /** * Generate SHA256 hash of a file */ export async function hashFile(filePath: string): Promise<string> { const content = await fs.readFile(filePath); return sha256(content); } /** * Generate config hash for manifest */ export function hashConfig(config: unknown): string { // Stable JSON stringify (sorted keys) const stable = JSON.stringify(config, Object.keys(config as object).sort()); return sha256(stable); } // ============================================================================ // ID Generation // ============================================================================ /** * Generate time-ordered UUID v7 for run IDs */ export function generateRunId(): string { return uuidv7(); } // ============================================================================ // File Operations // ============================================================================ /** * Ensure a directory exists */ export async function ensureDir(dirPath: string): Promise<void> { await fs.mkdir(dirPath, { recursive: true }); } /** * Check if a path exists */ export async function pathExists(filePath: string): Promise<boolean> { try { await fs.access(filePath); return true; } catch { return false; } } /** * Get file extension from content type */ export function extensionFromContentType(contentType: string): string { const mapping: Record<string, string> = { "application/pdf": ".pdf", "text/html": ".html", "text/plain": ".txt", "text/markdown": ".md", "text/csv": ".csv", "application/json": ".json", "application/xml": ".xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", }; const base = contentType.split(";")[0].trim(); return mapping[base] || ".bin"; } /** * Detect content type from file extension */ export function contentTypeFromExtension(ext: string): string { const mapping: Record<string, string> = { ".pdf": "application/pdf", ".html": "text/html", ".htm": "text/html", ".txt": "text/plain", ".md": "text/markdown", ".csv": "text/csv", ".json": "application/json", ".xml": "application/xml", ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", }; return mapping[ext.toLowerCase()] || "application/octet-stream"; } /** * Write JSONL file (append mode) */ export async function appendJsonl(filePath: string, records: unknown[]): Promise<void> { const lines = records.map(r => JSON.stringify(r)).join("\n") + "\n"; await fs.appendFile(filePath, lines, "utf-8"); } /** * Write JSONL file (overwrite mode) */ export async function writeJsonl(filePath: string, records: unknown[]): Promise<void> { const lines = records.map(r => JSON.stringify(r)).join("\n"); await fs.writeFile(filePath, lines ? lines + "\n" : "", "utf-8"); } /** * Read JSONL file */ export async function readJsonl<T>(filePath: string): Promise<T[]> { const content = await fs.readFile(filePath, "utf-8"); return content .split("\n") .filter(line => line.trim()) .map(line => JSON.parse(line) as T); } /** * Write JSON file with stable sorting */ export async function writeJson(filePath: string, data: unknown): Promise<void> { await fs.writeFile(filePath, JSON.stringify(data, null, 2), "utf-8"); } /** * Read JSON file */ export async function readJson<T>(filePath: string): Promise<T> { const content = await fs.readFile(filePath, "utf-8"); return JSON.parse(content) as T; } /** * List files in directory (sorted for determinism) */ export async function listFiles(dirPath: string): Promise<string[]> { const entries = await fs.readdir(dirPath, { withFileTypes: true }); return entries .filter(e => e.isFile()) .map(e => e.name) .sort(); } // ============================================================================ // Text Utilities // ============================================================================ /** * Normalize text for consistent hashing */ export function normalizeText(text: string): string { return text .replace(/\r\n/g, "\n") // Normalize line endings .replace(/\t/g, " ") // Tabs to spaces .normalize("NFC"); // Unicode normalization } /** * Estimate token count (rough approximation) */ export function estimateTokens(text: string): number { return Math.ceil(text.length / 4); } // ============================================================================ // Heading Utilities (Hierarchical Chunking) // ============================================================================ /** * Parse heading level from a markdown line. * Detects ATX-style headings (# through ######). * * @param line - The line to parse * @returns Heading level (1-6) or null if not a valid heading * * @example * parseHeadingLevel("# Title") // returns 1 * parseHeadingLevel("## Section") // returns 2 * parseHeadingLevel("#NoSpace") // returns null (invalid - no space after #) * parseHeadingLevel("Regular text") // returns null */ export function parseHeadingLevel(line: string): number | null { const match = line.match(/^(#{1,6})\s+/); return match ? match[1].length : null; } /** * Get truncated parent context from a parent chunk's text. * Used for hierarchical chunking to provide context from parent sections. * * @param parentText - The parent chunk's text content * @param maxChars - Maximum characters to include (0 disables context) * @returns Truncated parent content, or undefined if maxChars is 0 * * @example * getParentContext("# Long Title\nWith content...", 50) * // returns "# Long Title\nWith content..." (truncated to 50 chars) * * getParentContext("Short", 100) * // returns "Short" (no truncation needed) * * getParentContext("Any text", 0) * // returns undefined (context disabled) */ export function getParentContext(parentText: string, maxChars: number): string | undefined { if (maxChars <= 0) { return undefined; } return parentText.slice(0, maxChars); } // ============================================================================ // Error Handling // ============================================================================ /** * Create a standardized tool error */ export function createToolError( code: ErrorCode, message: string, options?: { details?: unknown; recoverable?: boolean; suggestion?: string; } ): ToolError { return { isError: true, code, message, details: options?.details, recoverable: options?.recoverable ?? false, suggestion: options?.suggestion, }; } /** * Format error for MCP response */ export function formatErrorResponse(error: ToolError): { isError: true; content: Array<{ type: "text"; text: string }> } { const text = [ `Error: ${error.code}`, error.message, error.suggestion ? `Suggestion: ${error.suggestion}` : "", error.details ? `Details: ${JSON.stringify(error.details)}` : "", ].filter(Boolean).join("\n"); return { isError: true, content: [{ type: "text", text }], }; } // ============================================================================ // Logging // ============================================================================ /** * Create an event log entry */ export function createLogEntry( level: EventLogEntry["level"], phase: string, tool: string, message: string, data?: unknown ): EventLogEntry { return { timestamp: new Date().toISOString(), level, phase, tool, message, data, }; } /** * Logger class for run operations */ export class RunLogger { private logsDir: string; constructor(runDir: string) { this.logsDir = path.join(runDir, "logs"); } async init(): Promise<void> { await ensureDir(this.logsDir); } async log(entry: EventLogEntry): Promise<void> { const file = entry.level === "error" ? "errors.ndjson" : "events.ndjson"; await appendJsonl(path.join(this.logsDir, file), [entry]); } async info(phase: string, tool: string, message: string, data?: unknown): Promise<void> { await this.log(createLogEntry("info", phase, tool, message, data)); } async warn(phase: string, tool: string, message: string, data?: unknown): Promise<void> { await this.log(createLogEntry("warn", phase, tool, message, data)); } async error(phase: string, tool: string, message: string, data?: unknown): Promise<void> { await this.log(createLogEntry("error", phase, tool, message, data)); } } // ============================================================================ // Time Utilities // ============================================================================ /** * Get current ISO8601 timestamp */ export function now(): string { return new Date().toISOString(); } /** * Measure execution time */ export async function timed<T>(fn: () => Promise<T>): Promise<{ result: T; duration_ms: number }> { const start = performance.now(); const result = await fn(); const duration_ms = Math.round(performance.now() - start); return { result, duration_ms }; } // ============================================================================ // Vector Utilities (Embeddings & Similarity) // ============================================================================ /** * Generate a deterministic mock embedding from text content. * Uses SHA256 hash as a seed for reproducible pseudo-random generation. * The resulting vector is L2 normalized (unit length). * * @param text - Input text to generate embedding from * @param dimension - Vector dimension (default: 1536 for OpenAI compatibility) * @returns Normalized embedding vector of specified dimension * * @example * const embedding = generateMockEmbedding("Hello, world!", 1536); * // Returns reproducible 1536-dimensional unit vector */ export function generateMockEmbedding(text: string, dimension: number = 1536): number[] { const hash = createHash("sha256").update(text).digest("hex"); const seed = parseInt(hash.slice(0, 8), 16); const embedding: number[] = []; let x = seed; for (let i = 0; i < dimension; i++) { x = (x * 1103515245 + 12345) % (2 ** 31); embedding.push((x / (2 ** 31)) * 2 - 1); // Normalize to [-1, 1] } // L2 normalize to unit vector const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0)); return embedding.map(v => v / norm); } /** * Calculate cosine similarity between two vectors. * Returns a value between -1 (opposite) and 1 (identical). * Returns 0 if vectors have different lengths or zero magnitude. * * @param a - First vector * @param b - Second vector * @returns Cosine similarity score [-1, 1] * * @example * const sim = cosineSimilarity([1, 0, 0], [1, 0, 0]); // 1.0 * const sim2 = cosineSimilarity([1, 0, 0], [0, 1, 0]); // 0.0 */ export function cosineSimilarity(a: number[], b: number[]): number { if (a.length !== b.length) return 0; let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } const denominator = Math.sqrt(normA) * Math.sqrt(normB); return denominator === 0 ? 0 : dotProduct / denominator; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Mnehmos/mnehmos.index-foundry.mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server