Skip to main content
Glama
run-manager.ts12.2 kB
/** * IndexFoundry-MCP: Run Manager * * Manages run directories, manifests, and phase coordination. * Ensures isolated, immutable workspaces for each pipeline run. * * Copyright (c) 2024 vario.automation * Proprietary and confidential. All rights reserved. */ import * as path from "path"; import * as fs from "fs/promises"; import type { RunManifest, PhaseManifest, IndexFoundryConfig } from "./types.js"; import { generateRunId, ensureDir, pathExists, writeJson, readJson, hashConfig, now, RunLogger, } from "./utils.js"; // ============================================================================ // Default Configuration // ============================================================================ const DEFAULT_CONFIG: IndexFoundryConfig = { version: "1.0.0", storage: { runs_dir: "./runs", max_runs: 100, cleanup_policy: "fifo", }, defaults: { connect: { timeout_ms: 30000, max_file_size_mb: 50, user_agent: "IndexFoundry/1.0", }, extract: { pdf_extractor: "pdf-parse", pdf_mode: "layout", ocr_engine: "tesseract", }, normalize: { chunk_strategy: "recursive", max_chars: 1500, overlap_chars: 150, }, index: { embedding_provider: "openai", embedding_model: "text-embedding-3-small", batch_size: 100, }, }, pinned_versions: { "pdf-parse": "1.1.1", "cheerio": "1.0.0", }, security: { allowed_domains: [], blocked_domains: ["localhost", "127.0.0.1"], max_concurrent_fetches: 5, }, }; // ============================================================================ // Run Manager Class // ============================================================================ export class RunManager { private baseDir: string; private config: IndexFoundryConfig; constructor(baseDir: string, config?: Partial<IndexFoundryConfig>) { this.baseDir = baseDir; this.config = { ...DEFAULT_CONFIG, ...config }; } // -------------------------------------------------------------------------- // Run Lifecycle // -------------------------------------------------------------------------- /** * Create a new run directory with all subdirectories */ async createRun(runId?: string): Promise<{ runId: string; runDir: string; logger: RunLogger }> { const id = runId || generateRunId(); const runDir = this.getRunDir(id); // Create directory structure await ensureDir(path.join(runDir, "raw")); await ensureDir(path.join(runDir, "extracted")); await ensureDir(path.join(runDir, "normalized")); await ensureDir(path.join(runDir, "indexed")); await ensureDir(path.join(runDir, "served")); await ensureDir(path.join(runDir, "logs")); // Initialize manifest const manifest: RunManifest = { run_id: id, created_at: now(), status: "running", config_hash: hashConfig(this.config), phases: {}, totals: { sources_fetched: 0, documents_extracted: 0, chunks_created: 0, vectors_indexed: 0, errors_encountered: 0, }, timing: { total_duration_ms: 0, phase_durations: {}, }, }; await writeJson(path.join(runDir, "manifest.json"), manifest); await writeJson(path.join(runDir, "config.json"), this.config); const logger = new RunLogger(runDir); await logger.init(); return { runId: id, runDir, logger }; } /** * Ensure a run exists, creating it if necessary. * This is the method tools should call to guarantee run infrastructure exists. */ async ensureRun(runId: string): Promise<{ runId: string; runDir: string; isNew: boolean }> { const runDir = this.getRunDir(runId); const manifestPath = path.join(runDir, "manifest.json"); // Check if run already exists if (await pathExists(manifestPath)) { return { runId, runDir, isNew: false }; } // Create the run await this.createRun(runId); return { runId, runDir, isNew: true }; } /** * Get an existing run's context */ async getRun(runId: string): Promise<{ runDir: string; manifest: RunManifest; logger: RunLogger }> { const runDir = this.getRunDir(runId); if (!await pathExists(runDir)) { throw new Error(`Run not found: ${runId}`); } const manifest = await readJson<RunManifest>(path.join(runDir, "manifest.json")); const logger = new RunLogger(runDir); return { runDir, manifest, logger }; } /** * Update run manifest */ async updateManifest(runId: string, updates: Partial<RunManifest>): Promise<RunManifest> { const runDir = this.getRunDir(runId); const manifest = await readJson<RunManifest>(path.join(runDir, "manifest.json")); const updated: RunManifest = { ...manifest, ...updates, totals: { ...manifest.totals, ...updates.totals }, timing: { ...manifest.timing, ...updates.timing }, phases: { ...manifest.phases, ...updates.phases }, }; await writeJson(path.join(runDir, "manifest.json"), updated); return updated; } /** * Mark a phase as started */ async startPhase( runId: string, phaseName: keyof RunManifest["phases"] ): Promise<PhaseManifest> { const phase: PhaseManifest = { started_at: now(), status: "running", inputs: { count: 0, hashes: [] }, outputs: { count: 0, hashes: [] }, tool_version: this.config.version, errors: [], }; await this.updateManifest(runId, { phases: { [phaseName]: phase }, }); return phase; } /** * Mark a phase as completed */ async completePhase( runId: string, phaseName: keyof RunManifest["phases"], result: Partial<PhaseManifest> ): Promise<void> { const { manifest } = await this.getRun(runId); const phase = manifest.phases[phaseName]!; const completed: PhaseManifest = { ...phase, ...result, completed_at: now(), status: result.errors?.length ? "failed" : "completed", }; // Calculate phase duration const startTime = new Date(phase.started_at).getTime(); const endTime = new Date(completed.completed_at!).getTime(); const duration = endTime - startTime; await this.updateManifest(runId, { phases: { [phaseName]: completed }, timing: { total_duration_ms: manifest.timing.total_duration_ms, phase_durations: { ...manifest.timing.phase_durations, [phaseName]: duration, }, }, }); } /** * Complete the entire run */ async completeRun(runId: string, status: RunManifest["status"]): Promise<RunManifest> { const { manifest } = await this.getRun(runId); const completedAt = now(); const startTime = new Date(manifest.created_at).getTime(); const endTime = new Date(completedAt).getTime(); const totalDuration = endTime - startTime; return this.updateManifest(runId, { completed_at: completedAt, status, timing: { ...manifest.timing, total_duration_ms: totalDuration, }, }); } // -------------------------------------------------------------------------- // Path Helpers // -------------------------------------------------------------------------- getRunDir(runId: string): string { return path.join(this.baseDir, this.config.storage.runs_dir, runId); } getRawDir(runId: string): string { return path.join(this.getRunDir(runId), "raw"); } getExtractedDir(runId: string): string { return path.join(this.getRunDir(runId), "extracted"); } getNormalizedDir(runId: string): string { return path.join(this.getRunDir(runId), "normalized"); } getIndexedDir(runId: string): string { return path.join(this.getRunDir(runId), "indexed"); } getServedDir(runId: string): string { return path.join(this.getRunDir(runId), "served"); } // -------------------------------------------------------------------------- // Run Queries // -------------------------------------------------------------------------- /** * List all runs */ async listRuns(options?: { status?: "all" | "completed" | "running" | "failed"; limit?: number; before?: string; after?: string; }): Promise<Array<{ run_id: string; status: string; created_at: string }>> { const runsDir = path.join(this.baseDir, this.config.storage.runs_dir); if (!await pathExists(runsDir)) { return []; } const entries = await fs.readdir(runsDir, { withFileTypes: true }); const runs: Array<{ run_id: string; status: string; created_at: string }> = []; for (const entry of entries) { if (!entry.isDirectory()) continue; const manifestPath = path.join(runsDir, entry.name, "manifest.json"); if (!await pathExists(manifestPath)) continue; try { const manifest = await readJson<RunManifest>(manifestPath); // Apply filters if (options?.status && options.status !== "all" && manifest.status !== options.status) { continue; } if (options?.before && manifest.created_at >= options.before) { continue; } if (options?.after && manifest.created_at <= options.after) { continue; } runs.push({ run_id: manifest.run_id, status: manifest.status, created_at: manifest.created_at, }); } catch { // Skip invalid manifests } } // Sort by created_at descending (newest first) runs.sort((a, b) => b.created_at.localeCompare(a.created_at)); // Apply limit if (options?.limit) { return runs.slice(0, options.limit); } return runs; } /** * Cleanup old runs */ async cleanup(options: { older_than_days: number; keep_manifests?: boolean; dry_run?: boolean; }): Promise<{ deleted: string[]; errors: string[] }> { const cutoff = new Date(); cutoff.setDate(cutoff.getDate() - options.older_than_days); const cutoffIso = cutoff.toISOString(); const runs = await this.listRuns({ before: cutoffIso }); const deleted: string[] = []; const errors: string[] = []; for (const run of runs) { const runDir = this.getRunDir(run.run_id); if (options.dry_run) { deleted.push(run.run_id); continue; } try { if (options.keep_manifests) { // Delete everything except manifest.json const entries = await fs.readdir(runDir, { withFileTypes: true }); for (const entry of entries) { if (entry.name === "manifest.json") continue; const fullPath = path.join(runDir, entry.name); await fs.rm(fullPath, { recursive: true }); } } else { await fs.rm(runDir, { recursive: true }); } deleted.push(run.run_id); } catch (e) { errors.push(`Failed to delete ${run.run_id}: ${e}`); } } return { deleted, errors }; } // -------------------------------------------------------------------------- // Configuration Access // -------------------------------------------------------------------------- getConfig(): IndexFoundryConfig { return this.config; } getDefaults(): IndexFoundryConfig["defaults"] { return this.config.defaults; } } // ============================================================================ // Singleton Instance // ============================================================================ let globalManager: RunManager | null = null; export function initRunManager(baseDir: string, config?: Partial<IndexFoundryConfig>): RunManager { globalManager = new RunManager(baseDir, config); return globalManager; } export function getRunManager(): RunManager { if (!globalManager) { throw new Error("RunManager not initialized. Call initRunManager first."); } return globalManager; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Mnehmos/mnehmos.index-foundry.mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server