Skip to main content
Glama
types.ts9.17 kB
/** * IndexFoundry-MCP Core Type Definitions * * These types define the canonical data structures used throughout the pipeline. * All types are designed for determinism and auditability. */ // ============================================================================= // Document & Chunk Types // ============================================================================= export interface DocumentSource { type: "pdf" | "html" | "csv" | "markdown" | "docx" | "url" | "repo"; uri: string; retrieved_at: string; // ISO8601 content_hash: string; // SHA256 of raw bytes } export interface ChunkContent { text: string; text_hash: string; // SHA256 of normalized text char_count: number; token_count_approx: number; } export interface ChunkPosition { byte_start: number; byte_end: number; page?: number; section?: string; line_start?: number; line_end?: number; } export interface ChunkMetadata { content_type: string; language?: string; title?: string; tags?: string[]; custom?: Record<string, unknown>; } export interface DocumentChunk { doc_id: string; chunk_id: string; chunk_index: number; source: DocumentSource; content: ChunkContent; position: ChunkPosition; metadata: ChunkMetadata; } // ============================================================================= // Manifest Types // ============================================================================= export interface ErrorRecord { timestamp: string; code: string; message: string; details?: unknown; recoverable: boolean; } export interface PhaseManifest { started_at: string; completed_at?: string; status: "pending" | "running" | "completed" | "failed"; inputs: { count: number; hashes: string[]; }; outputs: { count: number; hashes: string[]; }; tool_version: string; errors: ErrorRecord[]; } export interface RunManifest { run_id: string; created_at: string; completed_at?: string; status: "running" | "completed" | "failed" | "partial"; config_hash: string; phases: { connect?: PhaseManifest; extract?: PhaseManifest; normalize?: PhaseManifest; index?: PhaseManifest; serve?: PhaseManifest; }; totals: { sources_fetched: number; documents_extracted: number; chunks_created: number; vectors_indexed: number; errors_encountered: number; }; timing: { total_duration_ms: number; phase_durations: Record<string, number>; }; } // ============================================================================= // Artifact Types // ============================================================================= export interface RawArtifact { path: string; sha256: string; size_bytes: number; content_type: string; fetched_at: string; source_uri: string; } export interface PdfArtifact extends RawArtifact { page_count: number; pdf_version: string; has_ocr_layer: boolean; pdf_metadata: { title?: string; author?: string; created?: string; modified?: string; }; } export interface PageExtraction { page: number; text: string; char_count: number; is_empty: boolean; ocr_used: boolean; confidence?: number; } export interface ExtractionReport { extractor_version: string; mode_used: string; warnings: string[]; stats: { pages_processed: number; pages_empty: number; pages_ocr_fallback: number; chars_extracted: number; }; } // ============================================================================= // Embedding & Index Types // ============================================================================= export interface EmbeddingRecord { chunk_id: string; vector: number[]; model: string; dimensions: number; embedded_at: string; } export interface UpsertStats { vectors_sent: number; vectors_inserted: number; vectors_updated: number; vectors_failed: number; duration_ms: number; } export interface VectorManifest { collection: string; namespace?: string; model_used: string; dimensions: number; metadata_schema: string[]; created_at: string; total_vectors: number; } // ============================================================================= // Retrieval Profile Types // ============================================================================= export interface HybridConfig { alpha: number; // 0-1, weight for semantic vs keyword fusion_method: "rrf" | "weighted_sum"; } export interface RerankerConfig { enabled: boolean; model?: string; top_k_to_rerank: number; } export interface RetrievalProfile { default_top_k: number; search_modes: Array<"semantic" | "keyword" | "hybrid">; hybrid_config?: HybridConfig; reranker?: RerankerConfig; allowed_filters: Array<{ field: string; operators: Array<"eq" | "neq" | "gt" | "gte" | "lt" | "lte" | "in" | "contains">; }>; } // ============================================================================= // Configuration Types // ============================================================================= export interface ConnectDefaults { timeout_ms: number; max_file_size_mb: number; user_agent: string; } export interface ExtractDefaults { pdf_extractor: string; pdf_mode: "layout" | "plain" | "ocr"; ocr_engine: string; } export interface NormalizeDefaults { chunk_strategy: "recursive" | "fixed_chars" | "by_paragraph" | "by_heading" | "by_page" | "by_sentence"; max_chars: number; overlap_chars: number; } export interface IndexDefaults { embedding_provider: "openai" | "cohere" | "sentence-transformers" | "local"; embedding_model: string; batch_size: number; } export interface ServerConfig { version: string; storage: { runs_dir: string; max_runs: number; cleanup_policy: "fifo" | "lru" | "manual"; }; defaults: { connect: ConnectDefaults; extract: ExtractDefaults; normalize: NormalizeDefaults; index: IndexDefaults; }; pinned_versions: Record<string, string>; security: { allowed_domains: string[]; blocked_domains: string[]; max_concurrent_fetches: number; }; } // ============================================================================= // Tool Response Types // ============================================================================= export interface ToolSuccess<T> { success: true; data: T; } export interface ToolFailure { success: false; error: { code: string; message: string; details?: unknown; recoverable: boolean; suggestion?: string; }; } export type ToolResult<T> = ToolSuccess<T> | ToolFailure; // ============================================================================= // Pipeline Types // ============================================================================= export interface PipelineSource { type: "url" | "sitemap" | "folder" | "pdf"; url?: string; path?: string; glob?: string; max_pages?: number; } export interface PipelineConfig { run_id?: string; connect: { sources: PipelineSource[]; allowed_domains?: string[]; }; extract?: { pdf_mode?: "layout" | "plain" | "ocr"; preserve_headings?: boolean; }; normalize?: { chunk_strategy?: "recursive" | "by_paragraph" | "by_page"; max_chars?: number; overlap_chars?: number; dedupe?: boolean; detect_language?: boolean; }; index: { embedding_model: string; vector_db: { provider: "milvus" | "pinecone" | "weaviate" | "qdrant" | "chroma" | "local"; collection: string; connection?: Record<string, unknown>; }; }; serve?: { auto_start?: boolean; port?: number; }; force?: boolean; stop_on_error?: boolean; } export interface PhaseResult { status: "completed" | "skipped" | "failed"; duration_ms: number; artifacts_created: number; errors: string[]; } export interface PipelineResult { run_id: string; status: "completed" | "partial" | "failed"; manifest_path: string; phases: { connect: PhaseResult; extract: PhaseResult; normalize: PhaseResult; index: PhaseResult; serve?: PhaseResult; }; summary: { sources_fetched: number; chunks_indexed: number; duration_ms: number; errors: number; }; retrieval_endpoint?: string; } // ============================================================================= // Event Log Types // ============================================================================= export type LogLevel = "debug" | "info" | "warn" | "error"; export interface LogEvent { timestamp: string; level: LogLevel; phase: string; tool: string; message: string; details?: unknown; } // ============================================================================= // Utility Types // ============================================================================= export type DeepPartial<T> = { [P in keyof T]?: T[P] extends object ? DeepPartial<T[P]> : T[P]; }; export type RequireAtLeastOne<T, Keys extends keyof T = keyof T> = Pick<T, Exclude<keyof T, Keys>> & { [K in Keys]-?: Required<Pick<T, K>> & Partial<Pick<T, Exclude<Keys, K>>> }[Keys];

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Mnehmos/mnehmos.index-foundry.mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server