de en es ja ko ru zh

docs-mcp-server

by arabold

TypeScript

MIT License

542

676

Overview InspectNew Endpoints Schema Related Servers Reviews Score

Need Help?View Source Code Report Issue

JsonPipeline.ts•5.84 kB

import { JsonDocumentSplitter } from "../../splitter/JsonDocumentSplitter"; import type { DocumentSplitter } from "../../splitter/types"; import { SPLITTER_PREFERRED_CHUNK_SIZE } from "../../utils/config"; import { MimeTypeUtils } from "../../utils/mimeTypeUtils"; import type { ContentFetcher, RawContent } from "../fetcher/types"; import type { ContentProcessorMiddleware, MiddlewareContext } from "../middleware/types"; import type { ScraperOptions } from "../types"; import { convertToString } from "../utils/buffer"; import { BasePipeline } from "./BasePipeline"; import type { ProcessedContent } from "./types"; /** * Pipeline for processing JSON content with semantic, hierarchical splitting. * Uses JsonDocumentSplitter to produce structurally faithful chunks (preserving {level, path}) * without greedy size-based merging. Greedy merging is intentionally omitted to avoid collapsing * distinct structural nodes that are required for precise hierarchical reassembly. */ export class JsonPipeline extends BasePipeline { private readonly middleware: ContentProcessorMiddleware[]; private readonly splitter: DocumentSplitter; constructor(_chunkSize = SPLITTER_PREFERRED_CHUNK_SIZE) { super(); this.middleware = []; // Structure-preserving splitter only (no greedy size merging) this.splitter = new JsonDocumentSplitter({ preserveFormatting: true, }); } canProcess(rawContent: RawContent): boolean { if (!rawContent.mimeType) return false; return MimeTypeUtils.isJson(rawContent.mimeType); } async process( rawContent: RawContent, options: ScraperOptions, fetcher?: ContentFetcher, ): Promise<ProcessedContent> { const contentString = convertToString(rawContent.content, rawContent.charset); // Validate JSON structure let parsedJson: unknown; let isValidJson = true; try { parsedJson = JSON.parse(contentString); } catch (_error) { isValidJson = false; } // For invalid JSON, return as-is for fallback text processing if (!isValidJson) { // Still split invalid JSON content for consistency const fallbackChunks = await this.splitter.splitText(contentString); return { textContent: contentString, metadata: { isValidJson: false, }, links: [], errors: [], chunks: fallbackChunks, }; } const context: MiddlewareContext = { content: contentString, source: rawContent.source, metadata: { ...this.extractMetadata(parsedJson), isValidJson, jsonStructure: this.analyzeJsonStructure(parsedJson), }, links: [], // JSON files typically don't contain links errors: [], options, fetcher, }; // Execute the middleware stack (minimal for JSON) await this.executeMiddlewareStack(this.middleware, context); // Split the content using JsonContentSplitter const chunks = await this.splitter.splitText(context.content); return { textContent: context.content, metadata: context.metadata, links: context.links, errors: context.errors, chunks, }; } /** * Extracts metadata from JSON content only when meaningful values exist */ private extractMetadata(parsedJson: unknown): { title?: string; description?: string } { const metadata: { title?: string; description?: string } = {}; if (typeof parsedJson === "object" && parsedJson !== null) { const obj = parsedJson as Record<string, unknown>; // Look for common title fields - only use if they exist and are strings const titleFields = ["title", "name", "displayName", "label"]; for (const field of titleFields) { if (field in obj && typeof obj[field] === "string" && obj[field]) { metadata.title = obj[field] as string; break; } } // Look for common description fields - only use if they exist and are strings const descFields = ["description", "summary", "about", "info"]; for (const field of descFields) { if (field in obj && typeof obj[field] === "string" && obj[field]) { metadata.description = obj[field] as string; break; } } } return metadata; } /** * Analyzes the structure of valid JSON for metadata */ private analyzeJsonStructure(parsedJson: unknown): { type: string; depth: number; itemCount?: number; propertyCount?: number; } { if (Array.isArray(parsedJson)) { return { type: "array", depth: this.calculateDepth(parsedJson), itemCount: parsedJson.length, }; } else if (typeof parsedJson === "object" && parsedJson !== null) { const obj = parsedJson as Record<string, unknown>; return { type: "object", depth: this.calculateDepth(parsedJson), propertyCount: Object.keys(obj).length, }; } else { return { type: typeof parsedJson, depth: 1, }; } } /** * Calculates the maximum nesting depth of a JSON structure */ private calculateDepth(obj: unknown, currentDepth = 1): number { if (Array.isArray(obj)) { let maxDepth = currentDepth; for (const item of obj) { if (typeof item === "object" && item !== null) { maxDepth = Math.max(maxDepth, this.calculateDepth(item, currentDepth + 1)); } } return maxDepth; } else if (typeof obj === "object" && obj !== null) { let maxDepth = currentDepth; for (const value of Object.values(obj)) { if (typeof value === "object" && value !== null) { maxDepth = Math.max(maxDepth, this.calculateDepth(value, currentDepth + 1)); } } return maxDepth; } return currentDepth; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server