docs-mcp-server

Overview Schema Related Servers Score Discussions

JsonPipeline.ts•5.13 KiB

import { JsonDocumentSplitter } from "../../splitter/JsonDocumentSplitter"; import type { DocumentSplitter } from "../../splitter/types"; import { SPLITTER_PREFERRED_CHUNK_SIZE } from "../../utils/config"; import { MimeTypeUtils } from "../../utils/mimeTypeUtils"; import type { ContentFetcher, RawContent } from "../fetcher/types"; import type { ContentProcessorMiddleware, MiddlewareContext } from "../middleware/types"; import type { ScraperOptions } from "../types"; import { convertToString } from "../utils/buffer"; import { BasePipeline } from "./BasePipeline"; import type { PipelineResult } from "./types"; /** * Pipeline for processing JSON content with semantic, hierarchical splitting. * Uses JsonDocumentSplitter to produce structurally faithful chunks (preserving {level, path}) * without greedy size-based merging. Greedy merging is intentionally omitted to avoid collapsing * distinct structural nodes that are required for precise hierarchical reassembly. */ export class JsonPipeline extends BasePipeline { private readonly middleware: ContentProcessorMiddleware[]; private readonly splitter: DocumentSplitter; constructor(_chunkSize = SPLITTER_PREFERRED_CHUNK_SIZE) { super(); this.middleware = []; // Structure-preserving splitter only (no greedy size merging) this.splitter = new JsonDocumentSplitter({ preserveFormatting: true, }); } canProcess(mimeType: string): boolean { if (!mimeType) return false; return MimeTypeUtils.isJson(mimeType); } async process( rawContent: RawContent, options: ScraperOptions, fetcher?: ContentFetcher, ): Promise<PipelineResult> { const contentString = convertToString(rawContent.content, rawContent.charset); // Validate JSON structure let parsedJson: unknown; let isValidJson = true; try { parsedJson = JSON.parse(contentString); } catch (_error) { isValidJson = false; } // For invalid JSON, return as-is for fallback text processing if (!isValidJson) { // Still split invalid JSON content for consistency const fallbackChunks = await this.splitter.splitText(contentString); return { textContent: contentString, // metadata: { // isValidJson: false, // }, links: [], errors: [], chunks: fallbackChunks, }; } const metadata = this.extractMetadata(parsedJson); const context: MiddlewareContext = { content: contentString, source: rawContent.source, title: metadata.title, contentType: rawContent.mimeType || "application/json", // metadata: { // ...this.extractMetadata(parsedJson), // isValidJson, // jsonStructure: this.analyzeJsonStructure(parsedJson), // }, links: [], // JSON files typically don't contain links errors: [], options, fetcher, }; // Execute the middleware stack (minimal for JSON) await this.executeMiddlewareStack(this.middleware, context); // Split the content using JsonContentSplitter const chunks = await this.splitter.splitText(context.content); return { title: context.title, contentType: context.contentType, textContent: context.content, links: context.links, errors: context.errors, chunks, }; } /** * Extracts metadata from JSON content only when meaningful values exist */ private extractMetadata(parsedJson: unknown): { title?: string; description?: string } { const metadata: { title?: string; description?: string } = {}; if (typeof parsedJson === "object" && parsedJson !== null) { const obj = parsedJson as Record<string, unknown>; // Look for common title fields - only use if they exist and are strings const titleFields = ["title", "name", "displayName", "label"]; for (const field of titleFields) { if (field in obj && typeof obj[field] === "string" && obj[field]) { metadata.title = obj[field] as string; break; } } // Look for common description fields - only use if they exist and are strings const descFields = ["description", "summary", "about", "info"]; for (const field of descFields) { if (field in obj && typeof obj[field] === "string" && obj[field]) { metadata.description = obj[field] as string; break; } } } return metadata; } /** * Calculates the maximum nesting depth of a JSON structure */ private calculateDepth(obj: unknown, currentDepth = 1): number { if (Array.isArray(obj)) { let maxDepth = currentDepth; for (const item of obj) { if (typeof item === "object" && item !== null) { maxDepth = Math.max(maxDepth, this.calculateDepth(item, currentDepth + 1)); } } return maxDepth; } else if (typeof obj === "object" && obj !== null) { let maxDepth = currentDepth; for (const value of Object.values(obj)) { if (typeof value === "object" && value !== null) { maxDepth = Math.max(maxDepth, this.calculateDepth(value, currentDepth + 1)); } } return maxDepth; } return currentDepth; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

JsonPipeline.ts•5.13 KiB