Skip to main content
Glama

docs-mcp-server

JsonDocumentSplitter.test.ts13.3 kB
import { describe, expect, it } from "vitest"; import { JsonDocumentSplitter } from "./JsonDocumentSplitter"; describe("JsonDocumentSplitter", () => { const splitter = new JsonDocumentSplitter(); describe("concatenation-friendly chunking", () => { it("should create building-block chunks that concatenate to valid JSON", async () => { const content = '{"name": "test", "version": "1.0.0"}'; const chunks = await splitter.splitText(content); // Concatenate all chunks to verify they form valid JSON const concatenated = chunks.map((c) => c.content).join("\n"); expect(() => JSON.parse(concatenated)).not.toThrow(); // Should have opening brace, two properties, closing brace expect(chunks.some((c) => c.content.trim() === "{")).toBe(true); expect(chunks.some((c) => c.content.includes('"name": "test"'))).toBe(true); expect(chunks.some((c) => c.content.includes('"version": "1.0.0"'))).toBe(true); expect( chunks.some((c) => c.content.trim() === "}" || c.content.trim() === "},"), ).toBe(true); }); it("should handle comma placement correctly", async () => { const content = '{"first": "value1", "second": "value2", "third": "value3"}'; const chunks = await splitter.splitText(content); // Find property chunks const properties = chunks.filter( (c) => c.content.includes('"first"') || c.content.includes('"second"') || c.content.includes('"third"'), ); // First two properties should have commas, last should not const firstProp = properties.find((c) => c.content.includes('"first"')); const thirdProp = properties.find((c) => c.content.includes('"third"')); expect(firstProp?.content).toContain(","); expect(thirdProp?.content).not.toContain(","); }); }); describe("nested structure handling", () => { it("should create concatenable chunks for nested objects", async () => { const content = '{"config": {"debug": true, "port": 8080}}'; const chunks = await splitter.splitText(content); // Should be able to concatenate to valid JSON const concatenated = chunks.map((c) => c.content).join("\n"); expect(() => JSON.parse(concatenated)).not.toThrow(); // Should have hierarchical structure with proper indentation expect(chunks.some((c) => c.content.includes('"config": '))).toBe(true); expect(chunks.some((c) => c.content.includes(' "debug": true'))).toBe(true); expect(chunks.some((c) => c.content.includes(' "port": 8080'))).toBe(true); // Verify level/path relationship for nested chunks const configChunk = chunks.find((c) => c.content.includes('"config":')); expect(configChunk).toBeDefined(); expect(configChunk!.section.level).toBe(configChunk!.section.path.length); const debugChunk = chunks.find((c) => c.content.includes('"debug": true')); expect(debugChunk).toBeDefined(); expect(debugChunk!.section.level).toBe(debugChunk!.section.path.length); expect(debugChunk!.section.level).toBeGreaterThan(configChunk!.section.level); }); it("should handle nested arrays correctly", async () => { const content = '{"items": [1, 2, 3]}'; const chunks = await splitter.splitText(content); // Should concatenate to valid JSON const concatenated = chunks.map((c) => c.content).join("\n"); expect(() => JSON.parse(concatenated)).not.toThrow(); // Should have array structure expect(chunks.some((c) => c.content.includes('"items": '))).toBe(true); expect(chunks.some((c) => c.content.trim() === "[")).toBe(true); expect(chunks.some((c) => c.content.includes("1,"))).toBe(true); expect( chunks.some((c) => c.content.includes("3") && !c.content.includes("3,")), ).toBe(true); // Last item no comma expect( chunks.some((c) => c.content.trim() === "]" || c.content.trim() === "],"), ).toBe(true); // Verify level/path relationships chunks.forEach((chunk) => { expect(chunk.section.level).toBe(chunk.section.path.length); }); // Test specific path structures for array items const itemsChunk = chunks.find((c) => c.content.includes('"items":')); expect(itemsChunk).toBeDefined(); expect(itemsChunk!.section.path).toEqual(["root", "items"]); expect(itemsChunk!.section.level).toBe(2); // Find array item chunks by their content and verify exact paths const firstItemChunk = chunks.find((c) => c.content.includes("1,")); expect(firstItemChunk).toBeDefined(); expect(firstItemChunk!.section.path).toEqual(["root", "items", "[0]"]); expect(firstItemChunk!.section.level).toBe(3); const secondItemChunk = chunks.find((c) => c.content.includes("2,")); expect(secondItemChunk).toBeDefined(); expect(secondItemChunk!.section.path).toEqual(["root", "items", "[1]"]); expect(secondItemChunk!.section.level).toBe(3); const thirdItemChunk = chunks.find( (c) => c.content.includes("3") && !c.content.includes("3,"), ); expect(thirdItemChunk).toBeDefined(); expect(thirdItemChunk!.section.path).toEqual(["root", "items", "[2]"]); expect(thirdItemChunk!.section.level).toBe(3); }); it("should handle complex arrays with nested objects correctly", async () => { const content = '{"users": [{"name": "Alice", "age": 30}, {"name": "Bob"}]}'; const chunks = await splitter.splitText(content); // Should concatenate to valid JSON const concatenated = chunks.map((c) => c.content).join("\n"); expect(() => JSON.parse(concatenated)).not.toThrow(); // Verify all chunks follow level === path.length rule chunks.forEach((chunk) => { expect(chunk.section.level).toBe(chunk.section.path.length); }); // Test specific array index paths const aliceNameChunk = chunks.find((c) => c.content.includes('"name": "Alice"')); expect(aliceNameChunk).toBeDefined(); expect(aliceNameChunk!.section.path).toEqual(["root", "users", "[0]", "name"]); expect(aliceNameChunk!.section.level).toBe(4); const aliceAgeChunk = chunks.find((c) => c.content.includes('"age": 30')); expect(aliceAgeChunk).toBeDefined(); expect(aliceAgeChunk!.section.path).toEqual(["root", "users", "[0]", "age"]); expect(aliceAgeChunk!.section.level).toBe(4); const bobNameChunk = chunks.find((c) => c.content.includes('"name": "Bob"')); expect(bobNameChunk).toBeDefined(); expect(bobNameChunk!.section.path).toEqual(["root", "users", "[1]", "name"]); expect(bobNameChunk!.section.level).toBe(4); }); }); describe("path and structure information", () => { it("should maintain hierarchical path information", async () => { const content = '{"a": {"b": {"c": "value"}}}'; const chunks = await splitter.splitText(content); // Check for proper path hierarchy expect(chunks.some((chunk) => chunk.section.path.includes("a"))).toBe(true); expect(chunks.some((chunk) => chunk.section.path.includes("b"))).toBe(true); expect(chunks.some((chunk) => chunk.section.path.includes("c"))).toBe(true); // Verify level corresponds to path length chunks.forEach((chunk) => { expect(chunk.section.level).toBe(chunk.section.path.length); }); // Find specific chunks and verify their levels const aChunk = chunks.find( (chunk) => chunk.section.path.includes("a") && chunk.content.includes('"a":'), ); expect(aChunk).toBeDefined(); expect(aChunk!.section.path).toEqual(["root", "a"]); expect(aChunk!.section.level).toBe(2); const cChunk = chunks.find( (chunk) => chunk.section.path.includes("c") && chunk.content.includes('"c": "value"'), ); expect(cChunk).toBeDefined(); expect(cChunk!.section.path).toEqual(["root", "a", "b", "c"]); expect(cChunk!.section.level).toBe(4); }); it("should provide appropriate level numbers", async () => { const content = '{"level1": {"level2": "value"}}'; const chunks = await splitter.splitText(content); const level1Chunks = chunks.filter((chunk) => chunk.section.path.includes("level1"), ); const level2Chunks = chunks.filter((chunk) => chunk.section.path.includes("level2"), ); expect(level1Chunks.some((chunk) => chunk.section.level >= 2)).toBe(true); expect(level2Chunks.some((chunk) => chunk.section.level >= 3)).toBe(true); // Verify that level equals path length for all chunks [...level1Chunks, ...level2Chunks].forEach((chunk) => { expect(chunk.section.level).toBe(chunk.section.path.length); }); }); }); describe("edge cases", () => { it("should handle invalid JSON gracefully", async () => { const content = '{"invalid": json}'; const chunks = await splitter.splitText(content); expect(chunks).toHaveLength(1); expect(chunks[0].section.path).toEqual(["invalid-json"]); expect(chunks[0].content).toBe(content); }); it("should handle empty objects", async () => { const content = "{}"; const chunks = await splitter.splitText(content); expect(chunks.length).toBeGreaterThan(0); const concatenated = chunks.map((c) => c.content).join("\n"); expect(() => JSON.parse(concatenated)).not.toThrow(); }); it("should handle empty arrays", async () => { const content = "[]"; const chunks = await splitter.splitText(content); expect(chunks.length).toBeGreaterThan(0); const concatenated = chunks.map((c) => c.content).join("\n"); expect(() => JSON.parse(concatenated)).not.toThrow(); }); it("should handle null values correctly", async () => { const content = '{"nullable": null}'; const chunks = await splitter.splitText(content); const concatenated = chunks.map((c) => c.content).join("\n"); expect(() => JSON.parse(concatenated)).not.toThrow(); expect(chunks.some((chunk) => chunk.content.includes("null"))).toBe(true); }); }); describe("indentation preservation", () => { it("should maintain proper indentation in nested structures", async () => { const content = '{"outer": {"inner": "value"}}'; const chunks = await splitter.splitText(content); // Check for proper indentation levels expect(chunks.some((c) => c.content.includes(' "inner": "value"'))).toBe(true); // 2-space indent }); it("should respect preserveFormatting option", async () => { const splitterNoFormat = new JsonDocumentSplitter({ preserveFormatting: false }); const content = '{"test": "value"}'; const chunks = await splitterNoFormat.splitText(content); // With formatting disabled, should have minimal whitespace const hasIndentation = chunks.some((c) => c.content.startsWith(" ")); expect(hasIndentation).toBe(false); }); }); describe("integration with GreedySplitter", () => { it("should create chunks that work well with GreedySplitter optimization", async () => { const { GreedySplitter } = await import("./GreedySplitter"); const jsonSplitter = new JsonDocumentSplitter(); const greedySplitter = new GreedySplitter(jsonSplitter, 500, 1500); const complexJson = { application: { name: "Complex Application Configuration", version: "2.1.0", services: { database: { primary: { host: "primary-db.example.com", port: 5432, ssl: true, poolSize: 20, }, replica: { host: "replica-db.example.com", port: 5432, ssl: true, poolSize: 10, }, }, cache: { redis: { host: "cache.example.com", port: 6379, database: 0, }, }, }, features: { authentication: true, authorization: true, monitoring: true, logging: { level: "info", format: "json", }, }, }, }; const content = JSON.stringify(complexJson, null, 2); // Test JsonDocumentSplitter alone const jsonChunks = await jsonSplitter.splitText(content); expect(jsonChunks.length).toBeGreaterThan(5); // Should create many small chunks // Test GreedySplitter optimization const optimizedChunks = await greedySplitter.splitText(content); expect(optimizedChunks.length).toBeLessThanOrEqual(jsonChunks.length); // Should consolidate // Verify concatenation still produces valid JSON const concatenated = optimizedChunks.map((c) => c.content).join("\n"); const parsed = JSON.parse(concatenated); expect(parsed).toEqual(complexJson); // Verify chunks are reasonably sized optimizedChunks.forEach((chunk) => { expect(chunk.content.length).toBeGreaterThan(0); }); }); }); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server