Skip to main content
Glama
MarkdownAssemblyStrategy.test.ts22.4 kB
import { beforeEach, describe, expect, it, vi } from "vitest"; import type { DocumentStore } from "../../DocumentStore"; import type { DbPageChunk } from "../../types"; import { MarkdownAssemblyStrategy } from "./MarkdownAssemblyStrategy"; // Mock DocumentStore with just the methods we need const createMockDocumentStore = () => ({ findParentChunk: vi.fn().mockResolvedValue(null), findPrecedingSiblingChunks: vi.fn().mockResolvedValue([]), findSubsequentSiblingChunks: vi.fn().mockResolvedValue([]), findChildChunks: vi.fn().mockResolvedValue([]), findChunksByIds: vi.fn().mockResolvedValue([]), }) as Partial<DocumentStore> as DocumentStore; // Test fixtures - creating a "document universe" const createDocumentUniverse = () => { return { // Target chunk (the one we're finding relations for) target: { id: "target", content: "Target content", url: "https://example.com", metadata: { path: ["Chapter 1", "Section 1.1"] }, } as DbPageChunk, // Parent parent: { id: "parent", content: "Parent section content", url: "https://example.com", metadata: { path: ["Chapter 1"] }, } as DbPageChunk, // Children (limit = 3, so child4 should be excluded) child1: { id: "child1", content: "First child content", url: "https://example.com", metadata: { path: ["Chapter 1", "Section 1.1", "Subsection A"], }, } as DbPageChunk, child2: { id: "child2", content: "Second child content", url: "https://example.com", metadata: { path: ["Chapter 1", "Section 1.1", "Subsection B"], }, } as DbPageChunk, child3: { id: "child3", content: "Third child content", url: "https://example.com", metadata: { path: ["Chapter 1", "Section 1.1", "Subsection C"], }, } as DbPageChunk, child4: { id: "child4", content: "Fourth child content (should be excluded)", url: "https://example.com", metadata: { path: ["Chapter 1", "Section 1.1", "Subsection D"], }, } as DbPageChunk, // Preceding siblings (limit = 1, so only prev1 should be included) prev1: { id: "prev1", content: "Previous sibling 1", url: "https://example.com", metadata: { path: ["Chapter 1", "Section 1.0"] }, } as DbPageChunk, prev2: { id: "prev2", content: "Previous sibling 2 (should be excluded)", url: "https://example.com", metadata: { path: ["Chapter 1", "Section 0.9"] }, } as DbPageChunk, // Subsequent siblings (limit = 2) next1: { id: "next1", content: "Next sibling 1", url: "https://example.com", metadata: { path: ["Chapter 1", "Section 1.2"] }, } as DbPageChunk, next2: { id: "next2", content: "Next sibling 2", url: "https://example.com", metadata: { path: ["Chapter 1", "Section 1.3"] }, } as DbPageChunk, next3: { id: "next3", content: "Next sibling 3 (should be excluded)", url: "https://example.com", metadata: { path: ["Chapter 1", "Section 1.4"] }, } as DbPageChunk, // Orphan chunk (no relations) orphan: { id: "orphan", content: "Orphan content", url: "https://example.com/other", metadata: { path: ["Standalone"] }, } as DbPageChunk, }; }; describe("MarkdownAssemblyStrategy", () => { let strategy: MarkdownAssemblyStrategy; let mockStore: DocumentStore; let universe: ReturnType<typeof createDocumentUniverse>; beforeEach(() => { strategy = new MarkdownAssemblyStrategy(); mockStore = createMockDocumentStore(); universe = createDocumentUniverse(); }); // Helper function for setting up comprehensive mock store responses const setupFullMockStore = () => { vi.mocked(mockStore.findParentChunk).mockImplementation(async (_lib, _ver, id) => { if (id === "target") return universe.parent; return null; }); vi.mocked(mockStore.findPrecedingSiblingChunks).mockImplementation( async (_lib, _ver, id, limit) => { if (id === "target") return [universe.prev1].slice(0, limit); // Only prev1, respecting limit of 1 return []; }, ); vi.mocked(mockStore.findSubsequentSiblingChunks).mockImplementation( async (_lib, _ver, id, limit) => { if (id === "target") return [universe.next1, universe.next2].slice(0, limit); // Only next1 & next2, respecting limit of 2 return []; }, ); vi.mocked(mockStore.findChildChunks).mockImplementation( async (_lib, _ver, id, limit) => { if (id === "target") return [universe.child1, universe.child2, universe.child3].slice(0, limit); // Only first 3 children, respecting limit of 3 return []; }, ); vi.mocked(mockStore.findChunksByIds).mockImplementation(async (_lib, _ver, ids) => { const idSet = new Set(ids); return Object.values(universe).filter((doc) => idSet.has(doc.id as string)); }); }; describe("canHandle", () => { it("handles markdown content types", () => { expect(strategy.canHandle("text/markdown")).toBe(true); expect(strategy.canHandle("text/x-markdown")).toBe(true); }); it("handles HTML content types", () => { expect(strategy.canHandle("text/html")).toBe(true); expect(strategy.canHandle("application/xhtml+xml")).toBe(true); }); it("handles plain text content types", () => { expect(strategy.canHandle("text/plain")).toBe(true); expect(strategy.canHandle("text/css")).toBe(true); }); it("serves as fallback for unknown types", () => { expect(strategy.canHandle(undefined)).toBe(true); expect(strategy.canHandle("application/unknown")).toBe(true); }); it("rejects structured content types (delegated to HierarchicalAssemblyStrategy)", () => { expect(strategy.canHandle("text/x-typescript")).toBe(false); expect(strategy.canHandle("application/json")).toBe(false); expect(strategy.canHandle("text/x-python")).toBe(false); }); }); describe("assembleContent", () => { it("joins chunks with double newlines", () => { const chunks = [universe.target, universe.child1, universe.child2]; const result = strategy.assembleContent(chunks); expect(result).toBe( "Target content\n\nFirst child content\n\nSecond child content", ); }); it("handles empty chunk array", () => { const result = strategy.assembleContent([]); expect(result).toBe(""); }); it("handles single chunk", () => { const result = strategy.assembleContent([universe.target]); expect(result).toBe("Target content"); }); it("preserves chunk order", () => { const chunks = [universe.child2, universe.target, universe.child1]; const result = strategy.assembleContent(chunks); expect(result).toBe( "Second child content\n\nTarget content\n\nFirst child content", ); }); it("handles chunks with existing newlines", () => { const chunkWithNewlines = { id: "newlines", content: "Line 1\nLine 2\n\nLine 4", metadata: {}, } as DbPageChunk; const result = strategy.assembleContent([universe.target, chunkWithNewlines]); expect(result).toBe("Target content\n\nLine 1\nLine 2\n\nLine 4"); }); }); describe("selectChunks", () => { describe("single chunk scenarios", () => { it("chunk with no relations (orphan)", async () => { // Setup: orphan has no relations vi.mocked(mockStore.findParentChunk).mockResolvedValue(null); vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findChildChunks).mockResolvedValue([]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([universe.orphan]); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.orphan], mockStore, ); expect(result).toEqual([universe.orphan]); expect(mockStore.findChunksByIds).toHaveBeenCalledWith("lib", "1.0.0", [ "orphan", ]); }); it("chunk with only parent", async () => { vi.mocked(mockStore.findParentChunk).mockResolvedValue(universe.parent); vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findChildChunks).mockResolvedValue([]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([ universe.target, universe.parent, ]); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.target], mockStore, ); // Verify behavior: includes target and its parent expect(result).toHaveLength(2); expect(result).toContain(universe.target); expect(result).toContain(universe.parent); }); it("chunk with only children", async () => { vi.mocked(mockStore.findParentChunk).mockResolvedValue(null); vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findChildChunks).mockResolvedValue([ universe.child1, universe.child2, ]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([ universe.target, universe.child1, universe.child2, ]); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.target], mockStore, ); // Verify behavior: includes target and its children expect(result).toHaveLength(3); expect(result).toContain(universe.target); expect(result).toContain(universe.child1); expect(result).toContain(universe.child2); }); it("chunk with only siblings", async () => { vi.mocked(mockStore.findParentChunk).mockResolvedValue(null); vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([ universe.prev1, ]); vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([ universe.next1, universe.next2, ]); vi.mocked(mockStore.findChildChunks).mockResolvedValue([]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([ universe.target, universe.prev1, universe.next1, universe.next2, ]); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.target], mockStore, ); // Verify behavior: includes target and its siblings expect(result).toHaveLength(4); expect(result).toContain(universe.target); expect(result).toContain(universe.prev1); expect(result).toContain(universe.next1); expect(result).toContain(universe.next2); }); it("chunk with full family (parent + siblings + children)", async () => { setupFullMockStore(); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.target], mockStore, ); // Should include target + parent + 1 preceding + 2 subsequent + 3 children = 8 chunks expect(result).toHaveLength(8); expect(result).toContain(universe.target); expect(result).toContain(universe.parent); expect(result).toContain(universe.prev1); expect(result).toContain(universe.next1); expect(result).toContain(universe.next2); expect(result).toContain(universe.child1); expect(result).toContain(universe.child2); expect(result).toContain(universe.child3); // Should NOT include prev2 (exceeds preceding limit of 1) expect(result).not.toContain(universe.prev2); // Should NOT include next3 (exceeds subsequent limit of 2) expect(result).not.toContain(universe.next3); // Should NOT include child4 (exceeds child limit of 3) expect(result).not.toContain(universe.child4); }); }); describe("multiple initial chunks", () => { it("chunks from same document", async () => { // Setup: both child1 and child2 relate to target vi.mocked(mockStore.findParentChunk).mockImplementation( async (_lib, _ver, id) => { if (id === "child1" || id === "child2") return universe.target; return null; }, ); vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findChildChunks).mockResolvedValue([]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([ universe.child1, universe.child2, universe.target, ]); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.child1, universe.child2], mockStore, ); // Verify behavior: includes both children plus their shared parent (target), deduplicated expect(result).toHaveLength(3); expect(result).toContain(universe.child1); expect(result).toContain(universe.child2); expect(result).toContain(universe.target); }); it("chunks with overlapping relations (deduplication)", async () => { // Setup: target and parent both relate to each other vi.mocked(mockStore.findParentChunk).mockImplementation( async (_lib, _ver, id) => { if (id === "target") return universe.parent; return null; }, ); vi.mocked(mockStore.findChildChunks).mockImplementation( async (_lib, _ver, id, _limit) => { if (id === "parent") return [universe.target]; return []; }, ); vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([ universe.target, universe.parent, ]); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.target, universe.parent], mockStore, ); // Verify behavior: deduplicates overlapping relationships expect(result).toHaveLength(2); // No duplicates expect(result).toContain(universe.target); expect(result).toContain(universe.parent); }); it("chunks from different documents", async () => { vi.mocked(mockStore.findParentChunk).mockResolvedValue(null); vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findChildChunks).mockResolvedValue([]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([ universe.target, universe.orphan, ]); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.target, universe.orphan], mockStore, ); // Verify behavior: includes both unrelated chunks expect(result).toHaveLength(2); expect(result).toContain(universe.target); expect(result).toContain(universe.orphan); }); }); describe("limit adherence", () => { it("respects PRECEDING_SIBLINGS_LIMIT (1)", async () => { // Setup: Mock store has 2 preceding siblings available, but strategy should only request 1 vi.mocked(mockStore.findParentChunk).mockResolvedValue(null); vi.mocked(mockStore.findChildChunks).mockResolvedValue([]); vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([]); // Mock returns only 1 sibling (simulating limit enforcement in DocumentStore) vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([ universe.prev1, ]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([ universe.target, universe.prev1, ]); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.target], mockStore, ); // Verify behavior: strategy requests limit of 1 and gets at most 1 preceding sibling expect(mockStore.findPrecedingSiblingChunks).toHaveBeenCalledWith( "lib", "1.0.0", "target", 1, ); const precedingSiblings = result.filter( (chunk) => chunk.id === "prev1" || chunk.id === "prev2", ); expect(precedingSiblings).toHaveLength(1); expect(result).toContain(universe.prev1); expect(result).not.toContain(universe.prev2); }); it("respects SUBSEQUENT_SIBLINGS_LIMIT (2)", async () => { // Setup: Mock store has 3 subsequent siblings available, but strategy should only request 2 vi.mocked(mockStore.findParentChunk).mockResolvedValue(null); vi.mocked(mockStore.findChildChunks).mockResolvedValue([]); vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([]); // Mock returns only 2 siblings (simulating limit enforcement in DocumentStore) vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([ universe.next1, universe.next2, ]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([ universe.target, universe.next1, universe.next2, ]); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.target], mockStore, ); // Verify behavior: strategy requests limit of 2 and gets at most 2 subsequent siblings expect(mockStore.findSubsequentSiblingChunks).toHaveBeenCalledWith( "lib", "1.0.0", "target", 2, ); const subsequentSiblings = result.filter((chunk) => ["next1", "next2", "next3"].includes(chunk.id as string), ); expect(subsequentSiblings).toHaveLength(2); expect(result).toContain(universe.next1); expect(result).toContain(universe.next2); expect(result).not.toContain(universe.next3); }); it("respects CHILD_LIMIT (3)", async () => { // Setup: Mock store has 4 children available, but strategy should only request 3 vi.mocked(mockStore.findParentChunk).mockResolvedValue(null); vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([]); // Mock returns only 3 children (simulating limit enforcement in DocumentStore) vi.mocked(mockStore.findChildChunks).mockResolvedValue([ universe.child1, universe.child2, universe.child3, ]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([ universe.target, universe.child1, universe.child2, universe.child3, ]); const result = await strategy.selectChunks( "lib", "1.0.0", [universe.target], mockStore, ); // Verify behavior: strategy requests limit of 3 and gets at most 3 children expect(mockStore.findChildChunks).toHaveBeenCalledWith( "lib", "1.0.0", "target", 3, ); const children = result.filter((chunk) => ["child1", "child2", "child3", "child4"].includes(chunk.id as string), ); expect(children).toHaveLength(3); expect(result).toContain(universe.child1); expect(result).toContain(universe.child2); expect(result).toContain(universe.child3); expect(result).not.toContain(universe.child4); }); }); describe("edge cases", () => { it("handles empty initial chunks", async () => { const result = await strategy.selectChunks("lib", "1.0.0", [], mockStore); expect(result).toEqual([]); // Should not call store methods with empty array expect(mockStore.findParentChunk).not.toHaveBeenCalled(); }); it("handles chunks without IDs gracefully", async () => { const invalidChunk = { content: "No ID chunk", metadata: {}, } as DbPageChunk; // Mock all store methods to return empty arrays for undefined IDs vi.mocked(mockStore.findParentChunk).mockResolvedValue(null); vi.mocked(mockStore.findPrecedingSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findSubsequentSiblingChunks).mockResolvedValue([]); vi.mocked(mockStore.findChildChunks).mockResolvedValue([]); vi.mocked(mockStore.findChunksByIds).mockResolvedValue([]); // This should not throw, even though the chunk has no ID const result = await strategy.selectChunks( "lib", "1.0.0", [invalidChunk], mockStore, ); expect(result).toEqual([]); }); }); }); describe("integration scenarios", () => { it("end-to-end: full document hierarchy assembly", async () => { setupFullMockStore(); // Simulate finding chunks, then assembling them const selectedChunks = await strategy.selectChunks( "lib", "1.0.0", [universe.target], mockStore, ); const assembledContent = strategy.assembleContent(selectedChunks); // Should include all related content joined with \n\n expect(assembledContent).toContain("Target content"); expect(assembledContent).toContain("Parent section content"); expect(assembledContent).toContain("First child content"); expect(assembledContent).toMatch(/\n\n/); // Should have double newline separators }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server