docs-mcp-server

Overview Schema Related Servers Score Discussions

DocumentRetrieverService.test.ts•25.4 KiB

import { Document } from "@langchain/core/documents"; import { beforeEach, describe, expect, it, vi } from "vitest"; import { DocumentRetrieverService } from "./DocumentRetrieverService"; import { DocumentStore } from "./DocumentStore"; vi.mock("./DocumentStore"); vi.mock("../utils/logger"); describe("DocumentRetrieverService (consolidated logic)", () => { let retrieverService: DocumentRetrieverService; let mockDocumentStore: DocumentStore; beforeEach(() => { vi.clearAllMocks(); mockDocumentStore = new DocumentStore("mock_connection_string"); retrieverService = new DocumentRetrieverService(mockDocumentStore); }); it("should return an empty array when no documents are found", async () => { vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([]); const results = await retrieverService.search("lib", "1.0.0", "query"); expect(results).toEqual([]); }); it("should consolidate multiple hits from the same URL into a single ordered result", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; // Two initial hits from the same URL, with overlapping context const initialResult1 = new Document({ id: "doc1", pageContent: "Chunk A", metadata: { url: "url", score: 0.9 }, }); const initialResult2 = new Document({ id: "doc3", pageContent: "Chunk C", metadata: { url: "url", score: 0.8 }, }); const doc2 = new Document({ id: "doc2", pageContent: "Chunk B", metadata: { url: "url" }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([ initialResult1, initialResult2, ]); vi.spyOn(mockDocumentStore, "findParentChunk").mockImplementation(async () => null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockImplementation( async () => [], ); vi.spyOn(mockDocumentStore, "findChildChunks").mockImplementation( async (_lib, _ver, id) => (id === "doc1" ? [doc2] : []), ); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockImplementation( async (_lib, _ver, id) => (id === "doc1" ? [doc2] : []), ); const findChunksByIdsSpy = vi .spyOn(mockDocumentStore, "findChunksByIds") .mockResolvedValue([ initialResult1, // doc1 (Chunk A) doc2, // doc2 (Chunk B) initialResult2, // doc3 (Chunk C) ]); const results = await retrieverService.search(library, version, query); expect(findChunksByIdsSpy).toHaveBeenCalledWith( library, version, expect.arrayContaining(["doc1", "doc2", "doc3"]), ); expect(results).toEqual([ { content: "Chunk A\n\nChunk B\n\nChunk C", url: "url", score: 0.9, }, ]); }); it("should return a single result for a single hit with context", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; const initialResult = new Document({ id: "doc1", pageContent: "Main chunk", metadata: { url: "url", score: 0.7 }, }); const parent = new Document({ id: "parent1", pageContent: "Parent", metadata: { url: "url" }, }); const child = new Document({ id: "child1", pageContent: "Child", metadata: { url: "url" }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([initialResult]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(parent); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([child]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); const findChunksByIdsSpy = vi .spyOn(mockDocumentStore, "findChunksByIds") .mockResolvedValue([parent, initialResult, child]); const results = await retrieverService.search(library, version, query); expect(findChunksByIdsSpy).toHaveBeenCalledWith( library, version, expect.arrayContaining(["parent1", "doc1", "child1"]), ); expect(results).toEqual([ { content: "Parent\n\nMain chunk\n\nChild", url: "url", score: 0.7, }, ]); }); it("should return multiple results for hits from different URLs", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; const docA = new Document({ id: "a1", pageContent: "A1", metadata: { url: "urlA", score: 0.8 }, }); const docB = new Document({ id: "b1", pageContent: "B1", metadata: { url: "urlB", score: 0.9 }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([docA, docB]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockImplementation( async (_lib, _ver, ids) => { if (ids.includes("a1")) return [docA]; if (ids.includes("b1")) return [docB]; return []; }, ); const results = await retrieverService.search(library, version, query); expect(results).toEqual([ { content: "A1", url: "urlA", score: 0.8, }, { content: "B1", url: "urlB", score: 0.9, }, ]); }); it("should handle all context lookups returning empty", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; const initialResult = new Document({ id: "doc1", pageContent: "Main chunk", metadata: { url: "url", score: 0.5 }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([initialResult]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); const findChunksByIdsSpy = vi .spyOn(mockDocumentStore, "findChunksByIds") .mockResolvedValue([initialResult]); const results = await retrieverService.search(library, version, query); expect(findChunksByIdsSpy).toHaveBeenCalledWith( library, version, expect.arrayContaining(["doc1"]), ); expect(results).toEqual([ { content: "Main chunk", url: "url", score: 0.5, }, ]); }); it("should use the provided limit", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; const limit = 3; const initialResult = new Document({ id: "doc1", pageContent: "Main chunk", metadata: { url: "url", score: 0.5 }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([initialResult]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([initialResult]); const results = await retrieverService.search(library, version, query, limit); expect(mockDocumentStore.findByContent).toHaveBeenCalledWith( library, version, query, limit, ); expect(results).toEqual([ { content: "Main chunk", url: "url", score: 0.5, }, ]); }); it("should extract mimeType from document metadata and include it in search result", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; const mimeType = "text/html"; // Create a document with mimeType in metadata const initialResult = new Document({ id: "doc1", pageContent: "HTML content", metadata: { url: "https://example.com", score: 0.9, mimeType }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([initialResult]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([initialResult]); const results = await retrieverService.search(library, version, query); expect(results).toHaveLength(1); expect(results[0]).toEqual({ url: "https://example.com", content: "HTML content", score: 0.9, mimeType: "text/html", }); }); it("should handle missing mimeType gracefully", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; // Create a document without mimeType in metadata const initialResult = new Document({ id: "doc1", pageContent: "Plain content", metadata: { url: "https://example.com", score: 0.9 }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([initialResult]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([initialResult]); const results = await retrieverService.search(library, version, query); expect(results).toHaveLength(1); expect(results[0]).toEqual({ url: "https://example.com", content: "Plain content", score: 0.9, mimeType: undefined, }); }); describe("Context Retrieval and Hierarchical Reassembly", () => { it("should find parent chunks based on path hierarchy", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; // Child chunk with path ["Chapter 1", "Section 1.1"] const childResult = new Document({ id: "child1", pageContent: "Child content", metadata: { url: "https://example.com", score: 0.8, path: ["Chapter 1", "Section 1.1"], level: 2, }, }); // Parent chunk with path ["Chapter 1"] const parentChunk = new Document({ id: "parent1", pageContent: "Parent content", metadata: { url: "https://example.com", path: ["Chapter 1"], level: 1, }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([childResult]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(parentChunk); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([ parentChunk, childResult, ]); const results = await retrieverService.search(library, version, query); expect(mockDocumentStore.findParentChunk).toHaveBeenCalledWith( library, version, "child1", ); expect(results).toEqual([ { url: "https://example.com", content: "Parent content\n\nChild content", score: 0.8, mimeType: undefined, }, ]); }); it("should find sibling chunks at the same hierarchical level", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; // Main result chunk const mainResult = new Document({ id: "main1", pageContent: "Main content", metadata: { url: "https://example.com", score: 0.9, path: ["Chapter 1", "Section 1.2"], level: 2, }, }); // Preceding sibling with same path level const precedingSibling = new Document({ id: "preceding1", pageContent: "Preceding content", metadata: { url: "https://example.com", path: ["Chapter 1", "Section 1.1"], level: 2, }, }); // Subsequent sibling with same path level const subsequentSibling = new Document({ id: "subsequent1", pageContent: "Subsequent content", metadata: { url: "https://example.com", path: ["Chapter 1", "Section 1.3"], level: 2, }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([mainResult]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([ precedingSibling, ]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([ subsequentSibling, ]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([ precedingSibling, mainResult, subsequentSibling, ]); const results = await retrieverService.search(library, version, query); expect(mockDocumentStore.findPrecedingSiblingChunks).toHaveBeenCalledWith( library, version, "main1", 1, ); expect(mockDocumentStore.findSubsequentSiblingChunks).toHaveBeenCalledWith( library, version, "main1", 2, ); expect(results).toEqual([ { url: "https://example.com", content: "Preceding content\n\nMain content\n\nSubsequent content", score: 0.9, mimeType: undefined, }, ]); }); it("should find child chunks at deeper hierarchical levels", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; // Parent result chunk const parentResult = new Document({ id: "parent1", pageContent: "Parent section", metadata: { url: "https://example.com", score: 0.7, path: ["Chapter 1"], level: 1, }, }); // Child chunks at deeper level const child1 = new Document({ id: "child1", pageContent: "First subsection", metadata: { url: "https://example.com", path: ["Chapter 1", "Section 1.1"], level: 2, }, }); const child2 = new Document({ id: "child2", pageContent: "Second subsection", metadata: { url: "https://example.com", path: ["Chapter 1", "Section 1.2"], level: 2, }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([parentResult]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([child1, child2]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([ parentResult, child1, child2, ]); const results = await retrieverService.search(library, version, query); expect(mockDocumentStore.findChildChunks).toHaveBeenCalledWith( library, version, "parent1", 3, ); expect(results).toEqual([ { url: "https://example.com", content: "Parent section\n\nFirst subsection\n\nSecond subsection", score: 0.7, mimeType: undefined, }, ]); }); it("should demonstrate sort_order-based reassembly within same URL", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; // Multiple chunks from same document/URL, returned out of sort_order const chunk3 = new Document({ id: "chunk3", pageContent: "Third chunk", metadata: { url: "https://example.com", score: 0.6, path: ["Section C"], level: 1, }, }); const chunk1 = new Document({ id: "chunk1", pageContent: "First chunk", metadata: { url: "https://example.com", score: 0.8, path: ["Section A"], level: 1, }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([chunk3, chunk1]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); // findChunksByIds returns chunks in sort_order (simulating database ORDER BY) vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([chunk1, chunk3]); const results = await retrieverService.search(library, version, query); // Should be reassembled in sort_order, not in initial search result order expect(results).toEqual([ { url: "https://example.com", content: "First chunk\n\nThird chunk", score: 0.8, // Highest score from the chunks mimeType: undefined, }, ]); }); it("should demonstrate complex hierarchical context expansion", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; // Main search result - a subsection const mainResult = new Document({ id: "main1", pageContent: "Key subsection content", metadata: { url: "https://example.com", score: 0.9, path: ["Guide", "Installation", "Setup"], level: 3, }, }); // Parent at level 2 const parent = new Document({ id: "parent1", pageContent: "Installation overview", metadata: { url: "https://example.com", path: ["Guide", "Installation"], level: 2, }, }); // Preceding sibling at same level const precedingSibling = new Document({ id: "preceding1", pageContent: "Prerequisites section", metadata: { url: "https://example.com", path: ["Guide", "Installation", "Prerequisites"], level: 3, }, }); // Child at deeper level const child = new Document({ id: "child1", pageContent: "Detailed setup steps", metadata: { url: "https://example.com", path: ["Guide", "Installation", "Setup", "Steps"], level: 4, }, }); // Subsequent sibling const subsequentSibling = new Document({ id: "subsequent1", pageContent: "Configuration section", metadata: { url: "https://example.com", path: ["Guide", "Installation", "Configuration"], level: 3, }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([mainResult]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(parent); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([ precedingSibling, ]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([ subsequentSibling, ]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([child]); // Database returns in sort_order vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([ parent, precedingSibling, mainResult, child, subsequentSibling, ]); const results = await retrieverService.search(library, version, query); expect(results).toEqual([ { url: "https://example.com", content: "Installation overview\n\nPrerequisites section\n\nKey subsection content\n\nDetailed setup steps\n\nConfiguration section", score: 0.9, mimeType: undefined, }, ]); }); }); describe("Content-Type-Aware Assembly Strategy", () => { it("should use MarkdownAssemblyStrategy for markdown content", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; const markdownChunk = new Document({ id: "md1", pageContent: "# Heading\n\nSome content", metadata: { url: "https://example.com/doc.md", score: 0.9, mimeType: "text/markdown", }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([markdownChunk]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([markdownChunk]); const results = await retrieverService.search(library, version, query); expect(results).toHaveLength(1); expect(results[0]).toEqual({ url: "https://example.com/doc.md", content: "# Heading\n\nSome content", // Should use "\n\n" joining for markdown score: 0.9, mimeType: "text/markdown", }); }); it("should use HierarchicalAssemblyStrategy for source code content", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; const codeChunk = new Document({ id: "ts1", pageContent: "function test() {\n return 'hello';\n}", metadata: { url: "https://example.com/code.ts", score: 0.9, mimeType: "text/x-typescript", }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([codeChunk]); // Mock the hierarchical strategy's fallback behavior since we don't have full hierarchy implementation vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([codeChunk]); const results = await retrieverService.search(library, version, query); expect(results).toHaveLength(1); expect(results[0]).toEqual({ url: "https://example.com/code.ts", content: "function test() {\n return 'hello';\n}", // Should use simple concatenation for code score: 0.9, mimeType: "text/x-typescript", }); }); it("should use HierarchicalAssemblyStrategy for JSON content", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; const jsonChunk = new Document({ id: "json1", pageContent: '{"key": "value"}', metadata: { url: "https://example.com/config.json", score: 0.9, mimeType: "application/json", }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([jsonChunk]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([jsonChunk]); const results = await retrieverService.search(library, version, query); expect(results).toHaveLength(1); expect(results[0]).toEqual({ url: "https://example.com/config.json", content: '{"key": "value"}', // Should use simple concatenation for JSON score: 0.9, mimeType: "application/json", }); }); it("should handle missing MIME type with default MarkdownAssemblyStrategy", async () => { const library = "lib"; const version = "1.0.0"; const query = "test"; const unknownChunk = new Document({ id: "unknown1", pageContent: "Some content", metadata: { url: "https://example.com/unknown", score: 0.9, // No mimeType specified }, }); vi.spyOn(mockDocumentStore, "findByContent").mockResolvedValue([unknownChunk]); vi.spyOn(mockDocumentStore, "findParentChunk").mockResolvedValue(null); vi.spyOn(mockDocumentStore, "findPrecedingSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChildChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findSubsequentSiblingChunks").mockResolvedValue([]); vi.spyOn(mockDocumentStore, "findChunksByIds").mockResolvedValue([unknownChunk]); const results = await retrieverService.search(library, version, query); expect(results).toHaveLength(1); expect(results[0]).toEqual({ url: "https://example.com/unknown", content: "Some content", // Should default to markdown strategy score: 0.9, mimeType: undefined, }); }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

DocumentRetrieverService.test.ts•25.4 KiB