Skip to main content
Glama

docs-mcp-server

vector-search-e2e.test.ts8.67 kB
/** * End-to-end vector search tests that verify the complete pipeline: * scraping -> splitting -> embedding -> indexing -> searching * * This test uses the actual services (not mocks) with an in-memory database * to validate that vector search works correctly from end to end. */ import { beforeAll, afterAll, describe, expect, it } from "vitest"; import path from "path"; import { mkdtempSync, rmSync } from "fs"; import { tmpdir } from "os"; import { config } from "dotenv"; import { ScrapeTool } from "../src/tools/ScrapeTool"; import { SearchTool } from "../src/tools/SearchTool"; import { createLocalDocumentManagement } from "../src/store"; import { PipelineFactory } from "../src/pipeline/PipelineFactory"; import { EmbeddingConfig, type EmbeddingModelConfig } from "../src/store/embeddings/EmbeddingConfig"; // Load environment variables from .env file config(); describe("Vector Search End-to-End Tests", () => { let docService: any; let scrapeTool: ScrapeTool; let searchTool: SearchTool; let pipeline: any; let tempDir: string; beforeAll(async () => { // Skip this test suite if no embedding configuration is available if (!process.env.OPENAI_API_KEY && !process.env.GOOGLE_API_KEY) { console.log("⚠️ Skipping vector search tests - no embedding API key found"); return; } // Create temporary directory for test database tempDir = mkdtempSync(path.join(tmpdir(), "vector-search-e2e-test-")); // Create explicit embedding configuration let embeddingConfig: EmbeddingModelConfig; if (process.env.OPENAI_API_KEY) { embeddingConfig = EmbeddingConfig.parseEmbeddingConfig("openai:text-embedding-3-small"); } else if (process.env.GOOGLE_API_KEY) { embeddingConfig = EmbeddingConfig.parseEmbeddingConfig("gemini:embedding-001"); } else { // Fallback (shouldn't reach here due to check above) embeddingConfig = EmbeddingConfig.parseEmbeddingConfig("text-embedding-3-small"); } // Initialize DocumentManagementService with temporary directory and embedding config docService = await createLocalDocumentManagement(tempDir, embeddingConfig); // Create pipeline for ScrapeTool pipeline = await PipelineFactory.createPipeline(docService); await pipeline.start(); // Initialize tools scrapeTool = new ScrapeTool(pipeline); searchTool = new SearchTool(docService); }, 30000); afterAll(async () => { if (pipeline) { await pipeline.stop(); } if (docService) { await docService.shutdown(); } // Clean up temporary directory if (tempDir) { try { rmSync(tempDir, { recursive: true, force: true }); } catch (error) { // Ignore cleanup errors } } }); it("should scrape local README.md and make it searchable", async () => { // Skip if no embedding configuration if (!process.env.OPENAI_API_KEY && !process.env.GOOGLE_API_KEY) { console.log("⚠️ Skipping test - no embedding API key found"); return; } // Get the path to the README.md file const readmePath = path.resolve(process.cwd(), "README.md"); const fileUrl = `file://${readmePath}`; // Step 1: Scrape the README.md file console.log("📄 Scraping README.md file..."); const scrapeResult = await scrapeTool.execute({ library: "test-library", version: "1.0.0", url: fileUrl, waitForCompletion: true, }); // Verify scraping completed successfully expect(scrapeResult).toHaveProperty("pagesScraped"); expect((scrapeResult as any).pagesScraped).toBeGreaterThan(0); // Step 2: Verify documents were added to the store const exists = await docService.exists("test-library", "1.0.0"); expect(exists).toBe(true); // Step 3: Search for a specific string from the README console.log("🔍 Searching for specific content..."); const searchQuery = "preserves logical boundaries, keeps code and tables intact"; const searchResult = await searchTool.execute({ library: "test-library", version: "1.0.0", query: searchQuery, limit: 5, }); // Verify search results expect(searchResult.results).toBeDefined(); expect(Array.isArray(searchResult.results)).toBe(true); expect(searchResult.results.length).toBeGreaterThan(0); // Verify that at least one result contains the expected content const hasExpectedContent = searchResult.results.some(result => result.content.toLowerCase().includes("preserves logical boundaries") ); expect(hasExpectedContent).toBe(true); console.log(`✅ Found ${searchResult.results.length} search results`); const firstScore = searchResult.results[0].score; console.log(`📊 First result score: ${firstScore !== null ? firstScore : 'N/A'}`); // Step 4: Test with a broader search to verify improved vector search console.log("🔍 Testing broader search capabilities..."); const broadSearchResult = await searchTool.execute({ library: "test-library", version: "1.0.0", query: "documentation MCP server", limit: 10, }); console.log(`🎯 Broader search found ${broadSearchResult.results.length} results`); expect(broadSearchResult.results.length).toBeGreaterThan(0); // With the vector search multiplier, we should find more diverse results broadSearchResult.results.forEach((result, index) => { const score = result.score !== null ? result.score.toFixed(4) : 'N/A'; console.log(` ${index + 1}. Score: ${score} - ${result.content.substring(0, 80)}...`); }); }, 60000); it("should handle version-specific searches", async () => { // Skip if no embedding configuration if (!process.env.OPENAI_API_KEY && !process.env.GOOGLE_API_KEY) { console.log("⚠️ Skipping test - no embedding API key found"); return; } // Search with exact version match const searchResult = await searchTool.execute({ library: "test-library", version: "1.0.0", query: "MCP server", exactMatch: true, limit: 3, }); expect(searchResult.results).toBeDefined(); expect(searchResult.results.length).toBeGreaterThan(0); // Verify results contain MCP-related content const hasMcpContent = searchResult.results.some(result => result.content.toLowerCase().includes("mcp") ); expect(hasMcpContent).toBe(true); }, 30000); it("should find semantic similarities beyond exact text matches", async () => { // Skip if no embedding configuration if (!process.env.OPENAI_API_KEY && !process.env.GOOGLE_API_KEY) { console.log("⚠️ Skipping test - no embedding API key found"); return; } // Search for a concept that should match semantically const searchResult = await searchTool.execute({ library: "test-library", version: "1.0.0", query: "artificial intelligence documentation helper", limit: 5, }); expect(searchResult.results).toBeDefined(); expect(searchResult.results.length).toBeGreaterThan(0); // Log results for manual inspection of semantic matching console.log("🧠 Semantic search results:"); searchResult.results.forEach((result, index) => { const score = result.score !== null ? result.score.toFixed(3) : 'N/A'; console.log(` ${index + 1}. Score: ${score} - ${result.content.substring(0, 100)}...`); }); }, 30000); it("should handle non-existent library searches gracefully", async () => { // Skip if no embedding configuration if (!process.env.OPENAI_API_KEY && !process.env.GOOGLE_API_KEY) { console.log("⚠️ Skipping test - no embedding API key found"); return; } await expect(searchTool.execute({ library: "non-existent-library", version: "1.0.0", query: "test query", })).rejects.toThrow("Library 'non-existent-library' not found"); }, 10000); it("should handle non-existent version searches gracefully", async () => { // Skip if no embedding configuration if (!process.env.OPENAI_API_KEY && !process.env.GOOGLE_API_KEY) { console.log("⚠️ Skipping test - no embedding API key found"); return; } // Search for a non-existent version should return empty results const searchResult = await searchTool.execute({ library: "test-library", version: "999.999.999", query: "test query", exactMatch: true, }); expect(searchResult.results).toBeDefined(); expect(Array.isArray(searchResult.results)).toBe(true); expect(searchResult.results.length).toBe(0); }, 10000); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server