docmcp

docmcp
src
__tests__
integration

documentation-pipeline.test.ts

documentation-pipeline.test.ts•6.12 KiB

import { PrismaClient } from '../../generated/prisma'; import { startCrawlingProcess } from '../../services/mcp-tools/add-documentation.tool'; import { CrawlerService } from '../../services/crawler.service'; import { DocumentService } from '../../services/document.service'; import { DocumentProcessorService } from '../../services/document-processor.service'; import { JobService } from '../../services/job.service'; import { setupTestDatabase, teardownTestDatabase, getTestPrismaClient } from '../utils/testDb'; import axios from 'axios'; // Mock axios to avoid making actual HTTP requests jest.mock('axios'); const mockedAxios = axios as jest.Mocked<typeof axios>; describe('Documentation Processing Pipeline Integration', () => { let prisma: PrismaClient; let jobId: string; beforeAll(async () => { prisma = await setupTestDatabase(); }); afterAll(async () => { await teardownTestDatabase(); }); beforeEach(async () => { // Clear database tables await prisma.chunk.deleteMany(); await prisma.document.deleteMany(); await prisma.job.deleteMany(); // Create a new job to use in tests const jobService = new JobService(prisma); const job = await jobService.createJob({ url: 'https://example.com/docs', status: 'pending', startDate: new Date(), progress: 0, error: null, stats: { pagesProcessed: 0, pagesSkipped: 0, totalChunks: 0 }, }); jobId = job.id; // Setup axios mock to return HTML content mockedAxios.get.mockImplementation(async (url) => { if (url === 'https://example.com/robots.txt') { return { status: 200, data: 'User-agent: *\nAllow: /', }; } // First page with link to second if (url === 'https://example.com/docs') { return { status: 200, data: ` <html> <head> <title>Documentation Home</title> <meta name="package" content="example-docs"> <meta name="version" content="1.0.0"> </head> <body> <h1>Example Documentation</h1> <p>This is the main documentation page.</p> <a href="https://example.com/docs/page1">Page 1</a> </body> </html> `, }; } // Second page if (url === 'https://example.com/docs/page1') { return { status: 200, data: ` <html> <head> <title>Page 1</title> </head> <body> <h1>Page 1</h1> <p>This is page 1 of the documentation.</p> <pre><code class="language-javascript"> function example() { return "hello world"; } </code></pre> </body> </html> `, }; } return { status: 404, data: '404 Not Found', }; }); // Setup axios mock for embeddings mockedAxios.post.mockResolvedValue({ data: { embedding: Array(1536).fill(0.1), }, }); }); it('should process documents end-to-end', async () => { // Start the crawling process await startCrawlingProcess(jobId, { url: 'https://example.com/docs', maxDepth: 1, _prisma: prisma, _bypassAsync: true, }); // Check that job was completed const jobService = new JobService(prisma); const updatedJob = await jobService.findJobById(jobId); expect(updatedJob).toBeDefined(); expect(updatedJob?.status).toBe('completed'); expect(updatedJob?.progress).toBe(1.0); // Check that documents were created const documentService = new DocumentService(prisma); const documents = await prisma.document.findMany({ where: { jobId: jobId }, orderBy: { url: 'asc' }, }); expect(documents.length).toBe(2); expect(documents[0].title).toBe('Documentation Home'); expect(documents[1].title).toBe('Page 1'); // Check that chunks were created (without embeddings initially) const chunksData = await prisma.chunk.findMany({ where: { documentId: { in: documents.map(d => d.id) } }, select: { id: true, // embedding field removed from here } }); expect(chunksData.length).toBeGreaterThan(0); // Now, fetch embeddings using raw query const chunkIds = chunksData.map(c => c.id); // Type casting needed for raw query result type ChunkWithEmbedding = { id: string; embedding: number[] }; const embeddingsResult = await prisma.$queryRaw<ChunkWithEmbedding[]>` SELECT id, embedding::text FROM chunks WHERE id = ANY(${chunkIds}) `; // Convert text representation back to array const embeddingsMap = new Map( embeddingsResult.map(e => { let parsedEmbedding: number[] = []; try { // Assuming the ::text cast gives a JSON-parseable string like "[0.1, ... ]" parsedEmbedding = JSON.parse(e.embedding as unknown as string); } catch (parseError) { console.error(`Failed to parse embedding string for chunk ${e.id}:`, e.embedding, parseError); // Handle error appropriately, maybe fail the test or assign default } return [e.id, parsedEmbedding]; }) ); // Assertions on embeddings chunksData.forEach(chunk => { const embedding = embeddingsMap.get(chunk.id); expect(embedding).toBeDefined(); // We need to parse the string representation '[0.1, 0.1, ...]' from pgvector // Or rely on Prisma's raw query handling if it directly returns an array/vector type // For now, assuming it returns an array directly due to potential driver handling expect(Array.isArray(embedding)).toBe(true); expect(embedding?.length).toBe(1536); // Check dimension based on mocked embedding }); // Check that job stats were updated expect(updatedJob?.stats).toEqual(expect.objectContaining({ pagesProcessed: 2, totalChunks: expect.any(Number), })); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/visheshd/docmcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

documentation-pipeline.test.ts•6.12 KiB