folder-mcp

Overview Schema Related Servers Score Discussions

indexing.test.ts•11.3 KiB

/** * Integration Tests - Indexing Workflow * * Tests the complete indexing workflow across all architectural layers */ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import { TestUtils } from '../../helpers/test-utils.js'; import { MockFactory } from '../../helpers/mock-factories.js'; import { AssertionHelpers } from '../../helpers/assertions.js'; import type { IndexingWorkflow, IndexingResult, IndexingProgress, IndexingOptions, IndexingError, IndexingStatus } from '../../../src/application/indexing/index.js'; describe('Integration - Indexing Workflow', () => { let tempDir: string; let testFiles: Record<string, string>; let mockIndexingWorkflow: Partial<IndexingWorkflow>; beforeEach(async () => { tempDir = await TestUtils.createTempDir('indexing-workflow-test-'); // Create test files with different content types testFiles = { 'document.md': '# Sample Document\n\nThis is a test document with markdown content.', 'code.ts': 'interface TestInterface {\n property: string;\n}', 'data.json': '{"name": "test", "version": "1.0.0"}', 'readme.txt': 'This is a plain text file for testing purposes.' }; await TestUtils.createTestFiles(tempDir, testFiles); // Mock indexing workflow mockIndexingWorkflow = { async indexFolder(path: string, options: IndexingOptions): Promise<IndexingResult> { // Apply file filtering if specified let filesToProcess = Object.keys(testFiles); if (options.includeFileTypes && options.includeFileTypes.length > 0) { filesToProcess = filesToProcess.filter(filename => options.includeFileTypes!.some(ext => filename.endsWith(ext)) ); } return { success: true, filesProcessed: filesToProcess.length, chunksGenerated: Math.max(filesToProcess.length * 2, 8), embeddingsCreated: Math.max(filesToProcess.length * 2, 8), processingTime: 150, errors: [], statistics: { totalBytes: 1000, totalWords: 100, averageChunkSize: 500, processingRate: 10, embeddingRate: 20 } }; }, async getIndexingStatus(path: string): Promise<IndexingStatus> { return { isRunning: true, currentFile: 'document.md', progress: { totalFiles: 4, processedFiles: 2, totalChunks: 8, processedChunks: 4, percentage: 50 }, startedAt: new Date(), estimatedCompletion: new Date(Date.now() + 75000) }; } }; }); afterEach(async () => { await TestUtils.cleanupTempDir(tempDir); }); describe('Complete Indexing Pipeline', () => { it('should index files end-to-end', async () => { const options: IndexingOptions = { includeFileTypes: ['.md', '.ts', '.json', '.txt'], excludePatterns: [], chunkingOptions: { maxChunkSize: 500, overlapSize: 50, preserveParagraphs: true, preserveSentences: true }, embeddingModel: 'nomic-embed-text', batchSize: 32, parallelWorkers: 4 }; const result = await mockIndexingWorkflow.indexFolder!(tempDir, options); expect(result.success).toBe(true); expect(result.filesProcessed).toBe(4); expect(result.chunksGenerated).toBeGreaterThan(0); expect(result.processingTime).toBeGreaterThan(0); expect(result.errors).toHaveLength(0); expect(result.statistics).toBeDefined(); }); it('should handle progress tracking', async () => { const status = await mockIndexingWorkflow.getIndexingStatus!(tempDir); expect(status.isRunning).toBe(true); expect(status.currentFile).toBeTruthy(); expect(status.progress.processedFiles).toBeGreaterThanOrEqual(0); expect(status.progress.totalFiles).toBeGreaterThan(0); expect(status.progress.percentage).toBeGreaterThanOrEqual(0); expect(status.progress.percentage).toBeLessThanOrEqual(100); }); it('should handle errors gracefully', async () => { const errorWorkflow: Partial<IndexingWorkflow> = { async indexFolder(path: string, options: IndexingOptions): Promise<IndexingResult> { return { success: false, filesProcessed: 2, chunksGenerated: 0, embeddingsCreated: 0, processingTime: 50, errors: [ { filePath: 'corrupted.txt', error: 'File corrupted or unreadable', stage: 'parsing', timestamp: new Date(), recoverable: false } ], statistics: { totalBytes: 0, totalWords: 0, averageChunkSize: 0, processingRate: 0, embeddingRate: 0 } }; } }; const options: IndexingOptions = { includeFileTypes: ['.txt'], excludePatterns: [], chunkingOptions: { maxChunkSize: 500, overlapSize: 50, preserveParagraphs: true, preserveSentences: true }, embeddingModel: 'nomic-embed-text' }; const result = await errorWorkflow.indexFolder!(tempDir, options); expect(result.success).toBe(false); expect(result.errors).toHaveLength(1); expect(result.errors[0]!.filePath).toBe('corrupted.txt'); }); }); describe('File Processing Integration', () => { it('should process different file types', async () => { const fileTypes = ['.md', '.ts', '.json', '.txt']; for (const fileType of fileTypes) { const files = Object.keys(testFiles).filter(f => f.endsWith(fileType)); expect(files.length).toBeGreaterThan(0); } }); it('should handle large files efficiently', async () => { const largeContent = TestUtils.generateLargeContent(5000); // 5KB const largeFile = { 'large.txt': largeContent }; await TestUtils.createTestFiles(tempDir, largeFile); const options: IndexingOptions = { includeFileTypes: ['.txt'], excludePatterns: [], chunkingOptions: { maxChunkSize: 1000, overlapSize: 100, preserveParagraphs: true, preserveSentences: true }, embeddingModel: 'nomic-embed-text' }; const startTime = Date.now(); const result = await mockIndexingWorkflow.indexFolder!(tempDir, options); const duration = Date.now() - startTime; expect(result.success).toBe(true); expect(duration).toBeLessThan(5000); // Should complete within 5 seconds }); }); describe('Configuration Integration', () => { it('should respect chunking configuration', async () => { const options: IndexingOptions = { includeFileTypes: ['.md'], excludePatterns: [], chunkingOptions: { maxChunkSize: 200, overlapSize: 20, preserveParagraphs: true, preserveSentences: true }, embeddingModel: 'nomic-embed-text' }; const result = await mockIndexingWorkflow.indexFolder!(tempDir, options); expect(result.success).toBe(true); // With smaller chunk size, we should get more chunks expect(result.chunksGenerated).toBeGreaterThan(4); }); it('should filter files by type', async () => { const options: IndexingOptions = { includeFileTypes: ['.md'], // Only markdown files excludePatterns: [], chunkingOptions: { maxChunkSize: 500, overlapSize: 50, preserveParagraphs: true, preserveSentences: true }, embeddingModel: 'nomic-embed-text' }; const result = await mockIndexingWorkflow.indexFolder!(tempDir, options); expect(result.success).toBe(true); expect(result.filesProcessed).toBe(1); // Only document.md }); }); describe('Error Recovery Integration', () => { it('should continue processing after file errors', async () => { const partialErrorWorkflow: Partial<IndexingWorkflow> = { async indexFolder(path: string, options: IndexingOptions): Promise<IndexingResult> { return { success: true, // Overall success despite some errors filesProcessed: 3, chunksGenerated: 6, embeddingsCreated: 6, processingTime: 120, errors: [ { filePath: 'problematic.txt', error: 'Permission denied', stage: 'parsing', timestamp: new Date(), recoverable: true } ], statistics: { totalBytes: 900, totalWords: 90, averageChunkSize: 450, processingRate: 9, embeddingRate: 18 } }; } }; const options: IndexingOptions = { includeFileTypes: ['.md', '.ts', '.json', '.txt'], excludePatterns: [], chunkingOptions: { maxChunkSize: 500, overlapSize: 50, preserveParagraphs: true, preserveSentences: true }, embeddingModel: 'nomic-embed-text' }; const result = await partialErrorWorkflow.indexFolder!(tempDir, options); expect(result.success).toBe(true); expect(result.filesProcessed).toBe(3); // 3 out of 4 files processed expect(result.errors).toHaveLength(1); expect(result.chunksGenerated).toBeGreaterThan(0); // Still got chunks from successful files }); }); describe('Performance Integration', () => { it('should meet performance benchmarks', async () => { const startTime = Date.now(); const options: IndexingOptions = { includeFileTypes: ['.md', '.ts', '.json', '.txt'], excludePatterns: [], chunkingOptions: { maxChunkSize: 500, overlapSize: 50, preserveParagraphs: true, preserveSentences: true }, embeddingModel: 'nomic-embed-text', batchSize: 32, parallelWorkers: 4 }; const result = await mockIndexingWorkflow.indexFolder!(tempDir, options); const duration = Date.now() - startTime; expect(result.success).toBe(true); expect(duration).toBeLessThan(1000); // Should complete within 1 second for small files expect(result.processingTime).toBeLessThan(500); // Internal timing should be even faster }); it('should handle concurrent indexing requests', async () => { const options: IndexingOptions = { includeFileTypes: ['.md'], excludePatterns: [], chunkingOptions: { maxChunkSize: 500, overlapSize: 50, preserveParagraphs: true, preserveSentences: true }, embeddingModel: 'nomic-embed-text' }; const startTime = Date.now(); const results = await Promise.all( Array(3).fill(null).map(() => mockIndexingWorkflow.indexFolder!(tempDir, options)) ); const duration = Date.now() - startTime; expect(results).toHaveLength(3); results.forEach(result => { expect(result.success).toBe(true); }); expect(duration).toBeLessThan(3000); // Concurrent execution should be efficient }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/okets/folder-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

indexing.test.ts•11.3 KiB