/**
* Integration Tests - Indexing Workflow
*
* Tests the complete indexing workflow across all architectural layers
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { TestUtils } from '../../helpers/test-utils.js';
import { MockFactory } from '../../helpers/mock-factories.js';
import { AssertionHelpers } from '../../helpers/assertions.js';
import type {
IndexingWorkflow,
IndexingResult,
IndexingProgress,
IndexingOptions,
IndexingError,
IndexingStatus
} from '../../../src/application/indexing/index.js';
describe('Integration - Indexing Workflow', () => {
let tempDir: string;
let testFiles: Record<string, string>;
let mockIndexingWorkflow: Partial<IndexingWorkflow>;
beforeEach(async () => {
tempDir = await TestUtils.createTempDir('indexing-workflow-test-');
// Create test files with different content types
testFiles = {
'document.md': '# Sample Document\n\nThis is a test document with markdown content.',
'code.ts': 'interface TestInterface {\n property: string;\n}',
'data.json': '{"name": "test", "version": "1.0.0"}',
'readme.txt': 'This is a plain text file for testing purposes.'
};
await TestUtils.createTestFiles(tempDir, testFiles);
// Mock indexing workflow
mockIndexingWorkflow = {
async indexFolder(path: string, options: IndexingOptions): Promise<IndexingResult> {
// Apply file filtering if specified
let filesToProcess = Object.keys(testFiles);
if (options.includeFileTypes && options.includeFileTypes.length > 0) {
filesToProcess = filesToProcess.filter(filename =>
options.includeFileTypes!.some(ext => filename.endsWith(ext))
);
}
return {
success: true,
filesProcessed: filesToProcess.length,
chunksGenerated: Math.max(filesToProcess.length * 2, 8),
embeddingsCreated: Math.max(filesToProcess.length * 2, 8),
processingTime: 150,
errors: [],
statistics: {
totalBytes: 1000,
totalWords: 100,
averageChunkSize: 500,
processingRate: 10,
embeddingRate: 20
}
};
},
async getIndexingStatus(path: string): Promise<IndexingStatus> {
return {
isRunning: true,
currentFile: 'document.md',
progress: {
totalFiles: 4,
processedFiles: 2,
totalChunks: 8,
processedChunks: 4,
percentage: 50
},
startedAt: new Date(),
estimatedCompletion: new Date(Date.now() + 75000)
};
}
};
});
afterEach(async () => {
await TestUtils.cleanupTempDir(tempDir);
});
describe('Complete Indexing Pipeline', () => {
it('should index files end-to-end', async () => {
const options: IndexingOptions = {
includeFileTypes: ['.md', '.ts', '.json', '.txt'],
excludePatterns: [],
chunkingOptions: {
maxChunkSize: 500,
overlapSize: 50,
preserveParagraphs: true,
preserveSentences: true
},
embeddingModel: 'nomic-embed-text',
batchSize: 32,
parallelWorkers: 4
};
const result = await mockIndexingWorkflow.indexFolder!(tempDir, options);
expect(result.success).toBe(true);
expect(result.filesProcessed).toBe(4);
expect(result.chunksGenerated).toBeGreaterThan(0);
expect(result.processingTime).toBeGreaterThan(0);
expect(result.errors).toHaveLength(0);
expect(result.statistics).toBeDefined();
});
it('should handle progress tracking', async () => {
const status = await mockIndexingWorkflow.getIndexingStatus!(tempDir);
expect(status.isRunning).toBe(true);
expect(status.currentFile).toBeTruthy();
expect(status.progress.processedFiles).toBeGreaterThanOrEqual(0);
expect(status.progress.totalFiles).toBeGreaterThan(0);
expect(status.progress.percentage).toBeGreaterThanOrEqual(0);
expect(status.progress.percentage).toBeLessThanOrEqual(100);
});
it('should handle errors gracefully', async () => {
const errorWorkflow: Partial<IndexingWorkflow> = {
async indexFolder(path: string, options: IndexingOptions): Promise<IndexingResult> {
return {
success: false,
filesProcessed: 2,
chunksGenerated: 0,
embeddingsCreated: 0,
processingTime: 50,
errors: [
{
filePath: 'corrupted.txt',
error: 'File corrupted or unreadable',
stage: 'parsing',
timestamp: new Date(),
recoverable: false
}
],
statistics: {
totalBytes: 0,
totalWords: 0,
averageChunkSize: 0,
processingRate: 0,
embeddingRate: 0
}
};
}
};
const options: IndexingOptions = {
includeFileTypes: ['.txt'],
excludePatterns: [],
chunkingOptions: {
maxChunkSize: 500,
overlapSize: 50,
preserveParagraphs: true,
preserveSentences: true
},
embeddingModel: 'nomic-embed-text'
};
const result = await errorWorkflow.indexFolder!(tempDir, options);
expect(result.success).toBe(false);
expect(result.errors).toHaveLength(1);
expect(result.errors[0]!.filePath).toBe('corrupted.txt');
});
});
describe('File Processing Integration', () => {
it('should process different file types', async () => {
const fileTypes = ['.md', '.ts', '.json', '.txt'];
for (const fileType of fileTypes) {
const files = Object.keys(testFiles).filter(f => f.endsWith(fileType));
expect(files.length).toBeGreaterThan(0);
}
});
it('should handle large files efficiently', async () => {
const largeContent = TestUtils.generateLargeContent(5000); // 5KB
const largeFile = { 'large.txt': largeContent };
await TestUtils.createTestFiles(tempDir, largeFile);
const options: IndexingOptions = {
includeFileTypes: ['.txt'],
excludePatterns: [],
chunkingOptions: {
maxChunkSize: 1000,
overlapSize: 100,
preserveParagraphs: true,
preserveSentences: true
},
embeddingModel: 'nomic-embed-text'
};
const startTime = Date.now();
const result = await mockIndexingWorkflow.indexFolder!(tempDir, options);
const duration = Date.now() - startTime;
expect(result.success).toBe(true);
expect(duration).toBeLessThan(5000); // Should complete within 5 seconds
});
});
describe('Configuration Integration', () => {
it('should respect chunking configuration', async () => {
const options: IndexingOptions = {
includeFileTypes: ['.md'],
excludePatterns: [],
chunkingOptions: {
maxChunkSize: 200,
overlapSize: 20,
preserveParagraphs: true,
preserveSentences: true
},
embeddingModel: 'nomic-embed-text'
};
const result = await mockIndexingWorkflow.indexFolder!(tempDir, options);
expect(result.success).toBe(true);
// With smaller chunk size, we should get more chunks
expect(result.chunksGenerated).toBeGreaterThan(4);
});
it('should filter files by type', async () => {
const options: IndexingOptions = {
includeFileTypes: ['.md'], // Only markdown files
excludePatterns: [],
chunkingOptions: {
maxChunkSize: 500,
overlapSize: 50,
preserveParagraphs: true,
preserveSentences: true
},
embeddingModel: 'nomic-embed-text'
};
const result = await mockIndexingWorkflow.indexFolder!(tempDir, options);
expect(result.success).toBe(true);
expect(result.filesProcessed).toBe(1); // Only document.md
});
});
describe('Error Recovery Integration', () => {
it('should continue processing after file errors', async () => {
const partialErrorWorkflow: Partial<IndexingWorkflow> = {
async indexFolder(path: string, options: IndexingOptions): Promise<IndexingResult> {
return {
success: true, // Overall success despite some errors
filesProcessed: 3,
chunksGenerated: 6,
embeddingsCreated: 6,
processingTime: 120,
errors: [
{
filePath: 'problematic.txt',
error: 'Permission denied',
stage: 'parsing',
timestamp: new Date(),
recoverable: true
}
],
statistics: {
totalBytes: 900,
totalWords: 90,
averageChunkSize: 450,
processingRate: 9,
embeddingRate: 18
}
};
}
};
const options: IndexingOptions = {
includeFileTypes: ['.md', '.ts', '.json', '.txt'],
excludePatterns: [],
chunkingOptions: {
maxChunkSize: 500,
overlapSize: 50,
preserveParagraphs: true,
preserveSentences: true
},
embeddingModel: 'nomic-embed-text'
};
const result = await partialErrorWorkflow.indexFolder!(tempDir, options);
expect(result.success).toBe(true);
expect(result.filesProcessed).toBe(3); // 3 out of 4 files processed
expect(result.errors).toHaveLength(1);
expect(result.chunksGenerated).toBeGreaterThan(0); // Still got chunks from successful files
});
});
describe('Performance Integration', () => {
it('should meet performance benchmarks', async () => {
const startTime = Date.now();
const options: IndexingOptions = {
includeFileTypes: ['.md', '.ts', '.json', '.txt'],
excludePatterns: [],
chunkingOptions: {
maxChunkSize: 500,
overlapSize: 50,
preserveParagraphs: true,
preserveSentences: true
},
embeddingModel: 'nomic-embed-text',
batchSize: 32,
parallelWorkers: 4
};
const result = await mockIndexingWorkflow.indexFolder!(tempDir, options);
const duration = Date.now() - startTime;
expect(result.success).toBe(true);
expect(duration).toBeLessThan(1000); // Should complete within 1 second for small files
expect(result.processingTime).toBeLessThan(500); // Internal timing should be even faster
});
it('should handle concurrent indexing requests', async () => {
const options: IndexingOptions = {
includeFileTypes: ['.md'],
excludePatterns: [],
chunkingOptions: {
maxChunkSize: 500,
overlapSize: 50,
preserveParagraphs: true,
preserveSentences: true
},
embeddingModel: 'nomic-embed-text'
};
const startTime = Date.now();
const results = await Promise.all(
Array(3).fill(null).map(() => mockIndexingWorkflow.indexFolder!(tempDir, options))
);
const duration = Date.now() - startTime;
expect(results).toHaveLength(3);
results.forEach(result => {
expect(result.success).toBe(true);
});
expect(duration).toBeLessThan(3000); // Concurrent execution should be efficient
});
});
});