Skip to main content
Glama
orneryd

M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

by orneryd
chunk-aggregation.test.ts20.3 kB
/** * @file testing/chunk-aggregation.test.ts * @description Test chunk aggregation, IDs, and sequential relationships */ import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest'; import neo4j, { Driver } from 'neo4j-driver'; import { FileIndexer } from '../src/indexing/FileIndexer.js'; import { UnifiedSearchService } from '../src/managers/UnifiedSearchService.js'; import { GraphManager } from '../src/managers/GraphManager.js'; import { promises as fs } from 'fs'; import path from 'path'; import os from 'os'; /** * SKIPPED: Entire test suite - Uses real Neo4j database connection * * ⚠️ CRITICAL: This test suite uses real database connections and WILL MODIFY DATA * * DATABASE CONNECTION: Lines 52-56 create real Neo4j driver and GraphManager * - Connects to bolt://localhost:7687 with neo4j/password * - Creates/deletes nodes and edges in the database * - Can corrupt production data if run against wrong database * * TESTS COVERED (13 total): * - File indexing creates proper file/chunk relationships * - Sequential chunk relationships (NEXT_CHUNK edges) * - Chunk ID uniqueness and consistency * - Full file content reconstruction from chunks * - Chunk metadata (startLine, endLine, chunkIndex) * - Multiple file indexing and isolation * - Chunk aggregation queries and performance * * UNSKIP CONDITIONS - MUST COMPLETE ALL: * 1. Refactor to use MockGraphManager from testing/helpers/mockGraphManager.ts, AND * 2. Remove all real Neo4j driver connections (lines 47-58), AND * 3. Replace FileIndexer instantiation with mocked version or dependency injection, AND * 4. Ensure no database modification occurs (all operations in-memory), AND * 5. Verify FileIndexer chunk creation logic matches test expectations, AND * 6. Update test assertions if chunk metadata schema has changed * * ADDITIONAL CONFLICTS: * - Tests depend on legacy chunk aggregation logic that may have changed * - FileIndexer may need refactoring to support dependency injection for testing */ describe.skip('Chunk Aggregation and Sequential Relationships', () => { let driver: Driver; let searchService: UnifiedSearchService; let graphManager: GraphManager; let fileIndexer: FileIndexer; let tempDir: string; let testFile1: string; let testFile2: string; beforeAll(async () => { // Connect to test database const uri = process.env.NEO4J_URI || 'bolt://localhost:7687'; const user = process.env.NEO4J_USER || 'neo4j'; const password = process.env.NEO4J_PASSWORD || 'password'; driver = neo4j.driver(uri, neo4j.auth.basic(user, password)); // Initialize GraphManager for schema graphManager = new GraphManager(uri, user, password); await graphManager.initialize(); fileIndexer = new FileIndexer(driver); searchService = new UnifiedSearchService(driver); await searchService.initialize(); // Create temp directory for test files tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'mimir-chunk-test-')); // Create test files with enough content to trigger chunking testFile1 = path.join(tempDir, 'test-file-1.ts'); const content1 = ` // Test file 1 for chunk aggregation testing // This file contains information about authentication systems export class AuthenticationService { private tokenManager: TokenManager; private userRepository: UserRepository; // User authentication with JWT tokens async authenticateUser(username: string, password: string) { const user = await this.userRepository.findByUsername(username); if (!user || !await this.validatePassword(password, user.passwordHash)) { throw new Error('Invalid credentials'); } return this.tokenManager.generateToken(user); } // Token validation for secure endpoints async validateToken(token: string) { const payload = this.tokenManager.verifyToken(token); return await this.userRepository.findById(payload.userId); } // Password reset functionality async resetPassword(userId: string, newPassword: string) { const hashedPassword = await this.hashPassword(newPassword); await this.userRepository.updatePassword(userId, hashedPassword); } } // Additional authentication utilities export class TokenManager { private secretKey: string; generateToken(user: User): string { return jwt.sign({ userId: user.id, email: user.email }, this.secretKey); } verifyToken(token: string): TokenPayload { return jwt.verify(token, this.secretKey) as TokenPayload; } } // User repository for database operations export class UserRepository { async findByUsername(username: string): Promise<User | null> { return await db.users.findOne({ username }); } async findById(id: string): Promise<User | null> { return await db.users.findOne({ id }); } async updatePassword(userId: string, hashedPassword: string): Promise<void> { await db.users.update({ id: userId }, { passwordHash: hashedPassword }); } } `.repeat(3); // Repeat to ensure chunking (>1000 chars) testFile2 = path.join(tempDir, 'test-file-2.ts'); const content2 = ` // Test file 2 for chunk aggregation testing // This file contains database configuration and authentication setup export class DatabaseConfig { private connectionString: string; // Database authentication settings async configureAuth() { const authConfig = { username: process.env.DB_USER, password: process.env.DB_PASSWORD, authSource: 'admin' }; return authConfig; } // Connection pool configuration async setupConnection() { return { poolSize: 10, authentication: await this.configureAuth(), ssl: true }; } } // Additional database utilities export class QueryBuilder { buildAuthQuery(userId: string) { return { user_id: userId, active: true }; } } `.repeat(2); // Smaller file, fewer chunks await fs.writeFile(testFile1, content1, 'utf-8'); await fs.writeFile(testFile2, content2, 'utf-8'); }); afterAll(async () => { // Cleanup test files await fs.unlink(testFile1).catch(() => {}); await fs.unlink(testFile2).catch(() => {}); await fs.rmdir(tempDir).catch(() => {}); await driver.close(); }); beforeEach(async () => { // Clean test data const session = driver.session(); try { await session.run(` MATCH (n) WHERE n.path STARTS WITH $tempDir OR n.filePath STARTS WITH $tempDir DETACH DELETE n `, { tempDir }); } finally { await session.close(); } }); describe('Chunk ID Generation', () => { it('should generate unique IDs for each chunk', async () => { // Index file with embeddings (triggers chunking) await fileIndexer.indexFile(testFile1, tempDir, true); const session = driver.session(); try { // FileIndexer stores relative paths, not absolute const relativePath = path.basename(testFile1); const result = await session.run(` MATCH (c:FileChunk) WHERE c.filePath = $filePath RETURN c.id AS chunk_id, c.chunk_index AS chunk_index ORDER BY c.chunk_index `, { filePath: relativePath }); expect(result.records.length).toBeGreaterThan(0); const chunkIds = result.records.map(r => r.get('chunk_id')); // All IDs should be unique const uniqueIds = new Set(chunkIds); expect(uniqueIds.size).toBe(chunkIds.length); // IDs should follow the pattern: chunk-{filePath}-{index} for (const record of result.records) { const chunkId = record.get('chunk_id'); const chunkIndex = record.get('chunk_index'); expect(chunkId).toContain('chunk-'); expect(chunkId).toContain(`-${chunkIndex}`); } } finally { await session.close(); } }); it('should not have null IDs for chunks', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const session = driver.session(); try { const result = await session.run(` MATCH (c:FileChunk) WHERE c.filePath STARTS WITH $tempDir RETURN c.id AS chunk_id `, { tempDir }); for (const record of result.records) { const chunkId = record.get('chunk_id'); expect(chunkId).not.toBeNull(); expect(chunkId).toBeTruthy(); } } finally { await session.close(); } }); }); describe('NEXT_CHUNK Relationships', () => { it('should create NEXT_CHUNK relationships between sequential chunks', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const session = driver.session(); try { // Get all chunks ordered by index (use relative path) const relativePath = path.basename(testFile1); const chunksResult = await session.run(` MATCH (c:FileChunk) WHERE c.filePath = $filePath RETURN c.id AS chunk_id, c.chunk_index AS chunk_index ORDER BY c.chunk_index `, { filePath: relativePath }); const chunks = chunksResult.records.map(r => ({ id: r.get('chunk_id'), index: r.get('chunk_index') })); expect(chunks.length).toBeGreaterThan(1); // Check that NEXT_CHUNK relationships exist for (let i = 0; i < chunks.length - 1; i++) { const currentChunk = chunks[i]; const nextChunk = chunks[i + 1]; const relationshipResult = await session.run(` MATCH (curr:FileChunk {id: $currId})-[r:NEXT_CHUNK]->(next:FileChunk {id: $nextId}) RETURN count(r) AS relationship_count `, { currId: currentChunk.id, nextId: nextChunk.id }); const count = relationshipResult.records[0].get('relationship_count').toNumber(); expect(count).toBe(1); } } finally { await session.close(); } }); it('should allow traversal of entire chunk chain', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const session = driver.session(); try { // Get first chunk (use relative path) const relativePath = path.basename(testFile1); const firstChunkResult = await session.run(` MATCH (c:FileChunk) WHERE c.filePath = $filePath AND c.chunk_index = 0 RETURN c.id AS chunk_id `, { filePath: relativePath }); expect(firstChunkResult.records.length).toBe(1); const firstChunkId = firstChunkResult.records[0].get('chunk_id'); // Traverse the entire chain const chainResult = await session.run(` MATCH path = (start:FileChunk {id: $startId})-[:NEXT_CHUNK*]->(end:FileChunk) RETURN length(path) AS chain_length, [node IN nodes(path) | node.chunk_index] AS chunk_indices ORDER BY chain_length DESC LIMIT 1 `, { startId: firstChunkId }); if (chainResult.records.length > 0) { const chunkIndices = chainResult.records[0].get('chunk_indices'); // Indices should be sequential for (let i = 0; i < chunkIndices.length - 1; i++) { expect(chunkIndices[i + 1]).toBe(chunkIndices[i] + 1); } } } finally { await session.close(); } }); it('should not create NEXT_CHUNK relationships to chunks from different files', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); await fileIndexer.indexFile(testFile2, tempDir, true); const session = driver.session(); try { // Check that no NEXT_CHUNK relationships exist between different files const result = await session.run(` MATCH (c1:FileChunk)-[:NEXT_CHUNK]->(c2:FileChunk) WHERE c1.filePath <> c2.filePath RETURN count(*) AS cross_file_links `); const crossFileLinks = result.records[0].get('cross_file_links').toNumber(); expect(crossFileLinks).toBe(0); } finally { await session.close(); } }); }); describe('Chunk Aggregation by File', () => { it('should count only chunks from the same file', async () => { // Index both files await fileIndexer.indexFile(testFile1, tempDir, true); await fileIndexer.indexFile(testFile2, tempDir, true); // Search for "authentication" which appears in both files const result = await searchService.search('authentication', { types: ['file_chunk'], limit: 10, minSimilarity: 0.3 }); expect(result.results.length).toBeGreaterThan(0); // Each result should represent one file const filePaths = result.results.map(r => r.parent_file?.path || r.path); const uniqueFiles = new Set(filePaths); // Should have at most 2 unique files (test-file-1.ts and test-file-2.ts) expect(uniqueFiles.size).toBeLessThanOrEqual(2); // Verify chunks_matched for each result for (const resultItem of result.results) { if (resultItem.chunks_matched) { // chunks_matched should be a positive number expect(resultItem.chunks_matched).toBeGreaterThan(0); // Verify the count is accurate by querying the database const session = driver.session(); try { const filePath = resultItem.parent_file?.path || resultItem.path; if (!filePath) continue; // File paths are stored as just the filename (e.g., 'test-file-1.ts') const fileName = path.basename(filePath); const countResult = await session.run(` MATCH (f:File {path: $fileName})-[:HAS_CHUNK]->(c:FileChunk) WHERE c.embedding IS NOT NULL RETURN count(c) AS total_chunks `, { fileName }); const totalChunks = countResult.records[0]?.get('total_chunks').toNumber() || 0; // chunks_matched should be <= total chunks in the file expect(resultItem.chunks_matched).toBeLessThanOrEqual(totalChunks); } finally { await session.close(); } } } }); it('should return the best matching chunk for each file', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const result = await searchService.search('authentication token', { types: ['file_chunk'], limit: 5, minSimilarity: 0.3 }); expect(result.results.length).toBeGreaterThan(0); for (const resultItem of result.results) { // Should have similarity score expect(resultItem.similarity).toBeDefined(); expect(resultItem.similarity).toBeGreaterThan(0); // If there are multiple chunks, should have avg_similarity if (resultItem.chunks_matched && resultItem.chunks_matched > 1) { expect(resultItem.avg_similarity).toBeDefined(); // Average similarity should be <= best similarity expect(resultItem.avg_similarity).toBeLessThanOrEqual(resultItem.similarity!); } // Should have content preview from the best chunk expect(resultItem.content_preview).toBeTruthy(); } }); it('should aggregate multiple matching chunks from the same file', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); // Search for a term that appears multiple times in the file const result = await searchService.search('authentication', { types: ['file_chunk'], limit: 5, minSimilarity: 0.3 }); // Find results with multiple chunks matched const multiChunkResults = result.results.filter(r => r.chunks_matched && r.chunks_matched > 1 ); if (multiChunkResults.length > 0) { for (const resultItem of multiChunkResults) { // Should have chunks_matched > 1 expect(resultItem.chunks_matched).toBeGreaterThan(1); // Should have avg_similarity expect(resultItem.avg_similarity).toBeDefined(); // Should have only one result per file (aggregated) expect(resultItem.parent_file).toBeDefined(); expect(resultItem.parent_file!.path).toBeTruthy(); // Check that we're not returning duplicate entries for this file const sameFileResults = result.results.filter(r => r.parent_file?.path === resultItem.parent_file!.path ); expect(sameFileResults.length).toBe(1); } } }); }); describe('Search Result Format', () => { it('should include chunk_index for the best matching chunk', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const result = await searchService.search('authentication', { types: ['file_chunk'], limit: 5, minSimilarity: 0.3 }); expect(result.results.length).toBeGreaterThan(0); for (const resultItem of result.results) { if (resultItem.type === 'file_chunk') { // Should have chunk_index expect(resultItem.chunk_index).toBeDefined(); expect(typeof resultItem.chunk_index).toBe('number'); expect(resultItem.chunk_index).toBeGreaterThanOrEqual(0); } } }); it('should include parent_file information for file_chunk results', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const result = await searchService.search('authentication', { types: ['file_chunk'], limit: 5, minSimilarity: 0.3 }); expect(result.results.length).toBeGreaterThan(0); for (const resultItem of result.results) { if (resultItem.type === 'file_chunk') { expect(resultItem.parent_file).toBeDefined(); expect(resultItem.parent_file!.path).toBeTruthy(); expect(resultItem.parent_file!.name).toBeTruthy(); expect(resultItem.parent_file!.language).toBe('typescript'); } } }); it('should not have duplicate file entries in results', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); await fileIndexer.indexFile(testFile2, tempDir, true); const result = await searchService.search('authentication', { types: ['file_chunk'], limit: 10, minSimilarity: 0.3 }); // Get all file paths from results const filePaths = result.results.map(r => r.parent_file?.path || r.path ); // Check for duplicates const uniqueFiles = new Set(filePaths); expect(uniqueFiles.size).toBe(filePaths.length); }); }); describe('Edge Cases', () => { it('should handle files with only one chunk', async () => { // Create a small file that won't be chunked const smallFile = path.join(tempDir, 'small-file.ts'); const smallContent = 'export const config = { auth: true };'; await fs.writeFile(smallFile, smallContent, 'utf-8'); await fileIndexer.indexFile(smallFile, tempDir, true); const session = driver.session(); try { const result = await session.run(` MATCH (f:File) WHERE f.path = $relativePath RETURN f.has_chunks AS has_chunks `, { relativePath: path.relative(tempDir, smallFile) }); if (result.records.length > 0) { const hasChunks = result.records[0].get('has_chunks'); // Small file should not have separate chunks expect(hasChunks).toBe(false); } } finally { await session.close(); } await fs.unlink(smallFile); }); it('should handle files with no matches', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const result = await searchService.search('xyznonexistentterm12345', { types: ['file_chunk'], limit: 5, minSimilarity: 0.9 }); expect(result.status).toBe('success'); expect(result.results.length).toBe(0); }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server