M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

Overview Schema Related Servers Score Discussions

Mimir
testing

chunk-aggregation.test.ts•19.8 KiB

/** * @file testing/chunk-aggregation.test.ts * @description Test chunk aggregation, IDs, and sequential relationships */ import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest'; import neo4j, { Driver } from 'neo4j-driver'; import { FileIndexer } from '../src/indexing/FileIndexer.js'; import { UnifiedSearchService } from '../src/managers/UnifiedSearchService.js'; import { GraphManager } from '../src/managers/GraphManager.js'; import { promises as fs } from 'fs'; import path from 'path'; import os from 'os'; /** * SKIPPED: Entire test suite - Uses real Neo4j database connection * * ⚠️ CRITICAL: This test suite uses real database connections and WILL MODIFY DATA * * DATABASE CONNECTION: Lines 52-56 create real Neo4j driver and GraphManager * - Connects to bolt://localhost:7687 with neo4j/password * - Creates/deletes nodes and edges in the database * - Can corrupt production data if run against wrong database * * TESTS COVERED (13 total): * - File indexing creates proper file/chunk relationships * - Sequential chunk relationships (NEXT_CHUNK edges) * - Chunk ID uniqueness and consistency * - Full file content reconstruction from chunks * - Chunk metadata (startLine, endLine, chunkIndex) * - Multiple file indexing and isolation * - Chunk aggregation queries and performance * * UNSKIP CONDITIONS - MUST COMPLETE ALL: * 1. Refactor to use MockGraphManager from testing/helpers/mockGraphManager.ts, AND * 2. Remove all real Neo4j driver connections (lines 47-58), AND * 3. Replace FileIndexer instantiation with mocked version or dependency injection, AND * 4. Ensure no database modification occurs (all operations in-memory), AND * 5. Verify FileIndexer chunk creation logic matches test expectations, AND * 6. Update test assertions if chunk metadata schema has changed * * ADDITIONAL CONFLICTS: * - Tests depend on legacy chunk aggregation logic that may have changed * - FileIndexer may need refactoring to support dependency injection for testing */ describe.skip('Chunk Aggregation and Sequential Relationships', () => { let driver: Driver; let searchService: UnifiedSearchService; let graphManager: GraphManager; let fileIndexer: FileIndexer; let tempDir: string; let testFile1: string; let testFile2: string; beforeAll(async () => { // Connect to test database const uri = process.env.NEO4J_URI || 'bolt://localhost:7687'; const user = process.env.NEO4J_USER || 'neo4j'; const password = process.env.NEO4J_PASSWORD || 'password'; driver = neo4j.driver(uri, neo4j.auth.basic(user, password)); // Initialize GraphManager for schema graphManager = new GraphManager(uri, user, password); await graphManager.initialize(); fileIndexer = new FileIndexer(driver); searchService = new UnifiedSearchService(driver); await searchService.initialize(); // Create temp directory for test files tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'mimir-chunk-test-')); // Create test files with enough content to trigger chunking testFile1 = path.join(tempDir, 'test-file-1.ts'); const content1 = ` // Test file 1 for chunk aggregation testing // This file contains information about authentication systems export class AuthenticationService { private tokenManager: TokenManager; private userRepository: UserRepository; // User authentication with JWT tokens async authenticateUser(username: string, password: string) { const user = await this.userRepository.findByUsername(username); if (!user || !await this.validatePassword(password, user.passwordHash)) { throw new Error('Invalid credentials'); } return this.tokenManager.generateToken(user); } // Token validation for secure endpoints async validateToken(token: string) { const payload = this.tokenManager.verifyToken(token); return await this.userRepository.findById(payload.userId); } // Password reset functionality async resetPassword(userId: string, newPassword: string) { const hashedPassword = await this.hashPassword(newPassword); await this.userRepository.updatePassword(userId, hashedPassword); } } // Additional authentication utilities export class TokenManager { private secretKey: string; generateToken(user: User): string { return jwt.sign({ userId: user.id, email: user.email }, this.secretKey); } verifyToken(token: string): TokenPayload { return jwt.verify(token, this.secretKey) as TokenPayload; } } // User repository for database operations export class UserRepository { async findByUsername(username: string): Promise<User | null> { return await db.users.findOne({ username }); } async findById(id: string): Promise<User | null> { return await db.users.findOne({ id }); } async updatePassword(userId: string, hashedPassword: string): Promise<void> { await db.users.update({ id: userId }, { passwordHash: hashedPassword }); } } `.repeat(3); // Repeat to ensure chunking (>1000 chars) testFile2 = path.join(tempDir, 'test-file-2.ts'); const content2 = ` // Test file 2 for chunk aggregation testing // This file contains database configuration and authentication setup export class DatabaseConfig { private connectionString: string; // Database authentication settings async configureAuth() { const authConfig = { username: process.env.DB_USER, password: process.env.DB_PASSWORD, authSource: 'admin' }; return authConfig; } // Connection pool configuration async setupConnection() { return { poolSize: 10, authentication: await this.configureAuth(), ssl: true }; } } // Additional database utilities export class QueryBuilder { buildAuthQuery(userId: string) { return { user_id: userId, active: true }; } } `.repeat(2); // Smaller file, fewer chunks await fs.writeFile(testFile1, content1, 'utf-8'); await fs.writeFile(testFile2, content2, 'utf-8'); }); afterAll(async () => { // Cleanup test files await fs.unlink(testFile1).catch(() => {}); await fs.unlink(testFile2).catch(() => {}); await fs.rmdir(tempDir).catch(() => {}); await driver.close(); }); beforeEach(async () => { // Clean test data const session = driver.session(); try { await session.run(` MATCH (n) WHERE n.path STARTS WITH $tempDir OR n.filePath STARTS WITH $tempDir DETACH DELETE n `, { tempDir }); } finally { await session.close(); } }); describe('Chunk ID Generation', () => { it('should generate unique IDs for each chunk', async () => { // Index file with embeddings (triggers chunking) await fileIndexer.indexFile(testFile1, tempDir, true); const session = driver.session(); try { // FileIndexer stores relative paths, not absolute const relativePath = path.basename(testFile1); const result = await session.run(` MATCH (c:FileChunk) WHERE c.filePath = $filePath RETURN c.id AS chunk_id, c.chunk_index AS chunk_index ORDER BY c.chunk_index `, { filePath: relativePath }); expect(result.records.length).toBeGreaterThan(0); const chunkIds = result.records.map(r => r.get('chunk_id')); // All IDs should be unique const uniqueIds = new Set(chunkIds); expect(uniqueIds.size).toBe(chunkIds.length); // IDs should follow the pattern: chunk-{filePath}-{index} for (const record of result.records) { const chunkId = record.get('chunk_id'); const chunkIndex = record.get('chunk_index'); expect(chunkId).toContain('chunk-'); expect(chunkId).toContain(`-${chunkIndex}`); } } finally { await session.close(); } }); it('should not have null IDs for chunks', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const session = driver.session(); try { const result = await session.run(` MATCH (c:FileChunk) WHERE c.filePath STARTS WITH $tempDir RETURN c.id AS chunk_id `, { tempDir }); for (const record of result.records) { const chunkId = record.get('chunk_id'); expect(chunkId).not.toBeNull(); expect(chunkId).toBeTruthy(); } } finally { await session.close(); } }); }); describe('NEXT_CHUNK Relationships', () => { it('should create NEXT_CHUNK relationships between sequential chunks', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const session = driver.session(); try { // Get all chunks ordered by index (use relative path) const relativePath = path.basename(testFile1); const chunksResult = await session.run(` MATCH (c:FileChunk) WHERE c.filePath = $filePath RETURN c.id AS chunk_id, c.chunk_index AS chunk_index ORDER BY c.chunk_index `, { filePath: relativePath }); const chunks = chunksResult.records.map(r => ({ id: r.get('chunk_id'), index: r.get('chunk_index') })); expect(chunks.length).toBeGreaterThan(1); // Check that NEXT_CHUNK relationships exist for (let i = 0; i < chunks.length - 1; i++) { const currentChunk = chunks[i]; const nextChunk = chunks[i + 1]; const relationshipResult = await session.run(` MATCH (curr:FileChunk {id: $currId})-[r:NEXT_CHUNK]->(next:FileChunk {id: $nextId}) RETURN count(r) AS relationship_count `, { currId: currentChunk.id, nextId: nextChunk.id }); const count = relationshipResult.records[0].get('relationship_count').toNumber(); expect(count).toBe(1); } } finally { await session.close(); } }); it('should allow traversal of entire chunk chain', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const session = driver.session(); try { // Get first chunk (use relative path) const relativePath = path.basename(testFile1); const firstChunkResult = await session.run(` MATCH (c:FileChunk) WHERE c.filePath = $filePath AND c.chunk_index = 0 RETURN c.id AS chunk_id `, { filePath: relativePath }); expect(firstChunkResult.records.length).toBe(1); const firstChunkId = firstChunkResult.records[0].get('chunk_id'); // Traverse the entire chain const chainResult = await session.run(` MATCH path = (start:FileChunk {id: $startId})-[:NEXT_CHUNK*]->(end:FileChunk) RETURN length(path) AS chain_length, [node IN nodes(path) | node.chunk_index] AS chunk_indices ORDER BY chain_length DESC LIMIT 1 `, { startId: firstChunkId }); if (chainResult.records.length > 0) { const chunkIndices = chainResult.records[0].get('chunk_indices'); // Indices should be sequential for (let i = 0; i < chunkIndices.length - 1; i++) { expect(chunkIndices[i + 1]).toBe(chunkIndices[i] + 1); } } } finally { await session.close(); } }); it('should not create NEXT_CHUNK relationships to chunks from different files', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); await fileIndexer.indexFile(testFile2, tempDir, true); const session = driver.session(); try { // Check that no NEXT_CHUNK relationships exist between different files const result = await session.run(` MATCH (c1:FileChunk)-[:NEXT_CHUNK]->(c2:FileChunk) WHERE c1.filePath <> c2.filePath RETURN count(*) AS cross_file_links `); const crossFileLinks = result.records[0].get('cross_file_links').toNumber(); expect(crossFileLinks).toBe(0); } finally { await session.close(); } }); }); describe('Chunk Aggregation by File', () => { it('should count only chunks from the same file', async () => { // Index both files await fileIndexer.indexFile(testFile1, tempDir, true); await fileIndexer.indexFile(testFile2, tempDir, true); // Search for "authentication" which appears in both files const result = await searchService.search('authentication', { types: ['file_chunk'], limit: 10, minSimilarity: 0.3 }); expect(result.results.length).toBeGreaterThan(0); // Each result should represent one file const filePaths = result.results.map(r => r.parent_file?.path || r.path); const uniqueFiles = new Set(filePaths); // Should have at most 2 unique files (test-file-1.ts and test-file-2.ts) expect(uniqueFiles.size).toBeLessThanOrEqual(2); // Verify chunks_matched for each result for (const resultItem of result.results) { if (resultItem.chunks_matched) { // chunks_matched should be a positive number expect(resultItem.chunks_matched).toBeGreaterThan(0); // Verify the count is accurate by querying the database const session = driver.session(); try { const filePath = resultItem.parent_file?.path || resultItem.path; if (!filePath) continue; // File paths are stored as just the filename (e.g., 'test-file-1.ts') const fileName = path.basename(filePath); const countResult = await session.run(` MATCH (f:File {path: $fileName})-[:HAS_CHUNK]->(c:FileChunk) WHERE c.embedding IS NOT NULL RETURN count(c) AS total_chunks `, { fileName }); const totalChunks = countResult.records[0]?.get('total_chunks').toNumber() || 0; // chunks_matched should be <= total chunks in the file expect(resultItem.chunks_matched).toBeLessThanOrEqual(totalChunks); } finally { await session.close(); } } } }); it('should return the best matching chunk for each file', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const result = await searchService.search('authentication token', { types: ['file_chunk'], limit: 5, minSimilarity: 0.3 }); expect(result.results.length).toBeGreaterThan(0); for (const resultItem of result.results) { // Should have similarity score expect(resultItem.similarity).toBeDefined(); expect(resultItem.similarity).toBeGreaterThan(0); // If there are multiple chunks, should have avg_similarity if (resultItem.chunks_matched && resultItem.chunks_matched > 1) { expect(resultItem.avg_similarity).toBeDefined(); // Average similarity should be <= best similarity expect(resultItem.avg_similarity).toBeLessThanOrEqual(resultItem.similarity!); } // Should have content preview from the best chunk expect(resultItem.content_preview).toBeTruthy(); } }); it('should aggregate multiple matching chunks from the same file', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); // Search for a term that appears multiple times in the file const result = await searchService.search('authentication', { types: ['file_chunk'], limit: 5, minSimilarity: 0.3 }); // Find results with multiple chunks matched const multiChunkResults = result.results.filter(r => r.chunks_matched && r.chunks_matched > 1 ); if (multiChunkResults.length > 0) { for (const resultItem of multiChunkResults) { // Should have chunks_matched > 1 expect(resultItem.chunks_matched).toBeGreaterThan(1); // Should have avg_similarity expect(resultItem.avg_similarity).toBeDefined(); // Should have only one result per file (aggregated) expect(resultItem.parent_file).toBeDefined(); expect(resultItem.parent_file!.path).toBeTruthy(); // Check that we're not returning duplicate entries for this file const sameFileResults = result.results.filter(r => r.parent_file?.path === resultItem.parent_file!.path ); expect(sameFileResults.length).toBe(1); } } }); }); describe('Search Result Format', () => { it('should include chunk_index for the best matching chunk', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const result = await searchService.search('authentication', { types: ['file_chunk'], limit: 5, minSimilarity: 0.3 }); expect(result.results.length).toBeGreaterThan(0); for (const resultItem of result.results) { if (resultItem.type === 'file_chunk') { // Should have chunk_index expect(resultItem.chunk_index).toBeDefined(); expect(typeof resultItem.chunk_index).toBe('number'); expect(resultItem.chunk_index).toBeGreaterThanOrEqual(0); } } }); it('should include parent_file information for file_chunk results', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const result = await searchService.search('authentication', { types: ['file_chunk'], limit: 5, minSimilarity: 0.3 }); expect(result.results.length).toBeGreaterThan(0); for (const resultItem of result.results) { if (resultItem.type === 'file_chunk') { expect(resultItem.parent_file).toBeDefined(); expect(resultItem.parent_file!.path).toBeTruthy(); expect(resultItem.parent_file!.name).toBeTruthy(); expect(resultItem.parent_file!.language).toBe('typescript'); } } }); it('should not have duplicate file entries in results', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); await fileIndexer.indexFile(testFile2, tempDir, true); const result = await searchService.search('authentication', { types: ['file_chunk'], limit: 10, minSimilarity: 0.3 }); // Get all file paths from results const filePaths = result.results.map(r => r.parent_file?.path || r.path ); // Check for duplicates const uniqueFiles = new Set(filePaths); expect(uniqueFiles.size).toBe(filePaths.length); }); }); describe('Edge Cases', () => { it('should handle files with only one chunk', async () => { // Create a small file that won't be chunked const smallFile = path.join(tempDir, 'small-file.ts'); const smallContent = 'export const config = { auth: true };'; await fs.writeFile(smallFile, smallContent, 'utf-8'); await fileIndexer.indexFile(smallFile, tempDir, true); const session = driver.session(); try { const result = await session.run(` MATCH (f:File) WHERE f.path = $relativePath RETURN f.has_chunks AS has_chunks `, { relativePath: path.relative(tempDir, smallFile) }); if (result.records.length > 0) { const hasChunks = result.records[0].get('has_chunks'); // Small file should not have separate chunks expect(hasChunks).toBe(false); } } finally { await session.close(); } await fs.unlink(smallFile); }); it('should handle files with no matches', async () => { await fileIndexer.indexFile(testFile1, tempDir, true); const result = await searchService.search('xyznonexistentterm12345', { types: ['file_chunk'], limit: 5, minSimilarity: 0.9 }); expect(result.status).toBe('success'); expect(result.results.length).toBe(0); }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

chunk-aggregation.test.ts•19.8 KiB