/**
* Tests for ParallelFileSynchronizer
*
* Coverage:
* - Bounded concurrency (parallelLimit function)
* - Hash caching and reuse
* - detectChanges with shard processing
* - updateSnapshot with precomputed hashes
* - Fast path (mtime+size) vs slow path (hash computation)
*/
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { promises as fs } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { createHash } from "node:crypto";
import { ParallelFileSynchronizer } from "./parallel-synchronizer.js";
describe("ParallelFileSynchronizer", () => {
let testDir: string;
let codebaseDir: string;
let snapshotDir: string;
let synchronizer: ParallelFileSynchronizer;
beforeEach(async () => {
// Create unique temp directories for each test
const uniqueId = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
testDir = join(tmpdir(), `parallel-sync-test-${uniqueId}`);
codebaseDir = join(testDir, "codebase");
snapshotDir = join(testDir, "snapshots");
await fs.mkdir(codebaseDir, { recursive: true });
await fs.mkdir(snapshotDir, { recursive: true });
synchronizer = new ParallelFileSynchronizer(
codebaseDir,
"test-collection",
snapshotDir,
4 // 4 shards
);
});
afterEach(async () => {
try {
await fs.rm(testDir, { recursive: true, force: true });
} catch {
// Ignore cleanup errors
}
});
// Helper to create test files
async function createTestFile(relativePath: string, content: string): Promise<string> {
const fullPath = join(codebaseDir, relativePath);
const dir = join(fullPath, "..");
await fs.mkdir(dir, { recursive: true });
await fs.writeFile(fullPath, content, "utf-8");
return fullPath;
}
// Helper to create multiple test files
async function createTestFiles(count: number): Promise<string[]> {
const files: string[] = [];
for (let i = 0; i < count; i++) {
const path = `src/file-${i.toString().padStart(4, "0")}.ts`;
await createTestFile(path, `// File ${i}\nexport const x${i} = ${i};`);
files.push(join(codebaseDir, path));
}
return files;
}
describe("Initialization", () => {
it("should return false when no snapshot exists", async () => {
const hasSnapshot = await synchronizer.initialize();
expect(hasSnapshot).toBe(false);
});
it("should return true after snapshot is created", async () => {
const files = await createTestFiles(5);
// First, detect changes to build snapshot
await synchronizer.initialize();
await synchronizer.detectChanges(files);
await synchronizer.updateSnapshot(files);
// Create new synchronizer and initialize
const newSync = new ParallelFileSynchronizer(
codebaseDir,
"test-collection",
snapshotDir,
4
);
const hasSnapshot = await newSync.initialize();
expect(hasSnapshot).toBe(true);
});
});
describe("detectChanges", () => {
it("should detect all files as added when no previous snapshot", async () => {
const files = await createTestFiles(10);
await synchronizer.initialize();
const changes = await synchronizer.detectChanges(files);
expect(changes.added.length).toBe(10);
expect(changes.modified.length).toBe(0);
expect(changes.deleted.length).toBe(0);
});
it("should detect modified files correctly", async () => {
const files = await createTestFiles(5);
// First snapshot
await synchronizer.initialize();
await synchronizer.detectChanges(files);
await synchronizer.updateSnapshot(files);
// Modify a file
await fs.writeFile(files[2], "// Modified content\nexport const modified = true;");
// Wait a bit to ensure mtime changes
await new Promise((r) => setTimeout(r, 10));
// Create new synchronizer and detect changes
const newSync = new ParallelFileSynchronizer(
codebaseDir,
"test-collection",
snapshotDir,
4
);
await newSync.initialize();
const changes = await newSync.detectChanges(files);
expect(changes.added.length).toBe(0);
expect(changes.modified.length).toBe(1);
expect(changes.deleted.length).toBe(0);
});
it("should detect deleted files correctly", async () => {
const files = await createTestFiles(5);
// First snapshot
await synchronizer.initialize();
await synchronizer.detectChanges(files);
await synchronizer.updateSnapshot(files);
// Delete a file
await fs.unlink(files[3]);
const remainingFiles = files.filter((_, i) => i !== 3);
// Create new synchronizer and detect changes
const newSync = new ParallelFileSynchronizer(
codebaseDir,
"test-collection",
snapshotDir,
4
);
await newSync.initialize();
const changes = await newSync.detectChanges(remainingFiles);
expect(changes.added.length).toBe(0);
expect(changes.modified.length).toBe(0);
expect(changes.deleted.length).toBe(1);
});
it("should handle concurrent detection correctly", async () => {
const files = await createTestFiles(20);
await synchronizer.initialize();
// Run detectChanges concurrently
const results = await Promise.all([
synchronizer.detectChanges(files),
synchronizer.detectChanges(files),
synchronizer.detectChanges(files),
]);
// All results should be identical
for (const result of results) {
expect(result.added.length).toBe(20);
expect(result.modified.length).toBe(0);
expect(result.deleted.length).toBe(0);
}
});
});
describe("Hash Caching", () => {
it("should cache computed hashes for reuse in updateSnapshot", async () => {
const files = await createTestFiles(10);
await synchronizer.initialize();
const changes = await synchronizer.detectChanges(files);
// updateSnapshot should use cached hashes (fast path)
const startTime = Date.now();
await synchronizer.updateSnapshot(files);
const duration = Date.now() - startTime;
// Should be relatively fast since hashes are cached
// (This is a sanity check, not a strict timing requirement)
expect(duration).toBeLessThan(5000);
});
it("should accept precomputed hashes in updateSnapshot", async () => {
const files = await createTestFiles(5);
await synchronizer.initialize();
// Create precomputed hashes (simulating external computation)
// This is a public API test - updateSnapshot accepts precomputed hashes
await synchronizer.updateSnapshot(files);
// Verify snapshot was created
const newSync = new ParallelFileSynchronizer(
codebaseDir,
"test-collection",
snapshotDir,
4
);
const hasSnapshot = await newSync.initialize();
expect(hasSnapshot).toBe(true);
});
});
describe("Shard Distribution", () => {
it("should distribute files across shards using consistent hashing", async () => {
const files = await createTestFiles(100);
await synchronizer.initialize();
const changes = await synchronizer.detectChanges(files);
// All 100 files should be detected as added
expect(changes.added.length).toBe(100);
// Update snapshot to verify distribution
await synchronizer.updateSnapshot(files);
// Verify snapshot was created correctly
const newSync = new ParallelFileSynchronizer(
codebaseDir,
"test-collection",
snapshotDir,
4
);
await newSync.initialize();
const newChanges = await newSync.detectChanges(files);
// No changes expected
expect(newChanges.added.length).toBe(0);
expect(newChanges.modified.length).toBe(0);
expect(newChanges.deleted.length).toBe(0);
});
it("should handle uneven shard distribution", async () => {
// Create files with similar paths that might cluster in same shard
const files: string[] = [];
for (let i = 0; i < 20; i++) {
const path = `src/utils/helper${i}.ts`;
await createTestFile(path, `// Helper ${i}`);
files.push(join(codebaseDir, path));
}
await synchronizer.initialize();
const changes = await synchronizer.detectChanges(files);
expect(changes.added.length).toBe(20);
});
});
describe("Fast Path (mtime+size)", () => {
it("should use fast path for unchanged files", async () => {
const files = await createTestFiles(10);
// First snapshot
await synchronizer.initialize();
await synchronizer.detectChanges(files);
await synchronizer.updateSnapshot(files);
// Create new synchronizer - should use fast path for all files
const newSync = new ParallelFileSynchronizer(
codebaseDir,
"test-collection",
snapshotDir,
4
);
await newSync.initialize();
const startTime = Date.now();
const changes = await newSync.detectChanges(files);
const duration = Date.now() - startTime;
// Fast path should be quick
expect(changes.added.length).toBe(0);
expect(changes.modified.length).toBe(0);
expect(changes.deleted.length).toBe(0);
// Relaxed timing check - just verify it completes
expect(duration).toBeLessThan(10000);
});
});
describe("Checkpoint Operations", () => {
it("should save and load checkpoint", async () => {
await synchronizer.saveCheckpoint(["file1.ts", "file2.ts"], 10, "indexing");
const checkpoint = await synchronizer.loadCheckpoint();
expect(checkpoint).not.toBeNull();
expect(checkpoint!.processedFiles).toEqual(["file1.ts", "file2.ts"]);
expect(checkpoint!.totalFiles).toBe(10);
expect(checkpoint!.phase).toBe("indexing");
});
it("should delete checkpoint", async () => {
await synchronizer.saveCheckpoint(["file1.ts"], 5, "indexing");
expect(await synchronizer.hasCheckpoint()).toBe(true);
await synchronizer.deleteCheckpoint();
expect(await synchronizer.hasCheckpoint()).toBe(false);
});
it("should filter processed files", async () => {
const allFiles = ["a.ts", "b.ts", "c.ts", "d.ts"];
const checkpoint = {
processedFiles: ["a.ts", "c.ts"],
totalFiles: 4,
timestamp: Date.now(),
phase: "indexing" as const,
};
const remaining = synchronizer.filterProcessedFiles(allFiles, checkpoint);
expect(remaining).toEqual(["b.ts", "d.ts"]);
});
});
describe("Edge Cases", () => {
it("should handle empty file list", async () => {
await synchronizer.initialize();
const changes = await synchronizer.detectChanges([]);
expect(changes.added.length).toBe(0);
expect(changes.modified.length).toBe(0);
expect(changes.deleted.length).toBe(0);
});
it("should handle files with special characters", async () => {
const specialPaths = [
"src/file with spaces.ts",
"src/file-with-dashes.ts",
"src/file_with_underscores.ts",
];
for (const path of specialPaths) {
await createTestFile(path, `// ${path}`);
}
const files = specialPaths.map((p) => join(codebaseDir, p));
await synchronizer.initialize();
const changes = await synchronizer.detectChanges(files);
expect(changes.added.length).toBe(3);
});
it("should handle deleted snapshot gracefully", async () => {
const files = await createTestFiles(5);
await synchronizer.initialize();
await synchronizer.detectChanges(files);
await synchronizer.updateSnapshot(files);
// Delete snapshot
await synchronizer.deleteSnapshot();
// Should work like fresh start
const newSync = new ParallelFileSynchronizer(
codebaseDir,
"test-collection",
snapshotDir,
4
);
const hasSnapshot = await newSync.initialize();
expect(hasSnapshot).toBe(false);
});
});
describe("Concurrency Configuration", () => {
it("should return configured concurrency level", () => {
const concurrency = synchronizer.getConcurrency();
expect(concurrency).toBe(4);
});
it("should use provided concurrency value", () => {
const customSync = new ParallelFileSynchronizer(
codebaseDir,
"test-collection",
snapshotDir,
8
);
expect(customSync.getConcurrency()).toBe(8);
});
});
});
describe("parallelLimit utility", () => {
// Test the bounded concurrency behavior indirectly through the synchronizer
it("should process large file lists without overwhelming the system", async () => {
const testDir = join(tmpdir(), `parallel-limit-test-${Date.now()}`);
const codebaseDir = join(testDir, "codebase");
const snapshotDir = join(testDir, "snapshots");
await fs.mkdir(codebaseDir, { recursive: true });
await fs.mkdir(snapshotDir, { recursive: true });
try {
// Create 200 files to test bounded concurrency
const files: string[] = [];
for (let i = 0; i < 200; i++) {
const path = join(codebaseDir, `file-${i}.ts`);
await fs.writeFile(path, `// File ${i}`);
files.push(path);
}
const sync = new ParallelFileSynchronizer(
codebaseDir,
"test-collection",
snapshotDir,
4
);
await sync.initialize();
const changes = await sync.detectChanges(files);
expect(changes.added.length).toBe(200);
} finally {
await fs.rm(testDir, { recursive: true, force: true });
}
});
});