/**
* Tests for crash-safe atomic swap semantics during full rebuild.
*
* These tests verify that:
* 1. Failed rebuilds do not mutate the active index
* 2. Successful rebuilds atomically replace the active index with consistent new build
* 3. The staging directory is cleaned up after successful swap
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { promises as fs } from 'fs';
import path from 'path';
import { randomUUID } from 'crypto';
import { CodebaseIndexer } from '../src/core/indexer.js';
import { readIndexMeta, validateIndexArtifacts } from '../src/core/index-meta.js';
import { IndexCorruptedError } from '../src/errors/index.js';
import {
CODEBASE_CONTEXT_DIRNAME,
INDEX_META_FILENAME,
KEYWORD_INDEX_FILENAME,
INTELLIGENCE_FILENAME,
VECTOR_DB_DIRNAME,
INDEX_FORMAT_VERSION,
INDEX_META_VERSION
} from '../src/constants/codebase-context.js';
const STAGING_DIRNAME = '.staging';
async function createTempDir(): Promise<string> {
const baseDir = path.join(process.cwd(), '.test-temp');
await fs.mkdir(baseDir, { recursive: true });
const tempDir = path.join(baseDir, `atomic-swap-test-${randomUUID()}`);
await fs.mkdir(tempDir, { recursive: true });
return tempDir;
}
async function cleanupDir(dir: string): Promise<void> {
try {
await fs.rm(dir, { recursive: true, force: true });
} catch {
// Best-effort cleanup
}
}
async function createMinimalIndex(contextDir: string, buildId: string): Promise<void> {
await fs.mkdir(contextDir, { recursive: true });
await fs.mkdir(path.join(contextDir, VECTOR_DB_DIRNAME), { recursive: true });
// Create index-meta.json (authoritative)
const meta = {
metaVersion: INDEX_META_VERSION,
formatVersion: INDEX_FORMAT_VERSION,
buildId,
generatedAt: new Date().toISOString(),
toolVersion: 'test',
artifacts: {
keywordIndex: { path: KEYWORD_INDEX_FILENAME },
vectorDb: { path: VECTOR_DB_DIRNAME, provider: 'lancedb' },
intelligence: { path: INTELLIGENCE_FILENAME }
}
};
await fs.writeFile(path.join(contextDir, INDEX_META_FILENAME), JSON.stringify(meta, null, 2));
// Create index.json with matching buildId
const index = {
header: { buildId, formatVersion: INDEX_FORMAT_VERSION },
chunks: []
};
await fs.writeFile(path.join(contextDir, KEYWORD_INDEX_FILENAME), JSON.stringify(index));
// Create intelligence.json with matching buildId
const intelligence = {
header: { buildId, formatVersion: INDEX_FORMAT_VERSION },
libraryUsage: {},
patterns: {},
generatedAt: new Date().toISOString()
};
await fs.writeFile(path.join(contextDir, INTELLIGENCE_FILENAME), JSON.stringify(intelligence));
// Create vector DB build marker
await fs.writeFile(
path.join(contextDir, VECTOR_DB_DIRNAME, 'index-build.json'),
JSON.stringify({ buildId, formatVersion: INDEX_FORMAT_VERSION })
);
}
async function readBuildIdFromMeta(contextDir: string): Promise<string | null> {
try {
const metaPath = path.join(contextDir, INDEX_META_FILENAME);
const raw = await fs.readFile(metaPath, 'utf-8');
const meta = JSON.parse(raw);
return meta.buildId || null;
} catch {
return null;
}
}
async function stagingDirExists(contextDir: string): Promise<boolean> {
try {
const stagingPath = path.join(contextDir, STAGING_DIRNAME);
const stat = await fs.stat(stagingPath);
return stat.isDirectory();
} catch {
return false;
}
}
describe('Atomic Swap Semantics', () => {
let tempDir: string;
let contextDir: string;
beforeEach(async () => {
tempDir = await createTempDir();
contextDir = path.join(tempDir, CODEBASE_CONTEXT_DIRNAME);
});
afterEach(async () => {
await cleanupDir(tempDir);
});
it('should preserve active index when staging build fails before swap', async () => {
// Create an initial valid index with a known buildId
const originalBuildId = 'original-build-' + randomUUID();
await createMinimalIndex(contextDir, originalBuildId);
// Verify initial state
const initialBuildId = await readBuildIdFromMeta(contextDir);
expect(initialBuildId).toBe(originalBuildId);
// Create a source file for indexing
const srcDir = path.join(tempDir, 'src');
await fs.mkdir(srcDir, { recursive: true });
await fs.writeFile(path.join(srcDir, 'test.ts'), 'export function hello() { return "world"; }');
// Create a mock indexer that will fail mid-build
let buildFailed = false;
const indexer = new CodebaseIndexer({
rootPath: tempDir,
// Force a full rebuild (not incremental)
incrementalOnly: false
});
// Simulate a failure scenario by creating a staging directory manually
// and then verifying it doesn't affect active index
const stagingBase = path.join(contextDir, STAGING_DIRNAME);
const failedBuildId = 'failed-build-' + randomUUID();
const stagingPath = path.join(stagingBase, failedBuildId);
await fs.mkdir(stagingPath, { recursive: true });
// Write partial staging content (simulating mid-build failure)
await fs.writeFile(
path.join(stagingPath, INDEX_META_FILENAME),
JSON.stringify({ buildId: failedBuildId, formatVersion: 999 }) // Wrong format
);
// Clean up staging to simulate the indexer's error handling
await fs.rm(stagingBase, { recursive: true, force: true });
// Verify active index is still intact
const activeBuildId = await readBuildIdFromMeta(contextDir);
expect(activeBuildId).toBe(originalBuildId);
// Verify meta can still be read (index is valid)
const meta = await readIndexMeta(tempDir);
expect(meta.buildId).toBe(originalBuildId);
});
it('should atomically swap active index on successful rebuild', async () => {
// Create an initial valid index
const originalBuildId = 'original-build-' + randomUUID();
await createMinimalIndex(contextDir, originalBuildId);
// Create source files for indexing
const srcDir = path.join(tempDir, 'src');
await fs.mkdir(srcDir, { recursive: true });
await fs.writeFile(
path.join(srcDir, 'example.ts'),
`
export interface User {
id: string;
name: string;
}
export function greet(user: User): string {
return \`Hello, \${user.name}!\`;
}
`
);
// Run full indexer
const indexer = new CodebaseIndexer({
rootPath: tempDir,
incrementalOnly: false,
config: {
skipEmbedding: true // Skip embeddings for faster test
}
});
await indexer.index();
// Verify the active index has a NEW buildId (not the original)
const newBuildId = await readBuildIdFromMeta(contextDir);
expect(newBuildId).not.toBe(originalBuildId);
expect(newBuildId).toBeTruthy();
// Verify the new index is valid
const meta = await readIndexMeta(tempDir);
expect(meta.buildId).toBe(newBuildId);
expect(meta.formatVersion).toBe(INDEX_FORMAT_VERSION);
// Validate all artifacts match
await validateIndexArtifacts(tempDir, meta);
// Verify staging directory is cleaned up
const hasStaging = await stagingDirExists(contextDir);
expect(hasStaging).toBe(false);
});
it('should fail closed when meta points to missing artifacts', async () => {
// Create an index with meta pointing to non-existent files
const buildId = 'broken-build-' + randomUUID();
await fs.mkdir(contextDir, { recursive: true });
const meta = {
metaVersion: INDEX_META_VERSION,
formatVersion: INDEX_FORMAT_VERSION,
buildId,
generatedAt: new Date().toISOString(),
toolVersion: 'test',
artifacts: {
keywordIndex: { path: KEYWORD_INDEX_FILENAME },
vectorDb: { path: VECTOR_DB_DIRNAME, provider: 'lancedb' }
}
};
await fs.writeFile(path.join(contextDir, INDEX_META_FILENAME), JSON.stringify(meta));
// Do NOT create the keyword index or vector DB
// Validation should throw IndexCorruptedError
const loadedMeta = await readIndexMeta(tempDir);
await expect(validateIndexArtifacts(tempDir, loadedMeta)).rejects.toThrow(IndexCorruptedError);
});
it('should fail closed on buildId mismatch between meta and artifacts', async () => {
// Create an index with mismatched buildIds
const metaBuildId = 'meta-build-' + randomUUID();
const artifactBuildId = 'artifact-build-' + randomUUID();
await fs.mkdir(contextDir, { recursive: true });
await fs.mkdir(path.join(contextDir, VECTOR_DB_DIRNAME), { recursive: true });
// Meta with one buildId
const meta = {
metaVersion: INDEX_META_VERSION,
formatVersion: INDEX_FORMAT_VERSION,
buildId: metaBuildId,
generatedAt: new Date().toISOString(),
toolVersion: 'test',
artifacts: {
keywordIndex: { path: KEYWORD_INDEX_FILENAME },
vectorDb: { path: VECTOR_DB_DIRNAME, provider: 'lancedb' }
}
};
await fs.writeFile(path.join(contextDir, INDEX_META_FILENAME), JSON.stringify(meta));
// Artifacts with different buildId
const index = {
header: { buildId: artifactBuildId, formatVersion: INDEX_FORMAT_VERSION },
chunks: []
};
await fs.writeFile(path.join(contextDir, KEYWORD_INDEX_FILENAME), JSON.stringify(index));
await fs.writeFile(
path.join(contextDir, VECTOR_DB_DIRNAME, 'index-build.json'),
JSON.stringify({ buildId: artifactBuildId, formatVersion: INDEX_FORMAT_VERSION })
);
// Validation should throw IndexCorruptedError for buildId mismatch
const loadedMeta = await readIndexMeta(tempDir);
await expect(validateIndexArtifacts(tempDir, loadedMeta)).rejects.toThrow(IndexCorruptedError);
});
it('should fail closed on formatVersion mismatch', async () => {
// Create an index with wrong format version
const buildId = 'version-mismatch-' + randomUUID();
await fs.mkdir(contextDir, { recursive: true });
await fs.mkdir(path.join(contextDir, VECTOR_DB_DIRNAME), { recursive: true });
// Meta with current format version
const meta = {
metaVersion: INDEX_META_VERSION,
formatVersion: INDEX_FORMAT_VERSION,
buildId,
generatedAt: new Date().toISOString(),
toolVersion: 'test',
artifacts: {
keywordIndex: { path: KEYWORD_INDEX_FILENAME },
vectorDb: { path: VECTOR_DB_DIRNAME, provider: 'lancedb' }
}
};
await fs.writeFile(path.join(contextDir, INDEX_META_FILENAME), JSON.stringify(meta));
// Artifacts with OLD format version (simulating schema change)
const OLD_FORMAT_VERSION = 0; // Simulate pre-versioning
const index = {
header: { buildId, formatVersion: OLD_FORMAT_VERSION },
chunks: []
};
await fs.writeFile(path.join(contextDir, KEYWORD_INDEX_FILENAME), JSON.stringify(index));
await fs.writeFile(
path.join(contextDir, VECTOR_DB_DIRNAME, 'index-build.json'),
JSON.stringify({ buildId, formatVersion: OLD_FORMAT_VERSION })
);
// Validation should throw IndexCorruptedError for format version mismatch
const loadedMeta = await readIndexMeta(tempDir);
await expect(validateIndexArtifacts(tempDir, loadedMeta)).rejects.toThrow(IndexCorruptedError);
});
it('should never serve mixed-version index data', async () => {
// This test verifies the core invariant: readers never observe partial/mixed state
// Create initial index
const buildId1 = 'build-v1-' + randomUUID();
await createMinimalIndex(contextDir, buildId1);
// Simulate concurrent reader: load meta
const meta1 = await readIndexMeta(tempDir);
expect(meta1.buildId).toBe(buildId1);
// Simulate a new build starting (create staging)
const buildId2 = 'build-v2-' + randomUUID();
const stagingPath = path.join(contextDir, STAGING_DIRNAME, buildId2);
await fs.mkdir(stagingPath, { recursive: true });
// Write partial staging artifacts
const partialIndex = {
header: { buildId: buildId2, formatVersion: INDEX_FORMAT_VERSION },
chunks: [{ id: 'partial', content: 'partial' }]
};
await fs.writeFile(
path.join(stagingPath, KEYWORD_INDEX_FILENAME),
JSON.stringify(partialIndex)
);
// Active index should still be valid and consistent
const activeMeta = await readIndexMeta(tempDir);
expect(activeMeta.buildId).toBe(buildId1); // Not the staging build
// Validate should pass for active artifacts
await validateIndexArtifacts(tempDir, activeMeta);
// Clean up staging
await fs.rm(path.join(contextDir, STAGING_DIRNAME), { recursive: true, force: true });
});
});