test_registry_sync.py•21.7 kB
"""Integration tests for registry synchronization fixes.
Tests validate that auto-created projects are persisted to PostgreSQL registry
and that secondary project resolution works correctly after the registry sync fix.
Bug Fix Validation:
- Before fix: Auto-created projects existed only in-memory registry
- After fix: Auto-created projects persisted to PostgreSQL immediately
- After fix: Secondary resolution finds projects in PostgreSQL registry
- After fix: Projects survive server restart
Constitutional Compliance:
- Principle VII: TDD (validates bug fix with comprehensive regression tests)
- Principle V: Production Quality (comprehensive error scenarios)
- Principle VIII: Type Safety (type-annotated test functions)
- Principle II: Local-First (validates offline persistence)
"""
from __future__ import annotations
import asyncio
import json
from pathlib import Path
from unittest.mock import MagicMock
import pytest
from fastmcp import Context
@pytest.mark.integration
@pytest.mark.asyncio
async def test_auto_created_project_synced_to_postgresql(tmp_path: Path) -> None:
"""Verify auto-created projects are persisted to PostgreSQL registry.
This is the PRIMARY regression test for the registry sync bug fix.
Bug Fix Validation:
1. Create .codebase-mcp/config.json with project name
2. Set working directory for session
3. Index repository (triggers auto-create)
4. Verify project exists in BOTH in-memory AND PostgreSQL registry
5. Simulate server restart (clear in-memory registry)
6. Verify project still found in PostgreSQL
7. Secondary resolution works (no fallback to default)
Args:
tmp_path: Pytest temporary directory fixture
"""
# Setup: Create test repository with config file
test_repo = tmp_path / "test-repo"
test_repo.mkdir()
config_dir = test_repo / ".codebase-mcp"
config_dir.mkdir()
config_file = config_dir / "config.json"
# Initial config without project.id
config_data = {
"version": "1.0",
"project": {
"name": "test-registry-sync"
},
"auto_switch": True
}
config_file.write_text(json.dumps(config_data, indent=2))
# Create some Python files to index
(test_repo / "main.py").write_text("def main(): pass\n" * 10)
(test_repo / "utils.py").write_text("def helper(): pass\n" * 10)
# Import MCP tools
from src.mcp.tools.project import set_working_directory
from src.mcp.tools.indexing import index_repository
from src.database.session import _initialize_registry_pool
# Create mock Context with session_id
mock_ctx = MagicMock(spec=Context)
mock_ctx.session_id = "test-session-registry-sync"
# Step 1: Set working directory (enables session-based resolution)
session_result = await set_working_directory.fn(
directory=str(test_repo),
ctx=mock_ctx
)
assert session_result["config_found"] is True, "Config file should be found"
assert session_result["project_info"]["name"] == "test-registry-sync"
# Step 2: Index repository (should auto-create project and sync to PostgreSQL)
index_result = await index_repository.fn(
repo_path=str(test_repo),
ctx=mock_ctx
)
# Verify project was NOT defaulted (bug fix validation)
assert index_result["project_id"] != "default", (
f"BUG: Project defaulted to 'default' instead of auto-creating. "
f"Got project_id='{index_result['project_id']}'"
)
assert "cb_proj_test_registry_sync_" in index_result["database_name"], (
f"BUG: Wrong database name. Expected 'cb_proj_test_registry_sync_*', "
f"got '{index_result['database_name']}'"
)
# Verify indexing succeeded
assert index_result["status"] == "success"
assert index_result["files_indexed"] >= 2
assert index_result["chunks_created"] > 0
project_id = index_result["project_id"]
database_name = index_result["database_name"]
# Step 3: Verify project exists in PostgreSQL registry (FIX VALIDATION)
registry_pool = await _initialize_registry_pool()
async with registry_pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT id, name, database_name FROM projects WHERE id = $1",
project_id
)
assert row is not None, (
f"CRITICAL BUG: Project {project_id} not found in PostgreSQL registry. "
f"Registry sync failed!"
)
assert row['id'] == project_id, "Project ID mismatch in registry"
assert row['name'] == "test-registry-sync", "Project name mismatch in registry"
assert row['database_name'] == database_name, "Database name mismatch in registry"
print(f"✅ Project found in PostgreSQL registry: {project_id}")
# Step 4: Simulate server restart (clear in-memory registry)
from src.database.auto_create import get_registry
registry = get_registry()
# Clear in-memory registry (simulates restart)
original_by_id = registry._projects_by_id.copy()
original_by_name = registry._projects_by_name.copy()
registry._projects_by_id.clear()
registry._projects_by_name.clear()
print("🔄 Simulated server restart (in-memory registry cleared)")
# Step 5: Verify secondary resolution still works (queries PostgreSQL)
from src.database.session import resolve_project_id
resolved_id, resolved_db = await resolve_project_id(
explicit_id=project_id,
ctx=None # No context - should use Tier 1 (PostgreSQL lookup)
)
# Verify secondary resolution found project in PostgreSQL
assert resolved_id == project_id, (
f"CRITICAL BUG: Secondary resolution failed. "
f"Expected {project_id}, got {resolved_id}. "
f"Project not found in PostgreSQL registry after restart!"
)
assert resolved_db == database_name, (
f"Database name mismatch after restart. "
f"Expected {database_name}, got {resolved_db}"
)
print(f"✅ Secondary resolution works: {resolved_id} -> {resolved_db}")
# Step 6: Verify config file was updated with project.id
updated_config = json.loads(config_file.read_text())
assert "id" in updated_config["project"], "Config should be updated with project.id"
assert updated_config["project"]["id"] == project_id
# Cleanup: Restore in-memory registry (for other tests)
registry._projects_by_id = original_by_id
registry._projects_by_name = original_by_name
@pytest.mark.integration
@pytest.mark.asyncio
async def test_concurrent_project_creation_no_duplicates(tmp_path: Path) -> None:
"""Verify concurrent project creation doesn't create duplicates.
Tests that ON CONFLICT clause in registry sync prevents duplicate projects
when multiple concurrent requests try to auto-create the same project.
Bug Fix Validation:
- PostgreSQL UNIQUE constraint on projects.id prevents duplicates
- ON CONFLICT DO UPDATE handles race conditions gracefully
- All concurrent requests succeed with same project_id
- Only ONE database created
Args:
tmp_path: Pytest temporary directory fixture
"""
# Setup: Create test repository
test_repo = tmp_path / "test-repo"
test_repo.mkdir()
config_dir = test_repo / ".codebase-mcp"
config_dir.mkdir()
config_file = config_dir / "config.json"
config_data = {
"version": "1.0",
"project": {
"name": "test-concurrent"
},
"auto_switch": True
}
config_file.write_text(json.dumps(config_data, indent=2))
(test_repo / "main.py").write_text("def main(): pass\n")
# Import MCP tools
from src.mcp.tools.project import set_working_directory
from src.mcp.tools.indexing import index_repository
from src.database.session import _initialize_registry_pool
# Create multiple contexts simulating concurrent clients
contexts = [
MagicMock(spec=Context, session_id=f"test-session-{i}")
for i in range(5)
]
# Step 1: Set working directory for all sessions
for ctx in contexts:
await set_working_directory.fn(directory=str(test_repo), ctx=ctx)
# Step 2: Trigger concurrent indexing (simulates race condition)
tasks = [
index_repository.fn(repo_path=str(test_repo), ctx=ctx)
for ctx in contexts
]
results = await asyncio.gather(*tasks, return_exceptions=True)
# Step 3: Verify all succeeded (no exceptions)
for i, result in enumerate(results):
assert not isinstance(result, Exception), (
f"Request {i} failed with exception: {result}"
)
assert result["status"] == "success", f"Request {i} failed: {result}"
# Step 4: Verify all used same project_id (no duplicates)
project_ids = [r["project_id"] for r in results]
unique_project_ids = set(project_ids)
assert len(unique_project_ids) == 1, (
f"CRITICAL BUG: Concurrent creation created {len(unique_project_ids)} different projects. "
f"Expected 1 project. Project IDs: {unique_project_ids}"
)
project_id = project_ids[0]
print(f"✅ All concurrent requests used same project_id: {project_id}")
# Step 5: Verify only ONE entry in PostgreSQL registry
registry_pool = await _initialize_registry_pool()
async with registry_pool.acquire() as conn:
rows = await conn.fetch(
"SELECT id, name FROM projects WHERE name = $1",
"test-concurrent"
)
assert len(rows) == 1, (
f"CRITICAL BUG: Found {len(rows)} projects with name 'test-concurrent'. "
f"Expected exactly 1. ON CONFLICT clause failed!"
)
assert rows[0]['id'] == project_id
print(f"✅ Only one project in registry: {project_id}")
@pytest.mark.integration
@pytest.mark.asyncio
async def test_registry_sync_survives_restart(tmp_path: Path) -> None:
"""Verify project state persists across simulated server restart.
Tests the critical bug fix: projects must survive server restarts by being
stored in PostgreSQL, not just in-memory registry.
Bug Fix Validation:
1. Create project via auto-create
2. Verify in PostgreSQL
3. Simulate restart (clear in-memory, dispose connections)
4. Resolve project via explicit ID (Tier 1)
5. Verify resolution succeeds (finds in PostgreSQL)
Args:
tmp_path: Pytest temporary directory fixture
"""
# Setup
test_repo = tmp_path / "test-repo"
test_repo.mkdir()
config_dir = test_repo / ".codebase-mcp"
config_dir.mkdir()
config_file = config_dir / "config.json"
config_data = {
"version": "1.0",
"project": {"name": "test-restart"},
"auto_switch": True
}
config_file.write_text(json.dumps(config_data, indent=2))
(test_repo / "main.py").write_text("def main(): pass\n")
from src.mcp.tools.project import set_working_directory
from src.mcp.tools.indexing import index_repository
from src.database.session import _initialize_registry_pool, resolve_project_id
from src.database.auto_create import get_registry
mock_ctx = MagicMock(spec=Context)
mock_ctx.session_id = "test-session-restart"
# Step 1: Create project
await set_working_directory.fn(directory=str(test_repo), ctx=mock_ctx)
index_result = await index_repository.fn(repo_path=str(test_repo), ctx=mock_ctx)
project_id = index_result["project_id"]
database_name = index_result["database_name"]
print(f"✅ Created project: {project_id} -> {database_name}")
# Step 2: Verify in PostgreSQL
registry_pool = await _initialize_registry_pool()
async with registry_pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT id, database_name FROM projects WHERE id = $1",
project_id
)
assert row is not None, "Project not found in PostgreSQL before restart"
# Step 3: Simulate server restart
registry = get_registry()
registry._projects_by_id.clear()
registry._projects_by_name.clear()
# Dispose connection pools (simulates server shutdown)
await registry_pool.close()
# Re-initialize pool (simulates server startup)
from src.database import session as session_module
session_module._registry_pool = None
registry_pool = await _initialize_registry_pool()
print("🔄 Simulated server restart (in-memory cleared, connections disposed)")
# Step 4: Resolve project after restart (should query PostgreSQL)
resolved_id, resolved_db = await resolve_project_id(
explicit_id=project_id,
ctx=None # No context - Tier 1 PostgreSQL lookup
)
# Step 5: Verify resolution succeeded
assert resolved_id == project_id, (
f"CRITICAL BUG: Project lost after restart. "
f"Resolution failed for {project_id}. "
f"Projects must be persisted to PostgreSQL!"
)
assert resolved_db == database_name, "Database name mismatch after restart"
print(f"✅ Project survived restart: {resolved_id} -> {resolved_db}")
# Step 6: Verify can still query project details
async with registry_pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT id, name, database_name FROM projects WHERE id = $1",
project_id
)
assert row is not None, "Project not found in PostgreSQL after restart"
assert row['name'] == "test-restart"
assert row['database_name'] == database_name
@pytest.mark.integration
@pytest.mark.asyncio
async def test_secondary_resolution_uses_postgresql(tmp_path: Path) -> None:
"""Verify secondary project resolution queries PostgreSQL registry.
Tests that when a project_id is passed explicitly, the resolution uses
Tier 1 (PostgreSQL lookup) and finds auto-created projects.
Bug Fix Validation:
- Before fix: Tier 1 lookup failed (project not in PostgreSQL)
- After fix: Tier 1 lookup succeeds (project synced to PostgreSQL)
- Secondary calls don't require ctx parameter
- No fallback to default database
Args:
tmp_path: Pytest temporary directory fixture
"""
# Setup
test_repo = tmp_path / "test-repo"
test_repo.mkdir()
config_dir = test_repo / ".codebase-mcp"
config_dir.mkdir()
config_file = config_dir / "config.json"
config_data = {
"version": "1.0",
"project": {"name": "test-secondary"},
"auto_switch": True
}
config_file.write_text(json.dumps(config_data, indent=2))
(test_repo / "main.py").write_text("def main(): pass\n")
from src.mcp.tools.project import set_working_directory
from src.mcp.tools.indexing import index_repository
from src.mcp.tools.search import search_code
mock_ctx = MagicMock(spec=Context)
mock_ctx.session_id = "test-session-secondary"
# Step 1: Auto-create project via session-based resolution
await set_working_directory.fn(directory=str(test_repo), ctx=mock_ctx)
index_result = await index_repository.fn(repo_path=str(test_repo), ctx=mock_ctx)
project_id = index_result["project_id"]
database_name = index_result["database_name"]
print(f"✅ Created project via session-based resolution: {project_id}")
# Step 2: Make secondary call with explicit project_id (no ctx)
# This simulates a different tool call where only project_id is available
search_result = await search_code.fn(
query="main function",
project_id=project_id,
# NOTE: No ctx parameter - should work via Tier 1 (PostgreSQL lookup)
)
# Step 3: Verify secondary resolution used correct project
assert search_result["project_id"] == project_id, (
f"CRITICAL BUG: Secondary resolution failed. "
f"Expected {project_id}, got {search_result['project_id']}. "
f"Auto-created project not found in PostgreSQL!"
)
assert search_result["database_name"] == database_name, (
f"Database name mismatch. Expected {database_name}, "
f"got {search_result['database_name']}"
)
# Step 4: Verify did NOT fall back to default
assert search_result["project_id"] != "default", (
"CRITICAL BUG: Secondary resolution fell back to default database! "
"This indicates registry sync failed."
)
print(f"✅ Secondary resolution worked without ctx: {project_id}")
@pytest.mark.integration
@pytest.mark.asyncio
async def test_registry_sync_performance_overhead(tmp_path: Path) -> None:
"""Verify registry sync adds <10ms overhead per auto-create.
Tests that the PostgreSQL INSERT operation doesn't significantly impact
project creation performance.
Performance Requirements:
- Registry sync should add <10ms per auto-create
- No blocking operations
- Async execution
Args:
tmp_path: Pytest temporary directory fixture
"""
import time
# Setup
test_repo = tmp_path / "test-repo"
test_repo.mkdir()
config_dir = test_repo / ".codebase-mcp"
config_dir.mkdir()
(test_repo / "main.py").write_text("def main(): pass\n")
from src.mcp.tools.project import set_working_directory
from src.mcp.tools.indexing import index_repository
# Create 5 different projects to measure average overhead
results = []
for i in range(5):
# Create unique project config
config_file = config_dir / "config.json"
config_data = {
"version": "1.0",
"project": {"name": f"test-perf-{i}"},
"auto_switch": True
}
config_file.write_text(json.dumps(config_data, indent=2))
mock_ctx = MagicMock(spec=Context)
mock_ctx.session_id = f"test-session-perf-{i}"
# Measure auto-create time
start = time.time()
await set_working_directory.fn(directory=str(test_repo), ctx=mock_ctx)
result = await index_repository.fn(repo_path=str(test_repo), ctx=mock_ctx)
duration_ms = (time.time() - start) * 1000
results.append(duration_ms)
assert result["status"] == "success"
# Calculate average time
avg_time = sum(results) / len(results)
max_time = max(results)
print(f"📊 Registry sync performance:")
print(f" Average: {avg_time:.2f}ms")
print(f" Maximum: {max_time:.2f}ms")
# Note: We measure total auto-create time (not just sync overhead)
# This includes config parsing, database creation, schema initialization, indexing
# Acceptable total time: <5000ms (5 seconds) for small repository
assert max_time < 5000, (
f"Performance regression detected: auto-create took {max_time:.2f}ms. "
f"Expected <5000ms for small repository."
)
print(f"✅ Performance acceptable: {avg_time:.2f}ms average")
@pytest.mark.integration
@pytest.mark.asyncio
async def test_idempotent_registry_sync(tmp_path: Path) -> None:
"""Verify registry sync is idempotent (can be called multiple times safely).
Tests that calling auto-create multiple times for the same project doesn't
create duplicates, using ON CONFLICT DO UPDATE clause.
Args:
tmp_path: Pytest temporary directory fixture
"""
# Setup
test_repo = tmp_path / "test-repo"
test_repo.mkdir()
config_dir = test_repo / ".codebase-mcp"
config_dir.mkdir()
config_file = config_dir / "config.json"
# Config WITH project.id (simulates re-indexing existing project)
import uuid
existing_project_id = str(uuid.uuid4())
config_data = {
"version": "1.0",
"project": {
"name": "test-idempotent",
"id": existing_project_id
},
"auto_switch": True
}
config_file.write_text(json.dumps(config_data, indent=2))
(test_repo / "main.py").write_text("def main(): pass\n")
from src.mcp.tools.project import set_working_directory
from src.mcp.tools.indexing import index_repository
from src.database.session import _initialize_registry_pool
mock_ctx = MagicMock(spec=Context)
mock_ctx.session_id = "test-session-idempotent"
await set_working_directory.fn(directory=str(test_repo), ctx=mock_ctx)
# Step 1: First indexing (creates project)
result1 = await index_repository.fn(repo_path=str(test_repo), ctx=mock_ctx)
assert result1["status"] == "success"
project_id1 = result1["project_id"]
# Step 2: Count projects in registry
registry_pool = await _initialize_registry_pool()
async with registry_pool.acquire() as conn:
count1 = await conn.fetchval(
"SELECT COUNT(*) FROM projects WHERE name = $1",
"test-idempotent"
)
assert count1 == 1, f"Expected 1 project, found {count1}"
# Step 3: Second indexing (should be idempotent)
result2 = await index_repository.fn(repo_path=str(test_repo), ctx=mock_ctx)
assert result2["status"] == "success"
project_id2 = result2["project_id"]
# Step 4: Verify same project_id used
assert project_id2 == project_id1, (
f"CRITICAL BUG: Second indexing created different project. "
f"First: {project_id1}, Second: {project_id2}"
)
# Step 5: Verify still only ONE project in registry
async with registry_pool.acquire() as conn:
count2 = await conn.fetchval(
"SELECT COUNT(*) FROM projects WHERE name = $1",
"test-idempotent"
)
assert count2 == 1, (
f"CRITICAL BUG: Multiple indexing created duplicate projects. "
f"Found {count2} projects. ON CONFLICT clause failed!"
)
print(f"✅ Idempotent registry sync works: {project_id1}")