Codebase MCP Server

002_remove_non_search_tables.py•27.4 kB

"""Remove non-search tables and add project_id columns for multi-project support. This migration refactors the database schema to: 1. Remove 9 unused tables from non-search features (work tracking, vendor management, deployments) 2. Add project_id columns to repositories and code_chunks tables 3. Establish foundation for multi-project semantic code search Tables Dropped (9): - vendor_extractors - deployment_events - project_configuration - future_enhancements - work_item_dependencies - vendor_deployment_links - work_item_deployment_links - archived_work_items - (extended columns removed from tasks table) Schema Changes: - repositories: Add project_id VARCHAR(50) NOT NULL DEFAULT 'default' - code_chunks: Add project_id VARCHAR(50) NOT NULL (copied from parent repository) - Both tables: Add CHECK constraint for pattern validation (^[a-z0-9-]{1,50}$) - New index: idx_project_repository on repositories(project_id, id) Performance: - Migration target: < 5 minutes for 100 repos + 10K chunks - Index created after data population for optimal performance - Three-step approach for code_chunks project_id (nullable → UPDATE → NOT NULL) Safety: - Single atomic transaction (automatic rollback on error) - Foreign key verification before dropping tables - Idempotent DDL (IF EXISTS / IF NOT EXISTS clauses) - Comprehensive logging at each major step Rollback: - downgrade() restores table structures (schema only, data NOT restored) - Data restoration requires manual backup import - Rollback target: < 30 seconds execution Constitutional Compliance: - Principle I: Simplicity Over Features (removes complexity, adds minimal foundation) - Principle IV: Performance Guarantees (< 5 min migration, maintains < 500ms search) - Principle V: Production Quality (error handling, validation, logging) - Principle VII: Test-Driven Development (comprehensive test suite) - Principle X: Git Micro-Commit Strategy (atomic migration commit) Revision ID: 002 Revises: 005 Create Date: 2025-10-11 00:00:00.000000 """ from __future__ import annotations from collections.abc import Sequence import logging import time from alembic import op from sqlalchemy import text # revision identifiers, used by Alembic. revision: str = "002" down_revision: str | None = "005" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None # Configure logging logger = logging.getLogger(__name__) def upgrade() -> None: """Apply migration: remove non-search tables and add project_id columns. Executes 10 steps in single atomic transaction: 1. Prerequisites check 2. Foreign key verification 3. Add project_id to repositories 4. Add project_id to code_chunks (3-step approach) 5. Add CHECK constraints 6. Create performance index 7. Drop 9 unused tables 8. Validation checks 9. Log completion with duration 10. COMMIT (automatic via Alembic) Raises: ValueError: If foreign keys found from dropped tables to core tables Exception: If any DDL operation fails (transaction auto-rolled back) """ start_time = time.time() conn = op.get_bind() # Step 1: Check prerequisites logger.info("Step 1/10: Checking prerequisites...") try: # Verify database connection conn.execute(text("SELECT 1")) logger.info("Step 1/10: Complete - Database connection verified") except Exception as e: logger.error(f"Step 1/10: FAILED - Database connection error: {e}") raise ValueError(f"Migration failed at step 1: Database connection error: {e}") from e # Step 2: Verify no foreign keys from dropped tables to core tables logger.info("Step 2/10: Verifying foreign key constraints...") try: # Query for foreign keys from tables to be dropped that reference repositories or code_chunks # This prevents accidental data corruption if FKs exist result = conn.execute(text(""" SELECT tc.table_name, kcu.column_name, ccu.table_name AS foreign_table_name, ccu.column_name AS foreign_column_name FROM information_schema.table_constraints AS tc JOIN information_schema.key_column_usage AS kcu ON tc.constraint_name = kcu.constraint_name AND tc.table_schema = kcu.table_schema JOIN information_schema.constraint_column_usage AS ccu ON ccu.constraint_name = tc.constraint_name AND ccu.table_schema = tc.table_schema WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_name IN ( 'vendor_extractors', 'deployment_events', 'project_configuration', 'future_enhancements', 'work_item_dependencies', 'vendor_deployment_links', 'work_item_deployment_links', 'archived_work_items', 'tasks' ) AND ccu.table_name IN ('repositories', 'code_chunks') """)) fk_violations = result.fetchall() if fk_violations: fk_details = "\n".join([ f" - {row.table_name}.{row.column_name} -> {row.foreign_table_name}.{row.foreign_column_name}" for row in fk_violations ]) error_msg = ( f"Found {len(fk_violations)} foreign key(s) from tables to be dropped that reference " f"core tables (repositories/code_chunks). This would cause data corruption:\n{fk_details}\n" f"These foreign keys must be removed before migration." ) logger.error(f"Step 2/10: FAILED - {error_msg}") raise ValueError(error_msg) logger.info("Step 2/10: Complete - No blocking foreign keys found") except ValueError: # Re-raise validation errors raise except Exception as e: logger.error(f"Step 2/10: FAILED - Foreign key verification error: {e}") raise ValueError(f"Migration failed at step 2: Foreign key verification error: {e}") from e # Step 3: Add project_id to repositories logger.info("Step 3/10: Adding project_id to repositories table...") try: conn.execute(text(""" ALTER TABLE repositories ADD COLUMN IF NOT EXISTS project_id VARCHAR(50) NOT NULL DEFAULT 'default' """)) logger.info("Step 3/10: Complete - project_id column added to repositories") except Exception as e: logger.error(f"Step 3/10: FAILED - Error adding project_id to repositories: {e}") raise ValueError(f"Migration failed at step 3: Error adding project_id to repositories: {e}") from e # Step 4: Add project_id to code_chunks (3-step approach for data integrity) logger.info("Step 4/10: Adding project_id to code_chunks table...") try: # Step 4.1: Add nullable column conn.execute(text(""" ALTER TABLE code_chunks ADD COLUMN IF NOT EXISTS project_id VARCHAR(50) """)) logger.info("Step 4/10: Sub-step 1 - Nullable column added") # Step 4.2: Populate from parent repository result = conn.execute(text(""" UPDATE code_chunks SET project_id = ( SELECT r.project_id FROM repositories r JOIN code_files cf ON cf.repository_id = r.id WHERE cf.id = code_chunks.code_file_id ) WHERE project_id IS NULL """)) rows_updated = result.rowcount logger.info(f"Step 4/10: Sub-step 2 - Populated project_id for {rows_updated} code chunks from parent repositories") # Step 4.3: Add NOT NULL constraint conn.execute(text(""" ALTER TABLE code_chunks ALTER COLUMN project_id SET NOT NULL """)) logger.info("Step 4/10: Complete - project_id column added to code_chunks with NOT NULL constraint") except Exception as e: logger.error(f"Step 4/10: FAILED - Error adding project_id to code_chunks: {e}") raise ValueError(f"Migration failed at step 4: Error adding project_id to code_chunks: {e}") from e # Step 5: Add CHECK constraints for pattern validation logger.info("Step 5/10: Adding validation constraints...") try: # CHECK constraint for repositories.project_id conn.execute(text(""" ALTER TABLE repositories ADD CONSTRAINT check_repositories_project_id CHECK (project_id ~ '^[a-z0-9-]{1,50}$') """)) logger.info("Step 5/10: Sub-step 1 - CHECK constraint added to repositories.project_id") # CHECK constraint for code_chunks.project_id conn.execute(text(""" ALTER TABLE code_chunks ADD CONSTRAINT check_code_chunks_project_id CHECK (project_id ~ '^[a-z0-9-]{1,50}$') """)) logger.info("Step 5/10: Complete - CHECK constraints added to both tables") except Exception as e: logger.error(f"Step 5/10: FAILED - Error adding CHECK constraints: {e}") raise ValueError(f"Migration failed at step 5: Error adding CHECK constraints: {e}") from e # Step 6: Create performance index logger.info("Step 6/10: Creating performance index...") try: # Create index on (project_id, id) for multi-project queries # Using IF NOT EXISTS pattern for idempotency (manual DROP INDEX IF EXISTS first) conn.execute(text("DROP INDEX IF EXISTS idx_project_repository")) op.create_index( "idx_project_repository", "repositories", ["project_id", "id"] ) logger.info("Step 6/10: Complete - Index idx_project_repository created on repositories(project_id, id)") except Exception as e: logger.error(f"Step 6/10: FAILED - Error creating performance index: {e}") raise ValueError(f"Migration failed at step 6: Error creating performance index: {e}") from e # Step 7: Drop 9 unused tables logger.info("Step 7/10: Dropping unused tables...") try: # Drop tables in order respecting dependencies # CASCADE ensures dependent objects are dropped tables_to_drop = [ "archived_work_items", "work_item_deployment_links", "vendor_deployment_links", "work_item_dependencies", "future_enhancements", "project_configuration", "deployment_events", "vendor_extractors", ] for table_name in tables_to_drop: conn.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE")) logger.info(f"Step 7/10: Dropped table {table_name}") # Also drop extended columns from tasks table # (Note: In migration 003, tasks table was extended. We're reverting those extensions) conn.execute(text("DROP TABLE IF EXISTS tasks CASCADE")) logger.info("Step 7/10: Dropped table tasks") logger.info("Step 7/10: Complete - All 9 unused tables dropped") except Exception as e: logger.error(f"Step 7/10: FAILED - Error dropping tables: {e}") raise ValueError(f"Migration failed at step 7: Error dropping tables: {e}") from e # Step 8: Run validation checks logger.info("Step 8/10: Running validation checks...") try: # Validation 1: Verify project_id columns exist result = conn.execute(text(""" SELECT COUNT(*) AS column_count FROM information_schema.columns WHERE table_name IN ('repositories', 'code_chunks') AND column_name = 'project_id' """)) column_count = result.scalar() if column_count != 2: raise ValueError(f"Expected 2 project_id columns, found {column_count}") logger.info("Step 8/10: Validation 1 - project_id columns exist in both tables") # Validation 2: Verify CHECK constraints exist result = conn.execute(text(""" SELECT COUNT(*) AS constraint_count FROM information_schema.check_constraints WHERE constraint_name IN ( 'check_repositories_project_id', 'check_code_chunks_project_id' ) """)) constraint_count = result.scalar() if constraint_count != 2: raise ValueError(f"Expected 2 CHECK constraints, found {constraint_count}") logger.info("Step 8/10: Validation 2 - CHECK constraints exist on both tables") # Validation 3: Verify performance index exists result = conn.execute(text(""" SELECT COUNT(*) AS index_count FROM pg_indexes WHERE indexname = 'idx_project_repository' """)) index_count = result.scalar() if index_count != 1: raise ValueError(f"Expected 1 performance index, found {index_count}") logger.info("Step 8/10: Validation 3 - Performance index exists") # Validation 4: Verify dropped tables no longer exist result = conn.execute(text(""" SELECT COUNT(*) AS table_count FROM information_schema.tables WHERE table_name IN ( 'vendor_extractors', 'deployment_events', 'project_configuration', 'future_enhancements', 'work_item_dependencies', 'vendor_deployment_links', 'work_item_deployment_links', 'archived_work_items', 'tasks' ) """)) table_count = result.scalar() if table_count != 0: raise ValueError(f"Expected 0 dropped tables to exist, found {table_count}") logger.info("Step 8/10: Validation 4 - All 9 tables successfully dropped") logger.info("Step 8/10: Complete - All validation checks passed") except ValueError: # Re-raise validation errors raise except Exception as e: logger.error(f"Step 8/10: FAILED - Validation error: {e}") raise ValueError(f"Migration failed at step 8: Validation error: {e}") from e # Step 9: Log completion with duration duration = time.time() - start_time logger.info(f"Step 9/10: Migration completed successfully in {duration:.2f} seconds") logger.info("Step 9/10: Summary - 9 tables dropped, 2 tables modified, 1 index created, 2 constraints added") # Step 10: COMMIT (automatic via Alembic transaction) logger.info("Step 10/10: Transaction committed (automatic via Alembic)") def downgrade() -> None: """Rollback migration: restore dropped tables and remove project_id columns. Executes 6 rollback steps in single atomic transaction: 1. Drop performance index 2. Drop CHECK constraints 3. Drop project_id from code_chunks 4. Drop project_id from repositories 5. Restore 9 dropped tables (schema only, data NOT restored) 6. Run validation, log completion WARNING: Data in dropped tables is NOT restored by this function. Data restoration requires manual import from backup. Raises: Exception: If any rollback operation fails (transaction auto-rolled back) """ start_time = time.time() conn = op.get_bind() # Rollback Step 1: Drop performance index logger.info("Rollback Step 1/6: Dropping performance index...") try: op.drop_index("idx_project_repository", table_name="repositories", if_exists=True) logger.info("Rollback Step 1/6: Complete - Performance index dropped") except Exception as e: logger.error(f"Rollback Step 1/6: FAILED - Error dropping performance index: {e}") raise ValueError(f"Rollback failed at step 1: Error dropping performance index: {e}") from e # Rollback Step 2: Drop CHECK constraints logger.info("Rollback Step 2/6: Dropping validation constraints...") try: conn.execute(text(""" ALTER TABLE repositories DROP CONSTRAINT IF EXISTS check_repositories_project_id """)) conn.execute(text(""" ALTER TABLE code_chunks DROP CONSTRAINT IF EXISTS check_code_chunks_project_id """)) logger.info("Rollback Step 2/6: Complete - CHECK constraints dropped from both tables") except Exception as e: logger.error(f"Rollback Step 2/6: FAILED - Error dropping CHECK constraints: {e}") raise ValueError(f"Rollback failed at step 2: Error dropping CHECK constraints: {e}") from e # Rollback Step 3: Drop project_id from code_chunks logger.info("Rollback Step 3/6: Dropping project_id from code_chunks...") try: conn.execute(text(""" ALTER TABLE code_chunks DROP COLUMN IF EXISTS project_id """)) logger.info("Rollback Step 3/6: Complete - project_id column dropped from code_chunks") except Exception as e: logger.error(f"Rollback Step 3/6: FAILED - Error dropping project_id from code_chunks: {e}") raise ValueError(f"Rollback failed at step 3: Error dropping project_id from code_chunks: {e}") from e # Rollback Step 4: Drop project_id from repositories logger.info("Rollback Step 4/6: Dropping project_id from repositories...") try: conn.execute(text(""" ALTER TABLE repositories DROP COLUMN IF EXISTS project_id """)) logger.info("Rollback Step 4/6: Complete - project_id column dropped from repositories") except Exception as e: logger.error(f"Rollback Step 4/6: FAILED - Error dropping project_id from repositories: {e}") raise ValueError(f"Rollback failed at step 4: Error dropping project_id from repositories: {e}") from e # Rollback Step 5: Restore 9 dropped tables (schema only) logger.info("Rollback Step 5/6: Restoring dropped tables (schema only)...") try: # 1. vendor_extractors conn.execute(text(""" CREATE TABLE IF NOT EXISTS vendor_extractors ( id UUID PRIMARY KEY, version INTEGER NOT NULL DEFAULT 1, name VARCHAR(100) NOT NULL, status VARCHAR(20) NOT NULL, extractor_version VARCHAR(50) NOT NULL, metadata JSONB NOT NULL, created_at TIMESTAMP WITH TIME ZONE NOT NULL, updated_at TIMESTAMP WITH TIME ZONE NOT NULL, created_by VARCHAR(100) NOT NULL, CONSTRAINT ck_vendor_status CHECK (status IN ('operational', 'broken')) ) """)) logger.info("Rollback Step 5/6: Restored table vendor_extractors") # 2. deployment_events conn.execute(text(""" CREATE TABLE IF NOT EXISTS deployment_events ( id UUID PRIMARY KEY, deployed_at TIMESTAMP WITH TIME ZONE NOT NULL, commit_hash VARCHAR(40) NOT NULL, metadata JSONB NOT NULL, created_at TIMESTAMP WITH TIME ZONE NOT NULL, created_by VARCHAR(100) NOT NULL, CONSTRAINT ck_commit_hash_format CHECK (commit_hash ~ '^[a-f0-9]{40}$') ) """)) logger.info("Rollback Step 5/6: Restored table deployment_events") # 3. project_configuration conn.execute(text(""" CREATE TABLE IF NOT EXISTS project_configuration ( id INTEGER PRIMARY KEY, active_context_type VARCHAR(50) NOT NULL, current_session_id UUID, git_branch VARCHAR(100), git_head_commit VARCHAR(40), default_token_budget INTEGER NOT NULL DEFAULT 200000, database_healthy BOOLEAN NOT NULL DEFAULT TRUE, last_health_check_at TIMESTAMP WITH TIME ZONE, updated_at TIMESTAMP WITH TIME ZONE NOT NULL, updated_by VARCHAR(100) NOT NULL, CONSTRAINT ck_singleton CHECK (id = 1) ) """)) logger.info("Rollback Step 5/6: Restored table project_configuration") # 4. future_enhancements conn.execute(text(""" CREATE TABLE IF NOT EXISTS future_enhancements ( id UUID PRIMARY KEY, title VARCHAR(200) NOT NULL, description TEXT NOT NULL, priority INTEGER NOT NULL, status VARCHAR(20) NOT NULL DEFAULT 'planned', target_quarter VARCHAR(10), requires_constitutional_principles JSONB NOT NULL DEFAULT '[]', created_at TIMESTAMP WITH TIME ZONE NOT NULL, updated_at TIMESTAMP WITH TIME ZONE NOT NULL, created_by VARCHAR(100) NOT NULL, CONSTRAINT ck_priority_range CHECK (priority >= 1 AND priority <= 5), CONSTRAINT ck_enhancement_status CHECK (status IN ('planned', 'approved', 'implementing', 'completed')) ) """)) logger.info("Rollback Step 5/6: Restored table future_enhancements") # 5. tasks (work_items) conn.execute(text(""" CREATE TABLE IF NOT EXISTS tasks ( id UUID PRIMARY KEY, version INTEGER NOT NULL DEFAULT 1, item_type VARCHAR(20) NOT NULL DEFAULT 'task', title VARCHAR(200) NOT NULL, status VARCHAR(20) NOT NULL, parent_id UUID, path VARCHAR(500) NOT NULL DEFAULT '/', depth INTEGER NOT NULL DEFAULT 0, branch_name VARCHAR(100), metadata JSONB NOT NULL, deleted_at TIMESTAMP WITH TIME ZONE, created_at TIMESTAMP WITH TIME ZONE NOT NULL, updated_at TIMESTAMP WITH TIME ZONE NOT NULL, created_by VARCHAR(100) NOT NULL, CONSTRAINT ck_work_item_type CHECK (item_type IN ('project', 'session', 'task', 'research')), CONSTRAINT ck_work_item_depth CHECK (depth >= 0 AND depth <= 5) ) """)) logger.info("Rollback Step 5/6: Restored table tasks") # 6. work_item_dependencies conn.execute(text(""" CREATE TABLE IF NOT EXISTS work_item_dependencies ( source_id UUID NOT NULL, target_id UUID NOT NULL, dependency_type VARCHAR(20) NOT NULL, created_at TIMESTAMP WITH TIME ZONE NOT NULL, created_by VARCHAR(100) NOT NULL, PRIMARY KEY (source_id, target_id), CONSTRAINT ck_dependency_type CHECK (dependency_type IN ('blocks', 'depends_on')), CONSTRAINT ck_no_self_dependency CHECK (source_id != target_id) ) """)) logger.info("Rollback Step 5/6: Restored table work_item_dependencies") # 7. vendor_deployment_links conn.execute(text(""" CREATE TABLE IF NOT EXISTS vendor_deployment_links ( deployment_id UUID NOT NULL, vendor_id UUID NOT NULL, created_at TIMESTAMP WITH TIME ZONE NOT NULL, PRIMARY KEY (deployment_id, vendor_id) ) """)) logger.info("Rollback Step 5/6: Restored table vendor_deployment_links") # 8. work_item_deployment_links conn.execute(text(""" CREATE TABLE IF NOT EXISTS work_item_deployment_links ( deployment_id UUID NOT NULL, work_item_id UUID NOT NULL, created_at TIMESTAMP WITH TIME ZONE NOT NULL, PRIMARY KEY (deployment_id, work_item_id) ) """)) logger.info("Rollback Step 5/6: Restored table work_item_deployment_links") # 9. archived_work_items conn.execute(text(""" CREATE TABLE IF NOT EXISTS archived_work_items ( id UUID PRIMARY KEY, item_type VARCHAR(20) NOT NULL, title VARCHAR(200) NOT NULL, status VARCHAR(20) NOT NULL, parent_id UUID, path VARCHAR(500) NOT NULL, depth INTEGER NOT NULL, branch_name VARCHAR(100), commit_hash VARCHAR(40), pr_number INTEGER, metadata JSONB NOT NULL, deleted_at TIMESTAMP WITH TIME ZONE, created_at TIMESTAMP WITH TIME ZONE NOT NULL, updated_at TIMESTAMP WITH TIME ZONE NOT NULL, created_by VARCHAR(100) NOT NULL, archived_at TIMESTAMP WITH TIME ZONE NOT NULL ) """)) logger.info("Rollback Step 5/6: Restored table archived_work_items") logger.warning( "Rollback Step 5/6: WARNING - Tables restored with schema only. " "Data in dropped tables is NOT restored. Restore from backup if needed." ) logger.info("Rollback Step 5/6: Complete - All 9 tables restored (schema only)") except Exception as e: logger.error(f"Rollback Step 5/6: FAILED - Error restoring tables: {e}") raise ValueError(f"Rollback failed at step 5: Error restoring tables: {e}") from e # Rollback Step 6: Run validation and log completion logger.info("Rollback Step 6/6: Running validation...") try: # Validation 1: Verify project_id columns removed result = conn.execute(text(""" SELECT COUNT(*) AS column_count FROM information_schema.columns WHERE table_name IN ('repositories', 'code_chunks') AND column_name = 'project_id' """)) column_count = result.scalar() if column_count != 0: raise ValueError(f"Expected 0 project_id columns, found {column_count}") logger.info("Rollback Step 6/6: Validation 1 - project_id columns removed from both tables") # Validation 2: Verify dropped tables restored result = conn.execute(text(""" SELECT COUNT(*) AS table_count FROM information_schema.tables WHERE table_name IN ( 'vendor_extractors', 'deployment_events', 'project_configuration', 'future_enhancements', 'work_item_dependencies', 'vendor_deployment_links', 'work_item_deployment_links', 'archived_work_items', 'tasks' ) """)) table_count = result.scalar() if table_count != 9: raise ValueError(f"Expected 9 tables restored, found {table_count}") logger.info("Rollback Step 6/6: Validation 2 - All 9 tables successfully restored") duration = time.time() - start_time logger.info(f"Rollback Step 6/6: Rollback completed successfully in {duration:.2f} seconds") logger.info("Rollback Step 6/6: Summary - 9 tables restored (schema only), 2 columns removed, 1 index dropped, 2 constraints dropped") except ValueError: # Re-raise validation errors raise except Exception as e: logger.error(f"Rollback Step 6/6: FAILED - Validation error: {e}") raise ValueError(f"Rollback failed at step 6: Validation error: {e}") from e

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Ravenight13/codebase-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server