Codebase MCP Server

codebase-mcp
scripts

init_db.py•11.4 kB

#!/usr/bin/env python3 """Database initialization script for Codebase MCP Server. This script initializes the PostgreSQL database by: 1. Validating database connection and pgvector extension availability 2. Running Alembic migrations to create schema 3. Verifying table and index creation 4. Providing detailed error messages for troubleshooting Constitutional Compliance: - Principle V: Production quality (comprehensive error handling, validation) - Principle VIII: Type safety (mypy --strict compliance) Usage: python scripts/init_db.py --database-url postgresql+asyncpg://localhost/codebase_mcp python scripts/init_db.py # Uses DATABASE_URL environment variable Exit Codes: 0 - Success (database initialized) 1 - Error (connection failed, migration failed, etc.) """ from __future__ import annotations import argparse import asyncio import os import sys from pathlib import Path from typing import Any, NoReturn from alembic import command from alembic.config import Config from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine def error_exit(message: str, details: str | None = None) -> NoReturn: """Print error message and exit with code 1. Args: message: Primary error message details: Optional detailed error information Exit Code: 1 (error) """ print(f"\n❌ ERROR: {message}", file=sys.stderr) if details: print(f"\nDetails: {details}", file=sys.stderr) sys.exit(1) def success_message(message: str) -> None: """Print success message to stdout. Args: message: Success message to display """ print(f"✅ {message}") async def check_database_connection(engine: AsyncEngine) -> None: """Verify database connection is working. Args: engine: Async SQLAlchemy engine Raises: Exception: If connection fails Checks: - Database connectivity - Basic query execution """ try: async with engine.connect() as conn: result = await conn.execute(text("SELECT 1")) result.fetchone() success_message("Database connection successful") except Exception as e: error_exit( "Failed to connect to database", f"Check that PostgreSQL is running and connection string is correct.\n{e}", ) async def check_pgvector_extension(engine: AsyncEngine) -> None: """Verify pgvector extension is available. Args: engine: Async SQLAlchemy engine Raises: Exception: If pgvector extension is not available Checks: - pgvector extension installation - Extension creation permission """ try: async with engine.connect() as conn: # Try to create extension (idempotent) await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) await conn.commit() # Verify extension exists result = await conn.execute( text( "SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector')" ) ) exists = result.scalar() if not exists: error_exit( "pgvector extension not available", "Install pgvector: https://github.com/pgvector/pgvector#installation", ) success_message("pgvector extension available") except Exception as e: error_exit( "Failed to enable pgvector extension", f"Ensure you have permission to create extensions.\n{e}", ) def run_alembic_migrations(database_url: str) -> None: """Run Alembic migrations to create database schema. Args: database_url: PostgreSQL connection string Raises: Exception: If migration fails Operations: - Loads alembic.ini configuration - Sets database URL - Runs 'alembic upgrade head' """ try: # Get path to alembic.ini (should be in project root) project_root = Path(__file__).parent.parent alembic_ini_path = project_root / "alembic.ini" if not alembic_ini_path.exists(): error_exit( "alembic.ini not found", f"Expected location: {alembic_ini_path}", ) # Create Alembic config alembic_cfg = Config(str(alembic_ini_path)) alembic_cfg.set_main_option("sqlalchemy.url", database_url) # Run migrations success_message("Running Alembic migrations...") command.upgrade(alembic_cfg, "head") success_message("Alembic migrations completed") except Exception as e: error_exit( "Failed to run Alembic migrations", f"Check migration files for errors.\n{e}", ) async def verify_schema(engine: AsyncEngine) -> None: """Verify database schema was created correctly. Args: engine: Async SQLAlchemy engine Raises: Exception: If schema verification fails Verifies: - All 11 tables exist - HNSW index on code_chunks.embedding exists - Key indexes exist """ expected_tables = [ "repositories", "code_files", "code_chunks", "tasks", "task_planning_references", "task_branch_links", "task_commit_links", "task_status_history", "change_events", "embedding_metadata", "search_queries", ] try: async with engine.connect() as conn: # Check tables result = await conn.execute( text( """ SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' ORDER BY table_name """ ) ) tables = [row[0] for row in result.fetchall()] missing_tables = set(expected_tables) - set(tables) if missing_tables: error_exit( "Missing database tables", f"Tables not found: {', '.join(sorted(missing_tables))}", ) success_message(f"All {len(expected_tables)} tables created") # Check HNSW index exists result = await conn.execute( text( """ SELECT indexname FROM pg_indexes WHERE tablename = 'code_chunks' AND indexname = 'ix_chunks_embedding_cosine' """ ) ) hnsw_index = result.fetchone() if not hnsw_index: error_exit( "HNSW index not created", "Index 'ix_chunks_embedding_cosine' missing on code_chunks table", ) success_message("HNSW index created on code_chunks.embedding") # Count total indexes result = await conn.execute( text( """ SELECT COUNT(*) FROM pg_indexes WHERE schemaname = 'public' """ ) ) index_count = result.scalar() success_message(f"Total indexes created: {index_count}") except Exception as e: error_exit( "Failed to verify database schema", str(e), ) async def initialize_database(database_url: str) -> None: """Main database initialization workflow. Args: database_url: PostgreSQL connection string Workflow: 1. Create async engine 2. Check database connection 3. Verify pgvector extension 4. Run Alembic migrations 5. Verify schema creation 6. Dispose engine """ print("\n🚀 Initializing Codebase MCP Database\n") print(f"Database URL: {database_url.split('@')[-1]}\n") # Create async engine (no pooling for initialization) engine = create_async_engine( database_url, echo=False, pool_pre_ping=True, ) try: # Step 1: Check connection await check_database_connection(engine) # Step 2: Verify pgvector await check_pgvector_extension(engine) # Step 3: Run migrations (synchronous Alembic) run_alembic_migrations(database_url) # Step 4: Verify schema await verify_schema(engine) print("\n" + "=" * 60) print("✅ Database initialization complete!") print("=" * 60) print("\nYou can now:") print(" - Start the MCP server: uvicorn src.main:app") print(" - Run tests: pytest") print(" - Check migrations: alembic history") print() finally: # Clean up engine await engine.dispose() def parse_arguments() -> argparse.Namespace: """Parse command-line arguments. Returns: Parsed arguments with database_url Arguments: --database-url: PostgreSQL connection string (optional) """ parser = argparse.ArgumentParser( description="Initialize Codebase MCP Server database", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Use environment variable DATABASE_URL python scripts/init_db.py # Specify database URL directly python scripts/init_db.py --database-url postgresql+asyncpg://user:pass@localhost/codebase_mcp # Local development (default PostgreSQL) python scripts/init_db.py --database-url postgresql+asyncpg://localhost/codebase_mcp """, ) parser.add_argument( "--database-url", type=str, help="PostgreSQL connection string (default: DATABASE_URL env var)", ) return parser.parse_args() def main() -> None: """Entry point for database initialization script. Validates arguments, checks environment, and runs initialization. Exit Codes: 0 - Success 1 - Error (validation, connection, migration, etc.) """ args = parse_arguments() # Get database URL from argument or environment database_url = args.database_url or os.getenv("DATABASE_URL") if not database_url: error_exit( "No database URL provided", "Set DATABASE_URL environment variable or use --database-url argument", ) # Validate URL format if not database_url.startswith("postgresql"): error_exit( "Invalid database URL", "URL must start with 'postgresql://' or 'postgresql+asyncpg://'", ) # Ensure asyncpg driver is specified if "+asyncpg" not in database_url: # Convert postgresql:// to postgresql+asyncpg:// database_url = database_url.replace("postgresql://", "postgresql+asyncpg://") print(f"⚠️ Note: Using asyncpg driver (converted URL)") # Run initialization try: asyncio.run(initialize_database(database_url)) except KeyboardInterrupt: print("\n\n⚠️ Initialization cancelled by user", file=sys.stderr) sys.exit(1) except Exception as e: error_exit("Unexpected error during initialization", str(e)) if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Ravenight13/codebase-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server