Skip to main content
Glama

Codebase MCP Server

by Ravenight13
settings.py19.3 kB
""" Production-grade settings configuration for Codebase MCP Server. Constitutional Compliance: - Principle V: Production quality with fail-fast validation - Principle VIII: Type safety with Pydantic 2.0+, mypy --strict compliance All configuration values are loaded from environment variables with .env support. Validation errors halt server startup with actionable error messages. """ from __future__ import annotations from enum import Enum from typing import Annotated import warnings from pydantic import ( Field, HttpUrl, PostgresDsn, field_validator, model_validator, ) from pydantic_settings import BaseSettings, SettingsConfigDict # Import PoolConfig for connection pool management from src.connection_pool.config import PoolConfig # ============================================================================ # Constants # ============================================================================ # Performance warning threshold for embedding batch size MIN_RECOMMENDED_BATCH_SIZE = 10 class LogLevel(str, Enum): """Valid log levels for structured logging.""" DEBUG = "DEBUG" INFO = "INFO" WARNING = "WARNING" ERROR = "ERROR" CRITICAL = "CRITICAL" class Settings(BaseSettings): """ Application settings with environment variable parsing and validation. All settings are loaded from environment variables with .env file support. Required fields must be set or server startup will fail. Database-Per-Project Architecture: Codebase-MCP uses a database-per-project architecture where each project workspace has its own isolated PostgreSQL database: - **Registry Database** (REGISTRY_DATABASE_URL): Tracks project metadata, config mappings, and workspace provisioning - **Project Databases** (cb_proj_*): Isolated databases for each project's code repositories, files, and chunks - **Default Database** (DATABASE_URL): Fallback workspace for unassociated repositories (optional, can be same as registry) This architecture provides: - Complete isolation between projects (no cross-contamination) - Independent scaling and backup strategies per project - Config-driven project discovery (.codebase-mcp/config.json) - Automatic database provisioning on first use Connection Pool Integration: The Settings class automatically initializes a PoolConfig instance from DATABASE_URL and POOL_* environment variables. This ensures fail-fast behavior: invalid configuration will halt server startup with clear error messages. The pool_config field is automatically populated during validation and provides access to advanced connection pool settings. If no POOL_* environment variables are set, default values from PoolConfig are used. Example .env: # Required: Database connections DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/codebase_mcp REGISTRY_DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/codebase_mcp_registry # Ollama Configuration OLLAMA_BASE_URL=http://localhost:11434 OLLAMA_EMBEDDING_MODEL=nomic-embed-text # Performance Tuning EMBEDDING_BATCH_SIZE=50 MAX_CONCURRENT_REQUESTS=10 # Legacy Pool Configuration (Deprecated - use POOL_* variables instead) DB_POOL_SIZE=20 DB_MAX_OVERFLOW=10 # Logging LOG_LEVEL=INFO LOG_FILE=/tmp/codebase-mcp.log # Advanced Connection Pool Configuration (Optional - uses defaults if not set) # All POOL_* variables are optional. Defaults are production-ready. POOL_MIN_SIZE=2 # Min connections (default: 2) POOL_MAX_SIZE=10 # Max connections (default: 10) POOL_TIMEOUT=30.0 # Connection acquisition timeout (default: 30.0s) POOL_COMMAND_TIMEOUT=60.0 # Query execution timeout (default: 60.0s) POOL_MAX_IDLE_TIME=60.0 # Idle connection timeout (default: 60.0s) POOL_MAX_QUERIES=50000 # Queries before connection recycling (default: 50000) POOL_MAX_CONNECTION_LIFETIME=3600.0 # Max connection age (default: 3600.0s) POOL_LEAK_DETECTION_TIMEOUT=30.0 # Leak warning threshold (default: 30.0s) POOL_ENABLE_LEAK_DETECTION=true # Enable leak detection (default: true) Validation Rules: - DATABASE_URL must use postgresql+asyncpg:// scheme - POOL_MAX_SIZE must be >= POOL_MIN_SIZE - All timeout values must be positive - Configuration errors fail fast at startup with actionable messages Usage Example: >>> from src.config.settings import get_settings >>> settings = get_settings() >>> # Access database URL >>> db_url = settings.database_url >>> # Access pool configuration >>> pool_config = settings.pool_config >>> print(f"Pool size: {pool_config.min_size}-{pool_config.max_size}") Pool size: 2-10 """ model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", case_sensitive=False, extra="ignore", # Ignore unknown env vars (allows coexistence with workflow-mcp) ) # ============================================================================ # Database Configuration # ============================================================================ database_url: Annotated[ PostgresDsn | None, Field( default=None, description=( "LEGACY: PostgreSQL connection URL with asyncpg driver. " "Format: postgresql+asyncpg://user:password@host:port/database. " "NOTE: This is OPTIONAL and only used for backward compatibility. " "Modern deployments use database-per-project architecture via REGISTRY_DATABASE_URL. " "Each project uses its own isolated database (cb_proj_*)." ), ), ] = None registry_database_url: Annotated[ PostgresDsn, Field( default="postgresql+asyncpg://localhost/codebase_mcp_registry", description=( "Registry database URL for project tracking in database-per-project architecture. " "Format: postgresql+asyncpg://user:password@host:port/database. " "Environment variables: REGISTRY_DATABASE_URL or CODEBASE_MCP_REGISTRY_URL" ), ), ] db_pool_size: Annotated[ int, Field( default=20, ge=5, le=50, description=( "SQLAlchemy connection pool size. " "Should accommodate max concurrent AI assistants. " "Range: 5-50" ), ), ] db_max_overflow: Annotated[ int, Field( default=10, ge=0, le=20, description=( "Maximum overflow connections beyond pool_size. " "Handles traffic spikes. " "Range: 0-20" ), ), ] # ============================================================================ # Ollama Configuration # ============================================================================ ollama_base_url: Annotated[ HttpUrl, Field( default="http://localhost:11434", description=( "Ollama API base URL for embedding generation. " "Must be accessible from the server." ), ), ] ollama_embedding_model: Annotated[ str, Field( default="nomic-embed-text", min_length=1, description=( "Ollama embedding model name. " "Must be pulled locally: ollama pull nomic-embed-text" ), ), ] # ============================================================================ # Performance Tuning # ============================================================================ embedding_batch_size: Annotated[ int, Field( default=50, ge=1, le=1000, description=( "Number of text chunks to embed per Ollama API request. " "Larger batches improve throughput but increase latency. " "Range: 1-1000" ), ), ] max_concurrent_requests: Annotated[ int, Field( default=10, ge=1, le=100, description=( "Maximum concurrent AI assistant connections. " "Limits resource usage under load. " "Range: 1-100" ), ), ] # ============================================================================ # Logging Configuration # ============================================================================ log_level: Annotated[ LogLevel, Field( default=LogLevel.INFO, description=( "Logging verbosity level. " "Valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL" ), ), ] log_file: Annotated[ str, Field( default="/tmp/codebase-mcp.log", # noqa: S108 - temporary log file is acceptable for MCP server min_length=1, description=( "File path for structured JSON logs. " "CRITICAL: Never log to stdout/stderr (MCP protocol violation)" ), ), ] # ============================================================================ # Multi-project Workspace Integration (Optional) # ============================================================================ workflow_mcp_url: Annotated[ HttpUrl | None, Field( default=None, description=( "Optional workflow-mcp server URL for automatic project detection. " "If not set, multi-project workspace features are disabled." ), ), ] = None workflow_mcp_timeout: Annotated[ float, Field( default=1.0, ge=0.1, le=5.0, description=( "Timeout for workflow-mcp queries (seconds). " "Should be low to avoid blocking indexing operations. " "Range: 0.1-5.0" ), ), ] = 1.0 workflow_mcp_cache_ttl: Annotated[ int, Field( default=60, ge=10, le=300, description=( "Cache TTL for workflow-mcp responses (seconds). " "Reduces query overhead for repeated repository checks. " "Range: 10-300" ), ), ] = 60 # ============================================================================ # Connection Pool Configuration # ============================================================================ pool_config: Annotated[ PoolConfig | None, Field( default=None, description=( "Advanced connection pool configuration. " "This field is automatically populated from DATABASE_URL and POOL_* environment variables. " "If not explicitly set, a PoolConfig instance will be created with DATABASE_URL " "and default pool settings during validation." ), ), ] = None # ============================================================================ # Validators # ============================================================================ @field_validator("database_url", "registry_database_url") @classmethod def validate_asyncpg_driver(cls, v: PostgresDsn | None) -> PostgresDsn | None: """ Ensure database URLs use asyncpg driver for async SQLAlchemy. Args: v: PostgreSQL DSN to validate (or None for optional database_url) Returns: Validated PostgreSQL DSN or None Raises: ValueError: If scheme is not postgresql+asyncpg """ # Allow None for optional database_url (legacy support) if v is None: return None if v.scheme != "postgresql+asyncpg": error_msg = ( "Database URL must use asyncpg driver for async operations.\n" f"Found: {v.scheme}\n" "Expected: postgresql+asyncpg\n\n" "Fix: Update .env file:\n" " DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/codebase_mcp (optional, legacy)\n" " REGISTRY_DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/codebase_mcp_registry (required)" ) raise ValueError(error_msg) return v @field_validator("ollama_base_url") @classmethod def validate_ollama_url(cls, v: HttpUrl) -> HttpUrl: """ Ensure Ollama URL is well-formed and uses HTTP/HTTPS. Args: v: Ollama base URL to validate Returns: Validated Ollama URL Raises: ValueError: If URL is malformed """ # Pydantic HttpUrl already validates format # Additional checks can be added here if needed return v @field_validator("db_pool_size", "db_max_overflow") @classmethod def validate_pool_configuration(cls, v: int) -> int: """ Validate database pool size configuration is reasonable. Args: v: Pool size value to validate Returns: Validated pool size Raises: ValueError: If pool configuration is invalid """ # Validation handled by Field constraints (ge/le) # This validator can add cross-field validation if needed return v @field_validator("embedding_batch_size") @classmethod def validate_batch_size(cls, v: int) -> int: """ Validate embedding batch size is optimized for performance. Args: v: Batch size to validate Returns: Validated batch size Raises: ValueError: If batch size is suboptimal """ # Warn about very small batch sizes (performance impact) if v < MIN_RECOMMENDED_BATCH_SIZE: warnings.warn( f"EMBEDDING_BATCH_SIZE={v} is very small and may impact indexing performance. " "Recommended: 50-100 for optimal throughput.", stacklevel=2, ) return v @model_validator(mode="after") def initialize_pool_config(self) -> "Settings": """ Initialize pool_config from DATABASE_URL and environment variables. This validator automatically creates a PoolConfig instance if one wasn't explicitly provided. It uses the DATABASE_URL from Settings and reads POOL_* environment variables for pool-specific configuration. This ensures fail-fast behavior: if DATABASE_URL is invalid or missing, server startup will fail with a clear error message. Returns: Settings instance with initialized pool_config Raises: ValueError: If PoolConfig creation fails due to invalid configuration Example: >>> # With only DATABASE_URL set >>> settings = Settings(database_url="postgresql+asyncpg://localhost/db") >>> settings.pool_config.min_size # Uses default: 2 2 >>> settings.pool_config.database_url 'postgresql+asyncpg://localhost/db' >>> # With POOL_* environment variables >>> # POOL_MIN_SIZE=5, POOL_MAX_SIZE=20 >>> settings = Settings(database_url="postgresql+asyncpg://localhost/db") >>> settings.pool_config.min_size 5 """ if self.pool_config is None and self.database_url is not None: # Convert PostgresDsn to string for PoolConfig # Only initialize if database_url is provided (legacy support) database_url_str = str(self.database_url) try: # Create PoolConfig with DATABASE_URL # PoolConfig will automatically read POOL_* environment variables self.pool_config = PoolConfig(database_url=database_url_str) except Exception as e: # Re-raise with context about where the error occurred error_msg = ( "Failed to initialize connection pool configuration.\n" f"Error: {e}\n\n" "This typically indicates:\n" " 1. Invalid POOL_* environment variable values\n" " 2. Conflicting pool size configuration (max_size < min_size)\n" " 3. Out-of-range timeout values\n\n" "Check your .env file and ensure POOL_* variables follow PoolConfig validation rules.\n" "See PoolConfig documentation for valid ranges and constraints." ) raise ValueError(error_msg) from e return self # ============================================================================ # Singleton Instance # ============================================================================ # Lazy-loaded singleton for testing flexibility _settings_instance: Settings | None = None def get_settings() -> Settings: """ Get singleton settings instance with lazy initialization. Returns: Singleton Settings instance Raises: ValidationError: If environment variables are invalid or missing Example: >>> settings = get_settings() >>> db_url = settings.database_url >>> batch_size = settings.embedding_batch_size """ global _settings_instance # noqa: PLW0603 - singleton pattern requires global state if _settings_instance is None: _settings_instance = Settings() return _settings_instance # ============================================================================ # Convenience Export # ============================================================================ # Primary export for application code # Note: This will fail if DATABASE_URL is not set. In that case, use get_settings() # or import Settings directly for testing/validation purposes. try: settings = get_settings() except Exception: # Allow module import even if settings validation fails # This enables testing and validation without requiring full config settings = None # type: ignore[assignment] # ============================================================================ # Type Exports for External Use # ============================================================================ __all__ = [ "LogLevel", "PoolConfig", "Settings", "get_settings", "settings", ]

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Ravenight13/codebase-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server