Codebase MCP Server

007_provision_default_workspace.py•11.1 KiB

"""Provision default workspace for backward compatibility. Creates project_default schema with core tables to support existing users who don't specify a project_id parameter. This ensures zero breaking changes for current MCP server deployments. Changes: - Create schema: project_default - Create table: repositories (with project_id defaulting to 'default') - Create table: code_files (file metadata within repositories) - Create table: code_chunks (semantic chunks with pgvector embeddings) - Add indexes: file_path, repository_id, embedding (HNSW) - Register workspace in project_registry.workspace_config Performance: - HNSW index on code_chunks.embedding for sub-500ms search (m=16, ef_construction=64) - B-tree indexes on foreign keys for O(log n) JOIN performance - GIN index on file_path for pattern matching queries Constitutional Compliance: - Principle V: Production Quality (proper indexes, constraints, downgrade support) - Principle IV: Performance Guarantees (<500ms p95 search latency) - Principle VIII: Type Safety (explicit column types, UUIDs for keys) Traces to: - FR-018: Backward compatibility (existing usage without project_id works unchanged) - FR-003: Default workspace fallback (when project_id=None) Revision ID: 007 Revises: 006 Create Date: 2025-10-12 00:00:00.000000 """ from __future__ import annotations from typing import Sequence, Union from alembic import op import sqlalchemy as sa from sqlalchemy import text from sqlalchemy.dialects import postgresql from pgvector.sqlalchemy import Vector # revision identifiers, used by Alembic. revision: str = '007' down_revision: Union[str, None] = '006' branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: """Apply migration: create project_default schema with core tables. Creates: 1. Schema: project_default (isolated namespace for default workspace) 2. Table: repositories (code repository metadata) 3. Table: code_files (source files with language detection) 4. Table: code_chunks (semantic chunks with pgvector embeddings) 5. Indexes: - Unique index on repositories.path (prevent duplicate indexing) - B-tree indexes on foreign keys (optimize JOINs) - HNSW index on code_chunks.embedding (semantic search) - B-tree index on code_chunks.project_id (isolation queries) 6. Registry entry: project_registry.workspace_config (default → project_default) Schema Design (matches post-002 migration structure): repositories: - id: UUID PRIMARY KEY (stable identifier) - path: VARCHAR NOT NULL UNIQUE (absolute filesystem path) - name: VARCHAR NOT NULL (repository name) - last_indexed_at: TIMESTAMP NULL (incremental indexing support) - is_active: BOOLEAN NOT NULL DEFAULT TRUE (soft delete flag) - project_id: VARCHAR(50) NOT NULL DEFAULT 'default' (workspace isolation) - created_at: TIMESTAMP NOT NULL DEFAULT NOW() code_files: - id: UUID PRIMARY KEY - repository_id: UUID NOT NULL FK -> repositories.id ON DELETE CASCADE - path: VARCHAR NOT NULL (absolute file path) - relative_path: VARCHAR NOT NULL (repo-relative path) - content_hash: VARCHAR(64) NOT NULL (SHA-256 for change detection) - size_bytes: INTEGER NOT NULL (file size validation) - language: VARCHAR NULL (tree-sitter detected language) - modified_at: TIMESTAMP NOT NULL (filesystem mtime) - indexed_at: TIMESTAMP NOT NULL DEFAULT NOW() - is_deleted: BOOLEAN NOT NULL DEFAULT FALSE (soft delete) - deleted_at: TIMESTAMP NULL (soft delete timestamp) - created_at: TIMESTAMP NOT NULL DEFAULT NOW() code_chunks: - id: UUID PRIMARY KEY - code_file_id: UUID NOT NULL FK -> code_files.id ON DELETE CASCADE - content: TEXT NOT NULL (chunk content for search results) - start_line: INTEGER NOT NULL (chunk location) - end_line: INTEGER NOT NULL (chunk location) - chunk_type: VARCHAR NOT NULL (function, class, import, etc.) - embedding: VECTOR(768) NULL (Ollama nomic-embed-text embeddings) - project_id: VARCHAR(50) NOT NULL (denormalized for query performance) - created_at: TIMESTAMP NOT NULL DEFAULT NOW() Note: - pgvector extension must exist (created in migration 001) - Registry schema must exist (created in migration 006) """ # Step 1: Create project_default schema op.execute("CREATE SCHEMA IF NOT EXISTS project_default") # Step 2: Create repositories table in project_default op.create_table( 'repositories', sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), sa.Column('path', sa.String(), nullable=False), sa.Column('name', sa.String(), nullable=False), sa.Column('last_indexed_at', sa.DateTime(), nullable=True), sa.Column('is_active', sa.Boolean(), nullable=False, server_default='true'), sa.Column('project_id', sa.String(50), nullable=False, server_default="'default'"), sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), sa.PrimaryKeyConstraint('id', name=op.f('pk_repositories')), sa.UniqueConstraint('path', name=op.f('uq_repositories_path')), schema='project_default' ) op.create_index( op.f('ix_repositories_path'), 'repositories', ['path'], unique=True, schema='project_default' ) op.create_index( 'ix_repositories_project_id', 'repositories', ['project_id'], schema='project_default' ) # Step 3: Create code_files table in project_default op.create_table( 'code_files', sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), sa.Column('repository_id', postgresql.UUID(as_uuid=True), nullable=False), sa.Column('path', sa.String(), nullable=False), sa.Column('relative_path', sa.String(), nullable=False), sa.Column('content_hash', sa.String(length=64), nullable=False), sa.Column('size_bytes', sa.Integer(), nullable=False), sa.Column('language', sa.String(), nullable=True), sa.Column('modified_at', sa.DateTime(), nullable=False), sa.Column('indexed_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), sa.Column('is_deleted', sa.Boolean(), nullable=False, server_default='false'), sa.Column('deleted_at', sa.DateTime(), nullable=True), sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), sa.ForeignKeyConstraint( ['repository_id'], ['project_default.repositories.id'], name=op.f('fk_code_files_repository_id_repositories'), ondelete='CASCADE' ), sa.PrimaryKeyConstraint('id', name=op.f('pk_code_files')), schema='project_default' ) op.create_index( op.f('ix_code_files_path'), 'code_files', ['path'], unique=False, schema='project_default' ) op.create_index( 'ix_code_files_repo_path', 'code_files', ['repository_id', 'relative_path'], unique=True, schema='project_default' ) # Step 4: Create code_chunks table in project_default op.create_table( 'code_chunks', sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), sa.Column('code_file_id', postgresql.UUID(as_uuid=True), nullable=False), sa.Column('content', sa.Text(), nullable=False), sa.Column('start_line', sa.Integer(), nullable=False), sa.Column('end_line', sa.Integer(), nullable=False), sa.Column('chunk_type', sa.String(), nullable=False), sa.Column('embedding', Vector(768), nullable=True), # pgvector VECTOR(768) type sa.Column('project_id', sa.String(50), nullable=False), sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), sa.ForeignKeyConstraint( ['code_file_id'], ['project_default.code_files.id'], name=op.f('fk_code_chunks_code_file_id_code_files'), ondelete='CASCADE' ), sa.PrimaryKeyConstraint('id', name=op.f('pk_code_chunks')), schema='project_default' ) # Step 5: Create HNSW index for vector similarity search # Constitutional Principle IV: <500ms p95 search latency op.execute( """ CREATE INDEX ix_chunks_embedding_cosine ON project_default.code_chunks USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64) """ ) # Step 6: Create performance indexes op.create_index( 'ix_code_chunks_project_id', 'code_chunks', ['project_id'], schema='project_default' ) # Step 7: Register default workspace in registry # This enables workspace resolution logic to find the default schema op.execute(text(""" INSERT INTO project_registry.workspace_config (project_id, schema_name, created_at, metadata) VALUES ( 'default', 'project_default', NOW(), '{"description": "Default workspace for backward compatibility", "auto_created": true}'::jsonb ) ON CONFLICT (project_id) DO NOTHING """)) def downgrade() -> None: """Revert migration: drop project_default schema and workspace registration. Drops: 1. Registry entry: project_registry.workspace_config (default) 2. Indexes: All indexes in project_default (CASCADE handles this) 3. Tables: code_chunks, code_files, repositories (CASCADE order) 4. Schema: project_default (CASCADE removes all contained objects) Warning: - This is a destructive operation that removes ALL data in default workspace - Downgrade should only be used in development/testing environments - Production downgrades require manual backup/restore procedures - Existing MCP clients using default workspace will fail after downgrade """ # Step 1: Remove registry entry op.execute("DELETE FROM project_registry.workspace_config WHERE project_id = 'default'") # Step 2: Drop HNSW index explicitly (best practice) op.execute("DROP INDEX IF EXISTS project_default.ix_chunks_embedding_cosine") # Step 3: Drop all indexes explicitly (best practice, even though CASCADE would handle it) op.drop_index('ix_code_chunks_project_id', table_name='code_chunks', schema='project_default') op.drop_index('ix_code_files_repo_path', table_name='code_files', schema='project_default') op.drop_index(op.f('ix_code_files_path'), table_name='code_files', schema='project_default') op.drop_index('ix_repositories_project_id', table_name='repositories', schema='project_default') op.drop_index(op.f('ix_repositories_path'), table_name='repositories', schema='project_default') # Step 4: Drop tables in dependency order (child → parent) op.drop_table('code_chunks', schema='project_default') op.drop_table('code_files', schema='project_default') op.drop_table('repositories', schema='project_default') # Step 5: Drop project_default schema (CASCADE ensures cleanup of any residual objects) op.execute("DROP SCHEMA IF EXISTS project_default CASCADE")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Ravenight13/codebase-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

007_provision_default_workspace.py•11.1 KiB