Code-Index-MCP

Code-Index-MCP
scripts

migrate_current_repo_from_large_index.py•8.4 KiB

#!/usr/bin/env python3 """ Quick migration script to extract current repository from large index. """ import os import sys import sqlite3 import hashlib from pathlib import Path from datetime import datetime # Add project root to path sys.path.insert(0, str(Path(__file__).parent.parent)) from mcp_server.core.path_utils import PathUtils def migrate_current_repo(): """Migrate current repository from large index.""" # Paths source_db = Path("/workspaces/Code-Index-MCP/.indexes/f7b49f5d0ae0/new_index.db") current_repo_path = str(PathUtils.get_workspace_root()) repo_hash = hashlib.sha256(current_repo_path.encode()).hexdigest()[:12] # Output path output_dir = PathUtils.get_index_storage_path() / repo_hash output_dir.mkdir(exist_ok=True) output_db = output_dir / "code_index.db" print(f"Migrating current repository from large index...") print(f"Source: {source_db}") print(f"Target: {output_db}") print(f"Repository path: {current_repo_path}") print(f"Repository hash: {repo_hash}") # Remove existing if present if output_db.exists(): output_db.unlink() print("Removed existing index") # Connect to databases source_conn = sqlite3.connect(str(source_db)) target_conn = sqlite3.connect(str(output_db)) try: # Create schema in target create_schema(target_conn) # Migrate data file_count = migrate_data(source_conn, target_conn, current_repo_path) print(f"\nMigrated {file_count} files successfully!") # Update registry update_registry(repo_hash, current_repo_path, output_db, file_count) finally: source_conn.close() target_conn.close() def create_schema(conn): """Create necessary tables in target database.""" cursor = conn.cursor() # Create repositories table cursor.execute(""" CREATE TABLE IF NOT EXISTS repositories ( id INTEGER PRIMARY KEY, path TEXT NOT NULL, name TEXT NOT NULL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, metadata JSON ) """) # Create files table cursor.execute(""" CREATE TABLE IF NOT EXISTS files ( id INTEGER PRIMARY KEY, repository_id INTEGER NOT NULL, path TEXT NOT NULL, relative_path TEXT NOT NULL, language TEXT, size INTEGER, hash TEXT, last_modified TIMESTAMP, indexed_at TIMESTAMP, metadata JSON, FOREIGN KEY (repository_id) REFERENCES repositories(id), UNIQUE(repository_id, path) ) """) # Create symbols table cursor.execute(""" CREATE TABLE IF NOT EXISTS symbols ( id INTEGER PRIMARY KEY, file_id INTEGER NOT NULL, name TEXT NOT NULL, kind TEXT, line_start INTEGER, line_end INTEGER, column_start INTEGER, column_end INTEGER, signature TEXT, documentation TEXT, metadata JSON, FOREIGN KEY (file_id) REFERENCES files(id) ) """) # Create indexes for symbols cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id)") # Create BM25 FTS5 table cursor.execute(""" CREATE VIRTUAL TABLE IF NOT EXISTS bm25_content USING fts5( file_id UNINDEXED, filepath, filename, content, language, symbols, imports, comments, tokenize = 'porter unicode61', prefix = '2 3' ) """) # Create indexes cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_repo ON files(repository_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)") conn.commit() def migrate_data(source_conn, target_conn, repo_path): """Migrate data for the current repository.""" source_cursor = source_conn.cursor() target_cursor = target_conn.cursor() # Insert repository target_cursor.execute(""" INSERT INTO repositories (id, path, name, metadata) VALUES (1, ?, 'Code-Index-MCP', '{}') """, (repo_path,)) # Get files from current repo (excluding test_repos) source_cursor.execute(""" SELECT DISTINCT b.file_id, b.filepath, b.filename, b.content, b.language, b.symbols, b.imports, b.comments FROM bm25_content b WHERE b.filepath LIKE ? || '%' AND b.filepath NOT LIKE '%/test_repos/%' ORDER BY b.file_id """, (repo_path,)) file_count = 0 new_file_id = 1 file_id_mapping = {} # old_file_id -> new_file_id for row in source_cursor.fetchall(): old_file_id, filepath, filename, content, language, symbols, imports, comments = row # Store file ID mapping file_id_mapping[old_file_id] = new_file_id # Insert into files table relative_path = filepath[len(repo_path):].lstrip('/') target_cursor.execute(""" INSERT INTO files (id, repository_id, path, relative_path, language, size, hash, last_modified, indexed_at, metadata) VALUES (?, 1, ?, ?, ?, 0, '', datetime('now'), datetime('now'), '{}') """, (new_file_id, filepath, relative_path, language)) # Insert into bm25_content target_cursor.execute(""" INSERT INTO bm25_content (file_id, filepath, filename, content, language, symbols, imports, comments) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, (new_file_id, filepath, filename, content, language, symbols, imports, comments)) new_file_id += 1 file_count += 1 if file_count % 100 == 0: print(f" Migrated {file_count} files...") target_conn.commit() # Migrate symbols print("\n Migrating symbols...") symbol_count = 0 # Get unique file paths we migrated migrated_files = list(file_id_mapping.keys()) if migrated_files: # Query symbols from files table join placeholders = ','.join(['?'] * len(migrated_files)) source_cursor.execute(f""" SELECT s.file_id, s.name, s.kind, s.line_start, s.line_end, s.column_start, s.column_end, s.signature, s.documentation, s.metadata FROM symbols s WHERE s.file_id IN ({placeholders}) """, migrated_files) for row in source_cursor.fetchall(): old_file_id = row[0] new_file_id = file_id_mapping.get(old_file_id) if new_file_id: target_cursor.execute(""" INSERT INTO symbols (file_id, name, kind, line_start, line_end, column_start, column_end, signature, documentation, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, (new_file_id,) + row[1:]) symbol_count += 1 print(f" Migrated {symbol_count} symbols") target_conn.commit() return file_count def update_registry(repo_hash, repo_path, index_path, file_count): """Update repository registry.""" import json registry_path = PathUtils.get_index_storage_path() / "repository_registry.json" # Load existing registry if registry_path.exists(): with open(registry_path, 'r') as f: registry = json.load(f) else: registry = {} # Add/update current repo registry[repo_hash] = { "repository_id": repo_hash, "name": "Code-Index-MCP", "path": repo_path, "index_path": str(index_path), "language_stats": {"python": file_count}, # Simplified "total_files": file_count, "total_symbols": 0, "indexed_at": datetime.now().isoformat(), "active": True, "priority": 10 # Higher priority for current repo } # Write back with open(registry_path, 'w') as f: json.dump(registry, f, indent=2) print(f"\nUpdated registry: {registry_path}") if __name__ == "__main__": migrate_current_repo()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ViperJuice/Code-Index-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

migrate_current_repo_from_large_index.py•8.4 KiB