Code-Index-MCP

Code-Index-MCP
scripts

find_codebase_embeddings.py•5.67 KiB

#!/usr/bin/env python3 """ Find which Qdrant collection contains embeddings for the current codebase. """ import sys from pathlib import Path from mcp_server.core.path_utils import PathUtils # Add project root to Python path sys.path.insert(0, str(Path(__file__).parent.parent)) from qdrant_client import QdrantClient def find_codebase_embeddings(): """Find which collection has current codebase embeddings.""" print("=" * 60) print("SEARCHING FOR CURRENT CODEBASE EMBEDDINGS") print("=" * 60) qdrant_path = "PathUtils.get_workspace_root()/.indexes/qdrant/main.qdrant" # Remove stale lock if it exists lock_file = Path(qdrant_path) / ".lock" if lock_file.exists(): try: lock_file.unlink() except OSError: pass try: client = QdrantClient(path=qdrant_path) collections = client.get_collections() codebase_patterns = [ "mcp_server", "code-index-mcp", "code_index_mcp", "/workspaces/code-index-mcp", "dispatcher", "semantic_indexer", "plugin_factory" ] print(f"Searching patterns: {codebase_patterns[:3]}...\n") results = {} for collection in collections.collections: try: info = client.get_collection(collection.name) if info.points_count == 0: continue # Sample more points to get better coverage sample_size = min(info.points_count, 200) sample = client.scroll( collection_name=collection.name, limit=sample_size, with_payload=True ) if not sample[0]: continue matches = 0 matched_files = [] for point in sample[0]: file_path = ( point.payload.get("file") or point.payload.get("relative_path") or point.payload.get("filepath") or point.payload.get("path") or "" ).lower() # Check all payload keys for key, value in point.payload.items(): if isinstance(value, str): value_lower = value.lower() if any(pattern in value_lower for pattern in codebase_patterns): matches += 1 if file_path and len(matched_files) < 5: matched_files.append(file_path) break if matches > 0: match_rate = (matches / sample_size) * 100 results[collection.name] = { "matches": matches, "sample_size": sample_size, "match_rate": match_rate, "total_points": info.points_count, "matched_files": matched_files } except Exception as e: print(f"Error checking {collection.name}: {e}") # Display results if results: print("Collections containing current codebase files:\n") sorted_results = sorted(results.items(), key=lambda x: x[1]["match_rate"], reverse=True) for collection_name, data in sorted_results: print(f"Collection: {collection_name}") print(f" Total points: {data['total_points']}") print(f" Matches: {data['matches']}/{data['sample_size']} ({data['match_rate']:.1f}%)") if data['matched_files']: print(f" Sample matched files:") for i, file in enumerate(data['matched_files'][:3]): print(f" {i+1}. {file}") print() # Recommend the best collection best_collection = sorted_results[0][0] print(f"🎯 RECOMMENDATION: Use collection '{best_collection}'") print(f" This collection has the highest match rate for current codebase files.") else: print("❌ No collections found containing current codebase files!") print(" All embeddings may be using different file paths or the index is empty.") # Try to inspect payload structure print("\nInspecting payload structure of code-embeddings collection...") try: sample = client.scroll( collection_name="code-embeddings", limit=3, with_payload=True ) if sample[0]: print("Sample payload keys:") for i, point in enumerate(sample[0][:3]): print(f"\nPoint {i+1}:") for key, value in point.payload.items(): print(f" {key}: {value if len(str(value)) < 100 else str(value)[:100] + '...'}") except Exception as e: print(f"Could not inspect code-embeddings: {e}") except Exception as e: print(f"❌ Failed to connect to Qdrant: {e}") print("\n" + "=" * 60) print("SEARCH COMPLETE") print("=" * 60) if __name__ == "__main__": find_codebase_embeddings()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ViperJuice/Code-Index-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

find_codebase_embeddings.py•5.67 KiB