Skip to main content
Glama
brockwebb

Open Census MCP Server

by brockwebb
hunt_databases.py5.67 kB
#!/usr/bin/env python3 """Find ALL the databases and check their actual locations vs expected""" import time from pathlib import Path import json def find_all_databases(): """Find every database in the project""" print("🔍 HUNTING DOWN ALL DATABASES") print("=" * 50) project_root = Path("/Users/brock/Documents/GitHub/census-mcp-server") # Search for different database types database_indicators = { "FAISS": "*.faiss", "ChromaDB": "chroma.sqlite3", "SQLite": "*.db", "Metadata": "*metadata.json", "Build Info": "build_info.json" } found_databases = {} for db_type, pattern in database_indicators.items(): files = list(project_root.rglob(pattern)) if files: found_databases[db_type] = files # Show findings for db_type, files in found_databases.items(): print(f"\n📁 {db_type} Files Found:") print("-" * 30) current_time = time.time() for file_path in sorted(files): try: stat = file_path.stat() age_hours = (current_time - stat.st_mtime) / 3600 size_mb = stat.st_size / (1024 * 1024) if age_hours < 1: age_str = f"{age_hours * 60:.0f}m ago 🔥" elif age_hours < 24: age_str = f"{age_hours:.1f}h ago" else: age_str = f"{age_hours / 24:.1f}d ago" rel_path = file_path.relative_to(project_root) print(f" {rel_path}") print(f" Size: {size_mb:.1f}MB, Modified: {age_str}") # Special handling for build info if file_path.name == "build_info.json": try: with open(file_path) as f: build_info = json.load(f) model = build_info.get('embedding_model', 'unknown') dims = build_info.get('embedding_dimension', 'unknown') count = build_info.get('variable_count', build_info.get('chunk_count', 'unknown')) print(f" 📊 Model: {model}, Dims: {dims}, Count: {count}") except: pass print() except Exception as e: print(f" ❌ Error reading {file_path}: {e}") def check_expected_vs_actual(): """Check where kb_search expects files vs where they actually are""" print("🎯 EXPECTED vs ACTUAL LOCATIONS") print("=" * 50) project_root = Path("/Users/brock/Documents/GitHub/census-mcp-server") # What kb_search.py expects expected_locations = { "Variables DB": "knowledge-base/variables-db/", "Methodology DB": "knowledge-base/methodology-db/", "Geography DB": "knowledge-base/geo-db/", "Table Catalog": "knowledge-base/table-catalog/" } print("Expected locations:") for name, location in expected_locations.items(): full_path = project_root / location if full_path.exists(): print(f"✅ {name}: {location}") # List key files in directory key_files = list(full_path.glob("*.faiss")) + list(full_path.glob("*.db")) + list(full_path.glob("chroma.sqlite3")) for f in key_files[:3]: # Show first 3 stat = f.stat() size_mb = stat.st_size / (1024 * 1024) age_hours = (time.time() - stat.st_mtime) / 3600 age_str = f"{age_hours:.1f}h ago" if age_hours < 48 else f"{age_hours/24:.1f}d ago" print(f" 📄 {f.name} ({size_mb:.1f}MB, {age_str})") else: print(f"❌ {name}: {location} - NOT FOUND") def find_methodology_databases(): """Specifically hunt down methodology databases""" print("\n🔍 METHODOLOGY DATABASE HUNT") print("=" * 40) project_root = Path("/Users/brock/Documents/GitHub/census-mcp-server") # Look for ChromaDB collections (methodology uses ChromaDB) chroma_files = list(project_root.rglob("chroma.sqlite3")) print(f"Found {len(chroma_files)} ChromaDB files:") current_time = time.time() for chroma_file in sorted(chroma_files): try: stat = chroma_file.stat() age_hours = (current_time - stat.st_mtime) / 3600 size_mb = stat.st_size / (1024 * 1024) age_str = f"{age_hours:.1f}h ago" if age_hours < 48 else f"{age_hours/24:.1f}d ago" rel_path = chroma_file.relative_to(project_root) print(f"\n📄 {rel_path}") print(f" Size: {size_mb:.1f}MB, Modified: {age_str}") # Check if this directory has a collection parent_dir = chroma_file.parent collection_files = list(parent_dir.glob("*")) print(f" Directory contains {len(collection_files)} files") # Look for collection names for f in collection_files: if f.is_file() and f.name not in ['chroma.sqlite3']: print(f" - {f.name}") except Exception as e: print(f"❌ Error reading {chroma_file}: {e}") if __name__ == "__main__": find_all_databases() print("\n" + "="*70 + "\n") check_expected_vs_actual() find_methodology_databases()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/brockwebb/open-census-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server