verify_index.pyโข6.37 kB
#!/usr/bin/env python3
"""
Verification script for SCS-MCP indexing
Checks what's indexed and provides detailed statistics
"""
import sys
import os
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
import sqlite3
from src.core.clean_search import CleanSmartCodeSearch
def main():
print("๐ SCS-MCP Index Verification Report")
print("=" * 60)
project_root = Path(__file__).parent.parent
db_path = project_root / ".claude-symbols" / "search.db"
if not db_path.exists():
print("โ No index database found!")
return
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# 1. Overall Statistics
print("\n๐ OVERALL STATISTICS")
print("-" * 40)
cursor.execute("SELECT COUNT(DISTINCT file_path) FROM symbols")
total_files = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM symbols")
total_symbols = cursor.fetchone()[0]
print(f"Total files indexed: {total_files}")
print(f"Total symbols indexed: {total_symbols}")
# 2. File Type Breakdown
print("\n๐ FILE TYPE BREAKDOWN")
print("-" * 40)
cursor.execute("""
SELECT
CASE
WHEN file_path LIKE '%.md' THEN 'Markdown'
WHEN file_path LIKE '%.json' THEN 'JSON'
WHEN file_path LIKE '%.yaml' OR file_path LIKE '%.yml' THEN 'YAML'
WHEN file_path LIKE '%.py' THEN 'Python'
WHEN file_path LIKE '%.js' OR file_path LIKE '%.jsx' THEN 'JavaScript'
WHEN file_path LIKE '%.ts' OR file_path LIKE '%.tsx' THEN 'TypeScript'
WHEN file_path LIKE '%.sh' THEN 'Shell'
WHEN file_path LIKE '%LICENSE%' THEN 'License'
WHEN file_path LIKE '%SECURITY%' THEN 'Security'
ELSE 'Other'
END as file_type,
COUNT(DISTINCT file_path) as file_count,
COUNT(*) as symbol_count
FROM symbols
GROUP BY file_type
ORDER BY file_count DESC
""")
print(f"{'Type':<15} | {'Files':>6} | {'Symbols':>8}")
print("-" * 40)
for row in cursor.fetchall():
print(f"{row[0]:<15} | {row[1]:>6} | {row[2]:>8}")
# 3. Documentation Coverage
print("\n๐ DOCUMENTATION COVERAGE")
print("-" * 40)
# Check for key documentation files
key_docs = [
'README.md',
'CONTRIBUTING.md',
'CHANGELOG.md',
'LICENSE',
'SECURITY.md',
'docs/API.md',
'docs/INSTALLATION.md',
'docs/ARCHITECTURE.md'
]
for doc in key_docs:
cursor.execute("SELECT COUNT(*) FROM symbols WHERE file_path LIKE ?", (f'%{doc}',))
count = cursor.fetchone()[0]
status = "โ
" if count > 0 else "โ"
print(f"{status} {doc:<30} ({count} symbols)")
# 4. Symbol Type Distribution
print("\n๐ท๏ธ SYMBOL TYPE DISTRIBUTION")
print("-" * 40)
cursor.execute("""
SELECT type, COUNT(*) as count
FROM symbols
GROUP BY type
ORDER BY count DESC
LIMIT 15
""")
print(f"{'Symbol Type':<20} | {'Count':>8}")
print("-" * 40)
for row in cursor.fetchall():
print(f"{row[0]:<20} | {row[1]:>8}")
# 5. Search Functionality Test
print("\n๐งช SEARCH FUNCTIONALITY TEST")
print("-" * 40)
search = CleanSmartCodeSearch(project_root=str(project_root))
test_queries = [
("README", "Documentation"),
("installation", "Setup/Install"),
("search function", "Code"),
("voice assistant", "Feature"),
("security", "Security"),
("configuration", "Config"),
("TODO", "Comments"),
("class", "Code Structure")
]
print(f"{'Query':<20} | {'Type':<15} | {'Results'}")
print("-" * 60)
for query, query_type in test_queries:
results = search.search(query, limit=5)
status = "โ
" if len(results) > 0 else "โ"
print(f"{query:<20} | {query_type:<15} | {status} {len(results)} found")
if results and len(results) > 0:
# Show top result
top = results[0]
print(f"{'':>38} โ {top['name']} in {Path(top['file_path']).name}")
# 6. Recent Additions
print("\n๐ RECENTLY INDEXED FILES (Last 10)")
print("-" * 40)
cursor.execute("""
SELECT DISTINCT file_path, COUNT(*) as symbol_count
FROM symbols
WHERE file_path LIKE '%.md'
OR file_path LIKE '%.json'
OR file_path LIKE '%.yaml'
OR file_path LIKE '%.yml'
OR file_path LIKE '%LICENSE%'
OR file_path LIKE '%SECURITY%'
GROUP BY file_path
ORDER BY file_path DESC
LIMIT 10
""")
for row in cursor.fetchall():
file_path = Path(row[0]).name if '/' in row[0] or '\\' in row[0] else row[0]
print(f" โข {file_path:<35} ({row[1]} symbols)")
# 7. Recommendations
print("\n๐ก RECOMMENDATIONS")
print("-" * 40)
# Check if docs are indexed
cursor.execute("SELECT COUNT(*) FROM symbols WHERE file_path LIKE '%.md'")
md_count = cursor.fetchone()[0]
if md_count == 0:
print("โ ๏ธ No Markdown files indexed - run scripts/quick_reindex_docs.py")
elif md_count < 10:
print("โ ๏ธ Few Markdown files indexed - consider full reindex")
else:
print("โ
Documentation files are properly indexed")
# Check Python coverage
cursor.execute("SELECT COUNT(DISTINCT file_path) FROM symbols WHERE file_path LIKE '%.py'")
py_files = cursor.fetchone()[0]
actual_py_files = len(list(project_root.rglob("*.py")))
if py_files < actual_py_files * 0.8:
print(f"โ ๏ธ Only {py_files}/{actual_py_files} Python files indexed")
else:
print(f"โ
Good Python file coverage ({py_files} files)")
# Check for stale entries
cursor.execute("SELECT COUNT(DISTINCT file_path) FROM symbols")
indexed_files = cursor.fetchone()[0]
if indexed_files > 200:
print("โน๏ธ Consider running cleanup to remove stale entries")
conn.close()
print("\n" + "=" * 60)
print("โจ Verification complete!")
if __name__ == "__main__":
main()