reindex_all.pyโข4.74 kB
#!/usr/bin/env python3
"""
Full reindexing script for SCS-MCP
Forces a complete reindex of all files including documentation
"""
import sys
import os
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.core.clean_search import CleanSmartCodeSearch
import time
def main():
"""Force a complete reindex of the project"""
print("๐ Starting full project reindex...")
print("=" * 60)
# Get project root (parent of scripts directory)
project_root = Path(__file__).parent.parent
print(f"๐ Project root: {project_root}")
# Initialize the search engine
print("๐ง Initializing search engine...")
search = CleanSmartCodeSearch(project_root=str(project_root))
# Clear existing index
print("๐๏ธ Clearing existing index...")
try:
search.db.execute("DELETE FROM symbols")
print(" โ
Existing index cleared")
except Exception as e:
print(f" โ ๏ธ Could not clear index: {e}")
# Start indexing
print("\n๐ Starting indexing process...")
start_time = time.time()
# Count files to index
file_count = 0
code_files = 0
doc_files = 0
# Collect all files
files_to_index = []
# Get code files
for pattern_info in search.language_patterns.values():
for ext in pattern_info['extensions']:
files_to_index.extend(project_root.rglob(f'*{ext}'))
# Get documentation files
for pattern_info in search.doc_patterns.values():
if 'extensions' in pattern_info:
for ext in pattern_info['extensions']:
files_to_index.extend(project_root.rglob(f'*{ext}'))
if 'special_files' in pattern_info:
for special_file in pattern_info['special_files']:
files_to_index.extend(project_root.rglob(special_file))
# Filter out ignored directories
ignored_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', 'dist', 'build', 'dev-archive'}
files_to_index = list(set([
f for f in files_to_index
if not any(ignored in f.parts for ignored in ignored_dirs)
]))
print(f"๐ Found {len(files_to_index)} files to index")
# Index each file
for i, file_path in enumerate(files_to_index, 1):
try:
# Determine file type
ext = file_path.suffix
is_code = any(ext in patterns['extensions']
for patterns in search.language_patterns.values())
is_doc = any(ext in patterns.get('extensions', [])
for patterns in search.doc_patterns.values()) or \
any(file_path.name in patterns.get('special_files', [])
for patterns in search.doc_patterns.values())
if is_code:
code_files += 1
file_type = "code"
elif is_doc:
doc_files += 1
file_type = "docs"
else:
file_type = "other"
# Index the file
search.index_file(file_path, force=True)
# Progress indicator
if i % 10 == 0:
print(f" ๐ Indexed {i}/{len(files_to_index)} files...")
file_count += 1
except Exception as e:
print(f" โ ๏ธ Error indexing {file_path}: {e}")
# Calculate statistics
elapsed_time = time.time() - start_time
# Get final symbol count
symbol_count = search.db.fetchone("SELECT COUNT(*) FROM symbols")
symbol_count = symbol_count[0] if symbol_count else 0
# Print summary
print("\n" + "=" * 60)
print("โจ Reindexing complete!")
print(f"๐ Statistics:")
print(f" โข Total files indexed: {file_count}")
print(f" โข Code files: {code_files}")
print(f" โข Documentation files: {doc_files}")
print(f" โข Total symbols: {symbol_count}")
print(f" โข Time taken: {elapsed_time:.2f} seconds")
print(f" โข Speed: {file_count/elapsed_time:.1f} files/second")
# Test the search
print("\n๐งช Testing search functionality...")
test_queries = [
"README",
"installation",
"configuration",
"search function",
"TODO"
]
for query in test_queries:
results = search.search(query, limit=3)
print(f" โข '{query}': {len(results)} results found")
print("\nโ
All done! Your project is now fully indexed.")
print(" Documentation and configuration files are now searchable.")
if __name__ == "__main__":
main()