quick_reindex_docs.pyโข2.59 kB
#!/usr/bin/env python3
"""
Quick documentation reindexing script
Focuses only on documentation files for faster indexing
"""
import sys
import os
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.core.clean_search import CleanSmartCodeSearch
import time
def main():
print("๐ Quick Documentation Reindex")
print("=" * 40)
project_root = Path(__file__).parent.parent
search = CleanSmartCodeSearch(project_root=str(project_root))
# Find all documentation files
doc_files = []
# Markdown files
for f in project_root.rglob('*.md'):
doc_files.append(f)
# JSON config files
for f in project_root.rglob('*.json'):
doc_files.append(f)
# YAML files
for f in project_root.rglob('*.yaml'):
doc_files.append(f)
for f in project_root.rglob('*.yml'):
doc_files.append(f)
# Special files
for name in ['LICENSE', 'SECURITY']:
for f in project_root.rglob(name):
doc_files.append(f)
# Filter out ignored directories
ignored_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', 'dist', 'build', 'dev-archive', 'all-MiniLM-L6-v2'}
doc_files = [
f for f in doc_files
if not any(ignored in f.parts for ignored in ignored_dirs)
]
print(f"Found {len(doc_files)} documentation files to index")
# Index each file
success_count = 0
error_count = 0
for i, file_path in enumerate(doc_files, 1):
try:
rel_path = file_path.relative_to(project_root) if file_path.is_absolute() else file_path
print(f" [{i}/{len(doc_files)}] Indexing: {rel_path}")
search.index_file(file_path, force=True)
success_count += 1
except Exception as e:
print(f" โ Error: {e}")
error_count += 1
print("\n" + "=" * 40)
print(f"โ
Successfully indexed: {success_count} files")
if error_count > 0:
print(f"โ Failed to index: {error_count} files")
# Test search
print("\n๐งช Testing documentation search...")
test_queries = ["README", "installation", "contributing", "security"]
for query in test_queries:
results = search.search(query, limit=2)
if results:
print(f" โ '{query}': Found {len(results)} results")
print(f" Top: {results[0]['name']} in {results[0]['file_path']}")
else:
print(f" โ '{query}': No results found")
if __name__ == "__main__":
main()