shallow_index_manager.py•5.68 kB
"""
Shallow Index Manager - Manages a minimal file-list-only index.
This manager builds and loads a shallow index consisting of relative file
paths only. It is optimized for fast initialization and filename-based
search/browsing. Content parsing and symbol extraction are not performed.
"""
from __future__ import annotations
import hashlib
import json
import logging
import os
import tempfile
import threading
from typing import List, Optional
import re
from .json_index_builder import JSONIndexBuilder
from ..constants import SETTINGS_DIR, INDEX_FILE_SHALLOW
logger = logging.getLogger(__name__)
class ShallowIndexManager:
"""Manage shallow (file-list) index lifecycle and storage."""
def __init__(self) -> None:
self.project_path: Optional[str] = None
self.index_builder: Optional[JSONIndexBuilder] = None
self.temp_dir: Optional[str] = None
self.index_path: Optional[str] = None
self._file_list: Optional[List[str]] = None
self._lock = threading.RLock()
def set_project_path(self, project_path: str) -> bool:
with self._lock:
try:
if not isinstance(project_path, str) or not project_path.strip():
logger.error("Invalid project path for shallow index")
return False
project_path = project_path.strip()
if not os.path.isdir(project_path):
logger.error(f"Project path does not exist: {project_path}")
return False
self.project_path = project_path
self.index_builder = JSONIndexBuilder(project_path)
project_hash = hashlib.md5(project_path.encode()).hexdigest()[:12]
self.temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash)
os.makedirs(self.temp_dir, exist_ok=True)
self.index_path = os.path.join(self.temp_dir, INDEX_FILE_SHALLOW)
return True
except Exception as e: # noqa: BLE001 - centralized logging
logger.error(f"Failed to set project path (shallow): {e}")
return False
def build_index(self) -> bool:
"""Build and persist the shallow file list index."""
with self._lock:
if not self.index_builder or not self.index_path:
logger.error("ShallowIndexManager not initialized")
return False
try:
file_list = self.index_builder.build_shallow_file_list()
with open(self.index_path, 'w', encoding='utf-8') as f:
json.dump(file_list, f, ensure_ascii=False)
self._file_list = file_list
logger.info(f"Built shallow index with {len(file_list)} files")
return True
except Exception as e: # noqa: BLE001
logger.error(f"Failed to build shallow index: {e}")
return False
def load_index(self) -> bool:
"""Load shallow index from disk to memory."""
with self._lock:
try:
if not self.index_path or not os.path.exists(self.index_path):
return False
with open(self.index_path, 'r', encoding='utf-8') as f:
data = json.load(f)
if isinstance(data, list):
# Normalize slashes/prefix
normalized: List[str] = []
for p in data:
if isinstance(p, str):
q = p.replace('\\\\', '/').replace('\\', '/')
if q.startswith('./'):
q = q[2:]
normalized.append(q)
self._file_list = normalized
return True
return False
except Exception as e: # noqa: BLE001
logger.error(f"Failed to load shallow index: {e}")
return False
def get_file_list(self) -> List[str]:
with self._lock:
return list(self._file_list or [])
def find_files(self, pattern: str = "*") -> List[str]:
with self._lock:
if not isinstance(pattern, str):
return []
norm = (pattern.strip() or "*").replace('\\\\','/').replace('\\','/')
regex = self._compile_glob_regex(norm)
files = self._file_list or []
if norm == "*":
return list(files)
return [f for f in files if regex.match(f) is not None]
@staticmethod
def _compile_glob_regex(pattern: str) -> re.Pattern:
i = 0
out = []
special = ".^$+{}[]|()"
while i < len(pattern):
c = pattern[i]
if c == '*':
if i + 1 < len(pattern) and pattern[i + 1] == '*':
out.append('.*')
i += 2
continue
else:
out.append('[^/]*')
elif c == '?':
out.append('[^/]')
elif c in special:
out.append('\\' + c)
else:
out.append(c)
i += 1
return re.compile('^' + ''.join(out) + '$')
def cleanup(self) -> None:
with self._lock:
self.project_path = None
self.index_builder = None
self.temp_dir = None
self.index_path = None
self._file_list = None
# Global singleton
_shallow_manager = ShallowIndexManager()
def get_shallow_index_manager() -> ShallowIndexManager:
return _shallow_manager