file_scanner.py•3.46 kB
"""File discovery and filtering for C++ projects."""
import os
import sys
from pathlib import Path
from typing import List, Set
class FileScanner:
"""Handles file discovery and filtering for C++ projects."""
# C++ file extensions
CPP_EXTENSIONS = {'.cpp', '.cc', '.cxx', '.c++', '.h', '.hpp', '.hxx', '.h++'}
# Directories to exclude (set by configuration)
EXCLUDE_DIRS = set()
# Directories that contain dependencies (set by configuration)
DEPENDENCY_DIRS = set()
def __init__(self, project_root: Path, include_dependencies: bool = False):
self.project_root = project_root
self.include_dependencies = include_dependencies
def should_skip_directory(self, dir_path: str) -> bool:
"""Check if a directory should be skipped"""
# Only skip if this directory is directly under the project root
try:
rel_path = Path(dir_path).relative_to(self.project_root)
# If the relative path has no parent, it's a top-level directory
if len(rel_path.parts) == 1:
return rel_path.parts[0] in self.EXCLUDE_DIRS
except ValueError:
# Directory is outside project root
pass
return False
def should_skip_file(self, file_path: str) -> bool:
"""Check if a file should be skipped during indexing"""
# Skip files outside project root (shouldn't happen, but safety check)
try:
rel_path = Path(file_path).relative_to(self.project_root)
except ValueError:
# File is outside project root
if not self.include_dependencies:
return True
else:
return False
# Check if file is in a top-level excluded directory
if len(rel_path.parts) > 0 and rel_path.parts[0] in self.EXCLUDE_DIRS:
return True
return False
def find_cpp_files(self) -> List[str]:
"""Find all C++ files in the project"""
files = []
try:
for root, dirs, filenames in os.walk(self.project_root):
# Filter directories in-place to prevent walking into them
dirs[:] = [d for d in dirs if not self.should_skip_directory(os.path.join(root, d))]
for filename in filenames:
if any(filename.endswith(ext) for ext in self.CPP_EXTENSIONS):
file_path = os.path.join(root, filename)
if not self.should_skip_file(file_path):
files.append(file_path)
except Exception as e:
print(f"Error scanning directory: {e}", file=sys.stderr)
return files
def is_project_file(self, file_path: str) -> bool:
"""Check if a file is part of the project (not a dependency)"""
if not file_path:
return False
# Check if file is under project root
try:
rel_path = Path(file_path).relative_to(self.project_root)
# Check if file is in a dependency directory (at any level)
for part in rel_path.parts:
if part in self.DEPENDENCY_DIRS:
return False
return True
except ValueError:
# File is outside project root - it's a dependency
return False