Skip to main content
Glama

llm-context

by cyberchitta
file_selector.py9.84 kB
import os from dataclasses import dataclass from logging import ERROR, WARNING from pathlib import Path from typing import Optional from pathspec import GitIgnoreSpec # type: ignore from llm_context.context_spec import ContextSpec from llm_context.rule import IGNORE_NOTHING, INCLUDE_ALL, Rule from llm_context.state import FileSelection from llm_context.utils import PathConverter, log, safe_read_file @dataclass(frozen=True) class PathspecIgnorer: pathspec: GitIgnoreSpec @staticmethod def create(ignore_patterns: list[str]) -> "PathspecIgnorer": pathspec = GitIgnoreSpec.from_lines(ignore_patterns) return PathspecIgnorer(pathspec) def ignore(self, path: str) -> bool: assert path not in ("/", ""), "Root directory cannot be an input for ignore method" return self.pathspec.match_file(path) @dataclass(frozen=True) class GitIgnorer: ignorer_data: list[tuple[str, PathspecIgnorer]] @staticmethod def from_git_root(root_dir: str, xtra_root_patterns: list[str] = []) -> "GitIgnorer": ignorer_data = [] if xtra_root_patterns: ignorer_data.append(("/", PathspecIgnorer.create(xtra_root_patterns))) gitignores = GitIgnorer._collect_gitignores(root_dir) for relative_path, patterns in gitignores: ignorer_data.append((relative_path, PathspecIgnorer.create(patterns))) start_idx = 1 if xtra_root_patterns else 0 if len(ignorer_data) > start_idx: prefix_data = ignorer_data[:start_idx] gitignore_data = ignorer_data[start_idx:] gitignore_data.sort(key=lambda x: (-x[0].count("/"), x[0])) ignorer_data = prefix_data + gitignore_data return GitIgnorer(ignorer_data) @staticmethod def _collect_gitignores(top: str) -> list[tuple[str, list[str]]]: gitignores = [] for root, _, files in os.walk(top): if ".gitignore" in files: content = safe_read_file(os.path.join(root, ".gitignore")) if content: patterns = content.splitlines() relpath = os.path.relpath(root, top) fixpath = "/" if relpath == "." else f"/{relpath}" gitignores.append((fixpath, patterns)) return gitignores def ignore(self, path: str) -> bool: assert path not in ("/", ""), "Root directory cannot be an input for ignore method" for prefix, ignorer in self.ignorer_data: if path.startswith(prefix): if prefix == "/": test_path = path[1:] else: test_path = path[len(prefix) :].lstrip("/") if test_path and ignorer.ignore(test_path): return True return False @dataclass(frozen=True) class IncludeFilter: pathspec: GitIgnoreSpec @staticmethod def create(include_patterns: list[str]) -> "IncludeFilter": pathspec = GitIgnoreSpec.from_lines(include_patterns) return IncludeFilter(pathspec) def include(self, path: str) -> bool: assert path not in ("/", ""), "Root directory cannot be an input for include method" return self.pathspec.match_file(path) @dataclass(frozen=True) class FileSelector: root_path: str ignorer: GitIgnorer converter: PathConverter limit_filter: IncludeFilter also_include_filter: IncludeFilter since: Optional[float] @staticmethod def create_universal(root_path: Path) -> "FileSelector": return FileSelector.create_ignorer(root_path, IGNORE_NOTHING) @staticmethod def create_ignorer(root_path: Path, pathspecs: list[str]) -> "FileSelector": return FileSelector.create(root_path, pathspecs, INCLUDE_ALL, []) @staticmethod def create( root_path: Path, ignore_pathspecs: list[str], limit_to_pathspecs: list[str], also_include_pathspecs: list[str], since: Optional[float] = None, ) -> "FileSelector": ignorer = GitIgnorer.from_git_root(str(root_path), ignore_pathspecs) converter = PathConverter.create(root_path) limit_filter = IncludeFilter.create(limit_to_pathspecs) also_include_filter = IncludeFilter.create(also_include_pathspecs) return FileSelector( str(root_path), ignorer, converter, limit_filter, also_include_filter, since ) def filter_files(self, files: list[str]) -> list[str]: return [f for f in files if f in set(self.get_files())] def get_files(self) -> list[str]: files = list(set(self.traverse(self.root_path) + self.also_traverse(self.root_path))) return [f for f in files if Path(f).stat().st_mtime > self.since] if self.since else files def get_relative_files(self) -> list[str]: return sorted(self.converter.to_relative(self.get_files())) def traverse(self, current_dir: str) -> list[str]: entries = os.listdir(current_dir) relative_current_dir = os.path.relpath(current_dir, self.root_path) dirs = [ e_path for e in entries if (e_path := os.path.join(current_dir, e)) and os.path.isdir(e_path) and (not self.ignorer.ignore(self._relative_path(relative_current_dir, e))) ] files = [ e_path for e in entries if (e_path := os.path.join(current_dir, e)) and not os.path.isdir(e_path) and self._should_include_file(self._relative_path(relative_current_dir, e)) ] subdir_files = [file for d in dirs for file in self.traverse(d)] return files + subdir_files def _should_include_file(self, path: str) -> bool: assert path not in ("/", ""), "Root directory cannot be an input for filtering" if self.ignorer.ignore(path): return False return self.limit_filter.include(path) def also_traverse(self, current_dir: str) -> list[str]: if not self.also_include_filter.pathspec.patterns: return [] entries = os.listdir(current_dir) relative_current_dir = os.path.relpath(current_dir, self.root_path) dirs = [ e_path for e in entries if (e_path := os.path.join(current_dir, e)) and os.path.isdir(e_path) ] files = [ e_path for e in entries if (e_path := os.path.join(current_dir, e)) and not os.path.isdir(e_path) and self.also_include_filter.include(self._relative_path(relative_current_dir, e)) ] subdir_files = [file for d in dirs for file in self.also_traverse(d)] return files + subdir_files def _relative_path(self, dir: str, filename: str) -> str: return f"/{os.path.normpath(os.path.join(dir, filename))}" @dataclass(frozen=True) class ContextSelector: full_selector: FileSelector excerpted_selector: FileSelector rule: Rule @staticmethod def create(spec: ContextSpec, since: Optional[float] = None) -> "ContextSelector": root_path = spec.project_root_path rule = spec.rule full_ignore_pathspecs = rule.get_ignore_patterns("full") excerpted_ignore_pathspecs = rule.get_ignore_patterns("excerpted") full_limit_to_pathspecs = rule.get_limit_to_patterns("full") excerpted_limit_to_pathspecs = rule.get_limit_to_patterns("excerpted") full_also_include_pathspecs = rule.get_also_include_patterns("full") excerpted_also_include_pathspecs = rule.get_also_include_patterns("excerpted") full_selector = FileSelector.create( root_path, full_ignore_pathspecs, full_limit_to_pathspecs, full_also_include_pathspecs, since, ) excerpted_selector = FileSelector.create( root_path, excerpted_ignore_pathspecs, excerpted_limit_to_pathspecs, excerpted_also_include_pathspecs, since, ) return ContextSelector(full_selector, excerpted_selector, rule) def select_full_files(self, file_selection: FileSelection) -> "FileSelection": full_files = self.full_selector.get_relative_files() excerpted_files = file_selection.excerpted_files updated_excerpted_files = [f for f in excerpted_files if f not in set(full_files)] if len(excerpted_files) != len(updated_excerpted_files): log( WARNING, "Some files previously in excerpted selection have been moved to full selection.", ) return FileSelection._create( file_selection.rule_name, full_files, updated_excerpted_files, file_selection.timestamp ) def select_excerpted_files(self, file_selection: FileSelection) -> "FileSelection": full_files = file_selection.full_files if not full_files: log( WARNING, "No full files have been selected. Consider running full file selection first.", ) all_excerpted_files = self.excerpted_selector.get_relative_files() excerpted_files = [f for f in all_excerpted_files if f not in set(full_files)] return FileSelection._create( file_selection.rule_name, full_files, excerpted_files, file_selection.timestamp ) def select_excerpted_only(self, file_selection: FileSelection) -> "FileSelection": all_excerpted_files = self.excerpted_selector.get_relative_files() supported_excerpted = [f for f in all_excerpted_files if self.rule.get_excerpt_mode(f)] return FileSelection._create( file_selection.rule_name, [], supported_excerpted, file_selection.timestamp )

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cyberchitta/llm-context.py'

If you have feedback or need assistance with the MCP directory API, please join our Discord server