Skip to main content
Glama
file_loading.py2.82 kB
"""File loading utilities for code retrieval tools.""" import logging import os from pathlib import Path logger = logging.getLogger(__name__) LANGUAGE_EXTENSIONS = { ".java": "java", ".kt": "kotlin", ".kts": "kotlin", ".py": "python", ".js": "javascript", ".ts": "typescript", ".cpp": "cpp", ".c": "c", ".h": "c", ".hpp": "cpp", ".cs": "csharp", ".go": "go", ".rs": "rust", ".rb": "ruby", ".php": "php", } def get_language_from_extension(file_path: str) -> str: """Get the programming language from file extension.""" extension = Path(file_path).suffix.lower() return LANGUAGE_EXTENSIONS.get(extension, "unknown") def is_code_file(file_path: str) -> bool: """Check if a file is a supported code file.""" extension = Path(file_path).suffix.lower() return extension in LANGUAGE_EXTENSIONS def should_skip_directory(dir_name: str) -> bool: """Check if a directory should be skipped during traversal.""" skip_dirs = { ".git", ".svn", ".hg", "node_modules", "venv", ".venv", "__pycache__", ".idea", ".vscode", ".eclipse", "build", "dist", "target", "out", ".gradle", ".maven", "logs", "tmp", "temp", } return dir_name in skip_dirs def load_files(codebase_path: str) -> list[tuple[str, str]]: """Load all supported code files from a codebase directory. Args: codebase_path: Path to the codebase directory Returns: List of tuples (file_path, language) for each supported code file """ if not codebase_path or not os.path.exists(codebase_path): logger.warning(f"Codebase path does not exist: {codebase_path}") return [] files = [] try: for root, dirs, filenames in os.walk(codebase_path): dirs[:] = [d for d in dirs if not should_skip_directory(d)] for filename in filenames: file_path = os.path.join(root, filename) if is_code_file(file_path): language = get_language_from_extension(file_path) files.append((file_path, language)) except Exception as e: logger.error(f"Error loading files from {codebase_path}: {e}") return files def filter_files_by_language( files: list[tuple[str, str]], language: str ) -> list[tuple[str, str]]: """Filter files by programming language.""" return [(path, lang) for path, lang in files if lang == language] def get_file_stats(files: list[tuple[str, str]]) -> dict: """Get statistics about the loaded files.""" stats = {} for _, language in files: stats[language] = stats.get(language, 0) + 1 return stats

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/GetSherlog/Sherlog-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server