Scantool - File Scanner MCP

Overview Schema Related Servers Score Discussions

config.py•22.3 KiB

"""Config language support - analyzer for configuration files. This module provides ConfigLanguage for analyzing configuration files (.json, .yaml, .yml, .toml, .ini). Since config files don't have traditional code structure, scan() returns an empty list. Key functionality: - extract_imports(): Extract file path references from config files - find_entry_points(): Find project configs, scripts sections, etc. - classify_file(): All config files go to "config" cluster """ import re import json from typing import Optional from pathlib import Path from .base import BaseLanguage from .models import ( StructureNode, ImportInfo, EntryPointInfo, DefinitionInfo, CallInfo, ) class ConfigLanguage(BaseLanguage): """Language handler for configuration files (.json, .yaml, .yml, .toml, .ini). Config files don't have traditional code structure (classes, functions), so scan() returns an empty list. The primary value is in: - extract_imports(): Find file path references - find_entry_points(): Find project configs and scripts """ # =========================================================================== # Metadata (REQUIRED) # =========================================================================== @classmethod def get_extensions(cls) -> list[str]: """Configuration file extensions.""" return [".json", ".yaml", ".yml", ".toml", ".ini"] @classmethod def get_language_name(cls) -> str: """Language name.""" return "Config" @classmethod def get_priority(cls) -> int: """Standard priority.""" return 10 # =========================================================================== # Skip Logic # =========================================================================== @classmethod def should_skip(cls, filename: str) -> bool: """Skip config files that should not be scanned.""" filename_lower = filename.lower() # Skip lock files if filename_lower in ('package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'poetry.lock', 'cargo.lock'): return True # Skip if ends with .lock if filename_lower.endswith('.lock'): return True # Skip minified if '.min.' in filename_lower: return True return False def should_analyze(self, file_path: str) -> bool: """Skip config files that should not be analyzed.""" filename = Path(file_path).name.lower() # Skip lock files if filename in ('package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'poetry.lock', 'cargo.lock'): return False # Skip if ends with .lock if filename.endswith('.lock'): return False # Skip minified if '.min.' in filename: return False return True # =========================================================================== # Structure Scanning # =========================================================================== def scan(self, source_code: bytes) -> Optional[list[StructureNode]]: """Scan config file - returns empty list as configs don't have code structure.""" # Config files don't have traditional code structure (classes, functions) # Return empty list rather than None to indicate successful parse return [] # =========================================================================== # Semantic Analysis - Layer 1 # =========================================================================== def extract_imports(self, file_path: str, content: str) -> list[ImportInfo]: """Extract file path references from config files. Handles: - JSON: tsconfig paths, package.json scripts with file refs - YAML: docker-compose volumes, env_file, Dockerfile paths - TOML: pyproject.toml paths, Cargo.toml paths - File path patterns in string values Does NOT extract: - Package names (npm, cargo, pip - these are registry names, not file imports) """ imports = [] filename = Path(file_path).name.lower() # Detect file type if filename.endswith('.json'): imports.extend(self._extract_json_imports(file_path, content)) elif filename.endswith(('.yaml', '.yml')): imports.extend(self._extract_yaml_imports(file_path, content)) elif filename.endswith('.toml'): imports.extend(self._extract_toml_imports(file_path, content)) elif filename.endswith('.ini'): imports.extend(self._extract_ini_imports(file_path, content)) # Generic file path pattern extraction (all config types) imports.extend(self._extract_path_patterns(file_path, content)) return imports def _extract_json_imports(self, file_path: str, content: str) -> list[ImportInfo]: """Extract imports from JSON config files.""" imports = [] filename = Path(file_path).name.lower() try: data = json.loads(content) except json.JSONDecodeError: return imports # tsconfig.json specific if filename == 'tsconfig.json': # "extends": "./base.json" if 'extends' in data and isinstance(data['extends'], str): imports.append(ImportInfo( source_file=file_path, target_module=data['extends'], line=self._find_line(content, data['extends']), import_type="extends" )) # "files": ["src/index.ts", ...] if 'files' in data and isinstance(data['files'], list): for file_ref in data['files']: if isinstance(file_ref, str): imports.append(ImportInfo( source_file=file_path, target_module=file_ref, line=self._find_line(content, file_ref), import_type="file_reference" )) # "include": ["src/**/*"] if 'include' in data and isinstance(data['include'], list): for pattern in data['include']: if isinstance(pattern, str) and not pattern.startswith('*'): imports.append(ImportInfo( source_file=file_path, target_module=pattern, line=self._find_line(content, pattern), import_type="include_pattern" )) # "paths": {"@/*": ["./src/*"]} if 'compilerOptions' in data and 'paths' in data['compilerOptions']: paths = data['compilerOptions']['paths'] if isinstance(paths, dict): for alias, path_list in paths.items(): if isinstance(path_list, list): for path in path_list: if isinstance(path, str): imports.append(ImportInfo( source_file=file_path, target_module=path, line=self._find_line(content, path), import_type="path_mapping" )) # package.json - only extract file paths from scripts, not dependencies elif filename == 'package.json': # Scripts might reference local files if 'scripts' in data and isinstance(data['scripts'], dict): for script_name, script_cmd in data['scripts'].items(): if isinstance(script_cmd, str): # Extract file paths from scripts (e.g., "node build.js", "node ./scripts/test.mjs") # Match files with or without ./ prefix file_refs = re.findall(r'\b(?:\./)?[\w/.-]+\.(?:js|ts|mjs|cjs|json|jsx|tsx)\b', script_cmd) for ref in file_refs: imports.append(ImportInfo( source_file=file_path, target_module=ref, line=self._find_line(content, ref), import_type="script_file" )) return imports def _extract_yaml_imports(self, file_path: str, content: str) -> list[ImportInfo]: """Extract imports from YAML config files.""" imports = [] filename = Path(file_path).name.lower() # docker-compose.yml patterns if 'docker-compose' in filename: # env_file: .env.production or env_file: .env env_file_pattern = r'env_file:\s*["\']?(\.env[^\s"\']*)["\']?' for match in re.finditer(env_file_pattern, content): imports.append(ImportInfo( source_file=file_path, target_module=match.group(1), line=content[:match.start()].count('\n') + 1, import_type="env_file" )) # dockerfile: ./Dockerfile.prod dockerfile_pattern = r'dockerfile:\s*["\']?([^\s"\']+Dockerfile[^\s"\']*)["\']?' for match in re.finditer(dockerfile_pattern, content, re.IGNORECASE): imports.append(ImportInfo( source_file=file_path, target_module=match.group(1), line=content[:match.start()].count('\n') + 1, import_type="dockerfile" )) # volumes: - ./data:/app/data volume_pattern = r'[-\s]+["\']?(\.{1,2}/[^:\s"\']+):[^\s"\']+["\']?' for match in re.finditer(volume_pattern, content): imports.append(ImportInfo( source_file=file_path, target_module=match.group(1), line=content[:match.start()].count('\n') + 1, import_type="volume_mount" )) return imports def _extract_toml_imports(self, file_path: str, content: str) -> list[ImportInfo]: """Extract imports from TOML config files.""" imports = [] filename = Path(file_path).name.lower() # pyproject.toml - extract script paths, not package dependencies if filename == 'pyproject.toml': # [tool.mypy] config files config_pattern = r'config_file\s*=\s*["\']([^"\']+)["\']' for match in re.finditer(config_pattern, content): imports.append(ImportInfo( source_file=file_path, target_module=match.group(1), line=content[:match.start()].count('\n') + 1, import_type="config_file" )) # Cargo.toml - path dependencies (local crates) elif filename == 'cargo.toml': # my_crate = { path = "../my_crate" } path_dep_pattern = r'path\s*=\s*["\']([^"\']+)["\']' for match in re.finditer(path_dep_pattern, content): imports.append(ImportInfo( source_file=file_path, target_module=match.group(1), line=content[:match.start()].count('\n') + 1, import_type="path_dependency" )) return imports def _extract_ini_imports(self, file_path: str, content: str) -> list[ImportInfo]: """Extract imports from INI config files.""" imports = [] # INI files may have file path values # key = /path/to/file or key = ./relative/path ini_path_pattern = r'^\s*[\w_-]+\s*=\s*(["\']?)([./][^\s"\']+\.[a-zA-Z0-9]+)\1' for match in re.finditer(ini_path_pattern, content, re.MULTILINE): imports.append(ImportInfo( source_file=file_path, target_module=match.group(2), line=content[:match.start()].count('\n') + 1, import_type="config_value" )) return imports def _extract_path_patterns(self, file_path: str, content: str) -> list[ImportInfo]: """Extract generic file path patterns from config files. Conservative patterns: - Relative paths: ./file.ext, ../file.ext - Quoted paths with extensions - Avoid matching URLs, version numbers, or package names """ imports = [] # Pattern 1: Relative paths with common extensions (in quotes) # Matches: "./config.json", "../utils/helper.ts", "./templates/base.html", etc. quoted_path_pattern = r'["\'](\./(?:[^/"\s]+/)*[^/"\s]+\.[a-zA-Z0-9]+|\.\./(?:[^/"\s]+/)*[^/"\s]+\.[a-zA-Z0-9]+)["\']' for match in re.finditer(quoted_path_pattern, content): path = match.group(1) # Skip if looks like URL if '://' not in path: imports.append(ImportInfo( source_file=file_path, target_module=path, line=content[:match.start()].count('\n') + 1, import_type="path_reference" )) # Pattern 2: Unquoted relative paths on their own line (YAML-style) # Matches: " - ./file.ext" or "key: ./file.ext" unquoted_path_pattern = r'(?:^|\s)(\./[^\s:]+\.[a-zA-Z0-9]+|\.\./(?:[^/\s]+/)*[^/\s]+\.[a-zA-Z0-9]+)(?:\s|$)' for match in re.finditer(unquoted_path_pattern, content, re.MULTILINE): path = match.group(1) if '://' not in path: imports.append(ImportInfo( source_file=file_path, target_module=path, line=content[:match.start()].count('\n') + 1, import_type="path_reference" )) return imports def find_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]: """Find entry points in config files. Entry points: - package.json: main, bin scripts - Cargo.toml: [[bin]] targets - pyproject.toml: [project.scripts] - docker-compose.yml: services - tsconfig.json: project config """ entry_points = [] filename = Path(file_path).name.lower() try: # JSON configs if filename.endswith('.json'): entry_points.extend(self._find_json_entry_points(file_path, content)) # TOML configs elif filename.endswith('.toml'): entry_points.extend(self._find_toml_entry_points(file_path, content)) # YAML configs elif filename.endswith(('.yaml', '.yml')): entry_points.extend(self._find_yaml_entry_points(file_path, content)) except Exception: # Don't fail on parse errors pass return entry_points def _find_json_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]: """Find entry points in JSON config files.""" entry_points = [] filename = Path(file_path).name.lower() try: data = json.loads(content) except json.JSONDecodeError: return entry_points # package.json if filename == 'package.json': # Mark as npm/node project entry_points.append(EntryPointInfo( file=file_path, type="project_config", name="npm_project", line=1, framework="npm" )) # main entry point if 'main' in data: entry_points.append(EntryPointInfo( file=file_path, type="main_entry", name=data['main'], line=self._find_line(content, data['main']), framework="npm" )) # bin scripts if 'bin' in data: if isinstance(data['bin'], dict): for bin_name, bin_path in data['bin'].items(): entry_points.append(EntryPointInfo( file=file_path, type="bin_script", name=bin_name, line=self._find_line(content, bin_name), framework="npm" )) # tsconfig.json elif filename == 'tsconfig.json': entry_points.append(EntryPointInfo( file=file_path, type="project_config", name="typescript_project", line=1, framework="TypeScript" )) return entry_points def _find_toml_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]: """Find entry points in TOML config files.""" entry_points = [] filename = Path(file_path).name.lower() # pyproject.toml if filename == 'pyproject.toml': entry_points.append(EntryPointInfo( file=file_path, type="project_config", name="python_project", line=1, framework="Python" )) # [project.scripts] script_pattern = r'\[project\.scripts\]' match = re.search(script_pattern, content) if match: line = content[:match.start()].count('\n') + 1 entry_points.append(EntryPointInfo( file=file_path, type="scripts_section", name="project_scripts", line=line, framework="Python" )) # Cargo.toml elif filename == 'cargo.toml': entry_points.append(EntryPointInfo( file=file_path, type="project_config", name="rust_project", line=1, framework="Rust" )) # [[bin]] bin_pattern = r'\[\[bin\]\]' for match in re.finditer(bin_pattern, content): line = content[:match.start()].count('\n') + 1 entry_points.append(EntryPointInfo( file=file_path, type="bin_target", name="bin", line=line, framework="Rust" )) return entry_points def _find_yaml_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]: """Find entry points in YAML config files.""" entry_points = [] filename = Path(file_path).name.lower() # docker-compose.yml if 'docker-compose' in filename: entry_points.append(EntryPointInfo( file=file_path, type="project_config", name="docker_compose_project", line=1, framework="Docker" )) # services: services_pattern = r'^services:\s*$' for match in re.finditer(services_pattern, content, re.MULTILINE): line = content[:match.start()].count('\n') + 1 entry_points.append(EntryPointInfo( file=file_path, type="services_section", name="services", line=line, framework="Docker" )) return entry_points # =========================================================================== # Semantic Analysis - Layer 2 # =========================================================================== def extract_definitions(self, file_path: str, content: str) -> list[DefinitionInfo]: """Config files don't have traditional definitions.""" return [] def extract_calls( self, file_path: str, content: str, definitions: list[DefinitionInfo] ) -> list[CallInfo]: """Config files don't have function calls.""" return [] # =========================================================================== # Classification # =========================================================================== def classify_file(self, file_path: str, content: str) -> str: """All config files go to config cluster.""" return "config" # =========================================================================== # CodeMap Integration # =========================================================================== def resolve_import_to_file( self, module: str, source_file: str, all_files: list[str], definitions_map: dict[str, str], ) -> Optional[str]: """Resolve config file reference to file path. Config files reference other files directly by path, so resolution is straightforward path matching. """ # Direct path match if module in all_files: return module # Try relative to source file directory source_dir = str(Path(source_file).parent) if source_dir != ".": candidate = f"{source_dir}/{module}" if candidate in all_files: return candidate return None def format_entry_point(self, ep: EntryPointInfo) -> str: """Format config entry point for display. Formats: - python_project: "pyproject.toml project" - npm_project: "package.json project" - docker_service: "docker-compose.yml service" """ if ep.type == "python_project": return f" {ep.file}:python_project @{ep.line}" elif ep.type == "npm_project": return f" {ep.file}:npm_project @{ep.line}" elif ep.type == "docker_service": return f" {ep.file}:docker {ep.name} @{ep.line}" elif ep.type == "project_scripts": return f" {ep.file}:project_scripts @{ep.line}" else: return super().format_entry_point(ep) # =========================================================================== # Helper methods # =========================================================================== def _find_line(self, content: str, search_str: str) -> int: """Find line number of a string in content.""" try: index = content.index(search_str) return content[:index].count('\n') + 1 except ValueError: return 0

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mariusei/file-scanner-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

config.py•22.3 KiB