Scantool - File Scanner MCP

Overview Schema Related Servers Score Discussions

base.py•15.9 KiB

"""Base language class that unifies scanner and analyzer functionality. This module provides the BaseLanguage class that combines: - Structure scanning (tree-sitter based AST extraction) - Semantic analysis (imports, entry points, definitions, calls) Each language implementation inherits from BaseLanguage and provides a single file per language instead of separate scanner + analyzer files. """ from abc import ABC, abstractmethod from typing import Optional from .models import ( StructureNode, ImportInfo, EntryPointInfo, DefinitionInfo, CallInfo, ) class BaseLanguage(ABC): """Unified base class for language support. Combines the functionality of BaseScanner and BaseAnalyzer into a single interface. Each language provides one implementation file that handles both structure scanning and semantic analysis. Key methods: - scan(): Extract structure (classes, functions, methods) from source - extract_imports(): Find import statements - find_entry_points(): Find main functions, exports, etc. - extract_definitions(): Get function/class definitions (can reuse scan()) - extract_calls(): Find function/method calls """ def __init__(self, show_errors: bool = True, fallback_on_errors: bool = True): """Initialize language handler with error handling options. Args: show_errors: Include ERROR nodes in output fallback_on_errors: Use regex fallback if too many parse errors """ self.show_errors = show_errors self.fallback_on_errors = fallback_on_errors # =========================================================================== # Metadata (REQUIRED - classmethod) # =========================================================================== @classmethod @abstractmethod def get_extensions(cls) -> list[str]: """Return list of file extensions this language handles. Examples: ['.py', '.pyw'] # Python ['.ts', '.tsx'] # TypeScript ['.swift'] # Swift """ pass @classmethod @abstractmethod def get_language_name(cls) -> str: """Return the human-readable language name. Examples: 'Python', 'TypeScript', 'Swift' """ pass @classmethod def get_priority(cls) -> int: """Return priority for this language (higher = preferred). Used when multiple languages claim the same extension. Default: 0 """ return 0 # =========================================================================== # Skip/Filter Logic (OPTIONAL - combined from scanner + analyzer) # =========================================================================== @classmethod def should_skip(cls, filename: str) -> bool: """Check if file should be skipped for scanning. Override to skip files like: - __init__.py (Python empty init files) - *.min.js (JavaScript minified files) - *.d.ts (TypeScript declaration files) Args: filename: Just the filename (not full path) Returns: True if file should be skipped (not scanned) """ return False def should_analyze(self, file_path: str) -> bool: """Check if file should be analyzed for semantic information. Override to skip certain files from import/entry point analysis. This is similar to should_skip but operates on full paths and is called during CodeMap analysis. Args: file_path: Relative path to the file Returns: True if file should be analyzed """ return True def is_low_value_for_inventory(self, file_path: str, size: int = 0) -> bool: """Check if file is low-value for inventory listing. Unlike should_analyze (which skips analysis entirely), this identifies files that CAN be analyzed but are low-value for overview displays. Used by preview_directory to filter noise. NOTE: Central/hot files should NEVER be excluded, regardless of this method's return value. Caller must check centrality. Override for patterns like: - Empty __init__.py (Python) - Type declarations *.d.ts (TypeScript) - Re-export index files Args: file_path: Relative path to the file size: File size in bytes (0 = unknown) Returns: True if file is low-value for inventory (can be hidden) """ if size > 0 and size < 50: return True return False # =========================================================================== # Structure Scanning (REQUIRED - from BaseScanner) # =========================================================================== @abstractmethod def scan(self, source_code: bytes) -> Optional[list[StructureNode]]: """Scan source code and extract structure. This is the primary scanning method that extracts classes, functions, methods, and other structural elements from source code. Args: source_code: Raw file content as bytes Returns: List of StructureNode objects representing the file structure, or None if the file couldn't be parsed """ pass # =========================================================================== # Semantic Analysis - Layer 1 (REQUIRED - from BaseAnalyzer) # =========================================================================== @abstractmethod def extract_imports(self, file_path: str, content: str) -> list[ImportInfo]: """Extract import statements from file. Args: file_path: Relative path to the file content: File content as string Returns: List of ImportInfo objects """ pass @abstractmethod def find_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]: """Find entry points in the file. Entry points include: - main() functions - if __name__ == "__main__" blocks - app/server instances (Flask, FastAPI, Express, etc.) - Module exports Args: file_path: Relative path to the file content: File content as string Returns: List of EntryPointInfo objects """ pass # =========================================================================== # Semantic Analysis - Layer 2 (OPTIONAL - default implementations) # =========================================================================== def extract_definitions(self, file_path: str, content: str) -> list[DefinitionInfo]: """Extract function/class definitions from file. Default implementation converts scan() output to DefinitionInfo. Override for more precise control or when scan() isn't suitable. Args: file_path: Relative path to the file content: File content as string Returns: List of DefinitionInfo objects """ try: structures = self.scan(content.encode("utf-8")) if not structures: return [] return self._structures_to_definitions(file_path, structures) except Exception: return [] def extract_calls( self, file_path: str, content: str, definitions: list[DefinitionInfo] ) -> list[CallInfo]: """Extract function/method calls from file. Default implementation returns empty list. Override to enable call graph analysis. Args: file_path: Relative path to the file content: File content as string definitions: List of known definitions from this file Returns: List of CallInfo objects """ return [] def _structures_to_definitions( self, file_path: str, structures: list[StructureNode], parent: str = None ) -> list[DefinitionInfo]: """Convert StructureNode list to DefinitionInfo list. Helper for default extract_definitions() implementation. """ definitions = [] for node in structures: if node.type in ("class", "function", "method"): definitions.append( DefinitionInfo( file=file_path, type=node.type, name=node.name, line=node.start_line, signature=node.signature, parent=parent, ) ) # Recurse into children if node.children: child_parent = node.name if node.type == "class" else parent definitions.extend( self._structures_to_definitions(file_path, node.children, child_parent) ) return definitions # =========================================================================== # Classification (OPTIONAL) # =========================================================================== def classify_file(self, file_path: str, content: str) -> str: """Classify file into architectural cluster. Clusters: - "entry_points" (main.py, server.py, app.py) - "core_logic" (scanner, parser, analyzer) - "utilities" (helpers, formatters) - "plugins" (scanners/*, extensions/*) - "config" (settings, constants) - "tests" (test_*.py, *_test.py) - "other" (default) Args: file_path: Relative path to the file content: File content as string Returns: Cluster name """ path_lower = file_path.lower() name = file_path.split("/")[-1].lower() # Entry points entry_names = [ "main.py", "server.py", "app.py", "__main__.py", "index.ts", "main.tsx", "app.tsx", "main.go" ] if name in entry_names: return "entry_points" # Tests if name.startswith("test_") or "_test." in name or "/tests/" in path_lower: return "tests" # Config config_names = ["config.py", "settings.py", "constants.py", "config.ts", "settings.ts"] if name in config_names: return "config" # Plugins plugin_dirs = ["/scanners/", "/plugins/", "/extensions/", "/languages/"] if any(plugin_dir in path_lower for plugin_dir in plugin_dirs): return "plugins" # Utilities if "/utils/" in path_lower or "/helpers/" in path_lower or "utils." in name or "helper." in name: return "utilities" # Core logic core_keywords = ["scanner", "parser", "formatter", "analyzer", "processor", "engine"] if any(keyword in name for keyword in core_keywords): return "core_logic" return "other" # =========================================================================== # CodeMap Integration (OPTIONAL) # =========================================================================== def resolve_import_to_file( self, module: str, source_file: str, all_files: list[str], definitions_map: dict[str, str], ) -> Optional[str]: """Resolve import module to actual file path. Override for language-specific resolution: - Python: dot.separated.module -> path/to/module.py - Swift: Type references -> file defining Type - Go: github.com/pkg -> pkg/file.go - TypeScript: ./relative -> relative.ts or relative/index.ts Args: module: Module/type name to resolve source_file: Path of file doing the import all_files: List of all files in project definitions_map: Map of type/definition names to file paths Returns: Resolved file path, or None if external/unresolvable """ return None def format_entry_point(self, ep: EntryPointInfo) -> str: """Format entry point for display. Override for language-specific formatting. Args: ep: EntryPointInfo object to format Returns: Formatted string for display (with leading 2-space indent) """ line_str = f" @{ep.line}" if ep.line else "" return f" {ep.file}:{ep.name or ep.type}{line_str}" def get_file_extension(self) -> str: """Return primary file extension for this language. Returns: Primary extension (e.g., ".py", ".swift", ".go") """ exts = self.get_extensions() return exts[0] if exts else "" # =========================================================================== # Helper methods (from BaseScanner) # =========================================================================== def _get_node_text(self, node, source_code: bytes) -> str: """Extract text from a tree-sitter node.""" try: return source_code[node.start_byte:node.end_byte].decode("utf-8") except (UnicodeDecodeError, AttributeError): return source_code[node.start_byte:node.end_byte].decode("utf-8", errors="replace") def _normalize_signature(self, signature: str) -> str: """Normalize a signature to single line for tree formatting.""" if not signature: return signature normalized = signature.replace('\n', ' ').replace('\r', ' ') return ' '.join(normalized.split()) def _count_error_nodes(self, node) -> int: """Count ERROR nodes in tree (for fallback detection).""" count = 1 if node.type == "ERROR" else 0 for child in node.children: count += self._count_error_nodes(child) return count def _count_nodes(self, node) -> int: """Count all nodes in tree.""" count = 1 for child in node.children: count += self._count_nodes(child) return count def _should_use_fallback(self, root_node) -> bool: """Determine if we should use regex fallback due to too many errors.""" if not self.fallback_on_errors: return False total = self._count_nodes(root_node) errors = self._count_error_nodes(root_node) return total > 0 and (errors / total) > 0.5 def _calculate_complexity(self, node) -> dict: """Calculate complexity metrics for a node. Returns: Dict with keys: lines, max_depth, branches """ stats = { "lines": node.end_point[0] - node.start_point[0] + 1, "max_depth": 0, "branches": 0, } def traverse_depth(n, depth: int): stats["max_depth"] = max(stats["max_depth"], depth) if n.type in ( "if_statement", "for_statement", "while_statement", "switch_statement", "case_statement", "match_statement" ): stats["branches"] += 1 for child in n.children: traverse_depth(child, depth + 1) traverse_depth(node, 0) return stats def _resolve_relative_import( self, current_file: str, relative_import: str ) -> Optional[str]: """Resolve relative import to absolute file path. Args: current_file: Path of file doing the import relative_import: Relative import string Returns: Resolved path or None """ if not relative_import.startswith("."): return None dots = len(relative_import) - len(relative_import.lstrip(".")) rest = relative_import.lstrip(".") parts = current_file.split("/")[:-1] # Remove filename for _ in range(dots - 1): if not parts: return None parts.pop() if rest: parts.extend(rest.split(".")) return "/".join(parts) if parts else None

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mariusei/file-scanner-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

base.py•15.9 KiB