Skip to main content
Glama
ast_analyzer.py19.9 kB
""" AST analyzer for extracting high-level structure and insights from parsed code. This module provides analysis capabilities that work with parsed AST structures to extract patterns, calculate metrics, and generate insights. """ import logging from typing import Dict, List, Any, Optional, Set, Tuple from dataclasses import dataclass, field from collections import defaultdict, Counter from .base_parser import ParseResult, ASTNode, NodeType logger = logging.getLogger(__name__) @dataclass class StructureAnalysis: """High-level structure analysis of parsed code.""" file_path: str language: str total_nodes: int complexity_score: int maintainability_score: float # Counts by type classes: int = 0 functions: int = 0 methods: int = 0 imports: int = 0 variables: int = 0 # Detailed information class_info: List[Dict[str, Any]] = field(default_factory=list) function_info: List[Dict[str, Any]] = field(default_factory=list) import_info: List[Dict[str, Any]] = field(default_factory=list) # Metrics average_function_length: float = 0.0 max_function_length: int = 0 average_class_size: float = 0.0 max_class_size: int = 0 nesting_depth: int = 0 # Dependencies and relationships internal_dependencies: Set[str] = field(default_factory=set) external_dependencies: Set[str] = field(default_factory=set) # Quality indicators has_docstrings: bool = False docstring_coverage: float = 0.0 has_type_hints: bool = False type_hint_coverage: float = 0.0 def to_dict(self) -> Dict[str, Any]: """Convert to dictionary representation.""" return { 'file_path': self.file_path, 'language': self.language, 'total_nodes': self.total_nodes, 'complexity_score': self.complexity_score, 'maintainability_score': self.maintainability_score, 'counts': { 'classes': self.classes, 'functions': self.functions, 'methods': self.methods, 'imports': self.imports, 'variables': self.variables }, 'metrics': { 'average_function_length': self.average_function_length, 'max_function_length': self.max_function_length, 'average_class_size': self.average_class_size, 'max_class_size': self.max_class_size, 'nesting_depth': self.nesting_depth }, 'quality': { 'has_docstrings': self.has_docstrings, 'docstring_coverage': self.docstring_coverage, 'has_type_hints': self.has_type_hints, 'type_hint_coverage': self.type_hint_coverage }, 'dependencies': { 'internal': list(self.internal_dependencies), 'external': list(self.external_dependencies) }, 'details': { 'classes': self.class_info, 'functions': self.function_info, 'imports': self.import_info } } class ASTAnalyzer: """ Analyzer for extracting insights from AST structures. Provides various analysis methods for code quality, structure, complexity, and maintainability assessment. """ def __init__(self): self.analysis_cache: Dict[str, StructureAnalysis] = {} def analyze_structure(self, parse_result: ParseResult) -> StructureAnalysis: """ Perform comprehensive structure analysis. Args: parse_result: Parsed AST structure Returns: StructureAnalysis with detailed insights """ if not parse_result.success or not parse_result.root_node: return StructureAnalysis( file_path=parse_result.file_path, language=parse_result.language, total_nodes=0, complexity_score=0, maintainability_score=0.0 ) # Get all nodes for analysis all_nodes = parse_result.all_nodes # Basic counts node_counts = self._count_nodes_by_type(all_nodes) # Analyze classes classes = parse_result.get_classes() class_info = [self._analyze_class(cls) for cls in classes] # Analyze functions functions = parse_result.get_functions() function_info = [self._analyze_function(func) for func in functions] # Analyze imports imports = parse_result.get_imports() import_info = [self._analyze_import(imp) for imp in imports] # Calculate metrics complexity_score = self._calculate_complexity(all_nodes) maintainability_score = self._calculate_maintainability( all_nodes, function_info, class_info ) # Function metrics function_lengths = [f['line_count'] for f in function_info] avg_function_length = sum(function_lengths) / len(function_lengths) if function_lengths else 0 max_function_length = max(function_lengths) if function_lengths else 0 # Class metrics class_sizes = [c['method_count'] for c in class_info] avg_class_size = sum(class_sizes) / len(class_sizes) if class_sizes else 0 max_class_size = max(class_sizes) if class_sizes else 0 # Nesting depth nesting_depth = self._calculate_max_nesting_depth(all_nodes) # Dependencies internal_deps, external_deps = self._extract_dependencies(import_info) # Quality indicators docstring_info = self._analyze_docstring_coverage(all_nodes) type_hint_info = self._analyze_type_hint_coverage(all_nodes, parse_result.language) return StructureAnalysis( file_path=parse_result.file_path, language=parse_result.language, total_nodes=len(all_nodes), complexity_score=complexity_score, maintainability_score=maintainability_score, classes=node_counts.get(NodeType.CLASS, 0), functions=node_counts.get(NodeType.FUNCTION, 0), methods=node_counts.get(NodeType.METHOD, 0), imports=node_counts.get(NodeType.IMPORT, 0), variables=node_counts.get(NodeType.VARIABLE, 0), class_info=class_info, function_info=function_info, import_info=import_info, average_function_length=avg_function_length, max_function_length=max_function_length, average_class_size=avg_class_size, max_class_size=max_class_size, nesting_depth=nesting_depth, internal_dependencies=internal_deps, external_dependencies=external_deps, **docstring_info, **type_hint_info ) def _count_nodes_by_type(self, nodes: List[ASTNode]) -> Dict[NodeType, int]: """Count nodes by type.""" return Counter(node.node_type for node in nodes) def _analyze_class(self, class_node: ASTNode) -> Dict[str, Any]: """Analyze a class node.""" methods = class_node.get_children_by_type(NodeType.METHOD) variables = class_node.get_children_by_type(NodeType.VARIABLE) return { 'name': class_node.name, 'line_start': class_node.line_start, 'line_end': class_node.line_end, 'line_count': class_node.line_count, 'method_count': len(methods), 'variable_count': len(variables), 'methods': [m.name for m in methods], 'base_classes': class_node.metadata.get('base_classes', []), 'decorators': class_node.metadata.get('decorators', []), 'has_docstring': bool(class_node.metadata.get('docstring')), 'is_exported': class_node.metadata.get('is_exported', False) } def _analyze_function(self, func_node: ASTNode) -> Dict[str, Any]: """Analyze a function/method node.""" return { 'name': func_node.name, 'full_name': func_node.full_name, 'line_start': func_node.line_start, 'line_end': func_node.line_end, 'line_count': func_node.line_count, 'is_method': func_node.node_type == NodeType.METHOD, 'is_async': func_node.metadata.get('is_async', False), 'is_generator': func_node.metadata.get('is_generator', False), 'parameters': func_node.metadata.get('parameters', []), 'parameter_count': len(func_node.metadata.get('parameters', [])), 'decorators': func_node.metadata.get('decorators', []), 'has_docstring': bool(func_node.metadata.get('docstring')), 'complexity': func_node.metadata.get('complexity', 1), 'return_annotation': func_node.metadata.get('return_annotation'), 'is_exported': func_node.metadata.get('is_exported', False) } def _analyze_import(self, import_node: ASTNode) -> Dict[str, Any]: """Analyze an import node.""" return { 'name': import_node.name, 'line': import_node.line_start, 'type': import_node.metadata.get('type', 'import'), 'module': import_node.metadata.get('module'), 'names': import_node.metadata.get('names', []), 'is_from_import': import_node.metadata.get('type') == 'from_import', 'is_relative': import_node.metadata.get('level', 0) > 0 } def _calculate_complexity(self, nodes: List[ASTNode]) -> int: """Calculate overall complexity score.""" complexity = 1 # Base complexity for node in nodes: # Add complexity for control flow if node.node_type == NodeType.CONTROL_FLOW: complexity += 1 # Add complexity from function metadata elif node.node_type in (NodeType.FUNCTION, NodeType.METHOD): func_complexity = node.metadata.get('complexity', 1) complexity += func_complexity - 1 # Subtract base complexity return complexity def _calculate_maintainability( self, nodes: List[ASTNode], function_info: List[Dict[str, Any]], class_info: List[Dict[str, Any]] ) -> float: """ Calculate maintainability score (0-100). Based on various factors including: - Function/method length - Complexity - Documentation coverage - Naming conventions """ if not nodes: return 100.0 score = 100.0 # Penalize long functions long_functions = [f for f in function_info if f['line_count'] > 50] score -= len(long_functions) * 5 # Penalize high complexity functions complex_functions = [f for f in function_info if f['complexity'] > 10] score -= len(complex_functions) * 10 # Reward documentation documented_functions = [f for f in function_info if f['has_docstring']] if function_info: doc_ratio = len(documented_functions) / len(function_info) score += doc_ratio * 10 # Penalize large classes large_classes = [c for c in class_info if c['method_count'] > 20] score -= len(large_classes) * 5 # Ensure score stays within bounds return max(0.0, min(100.0, score)) def _calculate_max_nesting_depth(self, nodes: List[ASTNode]) -> int: """Calculate maximum nesting depth.""" max_depth = 0 for node in nodes: depth = 0 current = node.parent while current and current.node_type != NodeType.MODULE: depth += 1 current = current.parent max_depth = max(max_depth, depth) return max_depth def _extract_dependencies(self, import_info: List[Dict[str, Any]]) -> Tuple[Set[str], Set[str]]: """Extract internal and external dependencies.""" internal_deps = set() external_deps = set() for imp in import_info: module = imp.get('module', '') # Heuristic: modules starting with '.' or containing project-specific terms are internal if imp.get('is_relative') or not module: continue # Common external packages external_patterns = [ 'numpy', 'pandas', 'matplotlib', 'requests', 'django', 'flask', 'react', 'vue', 'angular', 'lodash', 'axios', 'express' ] is_external = any(pattern in module.lower() for pattern in external_patterns) if is_external: external_deps.add(module.split('.')[0]) # Get root module else: internal_deps.add(module) return internal_deps, external_deps def _analyze_docstring_coverage(self, nodes: List[ASTNode]) -> Dict[str, Any]: """Analyze docstring coverage.""" documentable_nodes = [ node for node in nodes if node.node_type in (NodeType.CLASS, NodeType.FUNCTION, NodeType.METHOD) ] if not documentable_nodes: return { 'has_docstrings': False, 'docstring_coverage': 0.0 } documented_nodes = [ node for node in documentable_nodes if node.metadata.get('docstring') ] coverage = len(documented_nodes) / len(documentable_nodes) return { 'has_docstrings': len(documented_nodes) > 0, 'docstring_coverage': coverage } def _analyze_type_hint_coverage(self, nodes: List[ASTNode], language: str) -> Dict[str, Any]: """Analyze type hint coverage (primarily for Python and TypeScript).""" if language not in ['python', 'typescript']: return { 'has_type_hints': False, 'type_hint_coverage': 0.0 } functions = [ node for node in nodes if node.node_type in (NodeType.FUNCTION, NodeType.METHOD) ] if not functions: return { 'has_type_hints': False, 'type_hint_coverage': 0.0 } type_hinted_functions = [] for func in functions: params = func.metadata.get('parameters', []) has_return_annotation = bool(func.metadata.get('return_annotation')) has_param_annotations = any(p.get('annotation') for p in params if isinstance(p, dict)) if has_return_annotation or has_param_annotations: type_hinted_functions.append(func) coverage = len(type_hinted_functions) / len(functions) return { 'has_type_hints': len(type_hinted_functions) > 0, 'type_hint_coverage': coverage } def find_code_smells(self, analysis: StructureAnalysis) -> List[Dict[str, Any]]: """ Identify potential code smells and issues. Args: analysis: Structure analysis result Returns: List of identified code smells """ smells = [] # Long functions long_functions = [f for f in analysis.function_info if f['line_count'] > 50] for func in long_functions: smells.append({ 'type': 'long_function', 'severity': 'medium', 'message': f"Function '{func['name']}' is {func['line_count']} lines long", 'location': f"Line {func['line_start']}", 'suggestion': "Consider breaking this function into smaller, more focused functions" }) # High complexity functions complex_functions = [f for f in analysis.function_info if f['complexity'] > 10] for func in complex_functions: smells.append({ 'type': 'high_complexity', 'severity': 'high', 'message': f"Function '{func['name']}' has complexity {func['complexity']}", 'location': f"Line {func['line_start']}", 'suggestion': "Reduce complexity by extracting logic into separate functions" }) # Large classes large_classes = [c for c in analysis.class_info if c['method_count'] > 20] for cls in large_classes: smells.append({ 'type': 'large_class', 'severity': 'medium', 'message': f"Class '{cls['name']}' has {cls['method_count']} methods", 'location': f"Line {cls['line_start']}", 'suggestion': "Consider splitting this class following Single Responsibility Principle" }) # Functions with many parameters parameter_heavy = [f for f in analysis.function_info if f['parameter_count'] > 6] for func in parameter_heavy: smells.append({ 'type': 'too_many_parameters', 'severity': 'medium', 'message': f"Function '{func['name']}' has {func['parameter_count']} parameters", 'location': f"Line {func['line_start']}", 'suggestion': "Consider using objects or data structures to group related parameters" }) # Low documentation coverage if analysis.docstring_coverage < 0.5 and analysis.docstring_coverage > 0: smells.append({ 'type': 'poor_documentation', 'severity': 'low', 'message': f"Low documentation coverage: {analysis.docstring_coverage:.1%}", 'location': "File level", 'suggestion': "Add docstrings to classes and functions" }) return smells def generate_summary(self, analysis: StructureAnalysis) -> Dict[str, Any]: """Generate a summary of the analysis.""" return { 'overview': { 'file': analysis.file_path, 'language': analysis.language, 'total_lines': analysis.function_info[-1]['line_end'] if analysis.function_info else 0, 'total_nodes': analysis.total_nodes }, 'structure': { 'classes': analysis.classes, 'functions': analysis.functions, 'methods': analysis.methods, 'imports': analysis.imports }, 'quality_metrics': { 'complexity_score': analysis.complexity_score, 'maintainability_score': analysis.maintainability_score, 'docstring_coverage': f"{analysis.docstring_coverage:.1%}", 'type_hint_coverage': f"{analysis.type_hint_coverage:.1%}" }, 'size_metrics': { 'average_function_length': f"{analysis.average_function_length:.1f} lines", 'max_function_length': f"{analysis.max_function_length} lines", 'average_class_size': f"{analysis.average_class_size:.1f} methods", 'nesting_depth': analysis.nesting_depth }, 'dependencies': { 'internal_count': len(analysis.internal_dependencies), 'external_count': len(analysis.external_dependencies) } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/vedantparmar12/Document-Automation'

If you have feedback or need assistance with the MCP directory API, please join our Discord server