maintainability.py•17.2 kB
"""
Maintainability Index and Halstead Metrics Calculator
"""
import ast
import math
import re
from pathlib import Path
from typing import Dict, Set, Any, List
from collections import Counter
from .base_metrics import BaseMetricsAnalyzer, FileMetrics
class MaintainabilityAnalyzer(BaseMetricsAnalyzer):
"""Calculates maintainability index and Halstead complexity metrics"""
def __init__(self, language: str = "python"):
"""Initialize maintainability analyzer"""
super().__init__(language)
# Python operators and keywords
self.python_operators = {
'+', '-', '*', '/', '//', '%', '**', # Arithmetic
'=', '+=', '-=', '*=', '/=', '//=', '%=', '**=', # Assignment
'==', '!=', '<', '>', '<=', '>=', # Comparison
'and', 'or', 'not', '&', '|', '^', '~', '<<', '>>', # Logical/Bitwise
'in', 'is', 'is not', 'not in', # Membership/Identity
'[', ']', '(', ')', '{', '}', ',', ':', ';', '.', # Delimiters
'if', 'else', 'elif', 'while', 'for', 'break', 'continue', # Control
'def', 'class', 'return', 'yield', 'lambda', # Definition
'try', 'except', 'finally', 'raise', 'assert', # Exceptions
'import', 'from', 'as', 'with', # Import/Context
}
# JavaScript/TypeScript operators
self.js_operators = {
'+', '-', '*', '/', '%', '**', # Arithmetic
'=', '+=', '-=', '*=', '/=', '%=', # Assignment
'==', '!=', '===', '!==', '<', '>', '<=', '>=', # Comparison
'&&', '||', '!', '&', '|', '^', '~', '<<', '>>', '>>>', # Logical/Bitwise
'typeof', 'instanceof', 'in', 'of', # Type/Membership
'[', ']', '(', ')', '{', '}', ',', ':', ';', '.', # Delimiters
'if', 'else', 'switch', 'case', 'default', 'while', 'for', 'do', # Control
'break', 'continue', 'return', 'throw', # Flow control
'function', 'class', 'const', 'let', 'var', 'new', # Definition
'try', 'catch', 'finally', # Exceptions
'import', 'export', 'from', 'as', 'default', # Modules
'=>', '?', ':', # Arrow functions, ternary
}
def analyze_file(self, file_path: Path, content: str) -> FileMetrics:
"""
Analyze maintainability metrics for a file
Args:
file_path: Path to the file
content: File content
Returns:
FileMetrics with maintainability information
"""
metrics = FileMetrics(file_path=str(file_path))
# Get line counts
line_counts = self.count_lines(content)
metrics.lines_of_code = line_counts['total']
metrics.source_lines_of_code = line_counts['code']
metrics.comment_lines = line_counts['comment']
# Calculate Halstead metrics
halstead = self.calculate_halstead_metrics(content)
metrics.halstead_volume = halstead['volume']
metrics.halstead_difficulty = halstead['difficulty']
metrics.halstead_effort = halstead['effort']
# Get cyclomatic complexity (simplified)
metrics.cyclomatic_complexity = self.calculate_cyclomatic_complexity(content)
# Calculate Maintainability Index
metrics.maintainability_index = self.calculate_maintainability_index(
halstead['volume'],
metrics.cyclomatic_complexity,
metrics.source_lines_of_code
)
return metrics
def calculate_halstead_metrics(self, code: str) -> Dict[str, float]:
"""
Calculate Halstead complexity metrics
Halstead metrics:
- n1: Number of unique operators
- n2: Number of unique operands
- N1: Total number of operators
- N2: Total number of operands
- n: n1 + n2 (vocabulary size)
- N: N1 + N2 (program length)
- Volume: N * log2(n)
- Difficulty: (n1/2) * (N2/n2)
- Effort: Volume * Difficulty
- Time: Effort / 18 (seconds)
- Bugs: Volume / 3000
Args:
code: Source code to analyze
Returns:
Dictionary with Halstead metrics
"""
if self.language == "python":
return self._calculate_python_halstead(code)
elif self.language in ["javascript", "typescript"]:
return self._calculate_js_halstead(code)
else:
# Default minimal metrics
return {
'n1': 1, 'n2': 1, 'N1': 1, 'N2': 1,
'vocabulary': 2, 'length': 2,
'volume': 2.0, 'difficulty': 1.0,
'effort': 2.0, 'time': 0.1, 'bugs': 0.001
}
def _calculate_python_halstead(self, code: str) -> Dict[str, float]:
"""Calculate Halstead metrics for Python code"""
operators = []
operands = []
try:
tree = ast.parse(code)
# Walk the AST to collect operators and operands
for node in ast.walk(tree):
# Operators
if isinstance(node, ast.Add):
operators.append('+')
elif isinstance(node, ast.Sub):
operators.append('-')
elif isinstance(node, ast.Mult):
operators.append('*')
elif isinstance(node, ast.Div):
operators.append('/')
elif isinstance(node, ast.FloorDiv):
operators.append('//')
elif isinstance(node, ast.Mod):
operators.append('%')
elif isinstance(node, ast.Pow):
operators.append('**')
elif isinstance(node, ast.Eq):
operators.append('==')
elif isinstance(node, ast.NotEq):
operators.append('!=')
elif isinstance(node, ast.Lt):
operators.append('<')
elif isinstance(node, ast.LtE):
operators.append('<=')
elif isinstance(node, ast.Gt):
operators.append('>')
elif isinstance(node, ast.GtE):
operators.append('>=')
elif isinstance(node, ast.And):
operators.append('and')
elif isinstance(node, ast.Or):
operators.append('or')
elif isinstance(node, ast.Not):
operators.append('not')
elif isinstance(node, ast.If):
operators.append('if')
elif isinstance(node, ast.While):
operators.append('while')
elif isinstance(node, ast.For):
operators.append('for')
elif isinstance(node, ast.FunctionDef):
operators.append('def')
elif isinstance(node, ast.ClassDef):
operators.append('class')
elif isinstance(node, ast.Return):
operators.append('return')
elif isinstance(node, ast.Import):
operators.append('import')
elif isinstance(node, ast.ImportFrom):
operators.append('from')
elif isinstance(node, ast.Assign):
operators.append('=')
elif isinstance(node, ast.AugAssign):
operators.append('+=') # Simplified
elif isinstance(node, ast.Call):
operators.append('()')
elif isinstance(node, ast.Subscript):
operators.append('[]')
elif isinstance(node, ast.Attribute):
operators.append('.')
# Operands
elif isinstance(node, ast.Name):
operands.append(node.id)
elif isinstance(node, ast.Constant):
operands.append(str(node.value))
elif isinstance(node, ast.Str):
operands.append(node.s)
elif isinstance(node, ast.Num):
operands.append(str(node.n))
except SyntaxError:
# Fallback to regex-based analysis
return self._regex_halstead(code)
return self._compute_halstead_from_lists(operators, operands)
def _calculate_js_halstead(self, code: str) -> Dict[str, float]:
"""Calculate Halstead metrics for JavaScript/TypeScript"""
# Use regex-based analysis for JS/TS
return self._regex_halstead(code)
def _regex_halstead(self, code: str) -> Dict[str, float]:
"""Fallback regex-based Halstead calculation"""
# Remove comments and strings
code_cleaned = self._remove_comments_and_strings(code)
# Extract operators
operators = []
operator_patterns = self._get_operator_patterns()
for pattern in operator_patterns:
matches = re.findall(pattern, code_cleaned)
operators.extend(matches)
# Extract operands (identifiers and literals)
operand_pattern = r'\b[a-zA-Z_]\w*\b|\b\d+\.?\d*\b'
operands = re.findall(operand_pattern, code_cleaned)
# Filter out language keywords from operands
keywords = self._get_language_keywords()
operands = [op for op in operands if op not in keywords]
return self._compute_halstead_from_lists(operators, operands)
def _compute_halstead_from_lists(self, operators: List[str],
operands: List[str]) -> Dict[str, float]:
"""Compute Halstead metrics from operator and operand lists"""
# Count unique and total
unique_operators = set(operators)
unique_operands = set(operands)
n1 = len(unique_operators) if unique_operators else 1
n2 = len(unique_operands) if unique_operands else 1
N1 = len(operators) if operators else 1
N2 = len(operands) if operands else 1
# Calculate metrics
n = n1 + n2 # Vocabulary
N = N1 + N2 # Length
# Volume
volume = N * math.log2(n) if n > 0 else 0
# Difficulty
difficulty = (n1 / 2) * (N2 / n2) if n2 > 0 else 1
# Effort
effort = volume * difficulty
# Time (in seconds)
time = effort / 18
# Bugs estimate
bugs = volume / 3000
return {
'n1': n1,
'n2': n2,
'N1': N1,
'N2': N2,
'vocabulary': n,
'length': N,
'volume': volume,
'difficulty': difficulty,
'effort': effort,
'time': time,
'bugs': bugs
}
def _remove_comments_and_strings(self, code: str) -> str:
"""Remove comments and strings from code"""
# Remove single-line comments
if self.language == "python":
code = re.sub(r'#.*$', '', code, flags=re.MULTILINE)
# Remove docstrings
code = re.sub(r'""".*?"""', '', code, flags=re.DOTALL)
code = re.sub(r"'''.*?'''", '', code, flags=re.DOTALL)
else:
code = re.sub(r'//.*$', '', code, flags=re.MULTILINE)
code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
# Remove strings (simplified)
code = re.sub(r'"[^"]*"', '""', code)
code = re.sub(r"'[^']*'", "''", code)
return code
def _get_operator_patterns(self) -> List[str]:
"""Get regex patterns for operators"""
if self.language == "python":
return [
r'[+\-*/%]', # Arithmetic
r'[<>]=?|[!=]=', # Comparison
r'\band\b|\bor\b|\bnot\b', # Logical
r'\bif\b|\belse\b|\belif\b', # Conditional
r'\bfor\b|\bwhile\b', # Loops
r'\bdef\b|\bclass\b', # Definition
r'\breturn\b|\byield\b', # Return
r'[=]', # Assignment
r'[\[\]\(\)\{\}]', # Brackets
]
else:
return [
r'[+\-*/%]', # Arithmetic
r'===?|!==?|[<>]=?', # Comparison
r'&&|\|\||!', # Logical
r'\bif\b|\belse\b', # Conditional
r'\bfor\b|\bwhile\b|\bdo\b', # Loops
r'\bfunction\b|\bclass\b', # Definition
r'\breturn\b', # Return
r'=>', # Arrow functions
r'[=]', # Assignment
r'[\[\]\(\)\{\}]', # Brackets
]
def _get_language_keywords(self) -> Set[str]:
"""Get language keywords to filter from operands"""
if self.language == "python":
return {
'and', 'or', 'not', 'if', 'else', 'elif', 'while', 'for',
'break', 'continue', 'def', 'class', 'return', 'yield',
'import', 'from', 'as', 'try', 'except', 'finally',
'raise', 'assert', 'with', 'lambda', 'pass', 'del',
'global', 'nonlocal', 'True', 'False', 'None'
}
else:
return {
'if', 'else', 'switch', 'case', 'default', 'while', 'for',
'do', 'break', 'continue', 'function', 'class', 'return',
'const', 'let', 'var', 'new', 'this', 'typeof', 'instanceof',
'try', 'catch', 'finally', 'throw', 'import', 'export',
'from', 'as', 'default', 'true', 'false', 'null', 'undefined'
}
def calculate_maintainability_index(self,
halstead_volume: float,
cyclomatic_complexity: int,
lines_of_code: int) -> float:
"""
Calculate Maintainability Index
Formula:
MI = 171 - 5.2 * ln(HV) - 0.23 * CC - 16.2 * ln(LOC)
Where:
- HV: Halstead Volume
- CC: Cyclomatic Complexity
- LOC: Lines of Code
Scale:
- 85-100: Highly maintainable
- 65-85: Moderately maintainable
- <65: Difficult to maintain
Args:
halstead_volume: Halstead volume metric
cyclomatic_complexity: Cyclomatic complexity
lines_of_code: Number of lines
Returns:
Maintainability index (0-100)
"""
if lines_of_code == 0:
return 100.0
# Ensure positive values
hv = max(1, halstead_volume)
cc = max(1, cyclomatic_complexity)
loc = max(1, lines_of_code)
# Calculate MI
mi = 171 - 5.2 * math.log(hv) - 0.23 * cc - 16.2 * math.log(loc)
# Normalize to 0-100 scale
mi = max(0, min(100, mi))
return round(mi, 2)
def calculate_cyclomatic_complexity(self, code: str) -> int:
"""
Calculate cyclomatic complexity for code
Args:
code: Source code
Returns:
Cyclomatic complexity
"""
complexity = 1
if self.language == "python":
# Count decision points
complexity += len(re.findall(r'\bif\b', code))
complexity += len(re.findall(r'\belif\b', code))
complexity += len(re.findall(r'\bwhile\b', code))
complexity += len(re.findall(r'\bfor\b', code))
complexity += len(re.findall(r'\bexcept\b', code))
complexity += len(re.findall(r'\band\b', code))
complexity += len(re.findall(r'\bor\b', code))
else:
# JavaScript/TypeScript
complexity += len(re.findall(r'\bif\b', code))
complexity += len(re.findall(r'\belse if\b', code))
complexity += len(re.findall(r'\bwhile\b', code))
complexity += len(re.findall(r'\bfor\b', code))
complexity += len(re.findall(r'\bswitch\b', code))
complexity += len(re.findall(r'\bcatch\b', code))
complexity += len(re.findall(r'&&', code))
complexity += len(re.findall(r'\|\|', code))
complexity += len(re.findall(r'\?[^:]*:', code)) # Ternary
return complexity
def calculate_cognitive_complexity(self, code: str) -> int:
"""
Calculate cognitive complexity for code
Args:
code: Source code
Returns:
Cognitive complexity (simplified for maintainability analyzer)
"""
# For maintainability analyzer, use simplified cognitive complexity
# (similar to cyclomatic but with nesting consideration)
return self.calculate_cyclomatic_complexity(code)