usage_analyzer.py•10.5 kB
"""
Usage Analyzer for tracking code usage patterns and test coverage
Identifies where symbols are used, test files, and common patterns
"""
import ast
import re
import json
from pathlib import Path
from typing import Dict, List, Set, Tuple, Optional, Any
from collections import defaultdict, Counter
class UsageAnalyzer:
"""Analyzes code usage patterns and test coverage"""
def __init__(self, project_root: Path):
self.project_root = Path(project_root)
self.usage_map = defaultdict(list) # symbol -> list of usage locations
self.test_map = defaultdict(set) # symbol -> set of test files
self.pattern_map = defaultdict(list) # symbol -> usage patterns
def analyze_project_usage(self) -> Dict[str, Any]:
"""Analyze usage patterns across the entire project"""
usage_data = {}
# Find all Python files
python_files = list(self.project_root.rglob('*.py'))
# Separate test files from source files
test_files = []
source_files = []
for file_path in python_files:
# Skip common directories to ignore
if any(part in file_path.parts for part in ['.git', '__pycache__', 'venv', '.venv']):
continue
# Identify test files
if self._is_test_file(file_path):
test_files.append(file_path)
else:
source_files.append(file_path)
# First pass: collect all symbol definitions
all_symbols = set()
for file_path in source_files:
symbols = self._extract_symbols(file_path)
all_symbols.update(symbols)
# Second pass: find usages in all files
for file_path in python_files:
usages = self._find_symbol_usages(file_path, all_symbols)
for symbol, contexts in usages.items():
self.usage_map[symbol].extend(contexts)
# Track if usage is in a test file
if self._is_test_file(file_path):
self.test_map[symbol].add(str(file_path.relative_to(self.project_root)))
# Calculate usage statistics
for symbol in all_symbols:
usage_data[symbol] = {
'usage_frequency': len(self.usage_map.get(symbol, [])),
'test_files': list(self.test_map.get(symbol, [])),
'patterns': self._extract_patterns(symbol),
'test_coverage': self._estimate_coverage(symbol)
}
return usage_data
def analyze_symbol_usage(self, symbol_name: str, file_path: Path) -> Dict[str, Any]:
"""Analyze usage for a specific symbol"""
usage_info = {
'usage_frequency': 0,
'test_files': [],
'example_usage': '',
'common_patterns': [],
'test_coverage': 0.0
}
# Search for usages across the project
python_files = list(self.project_root.rglob('*.py'))
usage_count = 0
test_files = []
usage_examples = []
for py_file in python_files:
if any(part in py_file.parts for part in ['.git', '__pycache__', 'venv', '.venv']):
continue
try:
with open(py_file, 'r', encoding='utf-8') as f:
content = f.read()
# Simple text search for symbol usage
if symbol_name in content:
usage_count += content.count(symbol_name)
# Extract usage example
lines = content.split('\n')
for i, line in enumerate(lines):
if symbol_name in line and not line.strip().startswith('#'):
# Get context around usage
start = max(0, i - 1)
end = min(len(lines), i + 2)
example = '\n'.join(lines[start:end])
usage_examples.append(example.strip())
# Check if it's a test file
if self._is_test_file(py_file):
test_files.append(str(py_file.relative_to(self.project_root)))
except Exception:
continue
usage_info['usage_frequency'] = usage_count
usage_info['test_files'] = list(set(test_files))
# Get best example
if usage_examples:
# Prefer examples from test files
test_examples = [ex for ex in usage_examples if 'test' in ex.lower() or 'assert' in ex.lower()]
usage_info['example_usage'] = (test_examples[0] if test_examples else usage_examples[0])[:500]
# Extract common patterns
patterns = self._analyze_patterns(symbol_name, usage_examples)
usage_info['common_patterns'] = patterns[:5] # Top 5 patterns
# Estimate test coverage
usage_info['test_coverage'] = self._calculate_coverage(symbol_name, test_files, usage_count)
return usage_info
def _is_test_file(self, file_path: Path) -> bool:
"""Check if a file is a test file"""
name = file_path.name.lower()
return (
name.startswith('test_') or
name.endswith('_test.py') or
'test' in file_path.parts or
'tests' in file_path.parts
)
def _extract_symbols(self, file_path: Path) -> Set[str]:
"""Extract all symbol definitions from a file"""
symbols = set()
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
symbols.add(node.name)
except Exception:
pass
return symbols
def _find_symbol_usages(self, file_path: Path, symbols: Set[str]) -> Dict[str, List[str]]:
"""Find usages of symbols in a file"""
usages = defaultdict(list)
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
for symbol in symbols:
for i, line in enumerate(lines):
if symbol in line and not line.strip().startswith('#'):
# Get usage context
context = line.strip()
usages[symbol].append(context)
except Exception:
pass
return usages
def _extract_patterns(self, symbol: str) -> List[str]:
"""Extract common usage patterns for a symbol"""
patterns = []
contexts = self.usage_map.get(symbol, [])
if contexts:
# Find common patterns using simple regex
pattern_counter = Counter()
for context in contexts:
# Extract pattern type (assignment, call, import, etc.)
if f'{symbol}(' in context:
pattern_counter['function_call'] += 1
elif f'= {symbol}' in context:
pattern_counter['assignment'] += 1
elif f'from' in context and f'import {symbol}' in context:
pattern_counter['import'] += 1
elif f'class' in context and f'({symbol})' in context:
pattern_counter['inheritance'] += 1
elif f'{symbol}.' in context:
pattern_counter['attribute_access'] += 1
# Get most common patterns
patterns = [pattern for pattern, _ in pattern_counter.most_common(5)]
return patterns
def _analyze_patterns(self, symbol: str, examples: List[str]) -> List[str]:
"""Analyze usage patterns from examples"""
patterns = Counter()
for example in examples:
# Identify pattern type
if f'{symbol}(' in example:
if '=' in example.split(f'{symbol}(')[0]:
patterns['result = ' + symbol + '(...)'] += 1
else:
patterns[symbol + '(...)'] += 1
elif f'= {symbol}' in example:
patterns['var = ' + symbol] += 1
elif f'import {symbol}' in example:
patterns['import ' + symbol] += 1
elif f'class' in example and symbol in example:
patterns['class Child(' + symbol + ')'] += 1
elif f'{symbol}.' in example:
patterns[symbol + '.method()'] += 1
elif f'isinstance' in example and symbol in example:
patterns['isinstance(obj, ' + symbol + ')'] += 1
return [pattern for pattern, _ in patterns.most_common()]
def _estimate_coverage(self, symbol: str) -> float:
"""Estimate test coverage for a symbol"""
total_usages = len(self.usage_map.get(symbol, []))
test_files = self.test_map.get(symbol, set())
if total_usages == 0:
return 0.0
# Simple heuristic: if used in test files, estimate coverage
if test_files:
# Higher coverage if used in multiple test files
coverage = min(1.0, len(test_files) * 0.3 + 0.4)
else:
coverage = 0.0
return coverage
def _calculate_coverage(self, symbol: str, test_files: List[str], usage_count: int) -> float:
"""Calculate estimated test coverage"""
if not test_files:
return 0.0
# Simple heuristic based on test file presence
base_coverage = 0.3 # Base coverage if any test exists
# Add coverage based on number of test files
test_file_bonus = min(0.4, len(test_files) * 0.1)
# Add coverage based on usage frequency in tests
usage_bonus = min(0.3, usage_count * 0.01)
return min(1.0, base_coverage + test_file_bonus + usage_bonus)