python_enhanced.py•12.1 kB
"""Enhanced Python parser combining AST and Tree-sitter"""
import ast
import json
from typing import List, Dict, Any, Optional
from pathlib import Path
from .base_parser import BaseParser
from .treesitter_unified import TreeSitterUnifiedParser
class PythonEnhancedParser(BaseParser):
"""Enhanced parser for Python using AST with Tree-sitter fallback"""
def __init__(self):
"""Initialize Python parser"""
self.ts_parser = TreeSitterUnifiedParser('python')
def parse(self, code: str, file_path: Optional[Path] = None) -> ast.AST:
"""Parse Python code using AST"""
try:
return ast.parse(code)
except SyntaxError:
# Fall back to Tree-sitter for invalid Python
return self.ts_parser.parse(code, file_path)
def extract_symbols(self, code: str) -> List[Dict[str, Any]]:
"""Extract all symbols using AST"""
symbols = []
try:
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
symbols.append(self._extract_function_info(node, code))
elif isinstance(node, ast.ClassDef):
symbols.append(self._extract_class_info(node, code))
elif isinstance(node, (ast.Import, ast.ImportFrom)):
for alias in node.names:
symbols.append({
'name': alias.name,
'type': 'import',
'line_num': node.lineno,
'signature': ast.unparse(node) if hasattr(ast, 'unparse') else str(node),
'docstring': ''
})
except SyntaxError:
# Fall back to Tree-sitter
return self.ts_parser.extract_symbols(code)
return symbols
def extract_functions(self, code: str) -> List[Dict[str, Any]]:
"""Extract function definitions using AST"""
functions = []
try:
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
functions.append(self._extract_function_info(node, code))
except SyntaxError:
return self.ts_parser.extract_functions(code)
return functions
def _extract_function_info(self, node: ast.AST, code: str) -> Dict[str, Any]:
"""Extract detailed function information from AST node"""
# Get parameters with type annotations
params = []
for arg in node.args.args:
param_info = {'name': arg.arg, 'type': None}
if arg.annotation:
param_info['type'] = ast.unparse(arg.annotation) if hasattr(ast, 'unparse') else str(arg.annotation)
params.append(param_info)
# Get return type
return_type = None
if node.returns:
return_type = ast.unparse(node.returns) if hasattr(ast, 'unparse') else str(node.returns)
# Get docstring
docstring = ast.get_docstring(node) or ""
# Get signature
signature = f"def {node.name}("
signature += ", ".join([p['name'] + (f": {p['type']}" if p['type'] else "") for p in params])
signature += ")"
if return_type:
signature += f" -> {return_type}"
return {
'name': node.name,
'type': 'async_function' if isinstance(node, ast.AsyncFunctionDef) else 'function',
'line_num': node.lineno,
'end_line': node.end_lineno if hasattr(node, 'end_lineno') else node.lineno,
'signature': signature,
'docstring': docstring,
'parameters': params,
'return_type': return_type,
'is_async': isinstance(node, ast.AsyncFunctionDef),
'decorators': [ast.unparse(d) if hasattr(ast, 'unparse') else str(d) for d in node.decorator_list]
}
def extract_classes(self, code: str) -> List[Dict[str, Any]]:
"""Extract class definitions using AST"""
classes = []
try:
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
classes.append(self._extract_class_info(node, code))
except SyntaxError:
return self.ts_parser.extract_classes(code)
return classes
def _extract_class_info(self, node: ast.ClassDef, code: str) -> Dict[str, Any]:
"""Extract detailed class information from AST node"""
# Get base classes
bases = []
for base in node.bases:
if isinstance(base, ast.Name):
bases.append(base.id)
elif hasattr(ast, 'unparse'):
bases.append(ast.unparse(base))
else:
bases.append(str(base))
# Get docstring
docstring = ast.get_docstring(node) or ""
# Get methods
methods = []
for item in node.body:
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
methods.append(item.name)
return {
'name': node.name,
'type': 'class',
'line_num': node.lineno,
'end_line': node.end_lineno if hasattr(node, 'end_lineno') else node.lineno,
'signature': f"class {node.name}",
'docstring': docstring,
'inherits_from': bases,
'methods': methods,
'decorators': [ast.unparse(d) if hasattr(ast, 'unparse') else str(d) for d in node.decorator_list]
}
def extract_imports(self, code: str) -> List[str]:
"""Extract import statements using AST"""
imports = []
try:
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
imports.append(alias.name)
elif isinstance(node, ast.ImportFrom):
module = node.module or ''
imports.append(module)
# Also track what's imported from the module
for alias in node.names:
if module:
imports.append(f"{module}.{alias.name}")
except SyntaxError:
return self.ts_parser.extract_imports(code)
return list(set(imports)) # Remove duplicates
def extract_type_info(self, code: str, symbol_name: str = None) -> Dict[str, Any]:
"""Extract type information using AST"""
type_info = {
'parameters': [],
'return_type': None,
'exceptions_raised': [],
'type_annotations': {}
}
try:
tree = ast.parse(code)
for node in ast.walk(tree):
# Extract from functions
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
if symbol_name is None or node.name == symbol_name:
# Parameters
for arg in node.args.args:
param_info = {'name': arg.arg, 'type': None}
if arg.annotation:
param_info['type'] = ast.unparse(arg.annotation) if hasattr(ast, 'unparse') else str(arg.annotation)
type_info['type_annotations'][arg.arg] = param_info['type']
type_info['parameters'].append(param_info)
# Return type
if node.returns:
type_info['return_type'] = ast.unparse(node.returns) if hasattr(ast, 'unparse') else str(node.returns)
# Find raised exceptions
for subnode in ast.walk(node):
if isinstance(subnode, ast.Raise):
if subnode.exc:
if isinstance(subnode.exc, ast.Call) and isinstance(subnode.exc.func, ast.Name):
type_info['exceptions_raised'].append(subnode.exc.func.id)
elif isinstance(subnode.exc, ast.Name):
type_info['exceptions_raised'].append(subnode.exc.id)
if symbol_name == node.name:
break
# Extract from class attributes
elif isinstance(node, ast.AnnAssign) and node.annotation:
if isinstance(node.target, ast.Name):
attr_name = node.target.id
attr_type = ast.unparse(node.annotation) if hasattr(ast, 'unparse') else str(node.annotation)
type_info['type_annotations'][attr_name] = attr_type
except SyntaxError:
return self.ts_parser.extract_type_info(code, symbol_name)
# Remove duplicate exceptions
type_info['exceptions_raised'] = list(set(type_info['exceptions_raised']))
return type_info
def extract_dependencies(self, code: str) -> Dict[str, Any]:
"""Extract dependency information using AST"""
dependencies = {
'imports': self.extract_imports(code),
'calls': [],
'inherits_from': []
}
try:
tree = ast.parse(code)
# Extract function calls
for node in ast.walk(tree):
if isinstance(node, ast.Call):
if isinstance(node.func, ast.Name):
dependencies['calls'].append(node.func.id)
elif isinstance(node.func, ast.Attribute):
dependencies['calls'].append(node.func.attr)
# Extract inheritance
elif isinstance(node, ast.ClassDef):
for base in node.bases:
if isinstance(base, ast.Name):
dependencies['inherits_from'].append(base.id)
elif hasattr(ast, 'unparse'):
dependencies['inherits_from'].append(ast.unparse(base))
except SyntaxError:
return self.ts_parser.extract_dependencies(code)
# Remove duplicates
dependencies['calls'] = list(set(dependencies['calls']))
dependencies['inherits_from'] = list(set(dependencies['inherits_from']))
return dependencies
def extract_documentation(self, code: str) -> Dict[str, Any]:
"""Extract documentation and comments"""
docs = {
'todo_items': [],
'inline_comments': [],
'docstrings': []
}
# Use Tree-sitter for comment extraction (AST doesn't preserve comments)
ts_docs = self.ts_parser.extract_documentation(code)
docs['todo_items'] = ts_docs['todo_items']
docs['inline_comments'] = ts_docs['inline_comments']
# Use AST for docstrings
try:
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
docstring = ast.get_docstring(node)
if docstring:
docs['docstrings'].append({
'name': node.name,
'docstring': docstring,
'line': node.lineno
})
except SyntaxError:
pass
return docs