MCP Code Analysis Server

treesitter_parser.py•110 KiB

"""TreeSitter parser for code analysis.""" from __future__ import annotations from importlib import import_module from typing import TYPE_CHECKING, Any, TypedDict import tree_sitter import tree_sitter_java as tsjava import tree_sitter_php as tsphp import tree_sitter_python as tspython from src.logger import get_logger from src.parser.complexity_calculator import ComplexityCalculator if TYPE_CHECKING: from pathlib import Path from types import ModuleType # Optional TreeSitter language imports with error handling JAVASCRIPT_AVAILABLE = False TYPESCRIPT_AVAILABLE = False # These module variables are optional and may be None at runtime tsjavascript: ModuleType | None = None tstypescript: ModuleType | None = None try: tsjavascript = import_module("tree_sitter_javascript") JAVASCRIPT_AVAILABLE = True except ImportError: JAVASCRIPT_AVAILABLE = False try: tstypescript = import_module("tree_sitter_typescript") TYPESCRIPT_AVAILABLE = True except ImportError: TYPESCRIPT_AVAILABLE = False logger = get_logger(__name__) class TreeSitterParser: """Base TreeSitter parser.""" def __init__(self, language: tree_sitter.Language | None = None) -> None: self.language: tree_sitter.Language | None = language if language is not None: self.parser = tree_sitter.Parser(language) else: self.parser = tree_sitter.Parser() def parse_file(self, file_path: Path) -> tree_sitter.Tree | None: """Parse a file and return the syntax tree.""" try: with file_path.open("rb") as f: content = f.read() return self.parse_content(content) except Exception: logger.exception("Error parsing file %s", file_path) return None def parse_content(self, content: bytes) -> tree_sitter.Tree | None: """Parse content and return the syntax tree.""" if not self.language: msg = "Language not set for parser" raise ValueError(msg) try: return self.parser.parse(content) except Exception: logger.exception("Error parsing content: %s", content[:100]) return None def require_language(self) -> tree_sitter.Language: """Return the configured language or raise if missing (for typing).""" if self.language is None: msg = "Language not set for parser" raise ValueError(msg) return self.language def get_node_text(self, node: tree_sitter.Node, content: bytes) -> str: """Get text content of a node.""" return content[node.start_byte : node.end_byte].decode("utf-8", errors="ignore") def get_node_location(self, node: tree_sitter.Node) -> tuple[int, int]: """Get start and end line numbers of a node.""" return node.start_point[0] + 1, node.end_point[0] + 1 def find_nodes_by_type( self, node: tree_sitter.Node, node_type: str, max_depth: int | None = None, ) -> list[tree_sitter.Node]: """Find all nodes of a specific type.""" results = [] def traverse(n: tree_sitter.Node, depth: int = 0) -> None: if max_depth is not None and depth > max_depth: return if n.type == node_type: results.append(n) for child in n.children: traverse(child, depth + 1) traverse(node) return results def get_docstring(self, node: tree_sitter.Node, content: bytes) -> str | None: """Extract docstring from a node.""" # Look for string as first statement for child in node.children: if child.type == "block": for stmt in child.children: if stmt.type == "expression_statement": for expr in stmt.children: if expr.type == "string": docstring = self.get_node_text(expr, content) # Remove quotes if docstring.startswith(('"""', "'''")): return docstring[3:-3].strip() if docstring.startswith(('"', "'")): return docstring[1:-1].strip() break break return None class PyImportData(TypedDict, total=False): import_statement: str imported_from: str | None imported_names: list[str] is_relative: bool level: int line_number: int class PyFuncParam(TypedDict, total=False): name: str | None type: str | None default: str | None class PyFuncData(TypedDict, total=False): name: str | None parameters: list[PyFuncParam] return_type: str | None docstring: str | None decorators: list[str] is_async: bool is_generator: bool is_property: bool is_staticmethod: bool is_classmethod: bool start_line: int end_line: int complexity: int class PyClassData(TypedDict, total=False): name: str | None docstring: str | None base_classes: list[str] decorators: list[str] start_line: int end_line: int is_abstract: bool methods: list[PyFuncData] class PythonParser(TreeSitterParser): """Python-specific TreeSitter parser.""" def __init__(self) -> None: self.language = tree_sitter.Language(tspython.language()) super().__init__(self.language) self.complexity_calculator = ComplexityCalculator(use_plugin=False) def extract_imports( # noqa: PLR0912 self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract import statements.""" imports: list[dict[str, Any]] = [] # Find import statements import_nodes = self.find_nodes_by_type(tree.root_node, "import_statement") for node in import_nodes: import_data: dict[str, Any] = { "import_statement": self.get_node_text(node, content), "imported_from": None, "imported_names": [], "is_relative": False, "level": 0, "line_number": node.start_point[0] + 1, } # Extract imported names for child in node.children: if child.type in {"dotted_name", "identifier"}: import_data["imported_names"].append( self.get_node_text(child, content), ) imports.append(import_data) # Find from imports from_import_nodes = self.find_nodes_by_type( tree.root_node, "import_from_statement", ) for node in from_import_nodes: import_data = { "import_statement": self.get_node_text(node, content), "imported_from": None, "imported_names": [], "is_relative": False, "level": 0, "line_number": node.start_point[0] + 1, } # Extract module name and imported names found_from = False found_import = False has_relative = False for child in node.children: if child.type == "from": found_from = True elif child.type == "import": found_import = True elif child.type == "relative_import": import_data["is_relative"] = True has_relative = True # Count dots for relative level # The relative_import node contains import_prefix child with dots for rel_child in child.children: if rel_child.type == "import_prefix": dots = self.get_node_text(rel_child, content) import_data["level"] = len(dots) elif rel_child.type == "dotted_name": # This is the module name after dots import_data["imported_from"] = self.get_node_text( rel_child, content ) elif child.type == "dotted_name": if found_from and not found_import and not has_relative: # First dotted_name after 'from' (and not after relative import) is the module import_data["imported_from"] = self.get_node_text( child, content ) else: # After 'import' or if we have relative import, it's an imported item import_data["imported_names"].append( self.get_node_text(child, content), ) elif child.type in {"import_list", "identifier"}: if child.type == "identifier": import_data["imported_names"].append( self.get_node_text(child, content), ) else: # Handle import list for name_node in child.children: if name_node.type in {"identifier", "dotted_name"}: import_data["imported_names"].append( self.get_node_text(name_node, content), ) imports.append(import_data) return imports def extract_functions( # noqa: PLR0912 self, tree: tree_sitter.Tree, content: bytes, parent_class: tree_sitter.Node | None = None, ) -> list[dict[str, Any]]: """Extract function definitions.""" functions: list[dict[str, Any]] = [] root = parent_class if parent_class else tree.root_node # When extracting module-level functions, we need to exclude those inside classes if parent_class: function_nodes = self.find_nodes_by_type( root, "function_definition", max_depth=4, # Increased to handle decorated methods ) else: # For module-level, find all functions then filter out those inside classes all_function_nodes = self.find_nodes_by_type(root, "function_definition") class_nodes = self.find_nodes_by_type(root, "class_definition") # Create a set of functions that are inside classes class_function_nodes = set() for class_node in class_nodes: class_functions = self.find_nodes_by_type( class_node, "function_definition" ) class_function_nodes.update(class_functions) # Only keep module-level functions function_nodes = [ f for f in all_function_nodes if f not in class_function_nodes ] for node in function_nodes: func_data: dict[str, Any] = { "name": None, "parameters": [], "return_type": None, "docstring": None, "decorators": [], "is_async": False, "is_generator": False, "is_property": False, "is_staticmethod": False, "is_classmethod": False, "start_line": node.start_point[0] + 1, "end_line": node.end_point[0] + 1, } # Check for async for child in node.children: if child.type == "async": func_data["is_async"] = True break # Extract decorators decorator_nodes: list[tree_sitter.Node] = [] prev_sibling = node.prev_sibling while prev_sibling and prev_sibling.type == "decorator": decorator_nodes.insert(0, prev_sibling) prev_sibling = prev_sibling.prev_sibling for dec_node in decorator_nodes: decorator_text = self.get_node_text(dec_node, content).strip() if decorator_text.startswith("@"): decorator_name = decorator_text[1:].split("(")[0] func_data["decorators"].append(decorator_name) # Check for special decorators if decorator_name == "property": func_data["is_property"] = True elif decorator_name == "staticmethod": func_data["is_staticmethod"] = True elif decorator_name == "classmethod": func_data["is_classmethod"] = True elif decorator_name.endswith((".setter", ".deleter")): # Property setters and deleters func_data["is_property"] = True # Normalize convenience flag func_data["is_static"] = func_data.get("is_staticmethod", False) # Extract function details for child in node.children: if child.type == "identifier": func_data["name"] = self.get_node_text(child, content) elif child.type == "parameters": func_data["parameters"] = self._extract_parameters(child, content) elif child.type == "type": # Return type annotation func_data["return_type"] = ( self.get_node_text(child, content).strip("->").strip() ) elif child.type == "block" and ( ( self.find_nodes_by_type(child, "yield_statement") or self.find_nodes_by_type(child, "yield_expression") or self.find_nodes_by_type(child, "yield") ) or self._contains_generator_return(child, content) ): # Generator detected by yield nodes func_data["is_generator"] = True # Extract docstring func_data["docstring"] = self.get_docstring(node, content) # Calculate cyclomatic complexity func_data["complexity"] = self.complexity_calculator.calculate_complexity( node, content ) functions.append(func_data) return functions def extract_classes( # noqa: PLR0912 self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract class definitions.""" classes: list[dict[str, Any]] = [] class_nodes = self.find_nodes_by_type(tree.root_node, "class_definition") for node in class_nodes: class_data: dict[str, Any] = { "name": None, "docstring": None, "base_classes": [], "decorators": [], "start_line": node.start_point[0] + 1, "end_line": node.end_point[0] + 1, "is_abstract": False, "methods": [], } # Extract decorators decorator_nodes: list[tree_sitter.Node] = [] prev_sibling = node.prev_sibling while prev_sibling and prev_sibling.type == "decorator": decorator_nodes.insert(0, prev_sibling) prev_sibling = prev_sibling.prev_sibling for dec_node in decorator_nodes: decorator_text = self.get_node_text(dec_node, content).strip() if decorator_text.startswith("@"): decorator_name = decorator_text[1:].split("(")[0] class_data["decorators"].append(decorator_name) # Check for abstract class decorators if "abstract" in decorator_name.lower(): class_data["is_abstract"] = True # Extract class details for child in node.children: if child.type == "identifier": class_data["name"] = self.get_node_text(child, content) elif child.type == "argument_list": # Extract base classes for arg in child.children: if arg.type in {"identifier", "attribute"}: class_data["base_classes"].append( self.get_node_text(arg, content), ) # Extract docstring class_data["docstring"] = self.get_docstring(node, content) # Extract methods class_data["methods"] = self.extract_functions(tree, content, node) # Check if abstract based on methods if not class_data["is_abstract"]: for method in class_data["methods"]: if "abstractmethod" in method.get("decorators", []): class_data["is_abstract"] = True break classes.append(class_data) return classes def extract_module_info( self, tree: tree_sitter.Tree, content: bytes, ) -> dict[str, Any]: """Extract module-level information.""" return { "docstring": self._get_module_docstring(tree, content), "imports": self.extract_imports(tree, content), "classes": self.extract_classes(tree, content), "functions": self.extract_functions(tree, content), } def _extract_parameters( self, params_node: tree_sitter.Node, content: bytes, ) -> list[dict[str, Any]]: """Extract function parameters.""" parameters: list[dict[str, Any]] = [] for child in params_node.children: if child.type in ( "identifier", "typed_parameter", "typed_default_parameter", "default_parameter", ): param_data: dict[str, Any] = { "name": None, "type": None, "default": None, } if child.type == "identifier": param_data["name"] = self.get_node_text(child, content) else: # Handle typed and default parameters for subchild in child.children: if subchild.type == "identifier": param_data["name"] = self.get_node_text(subchild, content) elif subchild.type == "type": param_data["type"] = ( self.get_node_text(subchild, content).strip(":").strip() ) elif subchild.type not in (":", "="): # Default value param_data["default"] = self.get_node_text( subchild, content, ) if param_data["name"] and param_data["name"] not in ( "self", "cls", "*", "**", "/", ): parameters.append(param_data) return parameters def _get_module_docstring( self, tree: tree_sitter.Tree, content: bytes, ) -> str | None: """Extract module-level docstring.""" # Module docstring is the first string in the file for child in tree.root_node.children: if child.type == "expression_statement": for expr in child.children: if expr.type == "string": docstring = self.get_node_text(expr, content) # Remove quotes if docstring.startswith(('"""', "'''")): return docstring[3:-3].strip() if docstring.startswith(('"', "'")): return docstring[1:-1].strip() break return None def _contains_generator_return( self, block_node: tree_sitter.Node, _content: bytes, ) -> bool: """Check if a block contains a return statement with a generator expression.""" return_nodes = self.find_nodes_by_type(block_node, "return_statement") for return_node in return_nodes: # Check for generator expressions in return statements if self.find_nodes_by_type(return_node, "generator_expression"): return True # Check for yield from expressions for child in return_node.children: if child.type == "yield_from_expression": return True return False class PHPParser(TreeSitterParser): """PHP-specific TreeSitter parser.""" def __init__(self) -> None: self.language = tree_sitter.Language(tsphp.language_php()) super().__init__(self.language) self.complexity_calculator = ComplexityCalculator(language="php") self.references: list[dict[str, Any]] = [] def extract_imports( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract use statements (imports) in PHP.""" imports = [] # Find use statements use_nodes = self.find_nodes_by_type(tree.root_node, "namespace_use_declaration") for node in use_nodes: import_data: dict[str, Any] import_data = { "import_statement": self.get_node_text(node, content), "imported_from": None, "imported_names": [], "is_relative": False, "level": 0, "line_number": node.start_point[0] + 1, } # Extract namespace/class names for child in node.children: if child.type == "namespace_use_clause": # Extract the name from the use clause alias_name = None for clause_child in child.children: if clause_child.type == "name": # This could be either the import name or alias if import_data["imported_from"] is None: name = self.get_node_text(clause_child, content) import_data["imported_from"] = name import_data["imported_names"].append(name) else: # This is the alias alias_name = self.get_node_text(clause_child, content) elif clause_child.type == "qualified_name": name = self.get_node_text(clause_child, content) import_data["imported_from"] = name # Last part is the imported name parts = name.split("\\") if parts: import_data["imported_names"].append(parts[-1]) # If we found an alias, replace the imported names if alias_name: import_data["imported_names"] = [alias_name] imports.append(import_data) return imports def extract_functions( self, tree: tree_sitter.Tree, content: bytes, parent_class: tree_sitter.Node | None = None, ) -> list[dict[str, Any]]: """Extract function/method definitions.""" functions = [] root = parent_class if parent_class else tree.root_node # Find function declarations if parent_class: function_nodes = self.find_nodes_by_type( root, "method_declaration", max_depth=4, ) func_data: dict[str, Any] else: # Module-level functions function_nodes = self.find_nodes_by_type(root, "function_definition") for node in function_nodes: func_data = { "name": None, "parameters": [], "return_type": None, "docstring": None, "decorators": [], "is_async": False, "is_generator": False, "is_property": False, "is_staticmethod": False, "is_classmethod": False, "start_line": node.start_point[0] + 1, "end_line": node.end_point[0] + 1, } # Extract function details for child in node.children: if child.type == "name": func_data["name"] = self.get_node_text(child, content) elif child.type == "formal_parameters": func_data["parameters"] = self._extract_parameters(child, content) elif child.type in ("type", "primitive_type", "named_type"): # Return type annotation func_data["return_type"] = self.get_node_text(child, content) elif child.type == "static_modifier": # Static method func_data["is_staticmethod"] = True # Extract docstring (PHP DocBlock) func_data["docstring"] = self._get_php_docstring(node, content) # Calculate cyclomatic complexity func_data["complexity"] = self.complexity_calculator.calculate_complexity( node, content ) functions.append(func_data) return functions def _extract_base_classes( self, node: tree_sitter.Node, content: bytes ) -> list[str]: """Extract base classes from base_clause node.""" return [ self.get_node_text(child, content) for child in node.children if child.type == "name" ] def _extract_interfaces(self, node: tree_sitter.Node, content: bytes) -> list[str]: """Extract interfaces from class_interface_clause node.""" return [ self.get_node_text(child, content) for child in node.children if child.type == "name" ] def _extract_class_traits( self, node: tree_sitter.Node, content: bytes ) -> list[str]: """Extract traits used by the class.""" use_nodes = self.find_nodes_by_type(node, "use_declaration") return [ self.get_node_text(child, content) for use_node in use_nodes for child in use_node.children if child.type == "name" ] def _process_class_modifiers( self, node: tree_sitter.Node, content: bytes, class_data: dict[str, Any] ) -> None: """Process class modifiers (abstract, final).""" modifier = self.get_node_text(node, content) if modifier == "abstract": class_data["is_abstract"] = True def extract_classes( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract class definitions.""" classes: list[dict[str, Any]] = [] class_nodes = self.find_nodes_by_type(tree.root_node, "class_declaration") for node in class_nodes: class_data: dict[str, Any] = { "name": None, "docstring": None, "base_classes": [], "decorators": [], "start_line": node.start_point[0] + 1, "end_line": node.end_point[0] + 1, "is_abstract": False, "methods": [], "traits": [], } # Process child nodes using a dispatch approach for child in node.children: if child.type == "name": class_data["name"] = self.get_node_text(child, content) elif child.type == "base_clause": class_data["base_classes"].extend( self._extract_base_classes(child, content) ) elif child.type == "class_interface_clause": class_data["base_classes"].extend( self._extract_interfaces(child, content) ) elif child.type in ["abstract_modifier", "final_modifier"]: self._process_class_modifiers(child, content, class_data) elif child.type == "declaration_list": class_data["traits"] = self._extract_class_traits(child, content) # Extract docstring and methods class_data["docstring"] = self._get_php_docstring(node, content) class_data["methods"] = self.extract_functions(tree, content, node) classes.append(class_data) return classes def extract_module_info( self, tree: tree_sitter.Tree, content: bytes, ) -> dict[str, Any]: """Extract module-level information.""" return { "docstring": self._get_module_docstring(tree, content), "imports": self.extract_imports(tree, content), "classes": self.extract_classes(tree, content), "functions": self.extract_functions(tree, content), "traits": self.extract_traits(tree, content), } def _extract_parameters( self, params_node: tree_sitter.Node, content: bytes, ) -> list[dict[str, Any]]: """Extract function parameters.""" parameters: list[dict[str, Any]] = [] for child in params_node.children: if child.type == "simple_parameter": param_data: dict[str, Any] = { "name": None, "type": None, "default": None, } has_default = False for subchild in child.children: if subchild.type == "variable_name": param_data["name"] = self.get_node_text( subchild, content ).lstrip("$") elif subchild.type in [ "primitive_type", "named_type", "optional_type", ]: param_data["type"] = self.get_node_text(subchild, content) elif subchild.type == "=": has_default = True elif has_default and subchild.type not in [ "variable_name", "primitive_type", "named_type", "optional_type", "=", ]: # This is the default value param_data["default"] = self.get_node_text(subchild, content) if param_data["name"]: parameters.append(param_data) return parameters def _get_module_docstring( self, tree: tree_sitter.Tree, content: bytes, ) -> str | None: """Extract module-level docstring (first comment block).""" # In PHP, look for the first comment block in the file for child in tree.root_node.children: if child.type == "comment": text = self.get_node_text(child, content) if text.startswith("/**"): return self._parse_docblock(text) return None def _get_php_docstring( self, node: tree_sitter.Node, content: bytes, ) -> str | None: """Extract PHP DocBlock comment.""" # Look for comment just before the node prev_sibling = node.prev_sibling if prev_sibling and prev_sibling.type == "comment": text = prev_sibling.text if text is not None and text.startswith(b"/**"): return self._parse_docblock(self.get_node_text(prev_sibling, content)) return None def _parse_docblock(self, docblock: str) -> str: """Parse PHP DocBlock to extract description.""" lines = docblock.strip().split("\n") if not lines: return "" # Remove /** and */ if lines[0].strip() == "/**": lines = lines[1:] if lines and lines[-1].strip() == "*/": lines = lines[:-1] # Remove leading asterisks and spaces cleaned_lines = [] for line in lines: stripped_line = line.strip() if stripped_line.startswith("*"): stripped_line = stripped_line[1:].strip() # Skip annotation lines like @param, @return if not stripped_line.startswith("@"): cleaned_lines.append(stripped_line) return " ".join(cleaned_lines).strip() def extract_traits( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract trait definitions.""" traits = [] trait_nodes = self.find_nodes_by_type(tree.root_node, "trait_declaration") for node in trait_nodes: trait_data: dict[str, Any] = { "name": None, "docstring": None, "methods": [], "start_line": node.start_point[0] + 1, "end_line": node.end_point[0] + 1, } # Extract trait details for child in node.children: if child.type == "name": trait_data["name"] = self.get_node_text(child, content) # Extract docstring trait_data["docstring"] = self._get_php_docstring(node, content) # Extract methods trait_data["methods"] = self.extract_functions(tree, content, node) traits.append(trait_data) return traits def extract_references( self, tree: tree_sitter.Tree, content: bytes ) -> list[dict[str, Any]]: """Extract references from PHP code.""" references = [] # Extract import references from use statements references.extend( [ { "type": "import", "source": "module", "target": imp["imported_from"], "line": imp["line_number"], } for imp in self.extract_imports(tree, content) if imp["imported_from"] ] ) # Extract class inheritance references classes = self.extract_classes(tree, content) for cls in classes: # Base class references references.extend( [ { "type": "inherit", "source": cls["name"], "target": base, "line": cls["start_line"], } for base in cls.get("base_classes", []) ] ) # Trait usage references references.extend( [ { "type": "trait_use", "source": cls["name"], "target": trait, "line": cls["start_line"], } for trait in cls.get("traits", []) ] ) # Extract type references from functions functions = self.extract_functions(tree, content) for func in functions: # Return type references if func.get("return_type") and func["return_type"] not in [ "void", "string", "int", "bool", "float", "array", "mixed", ]: references.append( { "type": "type_use", "source": func["name"], "target": func["return_type"].lstrip("?"), "line": func["start_line"], } ) # Parameter type references references.extend( [ { "type": "type_use", "source": func["name"], "target": param["type"].lstrip("?"), "line": func["start_line"], } for param in func.get("parameters", []) if param.get("type") and param["type"] not in [ "string", "int", "bool", "float", "array", "mixed", ] ] ) return references class JavaParser(TreeSitterParser): """Java-specific TreeSitter parser.""" def __init__(self) -> None: self.language = tree_sitter.Language(tsjava.language()) super().__init__(self.language) self.complexity_calculator = ComplexityCalculator(language="java") def extract_imports( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract import statements.""" imports = [] # Find import declarations import_nodes = self.find_nodes_by_type(tree.root_node, "import_declaration") for node in import_nodes: import_data: dict[str, Any] = { "import_statement": self.get_node_text(node, content), "imported_from": None, "imported_names": [], "is_relative": False, "level": 0, "line_number": node.start_point[0] + 1, } # Extract imported class/package found_wildcard = False for child in node.children: if child.type == "scoped_identifier": full_name = self.get_node_text(child, content) import_data["imported_from"] = full_name # Last part is the imported name parts = full_name.split(".") if parts: import_data["imported_names"].append(parts[-1]) elif child.type == "asterisk": found_wildcard = True # If we found a wildcard, update the imported names if found_wildcard: import_data["imported_names"] = ["*"] imports.append(import_data) return imports def _extract_method_modifiers( self, node: tree_sitter.Node, content: bytes ) -> tuple[bool, list[str]]: """Extract method modifiers and annotations.""" is_static = False decorators = [] for child in node.children: if child.type == "modifiers": modifiers_text = self.get_node_text(child, content) if "static" in modifiers_text: is_static = True # Extract annotations for mod_child in child.children: if mod_child.type == "marker_annotation": annotation_text = self.get_node_text(mod_child, content) decorators.append(annotation_text.lstrip("@")) elif mod_child.type == "annotation": annotation_text = self.get_node_text(mod_child, content) if annotation_text.startswith("@"): annotation_name = annotation_text[1:].split("(")[0] decorators.append(annotation_name) break return is_static, decorators def _extract_method_signature( self, node: tree_sitter.Node, content: bytes ) -> tuple[str | None, str | None, list[Any]]: """Extract method name, return type, and parameters.""" name = None return_type = None parameters = [] # Java return types for method signature return_type_nodes = { "type_identifier", "primitive_type", "integral_type", "floating_point_type", "boolean_type", "void_type", "generic_type", "array_type", } found_identifier = False for i, child in enumerate(node.children): if child.type == "identifier": name = self.get_node_text(child, content) found_identifier = True # Look for return type in previous siblings if i > 0 and node.children[i - 1].type in return_type_nodes: return_type = self.get_node_text(node.children[i - 1], content) elif child.type == "formal_parameters" and found_identifier: parameters = self._extract_parameters(child, content) return name, return_type, parameters def _resolve_constructor_name( self, node: tree_sitter.Node, content: bytes ) -> str | None: """Resolve constructor name from parent class.""" parent = node.parent class_types = {"class_declaration", "interface_declaration", "enum_declaration"} while parent and parent.type not in class_types: parent = parent.parent if parent: for child in parent.children: if child.type == "identifier": return self.get_node_text(child, content) return None def extract_functions( self, tree: tree_sitter.Tree, content: bytes, parent_class: tree_sitter.Node | None = None, ) -> list[dict[str, Any]]: """Extract method definitions.""" functions = [] root = parent_class if parent_class else tree.root_node # Find method declarations and constructors function_nodes = self.find_nodes_by_type( root, "method_declaration", max_depth=4 ) constructor_nodes = self.find_nodes_by_type( root, "constructor_declaration", max_depth=4 ) function_nodes.extend(constructor_nodes) for node in function_nodes: func_data: dict[str, Any] = { "name": None, "parameters": [], "return_type": None, "docstring": None, "decorators": [], "is_async": False, "is_generator": False, "is_property": False, "is_staticmethod": False, "is_classmethod": False, "start_line": node.start_point[0] + 1, "end_line": node.end_point[0] + 1, } # Extract modifiers and annotations is_static, decorators = self._extract_method_modifiers(node, content) func_data["is_staticmethod"] = is_static func_data["is_static"] = is_static func_data["decorators"] = decorators # Extract method signature name, return_type, parameters = self._extract_method_signature( node, content ) func_data["name"] = name func_data["return_type"] = return_type func_data["parameters"] = parameters # For constructors, resolve name from parent class if node.type == "constructor_declaration" and not func_data["name"]: func_data["name"] = self._resolve_constructor_name(node, content) # Extract docstring and complexity func_data["docstring"] = self._get_javadoc(node, content) func_data["complexity"] = self.complexity_calculator.calculate_complexity( node, content ) functions.append(func_data) return functions def _extract_superclass(self, node: tree_sitter.Node, content: bytes) -> list[str]: """Extract superclass from superclass node.""" return [ self.get_node_text(child, content) for child in node.children if child.type == "type_identifier" ] def _extract_java_interfaces( self, node: tree_sitter.Node, content: bytes ) -> list[str]: """Extract interfaces from interfaces node.""" return [ self.get_node_text(type_child, content) for child in node.children if child.type == "type_list" for type_child in child.children if type_child.type == "type_identifier" ] def _process_java_modifiers( self, node: tree_sitter.Node, content: bytes ) -> tuple[bool, list[str]]: """Process Java class modifiers and annotations.""" is_abstract = False decorators = [] modifiers_text = self.get_node_text(node, content) if "abstract" in modifiers_text: is_abstract = True # Extract annotations line by line for line in modifiers_text.split("\n"): stripped_line = line.strip() if stripped_line.startswith("@"): annotation = stripped_line[1:].split("(")[0] decorators.append(annotation) return is_abstract, decorators def _collect_class_nodes(self, tree: tree_sitter.Tree) -> list[tree_sitter.Node]: """Collect all class-like nodes (classes, interfaces, enums).""" class_nodes = self.find_nodes_by_type(tree.root_node, "class_declaration") interface_nodes = self.find_nodes_by_type( tree.root_node, "interface_declaration" ) enum_nodes = self.find_nodes_by_type(tree.root_node, "enum_declaration") return class_nodes + interface_nodes + enum_nodes def extract_classes( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract class definitions.""" classes = [] all_class_nodes = self._collect_class_nodes(tree) for node in all_class_nodes: class_data: dict[str, Any] class_data = { "name": None, "docstring": None, "base_classes": [], "decorators": [], "start_line": node.start_point[0] + 1, "end_line": node.end_point[0] + 1, "is_abstract": False, "methods": [], } # Process child nodes for child in node.children: if child.type == "identifier": class_data["name"] = self.get_node_text(child, content) elif child.type == "superclass": class_data["base_classes"].extend( self._extract_superclass(child, content) ) elif child.type == "interfaces": class_data["base_classes"].extend( self._extract_java_interfaces(child, content) ) elif child.type == "modifiers": is_abstract, decorators = self._process_java_modifiers( child, content ) class_data["is_abstract"] = is_abstract class_data["decorators"] = decorators # Extract docstring and methods class_data["docstring"] = self._get_javadoc(node, content) class_data["methods"] = self.extract_functions(tree, content, node) classes.append(class_data) return classes def extract_module_info( self, tree: tree_sitter.Tree, content: bytes, ) -> dict[str, Any]: """Extract module-level information.""" module_info = { "docstring": self._get_module_docstring(tree, content), "imports": self.extract_imports(tree, content), "classes": self.extract_classes(tree, content), "functions": self.extract_functions(tree, content), } # Extract package declaration package_nodes = self.find_nodes_by_type(tree.root_node, "package_declaration") if package_nodes: package_node = package_nodes[0] # Should only be one package declaration for child in package_node.children: if child.type == "scoped_identifier": module_info["package"] = self.get_node_text(child, content) break return module_info def _extract_parameters( self, params_node: tree_sitter.Node, content: bytes, ) -> list[dict[str, Any]]: """Extract function parameters.""" parameters: list[dict[str, Any]] = [] for child in params_node.children: if child.type in ("formal_parameter", "spread_parameter"): param_data: dict[str, Any] = { "name": None, "type": None, "default": None, } # Handle varargs (spread_parameter) if child.type == "spread_parameter": # Varargs parameter has ... before the type for subchild in child.children: if subchild.type == "variable_declarator": param_data["name"] = self.get_node_text(subchild, content) elif subchild.type in ( "type_identifier", "integral_type", "primitive_type", "array_type", "generic_type", ): # Add ... to indicate varargs param_data["type"] = ( self.get_node_text(subchild, content) + "..." ) else: # Regular parameter found_identifier = False for subchild in child.children: if subchild.type == "identifier": param_data["name"] = self.get_node_text(subchild, content) found_identifier = True elif subchild.type in ( "type_identifier", "integral_type", "primitive_type", "array_type", "generic_type", ): # Type comes before identifier in Java if not found_identifier: param_data["type"] = self.get_node_text( subchild, content ) if param_data["name"]: parameters.append(param_data) return parameters def _get_module_docstring( self, tree: tree_sitter.Tree, content: bytes, ) -> str | None: """Extract module-level docstring (first JavaDoc comment).""" # In Java, look for the first block comment in the file for child in tree.root_node.children: if child.type == "block_comment": text = child.text if text is not None and text.startswith(b"/**"): return self._parse_javadoc(self.get_node_text(child, content)) return None def _get_javadoc( self, node: tree_sitter.Node, content: bytes, ) -> str | None: """Extract JavaDoc comment.""" # Look for comment just before the node prev_sibling = node.prev_sibling if prev_sibling and prev_sibling.type == "block_comment": text = prev_sibling.text if text is not None and text.startswith(b"/**"): return self._parse_javadoc(self.get_node_text(prev_sibling, content)) return None def _parse_javadoc(self, javadoc: str) -> str: """Parse JavaDoc to extract description.""" lines = javadoc.strip().split("\n") if not lines: return "" # Remove /** and */ if lines[0].strip() == "/**": lines = lines[1:] if lines and lines[-1].strip() == "*/": lines = lines[:-1] # Remove leading asterisks and spaces cleaned_lines = [] for line in lines: stripped_line = line.strip() if stripped_line.startswith("*"): stripped_line = stripped_line[1:].strip() # Skip annotation lines like @param, @return if not stripped_line.startswith("@"): cleaned_lines.append(stripped_line) return " ".join(cleaned_lines).strip() def extract_references( self, tree: tree_sitter.Tree, content: bytes ) -> list[dict[str, Any]]: """Extract references from Java code.""" references = [] # Extract import references references.extend( [ { "type": "import", "source": "module", "target": imp["imported_from"], "line": imp["line_number"], } for imp in self.extract_imports(tree, content) if imp["imported_from"] ] ) # Extract class inheritance and interface implementation references classes = self.extract_classes(tree, content) for cls in classes: # Base class and interface references for base in cls.get("base_classes", []): ref_type = ( "implement" if "interface" in cls.get("name", "").lower() else "inherit" ) references.append( { "type": ref_type, "source": cls["name"], "target": base, "line": cls["start_line"], } ) # Extract type references from methods for cls in classes: for method in cls.get("methods", []): # Return type references if method.get("return_type") and method["return_type"] not in [ "void", "int", "long", "short", "byte", "float", "double", "boolean", "char", ]: references.append( { "type": "type_use", "source": f"{cls['name']}.{method['name']}", "target": method["return_type"], "line": method["start_line"], } ) # Parameter type references for param in method.get("parameters", []): if param.get("type") and param["type"] not in [ "int", "long", "short", "byte", "float", "double", "boolean", "char", ]: # Handle generic types and arrays param_type = ( param["type"] .split("<")[0] .split("[")[0] .removesuffix("...") ) if param_type not in ["String"]: references.append( { "type": "type_use", "source": f"{cls['name']}.{method['name']}", "target": param_type, "line": method["start_line"], } ) return references class TypeScriptParser(TreeSitterParser): """TypeScript-specific TreeSitter parser.""" def __init__(self) -> None: if not TYPESCRIPT_AVAILABLE or tstypescript is None: msg = "tree-sitter-typescript not available. Install with: pip install tree-sitter-typescript" raise ImportError(msg) super().__init__(tree_sitter.Language(tstypescript.language_typescript())) def extract_module_info( self, tree: tree_sitter.Tree, content: bytes, ) -> dict[str, Any]: """Extract module information from TypeScript code.""" return { "classes": self._extract_classes(tree, content), "functions": self._extract_functions(tree, content), "imports": self._extract_imports(tree, content), "exports": self._extract_exports(tree, content), "interfaces": self._extract_interfaces(tree, content), "types": self._extract_type_aliases(tree, content), "enums": self._extract_enums(tree, content), } def _extract_classes( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract class definitions from TypeScript code.""" classes = [] query = tree_sitter.Query( self.require_language(), """ (class_declaration name: (type_identifier) @class_name body: (class_body) @class_body) @class """, ) for match in query.matches(tree.root_node): captures = match[1] if "class_name" in captures and "class_body" in captures: class_node = captures["class"][0] class_name = self.get_node_text(captures["class_name"][0], content) class_info = { "name": class_name, "start_line": class_node.start_point[0] + 1, "end_line": class_node.end_point[0] + 1, "methods": self._extract_class_methods( captures["class_body"][0], content ), "properties": self._extract_class_properties( captures["class_body"][0], content ), "constructors": self._extract_constructors( captures["class_body"][0], content ), "base_classes": self._extract_extends_clause(class_node, content), "interfaces": self._extract_implements_clause(class_node, content), "decorators": self._extract_decorators(class_node, content), "access_modifier": self._extract_class_modifiers( class_node, content ), } classes.append(class_info) return classes def _extract_functions( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract function definitions from TypeScript code.""" functions = [] # Function declarations query = tree_sitter.Query( self.require_language(), """ (function_declaration name: (identifier) @function_name parameters: (formal_parameters) @params body: (statement_block) @body) @function """, ) for match in query.matches(tree.root_node): captures = match[1] if "function_name" in captures: func_node = captures["function"][0] func_name = self.get_node_text(captures["function_name"][0], content) params_nodes = captures.get("params") params_node = params_nodes[0] if params_nodes else None body_nodes = captures.get("body") body_node = body_nodes[0] if body_nodes else func_node func_info = { "name": func_name, "start_line": func_node.start_point[0] + 1, "end_line": func_node.end_point[0] + 1, "parameters": self._extract_ts_parameters(params_node, content), "return_type": self._extract_return_type(func_node, content), "access_modifier": "public", # Default for functions "decorators": self._extract_decorators(func_node, content), "is_async": self._is_async_function(func_node, content), "complexity": ComplexityCalculator( "typescript" ).calculate_complexity(body_node, content), } functions.append(func_info) # Arrow functions assigned to variables arrow_query = tree_sitter.Query( self.require_language(), """ (variable_declaration (variable_declarator name: (identifier) @var_name value: (arrow_function) @arrow_func)) """, ) for match in arrow_query.matches(tree.root_node): captures = match[1] if "var_name" in captures and "arrow_func" in captures: var_name = self.get_node_text(captures["var_name"][0], content) arrow_node = captures["arrow_func"][0] func_info = { "name": var_name, "start_line": arrow_node.start_point[0] + 1, "end_line": arrow_node.end_point[0] + 1, "parameters": self._extract_arrow_parameters(arrow_node, content), "return_type": self._extract_arrow_return_type(arrow_node, content), "access_modifier": "public", "decorators": [], "is_async": self._is_async_function(arrow_node, content), "complexity": ComplexityCalculator( "typescript" ).calculate_complexity(arrow_node, content), } functions.append(func_info) return functions def _extract_imports( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract import statements from TypeScript code.""" imports = [] query = tree_sitter.Query( self.require_language(), """ (import_statement source: (string) @source) @import """, ) for match in query.matches(tree.root_node): captures = match[1] if "source" in captures: import_node = captures["import"][0] source = self.get_node_text(captures["source"][0], content).strip("\"'") import_info = { "module": source, "line": import_node.start_point[0] + 1, "imports": self._extract_import_specifiers(import_node, content), "is_default": self._has_default_import(import_node, content), "is_namespace": self._has_namespace_import(import_node, content), } imports.append(import_info) return imports def _extract_exports( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract export statements from TypeScript code.""" exports = [] query = tree_sitter.Query( self.require_language(), """ (export_statement) @export """, ) for match in query.matches(tree.root_node): captures = match[1] export_node = captures["export"][0] export_info = { "line": export_node.start_point[0] + 1, "type": self._get_export_type(export_node, content), "name": self._get_export_name(export_node, content), "is_default": self._is_default_export(export_node, content), } exports.append(export_info) return exports def _extract_interfaces( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract interface definitions from TypeScript code.""" interfaces = [] query = tree_sitter.Query( self.require_language(), """ (interface_declaration name: (type_identifier) @interface_name body: (object_type) @interface_body) @interface """, ) for match in query.matches(tree.root_node): captures = match[1] if "interface_name" in captures: interface_node = captures["interface"][0] interface_name = self.get_node_text( captures["interface_name"][0], content ) interface_body_nodes = captures.get("interface_body") interface_body_node = ( interface_body_nodes[0] if interface_body_nodes else None ) interface_info = { "name": interface_name, "start_line": interface_node.start_point[0] + 1, "end_line": interface_node.end_point[0] + 1, "properties": self._extract_interface_properties( interface_body_node, content ), "methods": self._extract_interface_methods( interface_body_node, content ), "extends": self._extract_interface_extends(interface_node, content), } interfaces.append(interface_info) return interfaces def _extract_type_aliases( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract type alias definitions from TypeScript code.""" types = [] query = tree_sitter.Query( self.require_language(), """ (type_alias_declaration name: (type_identifier) @type_name) @type_alias """, ) for match in query.matches(tree.root_node): captures = match[1] if "type_name" in captures: type_node = captures["type_alias"][0] type_name = self.get_node_text(captures["type_name"][0], content) type_info = { "name": type_name, "line": type_node.start_point[0] + 1, "definition": self._extract_type_definition(type_node, content), } types.append(type_info) return types def _extract_enums( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract enum definitions from TypeScript code.""" enums = [] query = tree_sitter.Query( self.require_language(), """ (enum_declaration name: (identifier) @enum_name body: (enum_body) @enum_body) @enum """, ) for match in query.matches(tree.root_node): captures = match[1] if "enum_name" in captures: enum_node = captures["enum"][0] enum_name = self.get_node_text(captures["enum_name"][0], content) enum_body_nodes = captures.get("enum_body") enum_body_node = enum_body_nodes[0] if enum_body_nodes else None enum_info = { "name": enum_name, "start_line": enum_node.start_point[0] + 1, "end_line": enum_node.end_point[0] + 1, "members": self._extract_enum_members(enum_body_node, content), } enums.append(enum_info) return enums def _extract_ts_parameters( self, params_node: tree_sitter.Node | None, content: bytes, ) -> list[dict[str, Any]]: """Extract function parameters with TypeScript types.""" if not params_node: return [] parameters = [] for child in params_node.children: if child.type in ["required_parameter", "optional_parameter"]: param_data: dict[str, Any] = { "name": None, "type": None, "default": None, "optional": child.type == "optional_parameter", } for subchild in child.children: if subchild.type == "identifier": param_data["name"] = self.get_node_text(subchild, content) elif subchild.type == "type_annotation": param_data["type"] = self._extract_type_annotation( subchild, content ) elif subchild.type in ["number", "string", "true", "false", "null"]: param_data["default"] = self.get_node_text(subchild, content) if param_data["name"]: parameters.append(param_data) return parameters def _extract_type_annotation( self, type_node: tree_sitter.Node, content: bytes, ) -> str: """Extract TypeScript type annotation.""" # Find the actual type within the type annotation for child in type_node.children: if child.type != ":": return self.get_node_text(child, content) return "any" def _extract_class_methods( self, body_node: tree_sitter.Node | None, content: bytes ) -> list[dict[str, Any]]: """Extract methods from TypeScript class body.""" if not body_node: return [] methods = [] for child in body_node.children: if child.type == "method_definition": method_name = None method_params = [] return_type = "any" access_modifier = "public" is_static = False for subchild in child.children: if subchild.type == "property_identifier": method_name = self.get_node_text(subchild, content) elif subchild.type == "formal_parameters": method_params = self._extract_ts_parameters(subchild, content) elif subchild.type == "type_annotation": return_type = self._extract_type_annotation(subchild, content) elif subchild.type in ["public", "private", "protected"]: access_modifier = subchild.type elif subchild.type == "static": is_static = True if method_name: methods.append( { "name": method_name, "parameters": method_params, "return_type": return_type, "access_modifier": access_modifier, "is_static": is_static, "start_line": child.start_point[0] + 1, "end_line": child.end_point[0] + 1, "complexity": ComplexityCalculator( "typescript" ).calculate_complexity(child, content), } ) return methods def _extract_class_properties( self, body_node: tree_sitter.Node | None, content: bytes ) -> list[dict[str, Any]]: """Extract properties from TypeScript class body.""" if not body_node: return [] properties = [] for child in body_node.children: if child.type == "field_definition": prop_name = None prop_type = "any" access_modifier = "public" is_static = False for subchild in child.children: if subchild.type == "property_identifier": prop_name = self.get_node_text(subchild, content) elif subchild.type == "type_annotation": prop_type = self._extract_type_annotation(subchild, content) elif subchild.type in ["public", "private", "protected"]: access_modifier = subchild.type elif subchild.type == "static": is_static = True if prop_name: properties.append( { "name": prop_name, "type": prop_type, "access_modifier": access_modifier, "is_static": is_static, } ) return properties def _extract_constructors( self, body_node: tree_sitter.Node | None, content: bytes ) -> list[dict[str, Any]]: """Extract constructors from TypeScript class body.""" if not body_node: return [] constructors = [] for child in body_node.children: if child.type == "method_definition": # Check if this is a constructor for subchild in child.children: if ( subchild.type == "property_identifier" and self.get_node_text(subchild, content) == "constructor" ): constructor_params = [] access_modifier = "public" for param_child in child.children: if param_child.type == "formal_parameters": constructor_params = self._extract_ts_parameters( param_child, content ) elif param_child.type in ["public", "private", "protected"]: access_modifier = param_child.type constructors.append( { "parameters": constructor_params, "access_modifier": access_modifier, "start_line": child.start_point[0] + 1, "end_line": child.end_point[0] + 1, } ) break return constructors def _extract_extends_clause( self, class_node: tree_sitter.Node, content: bytes ) -> list[str]: """Extract extends clause from TypeScript class.""" for child in class_node.children: if child.type == "class_heritage": for heritage_child in child.children: if heritage_child.type == "extends_clause": for extends_child in heritage_child.children: if extends_child.type == "type_identifier": return [self.get_node_text(extends_child, content)] return [] def _extract_implements_clause( self, class_node: tree_sitter.Node, content: bytes ) -> list[str]: """Extract implements clause from TypeScript class.""" interfaces = [] for child in class_node.children: if child.type == "class_heritage": for heritage_child in child.children: if heritage_child.type == "implements_clause": interfaces.extend( [ self.get_node_text(impl_child, content) for impl_child in heritage_child.children if impl_child.type == "type_identifier" ] ) return interfaces def _extract_decorators(self, node: tree_sitter.Node, content: bytes) -> list[str]: """Extract decorators from TypeScript node.""" decorators = [] for child in node.children: if child.type == "decorator": decorator_name = "" for dec_child in child.children: if dec_child.type == "identifier": decorator_name = self.get_node_text(dec_child, content) break if decorator_name: decorators.append(decorator_name) return decorators def _extract_class_modifiers(self, node: tree_sitter.Node, _: bytes) -> str: """Extract class access modifiers.""" for child in node.children: if child.type in ["public", "private", "protected"]: return child.type return "public" def _extract_return_type(self, func_node: tree_sitter.Node, content: bytes) -> str: """Extract return type from TypeScript function.""" for child in func_node.children: if child.type == "type_annotation": return self._extract_type_annotation(child, content) return "any" def _is_async_function(self, func_node: tree_sitter.Node, _: bytes) -> bool: """Check if function is async.""" return any(child.type == "async" for child in func_node.children) def _extract_arrow_parameters( self, arrow_node: tree_sitter.Node, content: bytes ) -> list[dict[str, Any]]: """Extract parameters from arrow function.""" for child in arrow_node.children: if child.type == "formal_parameters": return self._extract_ts_parameters(child, content) if child.type == "identifier": # Single parameter without parentheses return [ { "name": self.get_node_text(child, content), "type": "any", "default": None, "optional": False, } ] return [] def _extract_arrow_return_type( self, arrow_node: tree_sitter.Node, content: bytes ) -> str: """Extract return type from arrow function.""" for child in arrow_node.children: if child.type == "type_annotation": return self._extract_type_annotation(child, content) return "any" def _extract_import_specifiers( self, import_node: tree_sitter.Node, content: bytes ) -> list[str]: """Extract import specifiers from import statement.""" imports = [] for child in import_node.children: if child.type == "import_clause": for clause_child in child.children: if clause_child.type == "named_imports": for named_child in clause_child.children: if named_child.type == "import_specifier": imports.extend( [ self.get_node_text(spec_child, content) for spec_child in named_child.children if spec_child.type == "identifier" ] ) elif clause_child.type == "identifier": imports.append(self.get_node_text(clause_child, content)) return imports def _has_default_import(self, import_node: tree_sitter.Node, _: bytes) -> bool: """Check if import has default import.""" for child in import_node.children: if child.type == "import_clause": for clause_child in child.children: if clause_child.type == "identifier": return True return False def _has_namespace_import(self, import_node: tree_sitter.Node, _: bytes) -> bool: """Check if import has namespace import.""" for child in import_node.children: if child.type == "import_clause": for clause_child in child.children: if clause_child.type == "namespace_import": return True return False def _get_export_type(self, export_node: tree_sitter.Node, _: bytes) -> str: """Get export type.""" for child in export_node.children: if child.type == "default": return "default" return "named" def _get_export_name(self, export_node: tree_sitter.Node, content: bytes) -> str: """Get export name.""" for child in export_node.children: if child.type == "identifier": return self.get_node_text(child, content) if child.type == "class_declaration": for class_child in child.children: if class_child.type == "type_identifier": return self.get_node_text(class_child, content) return "" def _is_default_export(self, export_node: tree_sitter.Node, content: bytes) -> bool: """Check if export is default export.""" return self._get_export_type(export_node, content) == "default" def _extract_interface_properties( self, body_node: tree_sitter.Node | None, content: bytes ) -> list[dict[str, Any]]: """Extract properties from TypeScript interface.""" if not body_node: return [] properties = [] for child in body_node.children: if child.type == "property_signature": prop_name = None prop_type = "any" optional = False for prop_child in child.children: if prop_child.type == "property_identifier": prop_name = self.get_node_text(prop_child, content) elif prop_child.type == "type_annotation": prop_type = self._extract_type_annotation(prop_child, content) elif prop_child.type == "?": optional = True if prop_name: properties.append( { "name": prop_name, "type": prop_type, "optional": optional, } ) return properties def _extract_interface_methods( self, body_node: tree_sitter.Node | None, content: bytes ) -> list[dict[str, Any]]: """Extract methods from TypeScript interface.""" if not body_node: return [] methods = [] for child in body_node.children: if child.type == "method_signature": method_name = None method_params = [] return_type = "any" for method_child in child.children: if method_child.type == "property_identifier": method_name = self.get_node_text(method_child, content) elif method_child.type == "formal_parameters": method_params = self._extract_ts_parameters( method_child, content ) elif method_child.type == "type_annotation": return_type = self._extract_type_annotation( method_child, content ) if method_name: methods.append( { "name": method_name, "parameters": method_params, "return_type": return_type, } ) return methods def _extract_interface_extends( self, interface_node: tree_sitter.Node, content: bytes ) -> list[str]: """Extract extends clause from TypeScript interface.""" extends = [] for child in interface_node.children: if child.type == "extends_clause": extends.extend( [ self.get_node_text(extends_child, content) for extends_child in child.children if extends_child.type == "type_identifier" ] ) return extends def _extract_type_definition( self, type_node: tree_sitter.Node, content: bytes ) -> str: """Extract type definition from TypeScript type alias.""" for child in type_node.children: if child.type not in ["type", "type_identifier", "="]: return self.get_node_text(child, content) return "any" def _extract_enum_members( self, body_node: tree_sitter.Node | None, content: bytes ) -> list[dict[str, Any]]: """Extract members from TypeScript enum.""" if not body_node: return [] members = [] for child in body_node.children: if child.type == "property_identifier": member_name = self.get_node_text(child, content) members.append( { "name": member_name, "value": None, # Could be enhanced to extract values } ) return members def extract_references( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract code references from TypeScript code.""" references = [] # Extract module information for context module_info = self.extract_module_info(tree, content) # Extract type references from interfaces for interface in module_info.get("interfaces", []): # Interface inheritance references references.extend( [ { "type": "interface_extends", "source": interface["name"], "target": extended, "line": interface["start_line"], } for extended in interface.get("extends", []) ] ) # Property type references for prop in interface.get("properties", []): if prop.get("type") and prop["type"] not in [ "string", "number", "boolean", "any", "void", "null", "undefined", ]: # Clean up generic types and arrays prop_type = prop["type"].split("<")[0].split("[")[0] references.append( { "type": "type_use", "source": f"{interface['name']}.{prop['name']}", "target": prop_type, "line": interface["start_line"], } ) # Extract type references from classes for cls in module_info.get("classes", []): # Class inheritance references references.extend( [ { "type": "inherit", "source": cls["name"], "target": base, "line": cls["start_line"], } for base in cls.get("base_classes", []) ] ) # Interface implementation references references.extend( [ { "type": "implement", "source": cls["name"], "target": impl, "line": cls["start_line"], } for impl in cls.get("interfaces", []) ] ) # Extract type references from functions for func in module_info.get("functions", []): # Return type references if func.get("return_type") and func["return_type"] not in [ "any", "void", "string", "number", "boolean", ]: return_type = func["return_type"].split("<")[0].split("[")[0] references.append( { "type": "type_use", "source": func["name"], "target": return_type, "line": func["start_line"], } ) # Parameter type references for param in func.get("parameters", []): if param.get("type") and param["type"] not in [ "any", "string", "number", "boolean", ]: param_type = param["type"].split("<")[0].split("[")[0] references.append( { "type": "type_use", "source": func["name"], "target": param_type, "line": func["start_line"], } ) # Extract import references for imp in module_info.get("imports", []): references.extend( [ { "type": "import", "source": "module", "target": imported_item, "line": imp["line"], "module": imp["module"], } for imported_item in imp.get("imports", []) ] ) return references class JavaScriptParser(TreeSitterParser): """JavaScript-specific TreeSitter parser.""" def __init__(self) -> None: if not JAVASCRIPT_AVAILABLE or tsjavascript is None: msg = "tree-sitter-javascript not available. Install with: pip install tree-sitter-javascript" raise ImportError(msg) super().__init__(tree_sitter.Language(tsjavascript.language())) def extract_module_info( self, tree: tree_sitter.Tree, content: bytes, ) -> dict[str, Any]: """Extract module information from JavaScript code.""" return { "classes": self._extract_classes(tree, content), "functions": self._extract_functions(tree, content), "imports": self._extract_imports(tree, content), "exports": self._extract_exports(tree, content), } def _extract_classes( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract class definitions from JavaScript code.""" classes = [] query = tree_sitter.Query( self.require_language(), """ (class_declaration name: (identifier) @class_name body: (class_body) @class_body) @class """, ) for match in query.matches(tree.root_node): captures = match[1] if "class_name" in captures and "class_body" in captures: class_node = captures["class"][0] class_name = self.get_node_text(captures["class_name"][0], content) class_info = { "name": class_name, "start_line": class_node.start_point[0] + 1, "end_line": class_node.end_point[0] + 1, "text": self.get_node_text(class_node, content), "methods": self._extract_js_class_methods( captures["class_body"][0], content ), "properties": self._extract_js_class_properties( captures["class_body"][0], content ), "constructors": self._extract_js_constructors( captures["class_body"][0], content ), "base_classes": self._extract_js_extends_clause( class_node, content ), } classes.append(class_info) return classes def _extract_functions( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract function definitions from JavaScript code.""" functions = [] # Function declarations query = tree_sitter.Query( self.require_language(), """ (function_declaration name: (identifier) @function_name parameters: (formal_parameters) @params body: (statement_block) @body) @function """, ) for match in query.matches(tree.root_node): captures = match[1] if "function_name" in captures: func_node = captures["function"][0] func_name = self.get_node_text(captures["function_name"][0], content) params_nodes = captures.get("params") params_node = params_nodes[0] if params_nodes else None body_nodes = captures.get("body") body_node = body_nodes[0] if body_nodes else func_node func_info = { "name": func_name, "start_line": func_node.start_point[0] + 1, "end_line": func_node.end_point[0] + 1, "parameters": self._extract_js_parameters(params_node, content), "is_async": self._is_async_function(func_node, content), "complexity": ComplexityCalculator( "javascript" ).calculate_complexity(body_node, content), } functions.append(func_info) # Arrow functions assigned to variables (handles both var/let/const and lexical declarations) arrow_query = tree_sitter.Query( self.require_language(), """ (variable_declaration (variable_declarator name: (identifier) @var_name value: (arrow_function) @arrow_func)) """, ) for match in arrow_query.matches(tree.root_node): captures = match[1] if "var_name" in captures and "arrow_func" in captures: var_name = self.get_node_text(captures["var_name"][0], content) arrow_node = captures["arrow_func"][0] func_info = { "name": var_name, "start_line": arrow_node.start_point[0] + 1, "end_line": arrow_node.end_point[0] + 1, "parameters": self._extract_js_arrow_parameters( arrow_node, content ), "is_async": self._is_async_function(arrow_node, content), "complexity": ComplexityCalculator( "javascript" ).calculate_complexity(arrow_node, content), } functions.append(func_info) # Also handle lexical_declaration forms produced by some grammars (e.g., 'const') lexical_arrow_query = tree_sitter.Query( self.require_language(), """ (lexical_declaration (variable_declarator name: (identifier) @var_name value: (arrow_function) @arrow_func)) """, ) for match in lexical_arrow_query.matches(tree.root_node): captures = match[1] if "var_name" in captures and "arrow_func" in captures: var_name = self.get_node_text(captures["var_name"][0], content) arrow_node = captures["arrow_func"][0] func_info = { "name": var_name, "start_line": arrow_node.start_point[0] + 1, "end_line": arrow_node.end_point[0] + 1, "parameters": self._extract_js_arrow_parameters( arrow_node, content ), "is_async": self._is_async_function(arrow_node, content), "complexity": ComplexityCalculator( "javascript" ).calculate_complexity(arrow_node, content), } functions.append(func_info) return functions # Helper utilities for JavaScript exports/imports/async def _is_async_function(self, func_node: tree_sitter.Node, _: bytes) -> bool: """Check if function is async (JavaScript).""" return any(child.type == "async" for child in func_node.children) def _has_default_import(self, import_node: tree_sitter.Node, _: bytes) -> bool: """Check if import has default import (JavaScript).""" for child in import_node.children: if child.type == "import_clause": for clause_child in child.children: if clause_child.type == "identifier": return True return False def _has_namespace_import(self, import_node: tree_sitter.Node, _: bytes) -> bool: """Check if import has namespace import (JavaScript).""" for child in import_node.children: if child.type == "import_clause": for clause_child in child.children: if clause_child.type == "namespace_import": return True return False def _get_export_type(self, export_node: tree_sitter.Node, _: bytes) -> str: """Get export type (JavaScript).""" for child in export_node.children: if child.type == "default": return "default" return "named" def _get_export_name(self, export_node: tree_sitter.Node, content: bytes) -> str: """Get export name (JavaScript).""" for child in export_node.children: if child.type == "identifier": return self.get_node_text(child, content) if child.type == "class_declaration": for class_child in child.children: if class_child.type == "identifier": return self.get_node_text(class_child, content) return "" def _is_default_export(self, export_node: tree_sitter.Node, content: bytes) -> bool: """Check if export is default export (JavaScript).""" return self._get_export_type(export_node, content) == "default" def _extract_imports( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract import statements from JavaScript code.""" imports = [] query = tree_sitter.Query( self.require_language(), """ (import_statement source: (string) @source) @import """, ) for match in query.matches(tree.root_node): captures = match[1] if "source" in captures: import_node = captures["import"][0] source = self.get_node_text(captures["source"][0], content).strip("\"'") import_info = { "module": source, "line": import_node.start_point[0] + 1, "imports": self._extract_js_import_specifiers(import_node, content), "is_default": self._has_default_import(import_node, content), "is_namespace": self._has_namespace_import(import_node, content), } imports.append(import_info) return imports def _extract_exports( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract export statements from JavaScript code.""" exports = [] query = tree_sitter.Query( self.require_language(), """ (export_statement) @export """, ) for match in query.matches(tree.root_node): captures = match[1] export_node = captures["export"][0] export_info = { "line": export_node.start_point[0] + 1, "type": self._get_export_type(export_node, content), "name": self._get_export_name(export_node, content), "is_default": self._is_default_export(export_node, content), } exports.append(export_info) return exports def _extract_js_parameters( self, params_node: tree_sitter.Node | None, content: bytes, ) -> list[dict[str, Any]]: """Extract function parameters from JavaScript code.""" if not params_node: return [] parameters = [] for child in params_node.children: if child.type == "identifier": param_data = { "name": self.get_node_text(child, content), "type": None, # JavaScript doesn't have explicit types "default": None, } parameters.append(param_data) elif child.type == "assignment_pattern": # Parameter with default value param_name = None default_value = None for subchild in child.children: if subchild.type == "identifier": param_name = self.get_node_text(subchild, content) elif subchild.type != "=": default_value = self.get_node_text(subchild, content) if param_name: param_data = { "name": param_name, "type": None, "default": default_value, } parameters.append(param_data) return parameters def _extract_js_class_methods( self, body_node: tree_sitter.Node | None, content: bytes ) -> list[dict[str, Any]]: """Extract methods from JavaScript class body.""" if not body_node: return [] methods = [] for child in body_node.children: if child.type == "method_definition": method_name = None method_params = [] is_static = False is_async = False for subchild in child.children: if subchild.type == "property_identifier": method_name = self.get_node_text(subchild, content) elif subchild.type == "formal_parameters": method_params = self._extract_js_parameters(subchild, content) elif subchild.type == "static": is_static = True elif subchild.type == "async": is_async = True if method_name: methods.append( { "name": method_name, "parameters": method_params, "is_static": is_static, "is_async": is_async, "start_line": child.start_point[0] + 1, "end_line": child.end_point[0] + 1, "complexity": ComplexityCalculator( "javascript" ).calculate_complexity(child, content), } ) return methods def _extract_js_class_properties( self, body_node: tree_sitter.Node | None, content: bytes ) -> list[dict[str, Any]]: """Extract properties from JavaScript class body.""" if not body_node: return [] properties = [] for child in body_node.children: if child.type == "field_definition": prop_name = None is_static = False for subchild in child.children: if subchild.type == "property_identifier": prop_name = self.get_node_text(subchild, content) elif subchild.type == "static": is_static = True if prop_name: properties.append( { "name": prop_name, "is_static": is_static, } ) return properties def _extract_js_constructors( self, body_node: tree_sitter.Node | None, content: bytes ) -> list[dict[str, Any]]: """Extract constructors from JavaScript class body.""" if not body_node: return [] constructors = [] for child in body_node.children: if child.type == "method_definition": # Check if this is a constructor for subchild in child.children: if ( subchild.type == "property_identifier" and self.get_node_text(subchild, content) == "constructor" ): constructor_params = [] for param_child in child.children: if param_child.type == "formal_parameters": constructor_params = self._extract_js_parameters( param_child, content ) constructors.append( { "parameters": constructor_params, "start_line": child.start_point[0] + 1, "end_line": child.end_point[0] + 1, } ) break return constructors def _extract_js_extends_clause( self, class_node: tree_sitter.Node, content: bytes ) -> list[str]: """Extract extends clause from JavaScript class.""" for child in class_node.children: if child.type == "class_heritage": for heritage_child in child.children: if heritage_child.type == "extends_clause": for extends_child in heritage_child.children: # Node types can vary between grammars (identifier, type_identifier, property_identifier, scoped_identifier) if extends_child.type in { "identifier", "type_identifier", "property_identifier", "scoped_identifier", "qualified_identifier", }: return [self.get_node_text(extends_child, content)] return [] def _extract_js_arrow_parameters( self, arrow_node: tree_sitter.Node, content: bytes ) -> list[dict[str, Any]]: """Extract parameters from JavaScript arrow function.""" for child in arrow_node.children: if child.type == "formal_parameters": return self._extract_js_parameters(child, content) if child.type == "identifier": # Single parameter without parentheses return [ { "name": self.get_node_text(child, content), "type": None, "default": None, } ] return [] def _extract_js_import_specifiers( self, import_node: tree_sitter.Node, content: bytes ) -> list[str]: """Extract import specifiers from JavaScript import statement.""" imports = [] for child in import_node.children: if child.type == "import_clause": for clause_child in child.children: if clause_child.type == "named_imports": for named_child in clause_child.children: if named_child.type == "import_specifier": imports.extend( [ self.get_node_text(spec_child, content) for spec_child in named_child.children if spec_child.type == "identifier" ] ) elif clause_child.type == "identifier": imports.append(self.get_node_text(clause_child, content)) return imports def extract_references( self, tree: tree_sitter.Tree, content: bytes, ) -> list[dict[str, Any]]: """Extract code references from JavaScript code.""" references = [] # Extract module information for context module_info = self.extract_module_info(tree, content) # Extract class inheritance references for cls in module_info.get("classes", []): # Class inheritance references references.extend( [ { "type": "inherit", "source": cls["name"], "target": base, "line": cls["start_line"], } for base in cls.get("base_classes", []) ] ) # Extract import references for imp in module_info.get("imports", []): references.extend( [ { "type": "import", "source": "module", "target": imported_item, "line": imp["line"], "module": imp["module"], } for imported_item in imp.get("imports", []) ] ) # Extract function call references (simplified) # This could be enhanced to analyze function calls within the AST references.extend( [ { "type": "function_definition", "source": "module", "target": func["name"], "line": func["start_line"], } for func in module_info.get("functions", []) ] ) return references

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/johannhartmann/mcpcodeanalysis'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

treesitter_parser.py•110 KiB