Skip to main content
Glama
parser.py34 kB
""" HiveMind MCP Server - Parser Code structure extraction using tree-sitter for TypeScript, JavaScript, and Python. """ import os from pathlib import Path from typing import Dict, List, Optional, Any, Set from dataclasses import dataclass, field try: import tree_sitter_languages TREE_SITTER_AVAILABLE = True except ImportError: TREE_SITTER_AVAILABLE = False from config import ( PARSEABLE_EXTENSIONS, INCLUDEABLE_EXTENSIONS, IGNORE_PATTERNS, TS_JS_FUNCTION_NODES, TS_JS_IMPORT_NODES, TS_JS_EXPORT_NODES, TS_JS_CLASS_NODES, TS_TYPE_NODES, PY_FUNCTION_NODES, PY_IMPORT_NODES, PY_CLASS_NODES, get_complexity_label, ) from utils import should_ignore, get_file_info, normalize_path @dataclass class FunctionInfo: """Information about a function definition.""" name: str params: str return_type: str start_line: int end_line: int is_async: bool = False exported: bool = False complexity: int = 1 docstring: Optional[str] = None file_path: Optional[str] = None def to_dict(self) -> Dict[str, Any]: return { 'name': self.name, 'params': self.params, 'return_type': self.return_type, 'start_line': self.start_line, 'end_line': self.end_line, 'async': self.is_async, 'exported': self.exported, 'complexity': self.complexity, 'complexity_label': get_complexity_label(self.complexity), 'docstring': self.docstring, 'file_path': self.file_path, } @dataclass class ImportInfo: """Information about an import statement.""" module: str imports: List[str] = field(default_factory=list) is_default: bool = False is_namespace: bool = False alias: Optional[str] = None def to_dict(self) -> Dict[str, Any]: return { 'module': self.module, 'imports': self.imports, 'is_default': self.is_default, 'is_namespace': self.is_namespace, 'alias': self.alias, } @dataclass class ExportInfo: """Information about an export statement.""" name: str export_type: str # 'function', 'class', 'const', 'type', 'interface', 'default' is_default: bool = False def to_dict(self) -> Dict[str, Any]: return { 'name': self.name, 'type': self.export_type, 'is_default': self.is_default, } @dataclass class ClassInfo: """Information about a class definition.""" name: str start_line: int end_line: int methods: List[FunctionInfo] = field(default_factory=list) exported: bool = False extends: Optional[str] = None def to_dict(self) -> Dict[str, Any]: return { 'name': self.name, 'start_line': self.start_line, 'end_line': self.end_line, 'methods': [m.to_dict() for m in self.methods], 'exported': self.exported, 'extends': self.extends, } @dataclass class TypeInfo: """Information about a type/interface definition.""" name: str kind: str # 'interface' or 'type' start_line: int end_line: int exported: bool = False def to_dict(self) -> Dict[str, Any]: return { 'name': self.name, 'kind': self.kind, 'start_line': self.start_line, 'end_line': self.end_line, 'exported': self.exported, } class CodeParser: """ Parse code files using tree-sitter to extract structural information. """ def __init__(self): """Initialize the parser with tree-sitter languages.""" self.parsers: Dict[str, Any] = {} self._init_parsers() def _init_parsers(self): """Initialize tree-sitter parsers for supported languages.""" if not TREE_SITTER_AVAILABLE: return language_map = { 'typescript': 'typescript', 'tsx': 'tsx', 'javascript': 'javascript', 'python': 'python', } for lang_key, lang_name in language_map.items(): try: self.parsers[lang_key] = tree_sitter_languages.get_parser(lang_name) except Exception: pass # Language not available def parse_file(self, file_path: Path) -> Dict[str, Any]: """ Parse a single file and extract structural information. Args: file_path: Path to the file to parse Returns: Dictionary containing extracted information """ result = { 'functions': [], 'imports': [], 'exports': [], 'classes': [], 'types': [], 'file_info': get_file_info(file_path), } ext = file_path.suffix.lower() if ext not in PARSEABLE_EXTENSIONS: return result lang = PARSEABLE_EXTENSIONS[ext] if lang not in self.parsers: # Fallback to regex-based parsing return self._parse_with_regex(file_path, lang) try: content = file_path.read_text(encoding='utf-8', errors='ignore') tree = self.parsers[lang].parse(bytes(content, 'utf-8')) if lang in ('typescript', 'tsx', 'javascript'): self._extract_ts_js(tree.root_node, content, result, str(file_path)) elif lang == 'python': self._extract_python(tree.root_node, content, result, str(file_path)) except Exception as e: result['error'] = str(e) return result def _extract_ts_js(self, node: Any, content: str, result: Dict, file_path: str): """Extract information from TypeScript/JavaScript AST.""" lines = content.split('\n') def get_text(n) -> str: return content[n.start_byte:n.end_byte] def walk(n, exported: bool = False): node_type = n.type # Check for export is_exported = exported or node_type == 'export_statement' # Functions if node_type in TS_JS_FUNCTION_NODES: func = self._parse_ts_js_function(n, get_text, is_exported, file_path) if func: result['functions'].append(func.to_dict()) # Imports elif node_type == 'import_statement': imp = self._parse_ts_js_import(n, get_text) if imp: result['imports'].append(imp.to_dict()) # Exports elif node_type == 'export_statement': exp = self._parse_ts_js_export(n, get_text) if exp: result['exports'].extend([e.to_dict() for e in exp]) # Classes elif node_type in TS_JS_CLASS_NODES: cls = self._parse_ts_js_class(n, get_text, is_exported, file_path) if cls: result['classes'].append(cls.to_dict()) # Types/Interfaces elif node_type in TS_TYPE_NODES: type_info = self._parse_ts_type(n, get_text, is_exported) if type_info: result['types'].append(type_info.to_dict()) # Recurse into children for child in n.children: walk(child, is_exported if node_type == 'export_statement' else False) walk(node) def _parse_ts_js_function(self, node: Any, get_text, exported: bool, file_path: str) -> Optional[FunctionInfo]: """Parse a TypeScript/JavaScript function node.""" try: name = "" params = "" return_type = "void" is_async = False for child in node.children: if child.type == 'identifier': name = get_text(child) elif child.type == 'formal_parameters': params = get_text(child) elif child.type == 'type_annotation': return_type = get_text(child).lstrip(': ') elif child.type == 'async': is_async = True # For arrow functions, check parent for variable name if not name and node.type == 'arrow_function': parent = node.parent if parent and parent.type == 'variable_declarator': for child in parent.children: if child.type == 'identifier': name = get_text(child) break if not name: return None # Calculate complexity (simplified: count branches) complexity = 1 def count_branches(n): nonlocal complexity if n.type in ('if_statement', 'conditional_expression', 'switch_case'): complexity += 1 elif n.type in ('for_statement', 'while_statement', 'do_statement', 'for_in_statement'): complexity += 1 for child in n.children: count_branches(child) count_branches(node) return FunctionInfo( name=name, params=params, return_type=return_type, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, is_async=is_async, exported=exported, complexity=complexity, file_path=file_path, ) except Exception: return None def _parse_ts_js_import(self, node: Any, get_text) -> Optional[ImportInfo]: """Parse a TypeScript/JavaScript import statement.""" try: module = "" imports = [] is_default = False is_namespace = False for child in node.children: if child.type == 'string': module = get_text(child).strip('"\'') elif child.type == 'import_clause': for ic_child in child.children: if ic_child.type == 'identifier': imports.append(get_text(ic_child)) is_default = True elif ic_child.type == 'named_imports': for ni_child in ic_child.children: if ni_child.type == 'import_specifier': for spec_child in ni_child.children: if spec_child.type == 'identifier': imports.append(get_text(spec_child)) break elif ic_child.type == 'namespace_import': is_namespace = True for ns_child in ic_child.children: if ns_child.type == 'identifier': imports.append(get_text(ns_child)) if module: return ImportInfo( module=module, imports=imports, is_default=is_default, is_namespace=is_namespace, ) return None except Exception: return None def _parse_ts_js_export(self, node: Any, get_text) -> List[ExportInfo]: """Parse a TypeScript/JavaScript export statement.""" exports = [] try: is_default = False for child in node.children: if child.type == 'default': is_default = True elif child.type == 'function_declaration': for fc in child.children: if fc.type == 'identifier': exports.append(ExportInfo( name=get_text(fc), export_type='function', is_default=is_default, )) break elif child.type == 'class_declaration': for cc in child.children: if cc.type == 'identifier': exports.append(ExportInfo( name=get_text(cc), export_type='class', is_default=is_default, )) break elif child.type == 'lexical_declaration': for lc in child.children: if lc.type == 'variable_declarator': for vc in lc.children: if vc.type == 'identifier': exports.append(ExportInfo( name=get_text(vc), export_type='const', is_default=is_default, )) break elif child.type == 'identifier': exports.append(ExportInfo( name=get_text(child), export_type='default' if is_default else 'named', is_default=is_default, )) except Exception: pass return exports def _parse_ts_js_class(self, node: Any, get_text, exported: bool, file_path: str) -> Optional[ClassInfo]: """Parse a TypeScript/JavaScript class.""" try: name = "" extends = None methods = [] for child in node.children: if child.type == 'identifier': name = get_text(child) elif child.type == 'class_heritage': for hc in child.children: if hc.type == 'extends_clause': for ec in hc.children: if ec.type == 'identifier': extends = get_text(ec) elif child.type == 'class_body': for bc in child.children: if bc.type == 'method_definition': func = self._parse_ts_js_function(bc, get_text, True, file_path) if func: methods.append(func) if name: return ClassInfo( name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, methods=methods, exported=exported, extends=extends, ) return None except Exception: return None def _parse_ts_type(self, node: Any, get_text, exported: bool) -> Optional[TypeInfo]: """Parse a TypeScript type or interface.""" try: name = "" kind = "interface" if node.type == 'interface_declaration' else "type" for child in node.children: if child.type == 'identifier' or child.type == 'type_identifier': name = get_text(child) break if name: return TypeInfo( name=name, kind=kind, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, exported=exported, ) return None except Exception: return None def _extract_python(self, node: Any, content: str, result: Dict, file_path: str): """Extract information from Python AST.""" def get_text(n) -> str: return content[n.start_byte:n.end_byte] def walk(n, in_class: bool = False): node_type = n.type # Functions if node_type in PY_FUNCTION_NODES: func = self._parse_python_function(n, get_text, file_path) if func: result['functions'].append(func.to_dict()) # Imports elif node_type in PY_IMPORT_NODES: imps = self._parse_python_import(n, get_text) for imp in imps: result['imports'].append(imp.to_dict()) # Classes elif node_type == 'class_definition': cls = self._parse_python_class(n, get_text, file_path) if cls: result['classes'].append(cls.to_dict()) return # Don't recurse into class body separately # Recurse for child in n.children: walk(child, in_class) walk(node) def _parse_python_function(self, node: Any, get_text, file_path: str) -> Optional[FunctionInfo]: """Parse a Python function definition.""" try: name = "" params = "" return_type = "None" is_async = node.type == 'async_function_definition' docstring = None for child in node.children: if child.type == 'identifier': name = get_text(child) elif child.type == 'parameters': params = get_text(child) elif child.type == 'type': return_type = get_text(child) elif child.type == 'block': # Check first statement for docstring for bc in child.children: if bc.type == 'expression_statement': for ec in bc.children: if ec.type == 'string': docstring = get_text(ec).strip('"\' ') break break if not name or name.startswith('_'): # Skip private functions for now pass # Calculate complexity complexity = 1 def count_branches(n): nonlocal complexity if n.type in ('if_statement', 'conditional_expression', 'elif_clause'): complexity += 1 elif n.type in ('for_statement', 'while_statement', 'with_statement'): complexity += 1 elif n.type == 'except_clause': complexity += 1 for child in n.children: count_branches(child) count_branches(node) return FunctionInfo( name=name, params=params, return_type=return_type, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, is_async=is_async, exported=not name.startswith('_'), complexity=complexity, docstring=docstring, file_path=file_path, ) except Exception: return None def _parse_python_import(self, node: Any, get_text) -> List[ImportInfo]: """Parse Python import statements.""" imports = [] try: if node.type == 'import_statement': # import module or import module as alias for child in node.children: if child.type == 'dotted_name': imports.append(ImportInfo( module=get_text(child), imports=[get_text(child).split('.')[-1]], is_namespace=True, )) elif child.type == 'aliased_import': module = "" alias = "" for ac in child.children: if ac.type == 'dotted_name': module = get_text(ac) elif ac.type == 'identifier': alias = get_text(ac) if module: imports.append(ImportInfo( module=module, imports=[alias] if alias else [module.split('.')[-1]], alias=alias, is_namespace=True, )) elif node.type == 'import_from_statement': # from module import name module = "" names = [] for child in node.children: if child.type == 'dotted_name' or child.type == 'relative_import': module = get_text(child) elif child.type == 'identifier': names.append(get_text(child)) elif child.type == 'aliased_import': for ac in child.children: if ac.type == 'identifier': names.append(get_text(ac)) break elif child.type == 'wildcard_import': names.append('*') if module: imports.append(ImportInfo( module=module, imports=names, is_default=len(names) == 1 and names[0] != '*', )) except Exception: pass return imports def _parse_python_class(self, node: Any, get_text, file_path: str) -> Optional[ClassInfo]: """Parse a Python class definition.""" try: name = "" extends = None methods = [] for child in node.children: if child.type == 'identifier': name = get_text(child) elif child.type == 'argument_list': # Base classes for ac in child.children: if ac.type == 'identifier': extends = get_text(ac) break elif child.type == 'block': for bc in child.children: if bc.type in PY_FUNCTION_NODES: func = self._parse_python_function(bc, get_text, file_path) if func: methods.append(func) if name: return ClassInfo( name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, methods=methods, exported=not name.startswith('_'), extends=extends, ) return None except Exception: return None def _parse_with_regex(self, file_path: Path, lang: str) -> Dict[str, Any]: """Fallback regex-based parsing when tree-sitter is not available.""" import re result = { 'functions': [], 'imports': [], 'exports': [], 'classes': [], 'types': [], 'file_info': get_file_info(file_path), } try: content = file_path.read_text(encoding='utf-8', errors='ignore') lines = content.split('\n') if lang == 'python': # Python function pattern func_pattern = r'^(async\s+)?def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?:' import_pattern = r'^(?:from\s+([\w.]+)\s+)?import\s+(.+)$' class_pattern = r'^class\s+(\w+)(?:\(([^)]*)\))?:' for i, line in enumerate(lines): # Functions match = re.match(func_pattern, line.strip()) if match: result['functions'].append({ 'name': match.group(2), 'params': f"({match.group(3)})", 'return_type': match.group(4) or 'None', 'start_line': i + 1, 'end_line': i + 1, 'async': bool(match.group(1)), 'exported': not match.group(2).startswith('_'), 'complexity': 1, 'file_path': str(file_path), }) # Imports match = re.match(import_pattern, line.strip()) if match: module = match.group(1) or match.group(2).split(',')[0].strip() names = [n.strip() for n in match.group(2).split(',')] result['imports'].append({ 'module': module, 'imports': names if match.group(1) else [module], 'is_default': False, 'is_namespace': not match.group(1), }) # Classes match = re.match(class_pattern, line.strip()) if match: result['classes'].append({ 'name': match.group(1), 'start_line': i + 1, 'end_line': i + 1, 'methods': [], 'exported': not match.group(1).startswith('_'), 'extends': match.group(2), }) elif lang in ('typescript', 'javascript', 'tsx'): # JS/TS patterns func_pattern = r'(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)' arrow_pattern = r'(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*=>' import_pattern = r"import\s+(?:{([^}]+)}|(\w+))\s+from\s+['\"]([^'\"]+)['\"]" export_pattern = r'export\s+(?:default\s+)?(?:const|let|var|function|class)\s+(\w+)' for i, line in enumerate(lines): # Functions match = re.search(func_pattern, line) if match: result['functions'].append({ 'name': match.group(1), 'params': f"({match.group(2)})", 'return_type': 'void', 'start_line': i + 1, 'end_line': i + 1, 'async': 'async' in line, 'exported': 'export' in line, 'complexity': 1, 'file_path': str(file_path), }) # Arrow functions match = re.search(arrow_pattern, line) if match: result['functions'].append({ 'name': match.group(1), 'params': '()', 'return_type': 'void', 'start_line': i + 1, 'end_line': i + 1, 'async': 'async' in line, 'exported': 'export' in line, 'complexity': 1, 'file_path': str(file_path), }) # Imports match = re.search(import_pattern, line) if match: names = match.group(1) or match.group(2) if names: result['imports'].append({ 'module': match.group(3), 'imports': [n.strip() for n in names.split(',')] if match.group(1) else [names], 'is_default': bool(match.group(2)), 'is_namespace': False, }) # Exports match = re.search(export_pattern, line) if match: result['exports'].append({ 'name': match.group(1), 'type': 'named', 'is_default': 'default' in line, }) except Exception as e: result['error'] = str(e) return result def analyze_directory(dir_path: Path, root_path: Optional[Path] = None) -> Dict[str, Any]: """ Analyze all code files in a directory (non-recursive for that level). Args: dir_path: Path to directory to analyze root_path: Optional root path for relative anchor generation Returns: Comprehensive structure dictionary """ dir_path = normalize_path(str(dir_path)) if root_path is None: root_path = dir_path parser = CodeParser() structure = { 'path': str(dir_path), 'name': dir_path.name, 'files': [], 'all_functions': [], 'all_imports': [], 'all_exports': [], 'all_classes': [], 'all_types': [], 'children': [], 'parent': str(dir_path.parent) if dir_path != root_path else None, 'metrics': { 'total_lines': 0, 'total_files': 0, 'total_functions': 0, 'avg_complexity': 0.0, }, 'connections': { 'uses': [], 'used_by': [], }, } try: # List files and directories for item in dir_path.iterdir(): if should_ignore(item): continue if item.is_dir(): structure['children'].append(str(item)) elif item.is_file() and item.suffix.lower() in INCLUDEABLE_EXTENSIONS: # Parse the file parsed = parser.parse_file(item) structure['files'].append(parsed.get('file_info', {'name': item.name})) structure['all_functions'].extend(parsed.get('functions', [])) structure['all_imports'].extend(parsed.get('imports', [])) structure['all_exports'].extend(parsed.get('exports', [])) structure['all_classes'].extend(parsed.get('classes', [])) structure['all_types'].extend(parsed.get('types', [])) # Calculate metrics total_lines = sum(f.get('lines', 0) for f in structure['files']) total_functions = len(structure['all_functions']) complexities = [f.get('complexity', 1) for f in structure['all_functions']] avg_complexity = sum(complexities) / len(complexities) if complexities else 1.0 structure['metrics'] = { 'total_lines': total_lines, 'total_files': len(structure['files']), 'total_functions': total_functions, 'total_classes': len(structure['all_classes']), 'total_types': len(structure['all_types']), 'avg_complexity': round(avg_complexity, 2), 'complexity_label': get_complexity_label(int(avg_complexity)), } # Analyze connections based on imports structure['connections'] = _analyze_connections(structure, root_path) except Exception as e: structure['error'] = str(e) return structure def _analyze_connections(structure: Dict, root_path: Path) -> Dict[str, List[str]]: """ Analyze import statements to build connection graph. Args: structure: Parsed structure dictionary root_path: Root path for anchor generation Returns: Dictionary with 'uses' and 'used_by' lists """ uses = set() for imp in structure.get('all_imports', []): module = imp.get('module', '') # Skip external packages if not module.startswith('.') and not module.startswith('@/'): continue # Normalize relative imports if module.startswith('.'): # Convert relative path to absolute current = Path(structure['path']) parts = module.split('/') for part in parts: if part == '.': continue elif part == '..': current = current.parent else: current = current / part uses.add(str(current)) elif module.startswith('@/'): # Project-relative import uses.add(str(root_path / module[2:])) return { 'uses': list(uses), 'used_by': [], # Will be populated during full codebase scan }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jahanzaib-Kaleem/hive-mind-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server