MCP Code Indexer

  • mcp_code_indexer
""" 代码分析器模块 提供代码语法分析和依赖关系分析功能 """ import os from typing import List, Dict, Any, Optional, Set from tree_sitter import Language, Parser import logging from pathlib import Path logger = logging.getLogger(__name__) class CodeAnalyzer: """代码分析器类""" def __init__(self): """初始化代码分析器""" # 初始化tree-sitter self.parsers = {} self._init_parsers() def _init_parsers(self): """初始化各语言的解析器""" try: # 获取语言文件路径 languages_dir = Path(__file__).parent / "parsers" languages_dir.mkdir(exist_ok=True) # 加载语言 language_names = ["python", "javascript", "typescript", "php", "rust"] # 创建解析器 self.parsers = {} # 直接从parsers目录加载DLL文件 for lang_name in language_names: try: # 创建解析器 parser = Parser() # 直接检查parsers目录下的DLL文件 dll_path = languages_dir / f"{lang_name}.dll" if dll_path.exists(): # 尝试使用ctypes加载DLL import ctypes lib = ctypes.CDLL(str(dll_path)) # 获取语言函数指针 tree_sitter_lang_fn = getattr(lib, f"tree_sitter_{lang_name}") # 创建语言对象 lang = Language(tree_sitter_lang_fn()) # 设置解析器语言 parser.set_language(lang) logger.info(f"成功加载 {lang_name} 语言库") else: logger.warning(f"未找到 {lang_name} 语言库,使用默认解析器") # 保存解析器 self.parsers[lang_name] = parser logger.info(f"初始化 {lang_name} 解析器") logger.info(f"成功初始化 {lang_name} 解析器") except Exception as e: logger.error(f"初始化 {lang_name} 解析器失败: {str(e)}") # 特别关注PHP解析器 if "php" not in self.parsers: logger.error("PHP解析器初始化失败,这将导致PHP代码分析功能不可用") except Exception as e: logger.error(f"初始化解析器失败: {str(e)}") import traceback logger.error(traceback.format_exc()) # 已删除不再需要的方法 def _has_language(self, parser: Parser) -> bool: """检查解析器是否设置了语言""" try: # 尝试解析一个简单的字符串,如果成功,说明设置了语言 test_tree = parser.parse(b"test") return test_tree is not None except ValueError: return False # 已删除正则表达式分析方法 def analyze_code(self, content: str, language: str) -> Dict[str, Any]: """ 分析代码内容 Args: content: 代码内容 language: 编程语言 Returns: 分析结果字典 """ # 检查是否有对应语言的解析器 if language not in self.parsers: logger.warning(f"未找到 {language} 语言的解析器,返回空结果") # 返回空结果 return { "imports": [], "functions": [], "classes": [], "dependencies": [], "line_count": len(content.split('\n')) } try: # 使用DLL解析器分析代码 parser = self.parsers[language] tree = parser.parse(content.encode()) # 分析结果 result = { "imports": self._analyze_imports(tree.root_node, language), "functions": self._analyze_functions(tree.root_node, language), "classes": self._analyze_classes(tree.root_node, language), "dependencies": self._analyze_dependencies(tree.root_node, language), "line_count": len(content.split('\n')) } return result except Exception as e: logger.error(f"使用DLL解析器分析代码失败: {str(e)}") import traceback logger.error(traceback.format_exc()) # 如果解析失败,返回空结果 return { "imports": [], "functions": [], "classes": [], "dependencies": [], "line_count": len(content.split('\n')) } def _analyze_imports(self, node: Any, language: str) -> List[str]: """ 分析导入语句 Args: node: 语法树节点 language: 编程语言 Returns: 导入模块列表 """ imports = [] if language == "python": # 分析Python的import语句 import_nodes = node.children_by_field_name("import") for import_node in import_nodes: imports.append(import_node.text.decode()) elif language in ["javascript", "typescript"]: # 分析JS/TS的import语句 import_nodes = node.children_by_field_name("import") for import_node in import_nodes: imports.append(import_node.text.decode()) return imports def _analyze_functions(self, node: Any, language: str) -> List[Dict[str, Any]]: """ 分析函数定义 Args: node: 语法树节点 language: 编程语言 Returns: 函数信息列表 """ functions = [] # 遍历AST查找函数定义 def visit(node): # 根据不同语言处理不同的函数定义节点类型 if language == "python" and node.type == "function_definition": name_node = node.child_by_field_name("name") if name_node: functions.append({ "name": name_node.text.decode('utf-8'), "start_line": node.start_point[0], "end_line": node.end_point[0] }) elif language in ["javascript", "typescript"]: # JS/TS函数声明 if node.type == "function_declaration": name_node = node.child_by_field_name("name") if name_node: functions.append({ "name": name_node.text.decode('utf-8'), "start_line": node.start_point[0], "end_line": node.end_point[0] }) # 箭头函数和方法定义 elif node.type in ["arrow_function", "method_definition"]: name_node = node.child_by_field_name("name") if name_node: functions.append({ "name": name_node.text.decode('utf-8'), "start_line": node.start_point[0], "end_line": node.end_point[0] }) # 变量声明中的函数表达式 elif node.type == "variable_declarator": name_node = node.child_by_field_name("name") value_node = node.child_by_field_name("value") if name_node and value_node and value_node.type in ["function", "arrow_function"]: functions.append({ "name": name_node.text.decode('utf-8'), "start_line": node.start_point[0], "end_line": node.end_point[0] }) elif language == "php": # PHP函数定义 if node.type == "function_definition": name_node = node.child_by_field_name("name") if name_node: # 提取参数信息 params = [] parameters = node.child_by_field_name("parameters") if parameters: for param in parameters.children: if param.type == "formal_parameter": param_name = param.child_by_field_name("name") if param_name: params.append(param_name.text.decode('utf-8')) functions.append({ "name": name_node.text.decode('utf-8'), "parameters": params, "start_line": node.start_point[0], "end_line": node.end_point[0] }) # PHP方法定义 elif node.type == "method_declaration": name_node = node.child_by_field_name("name") if name_node: # 提取参数信息 params = [] parameters = node.child_by_field_name("parameters") if parameters: for param in parameters.children: if param.type == "formal_parameter": param_name = param.child_by_field_name("name") if param_name: params.append(param_name.text.decode('utf-8')) # 提取可见性 visibility = "public" # 默认可见性 modifiers = node.children_by_field_name("modifiers") for modifier in modifiers: mod_text = modifier.text.decode('utf-8') if mod_text in ["public", "protected", "private"]: visibility = mod_text break functions.append({ "name": name_node.text.decode('utf-8'), "type": "method", "visibility": visibility, "parameters": params, "start_line": node.start_point[0], "end_line": node.end_point[0] }) # 递归处理子节点 for child in node.children: visit(child) visit(node) return functions def _analyze_classes(self, node: Any, language: str) -> List[Dict[str, Any]]: """ 分析类定义 Args: node: 语法树节点 language: 编程语言 Returns: 类信息列表 """ classes = [] # 遍历AST查找类定义 def visit(node): # 根据不同语言处理不同的类定义节点类型 if language == "python" and node.type == "class_definition": name_node = node.child_by_field_name("name") if name_node: classes.append({ "name": name_node.text.decode('utf-8'), "start_line": node.start_point[0], "end_line": node.end_point[0] }) elif language in ["javascript", "typescript"]: # JS/TS类声明 if node.type == "class_declaration": name_node = node.child_by_field_name("name") if name_node: classes.append({ "name": name_node.text.decode('utf-8'), "start_line": node.start_point[0], "end_line": node.end_point[0] }) # 类表达式 elif node.type == "class": name_node = node.child_by_field_name("name") if name_node: classes.append({ "name": name_node.text.decode('utf-8'), "start_line": node.start_point[0], "end_line": node.end_point[0] }) # 变量声明中的类表达式 elif node.type == "variable_declarator": name_node = node.child_by_field_name("name") value_node = node.child_by_field_name("value") if name_node and value_node and value_node.type == "class": classes.append({ "name": name_node.text.decode('utf-8'), "start_line": node.start_point[0], "end_line": node.end_point[0] }) elif language == "php": # PHP类定义 if node.type == "class_declaration": name_node = node.child_by_field_name("name") if name_node: classes.append({ "name": name_node.text.decode('utf-8'), "start_line": node.start_point[0], "end_line": node.end_point[0] }) # PHP接口定义 elif node.type == "interface_declaration": name_node = node.child_by_field_name("name") if name_node: classes.append({ "name": name_node.text.decode('utf-8') + " (interface)", "start_line": node.start_point[0], "end_line": node.end_point[0] }) # 递归处理子节点 for child in node.children: visit(child) visit(node) return classes def _analyze_dependencies(self, node: Any, language: str) -> List[str]: """ 分析代码依赖 Args: node: 语法树节点 language: 编程语言 Returns: 依赖列表 """ dependencies = set() # 遍历AST查找依赖关系 def visit(node): if language == "python": # Python函数调用 if node.type == "call": func_name = node.child_by_field_name("function") if func_name: dependencies.add(func_name.text.decode('utf-8')) # Python导入 elif node.type in ["import_statement", "import_from_statement"]: dependencies.add(node.text.decode('utf-8')) elif language in ["javascript", "typescript"]: # JS/TS函数调用 if node.type == "call_expression": func_name = node.child_by_field_name("function") if func_name: dependencies.add(func_name.text.decode('utf-8')) # JS/TS类实例化 (new 操作符) elif node.type == "new_expression": constructor = node.child_by_field_name("constructor") if constructor: dependencies.add(constructor.text.decode('utf-8')) # JS/TS导入 elif node.type in ["import_statement", "import_declaration"]: dependencies.add(node.text.decode('utf-8')) # JS/TS require调用 elif node.type == "call_expression" and node.child_by_field_name("function") and node.child_by_field_name("function").text.decode('utf-8') == "require": args = node.child_by_field_name("arguments") if args and args.children and len(args.children) > 0: dependencies.add("require(" + args.children[0].text.decode('utf-8') + ")") elif language == "php": # PHP函数调用 if node.type == "function_call_expression": func_name = node.child_by_field_name("name") if func_name: dependencies.add(func_name.text.decode('utf-8')) # PHP类实例化 elif node.type == "object_creation_expression": class_name = node.child_by_field_name("class_name") if class_name: dependencies.add(class_name.text.decode('utf-8')) # PHP导入 elif node.type in ["include_expression", "require_expression", "include_once_expression", "require_once_expression"]: dependencies.add(node.text.decode('utf-8')) # 递归处理子节点 for child in node.children: visit(child) visit(node) return list(dependencies) def get_code_structure(self, content: str, language: str) -> Dict[str, Any]: """ 获取代码结构信息 Args: content: 代码内容 language: 编程语言 Returns: 代码结构信息,包含: - functions: 函数列表 - classes: 类列表 - imports: 导入语句列表 - dependencies: 依赖关系 - variables: 全局变量 - structure: 整体代码结构 """ # 检查是否有对应语言的解析器 if language not in self.parsers: logger.warning(f"未找到 {language} 语言的解析器,返回空结果") # 返回空结果 return { "functions": [], "classes": [], "imports": [], "dependencies": [], "variables": [], "structure": { "type": "file", "children": [] } } try: # 使用DLL解析器分析代码结构 parser = self.parsers[language] tree = parser.parse(content.encode()) # 分析基本结构 functions = self._analyze_functions(tree.root_node, language) classes = self._analyze_classes(tree.root_node, language) imports = self._analyze_imports(tree.root_node, language) dependencies = self._analyze_dependencies(tree.root_node, language) # 提取全局变量 variables = self._extract_global_variables(tree.root_node, language) # 构建代码结构树 structure = self._build_structure_tree(tree.root_node, language) return { "functions": functions, "classes": classes, "imports": imports, "dependencies": dependencies, "variables": variables, "structure": structure } except Exception as e: logger.error(f"使用DLL解析器分析代码结构失败: {str(e)}") import traceback logger.error(traceback.format_exc()) # 如果解析失败,返回空结果 return { "functions": [], "classes": [], "imports": [], "dependencies": [], "variables": [], "structure": { "type": "file", "children": [] } } def _extract_global_variables(self, node: Any, language: str) -> List[Dict[str, Any]]: """提取全局变量""" variables = [] def visit(node): # Python 全局变量赋值 if language == "python" and node.type == "assignment" and node.parent.type == "module": left_node = node.child_by_field_name("left") right_node = node.child_by_field_name("right") if left_node and right_node: variables.append({ "name": left_node.text.decode('utf-8'), "value": right_node.text.decode('utf-8'), "line": node.start_point[0] }) # JavaScript/TypeScript 全局变量 elif language in ["javascript", "typescript"] and node.type in ["variable_declaration", "const_declaration"]: if node.parent.type == "program": for child in node.children: if child.type == "variable_declarator": name_node = child.child_by_field_name("name") value_node = child.child_by_field_name("value") if name_node: variables.append({ "name": name_node.text.decode('utf-8'), "value": value_node.text.decode('utf-8') if value_node else None, "line": node.start_point[0] }) # PHP 全局变量 elif language == "php": # PHP 全局变量声明 ($var = value) if node.type == "expression_statement": expr = node.child_by_field_name("expression") if expr and expr.type == "assignment_expression": left = expr.child_by_field_name("left") right = expr.child_by_field_name("right") # 确保是全局作用域 if left and node.parent and node.parent.type in ["program", "namespace_definition_body"]: # 确保是变量而不是属性访问 if left.type == "variable_name" and left.text.decode('utf-8').startswith('$'): variables.append({ "name": left.text.decode('utf-8'), "value": right.text.decode('utf-8') if right else None, "line": node.start_point[0] }) # PHP define 常量定义 elif node.type == "function_call_expression": func_name = node.child_by_field_name("name") if func_name and func_name.text.decode('utf-8') == "define": args = node.child_by_field_name("arguments") if args and len(args.children) >= 2: # define的第一个参数是常量名,第二个参数是值 name_arg = args.children[0] value_arg = args.children[1] if name_arg and name_arg.type == "string": variables.append({ "name": name_arg.text.decode('utf-8').strip('"\''), "value": value_arg.text.decode('utf-8') if value_arg else None, "type": "constant", "line": node.start_point[0] }) # PHP const 常量定义 elif node.type == "const_declaration": for child in node.children: if child.type == "const_element": name_node = child.child_by_field_name("name") value_node = child.child_by_field_name("value") if name_node: variables.append({ "name": name_node.text.decode('utf-8'), "value": value_node.text.decode('utf-8') if value_node else None, "type": "constant", "line": node.start_point[0] }) for child in node.children: visit(child) visit(node) return variables def _build_structure_tree(self, node: Any, language: str) -> Dict[str, Any]: """ 构建代码结构树,支持多种编程语言 Args: node: 语法树节点 language: 编程语言 Returns: 代码结构树 """ def create_node(type_name: str, name: str = None, children: List[Dict] = None, attributes: Dict[str, Any] = None, location: Dict[str, int] = None) -> Dict[str, Any]: """创建结构树节点""" node_data = {"type": type_name} if name: node_data["name"] = name if children: node_data["children"] = children if attributes: node_data["attributes"] = attributes if location: node_data["location"] = location return node_data def get_location(node) -> Dict[str, int]: """获取节点位置信息""" return { "start_line": node.start_point[0] + 1, "start_column": node.start_point[1] + 1, "end_line": node.end_point[0] + 1, "end_column": node.end_point[1] + 1 } def get_node_text(node) -> str: """获取节点文本""" try: return node.text.decode('utf-8') except: return "" def visit(node) -> List[Dict[str, Any]]: """递归访问节点构建结构树""" children = [] # 根据不同语言处理不同的节点类型 if language == "python": # Python导入语句 if node.type in ["import_statement", "import_from_statement"]: children.append(create_node( "import", get_node_text(node), location=get_location(node) )) # Python类定义 elif node.type == "class_definition": name_node = node.child_by_field_name("name") if name_node: # 提取基类信息 bases = [] arguments = node.child_by_field_name("superclasses") if arguments: for arg in arguments.children: if arg.type not in [",", "("]: bases.append(get_node_text(arg)) # 创建类节点 class_node = create_node( "class", get_node_text(name_node), attributes={"bases": bases} if bases else None, location=get_location(node) ) # 处理类体 body_node = node.child_by_field_name("body") if body_node: class_children = [] for child in body_node.children: class_children.extend(visit(child)) if class_children: class_node["children"] = class_children children.append(class_node) # Python函数定义 elif node.type == "function_definition": name_node = node.child_by_field_name("name") if name_node: # 提取参数信息 params = [] parameters = node.child_by_field_name("parameters") if parameters: for param in parameters.children: if param.type not in [",", "(", ")"]: params.append(get_node_text(param)) # 创建函数节点 func_node = create_node( "function", get_node_text(name_node), attributes={"parameters": params} if params else None, location=get_location(node) ) # 处理函数体 body_node = node.child_by_field_name("body") if body_node: func_children = [] for child in body_node.children: func_children.extend(visit(child)) if func_children: func_node["children"] = func_children children.append(func_node) # Python变量赋值 elif node.type == "assignment" and node.parent and node.parent.type == "module": left = node.child_by_field_name("left") right = node.child_by_field_name("right") if left and right: children.append(create_node( "variable", get_node_text(left), attributes={"value": get_node_text(right)}, location=get_location(node) )) elif language in ["javascript", "typescript"]: # JS/TS导入语句 if node.type in ["import_statement", "import_declaration"]: children.append(create_node( "import", get_node_text(node), location=get_location(node) )) # JS/TS导出语句 elif node.type in ["export_statement", "export_declaration"]: children.append(create_node( "export", get_node_text(node), location=get_location(node) )) # JS/TS类定义 elif node.type == "class_declaration": name_node = node.child_by_field_name("name") if name_node: # 提取继承信息 extends_clause = node.child_by_field_name("extends") extends_name = None if extends_clause: extends_name = get_node_text(extends_clause) # 创建类节点 class_node = create_node( "class", get_node_text(name_node), attributes={"extends": extends_name} if extends_name else None, location=get_location(node) ) # 处理类体 body_node = node.child_by_field_name("body") if body_node: class_children = [] for child in body_node.children: class_children.extend(visit(child)) if class_children: class_node["children"] = class_children children.append(class_node) # JS/TS函数定义 elif node.type in ["function_declaration", "method_definition"]: name_node = node.child_by_field_name("name") if name_node: # 提取参数信息 params = [] parameters = node.child_by_field_name("parameters") if parameters: for param in parameters.children: if param.type not in [",", "(", ")"]: params.append(get_node_text(param)) # 创建函数节点 func_node = create_node( "function" if node.type == "function_declaration" else "method", get_node_text(name_node), attributes={"parameters": params} if params else None, location=get_location(node) ) # 处理函数体 body_node = node.child_by_field_name("body") if body_node: func_children = [] for child in body_node.children: func_children.extend(visit(child)) if func_children: func_node["children"] = func_children children.append(func_node) # JS/TS变量声明 elif node.type in ["variable_declaration", "const_declaration", "let_declaration"]: for child in node.children: if child.type == "variable_declarator": name_node = child.child_by_field_name("name") value_node = child.child_by_field_name("value") if name_node: children.append(create_node( "variable", get_node_text(name_node), attributes={"value": get_node_text(value_node) if value_node else None}, location=get_location(child) )) # 箭头函数和函数表达式 elif node.type in ["arrow_function", "function"]: # 如果是变量声明的一部分,已经在上面处理过了 if node.parent and node.parent.type == "variable_declarator": pass else: # 匿名函数 func_node = create_node( "anonymous_function", "anonymous", location=get_location(node) ) # 处理函数体 body_node = node.child_by_field_name("body") if body_node: func_children = [] for child in body_node.children: func_children.extend(visit(child)) if func_children: func_node["children"] = func_children children.append(func_node) elif language == "php": # PHP命名空间 if node.type == "namespace_definition": name_node = node.child_by_field_name("name") if name_node: namespace_node = create_node( "namespace", get_node_text(name_node), location=get_location(node) ) # 处理命名空间体 body_node = node.child_by_field_name("body") if body_node: namespace_children = [] for child in body_node.children: namespace_children.extend(visit(child)) if namespace_children: namespace_node["children"] = namespace_children children.append(namespace_node) # PHP类定义 elif node.type == "class_declaration": name_node = node.child_by_field_name("name") if name_node: # 提取继承和实现信息 extends_clause = node.child_by_field_name("extends") implements_clause = node.child_by_field_name("implements") attributes = {} if extends_clause: attributes["extends"] = get_node_text(extends_clause) if implements_clause: attributes["implements"] = get_node_text(implements_clause) # 创建类节点 class_node = create_node( "class", get_node_text(name_node), attributes=attributes if attributes else None, location=get_location(node) ) # 处理类体 body_node = node.child_by_field_name("body") if body_node: class_children = [] for child in body_node.children: class_children.extend(visit(child)) if class_children: class_node["children"] = class_children children.append(class_node) # PHP接口定义 elif node.type == "interface_declaration": name_node = node.child_by_field_name("name") if name_node: interface_node = create_node( "interface", get_node_text(name_node), location=get_location(node) ) # 处理接口体 body_node = node.child_by_field_name("body") if body_node: interface_children = [] for child in body_node.children: interface_children.extend(visit(child)) if interface_children: interface_node["children"] = interface_children children.append(interface_node) # PHP函数定义 elif node.type == "function_definition": name_node = node.child_by_field_name("name") if name_node: # 提取参数信息 params = [] parameters = node.child_by_field_name("parameters") if parameters: for param in parameters.children: if param.type not in [",", "(", ")"]: params.append(get_node_text(param)) # 创建函数节点 func_node = create_node( "function", get_node_text(name_node), attributes={"parameters": params} if params else None, location=get_location(node) ) # 处理函数体 body_node = node.child_by_field_name("body") if body_node: func_children = [] for child in body_node.children: func_children.extend(visit(child)) if func_children: func_node["children"] = func_children children.append(func_node) # 递归处理子节点 for child in node.children: children.extend(visit(child)) return children # 构建文件级结构树 file_name = "" if hasattr(node, 'filename'): file_name = os.path.basename(node.filename) root = create_node( "file", name=file_name, children=visit(node), attributes={"language": language} ) return root