Skip to main content
Glama
symbol_operations.py53 kB
""" Advanced Symbol Operations - Intelligent Symbol Detection and Analysis Implements sophisticated symbol finding, scope resolution, and semantic analysis to provide powerful code navigation and understanding capabilities. """ import logging import re from dataclasses import dataclass, field from enum import Enum from typing import Any, Dict, List, Optional from .ast_rule_intelligence import LLMAstReasoningEngine from .enhanced_search import EnhancedSearchInfrastructure, SearchContext, SearchStrategy from .ripgrep_integration import RipgrepIntegration, SearchOptions, SearchResult # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class SymbolType(Enum): """Types of symbols that can be found and analyzed.""" FUNCTION = "function" CLASS = "class" METHOD = "method" VARIABLE = "variable" PARAMETER = "parameter" IMPORT = "import" MODULE = "module" PROPERTY = "property" CONSTANT = "constant" INTERFACE = "interface" TYPE_ALIAS = "type_alias" DECORATOR = "decorator" GENERATOR = "generator" ASYNC_FUNCTION = "async_function" class SymbolScope(Enum): """Symbol scope levels.""" GLOBAL = "global" MODULE = "module" CLASS = "class" FUNCTION = "function" LOCAL = "local" BLOCK = "block" class ReferenceType(Enum): """Types of symbol references.""" READ = "read" # Reading a variable/property WRITE = "write" # Writing to a variable/property CALL = "call" # Calling a function/method IMPORT = "import" # Importing a module/symbol INHERIT = "inherit" # Inheriting from a class IMPLEMENT = "implement" # Implementing an interface OVERRIDE = "override" # Overriding a method REFERENCE = "reference" # General reference TYPE_ANNOTATION = "type_annotation" # Using in type annotation DECORATE = "decorate" # Using as decorator @dataclass class SymbolInfo: """Comprehensive information about a discovered symbol.""" name: str symbol_type: SymbolType file_path: str line_number: int column_number: int = 0 scope: SymbolScope = SymbolScope.GLOBAL parent_symbol: Optional[str] = None documentation: Optional[str] = None signature: Optional[str] = None return_type: Optional[str] = None parameters: List[str] = field(default_factory=list) decorators: List[str] = field(default_factory=list) is_async: bool = False is_private: bool = False is_protected: bool = False is_static: bool = False is_abstract: bool = False confidence_score: float = 1.0 language: str = "python" metadata: Dict[str, Any] = field(default_factory=dict) @dataclass class ReferenceInfo: """Detailed information about a symbol reference.""" symbol_name: str reference_type: ReferenceType file_path: str line_number: int column_number: int = 0 context: str = "" is_definition: bool = False confidence_score: float = 1.0 language: str = "python" scope: SymbolScope = SymbolScope.GLOBAL container_symbol: Optional[str] = None metadata: Dict[str, Any] = field(default_factory=dict) @dataclass class ResolvedScope: """Result of symbol scope resolution.""" symbol_name: str resolved_path: str scope_level: SymbolScope context: Dict[str, Any] confidence: float alternative_matches: List[SymbolInfo] = field(default_factory=list) @dataclass class SymbolSearchContext: """Context for symbol search operations.""" symbol_name: str symbol_type: Optional[SymbolType] = None scope: Optional[str] = None file_path: Optional[str] = None language: str = "python" include_imports: bool = True include_definitions: bool = True include_references: bool = False max_results: int = 100 case_sensitive: bool = True use_semantic_search: bool = True class ReferenceAnalysis: """Enhanced reference analysis with comprehensive symbol tracking.""" def __init__( self, ripgrep_integration: Optional[RipgrepIntegration] = None, ast_engine: Optional[LLMAstReasoningEngine] = None, search_infrastructure: Optional[EnhancedSearchInfrastructure] = None, ): """Initialize reference analysis system.""" self.ripgrep = ripgrep_integration or RipgrepIntegration() self.ast_engine = ast_engine or LLMAstReasoningEngine() self.search_infra = search_infrastructure or EnhancedSearchInfrastructure() # Cache for reference analysis results self._reference_cache: Dict[str, List[ReferenceInfo]] = {} self._dependency_cache: Dict[str, Dict[str, List[ReferenceInfo]]] = {} # Language-specific reference patterns self._reference_patterns = { "python": { ReferenceType.READ: [ r"(\w+)(?=\s*[\+\-\*/%&|^]|==|!=|<=|>=|<|>|is|in)", # Used in expressions r"print\s*\(\s*(\w+)", # Used in print statements r"return\s+(\w+)", # Returned ], ReferenceType.WRITE: [ r"(\w+)\s*=", # Assignment r"(\w+)\s*[\+\-\*/%&|^]?=", # Compound assignment ], ReferenceType.CALL: [ r"(\w+)\s*\(", # Function call r"await\s+(\w+)\s*\(", # Async call r"(\w+)\.(\w+)\s*\(", # Method call ], ReferenceType.IMPORT: [ r"import\s+(\w+)", # Direct import r"from\s+(\w+)\s+import", # From import ], ReferenceType.INHERIT: [ r"class\s+\w+\s*\(\s*(\w+)\s*\)", # Class inheritance ], ReferenceType.TYPE_ANNOTATION: [ r":\s*(\w+)", # Type annotation r"->\s*(\w+)", # Return type annotation ], ReferenceType.DECORATE: [ r"@(\w+)", # Decorator ], }, "javascript": { ReferenceType.READ: [ r"(\w+)(?=\s*[\+\-\*/%&|^]|==|!=|<=|>=|<|>|===|!==)", # Used in expressions r"console\.log\s*\(\s*(\w+)", # Console log r"return\s+(\w+)", # Returned ], ReferenceType.WRITE: [ r"(?:let|const|var)\s+(\w+)\s*=", # Declaration with assignment r"(\w+)\s*=", # Assignment r"(\w+)\s*[\+\-\*/%&|^]?=", # Compound assignment ], ReferenceType.CALL: [ r"(\w+)\s*\(", # Function call r"await\s+(\w+)\s*\(", # Async call r"(\w+)\.(\w+)\s*\(", # Method call ], ReferenceType.IMPORT: [ r"import\s+.*?from\s+['\"](\w+)['\"]", # ES6 import r"require\s*\(\s*['\"](\w+)['\"]\s*\)", # CommonJS require ], ReferenceType.INHERIT: [ r"class\s+\w+\s+extends\s+(\w+)", # Class extends ], }, "typescript": { ReferenceType.READ: [ r"(\w+)(?=\s*[\+\-\*/%&|^]|==|!=|<=|>=|<|>|===|!==)", # Used in expressions r"console\.log\s*\(\s*(\w+)", # Console log r"return\s+(\w+)", # Returned ], ReferenceType.WRITE: [ r"(?:let|const|var)\s+(\w+)\s*[:=]", # Declaration with assignment r"(\w+)\s*=", # Assignment r"(\w+)\s*[\+\-\*/%&|^]?=", # Compound assignment ], ReferenceType.CALL: [ r"(\w+)\s*\(", # Function call r"await\s+(\w+)\s*\(", # Async call r"(\w+)\.(\w+)\s*\(", # Method call ], ReferenceType.IMPORT: [ r"import\s+.*?from\s+['\"](\w+)['\"]", # ES6 import r"import\s+.*?=\s*require\s*\(\s*['\"](\w+)['\"]\s*\)", # TypeScript require ], ReferenceType.INHERIT: [ r"class\s+\w+\s+extends\s+(\w+)", # Class extends r"implements\s+(\w+)", # Interface implementation ], ReferenceType.TYPE_ANNOTATION: [ r":\s*(\w+)", # Type annotation r"->\s*(\w+)", # Return type annotation r"interface\s+\w+\s+extends\s+(\w+)", # Interface extends ], }, } def analyze_symbol_references( self, symbol_info: SymbolInfo, include_definitions: bool = False, max_results: int = 1000 ) -> List[ReferenceInfo]: """ Analyze all references to a symbol with comprehensive classification. Args: symbol_info: SymbolInfo object to analyze references for include_definitions: Whether to include symbol definitions in results max_results: Maximum number of references to return Returns: List of ReferenceInfo objects with detailed reference information """ # Generate cache key cache_key = f"{symbol_info.name}:{symbol_info.file_path}:{include_definitions}:{max_results}" # Check cache first if cache_key in self._reference_cache: return self._reference_cache[cache_key] references = [] try: # Search for all occurrences of the symbol name search_context = SearchContext( query=symbol_info.name, file_types=[symbol_info.language], max_results=max_results, strategy=SearchStrategy.EXACT, ) results, _ = self.search_infra.search(search_context) # Analyze each result to determine reference type for result in results: # Skip definition if requested if not include_definitions and result.file_path == symbol_info.file_path and result.line_number == symbol_info.line_number: continue # Determine reference type and create ReferenceInfo reference_info = self._analyze_reference_context( result, symbol_info.name, symbol_info.language ) if reference_info: references.append(reference_info) # Sort by confidence score and line number references.sort(key=lambda x: (-x.confidence_score, x.line_number)) except Exception as e: logger.warning(f"Error analyzing symbol references: {e}") # Cache result self._reference_cache[cache_key] = references return references def get_symbol_dependencies( self, symbol_info: SymbolInfo, include_transitive: bool = False ) -> Dict[str, List[ReferenceInfo]]: """ Get all dependencies of a symbol with detailed analysis. Args: symbol_info: SymbolInfo object to analyze dependencies for include_transitive: Whether to include transitive dependencies Returns: Dictionary mapping dependency types to lists of ReferenceInfo objects """ cache_key = f"{symbol_info.name}:{symbol_info.file_path}:{include_transitive}" # Check cache first if cache_key in self._dependency_cache: return self._dependency_cache[cache_key] dependencies: Dict[str, List[ReferenceInfo]] = { "direct": [], "transitive": [], "imports": [], "type_dependencies": [], "runtime_dependencies": [], } try: # Read and analyze the symbol's file if symbol_info.file_path: with open(symbol_info.file_path, "r", encoding="utf-8") as f: content = f.read() # Analyze file content for dependencies file_deps = self._analyze_file_dependencies(content, symbol_info.language) dependencies["direct"] = file_deps["direct"] dependencies["imports"] = file_deps["imports"] dependencies["type_dependencies"] = file_deps["type_dependencies"] dependencies["runtime_dependencies"] = file_deps["runtime_dependencies"] # Analyze transitive dependencies if requested if include_transitive: transitive_deps = self._analyze_transitive_dependencies(file_deps["direct"], symbol_info.language) dependencies["transitive"] = transitive_deps except Exception as e: logger.warning(f"Error getting symbol dependencies: {e}") # Cache result self._dependency_cache[cache_key] = dependencies return dependencies def analyze_refactoring_safety(self, symbol_info: SymbolInfo) -> Dict[str, Any]: """ Analyze the safety of refactoring operations on a symbol. Args: symbol_info: SymbolInfo object to analyze for refactoring safety Returns: Dictionary with safety analysis results """ safety_analysis: Dict[str, Any] = { "is_safe_to_rename": True, "risk_factors": [], "impact_score": 0.0, "affected_files": set(), "reference_count": 0, "critical_references": [], "recommendations": [], } try: # Get all references to the symbol references = self.analyze_symbol_references(symbol_info, include_definitions=False) safety_analysis["reference_count"] = len(references) # Analyze each reference for safety concerns for ref in references: # Track affected files safety_analysis["affected_files"].add(ref.file_path) # Check for critical references that might make renaming unsafe if self._is_critical_reference(ref): safety_analysis["critical_references"].append(ref) safety_analysis["risk_factors"].append(f"Critical reference at {ref.file_path}:{ref.line_number}") # Calculate impact score (0.0 = low impact, 1.0 = high impact) safety_analysis["impact_score"] = self._calculate_refactoring_impact(references) # Determine if it's safe to rename if len(safety_analysis["critical_references"]) > 0: safety_analysis["is_safe_to_rename"] = False if safety_analysis["impact_score"] > 0.8: safety_analysis["risk_factors"].append("High impact refactoring") # Generate recommendations safety_analysis["recommendations"] = self._generate_refactoring_recommendations(safety_analysis) except Exception as e: logger.warning(f"Error analyzing refactoring safety: {e}") safety_analysis["is_safe_to_rename"] = False safety_analysis["risk_factors"].append(f"Analysis error: {str(e)}") # Convert set to list for JSON serialization safety_analysis["affected_files"] = list(safety_analysis["affected_files"]) return safety_analysis def get_reference_statistics(self, symbol_info: SymbolInfo) -> Dict[str, Any]: """ Get comprehensive statistics about symbol references. Args: symbol_info: SymbolInfo object to analyze Returns: Dictionary with reference statistics """ references = self.analyze_symbol_references(symbol_info) stats: Dict[str, Any] = { "total_references": len(references), "references_by_type": {}, "references_by_file": {}, "references_by_scope": {}, "average_confidence": 0.0, "reference_density": 0.0, } if not references: return stats # Count by reference type for ref in references: ref_type = ref.reference_type.value ref_by_type: Dict[str, int] = stats["references_by_type"] ref_by_type[ref_type] = ref_by_type.get(ref_type, 0) + 1 # Count by file for ref in references: file_path = ref.file_path ref_by_file: Dict[str, int] = stats["references_by_file"] ref_by_file[file_path] = ref_by_file.get(file_path, 0) + 1 # Count by scope for ref in references: scope = ref.scope.value ref_by_scope: Dict[str, int] = stats["references_by_scope"] ref_by_scope[scope] = ref_by_scope.get(scope, 0) + 1 # Calculate average confidence stats["average_confidence"] = sum(r.confidence_score for r in references) / len(references) # Calculate reference density (references per 100 lines) if symbol_info.file_path: try: with open(symbol_info.file_path, "r", encoding="utf-8") as f: lines = len(f.readlines()) if lines > 0: stats["reference_density"] = (len(references) / lines) * 100 except Exception: pass return stats def find_unused_symbols(self, file_path: str, language: str = "python") -> List[SymbolInfo]: """ Find potentially unused symbols in a file. Args: file_path: Path to the file to analyze language: Programming language of the file Returns: List of SymbolInfo objects that appear to be unused """ unused_symbols = [] try: # First, find all defined symbols in the file symbol_ops = AdvancedSymbolOperations( ripgrep_integration=self.ripgrep, ast_engine=self.ast_engine, search_infrastructure=self.search_infra, ) # Find all symbols defined in the file defined_symbols = symbol_ops.find_symbols_by_pattern(r"(def |class |import |from )", language) defined_symbols = [s for s in defined_symbols if s.file_path == file_path] # For each defined symbol, check if it has references outside its definition for symbol in defined_symbols: references = self.analyze_symbol_references(symbol, include_definitions=False) # Filter out references from the same symbol (e.g., recursive calls) external_references = [ r for r in references if not (r.file_path == symbol.file_path and r.line_number == symbol.line_number) ] # Consider symbols with no external references as potentially unused # (with some exceptions for common patterns) if not external_references and not self._is_exemption_from_unused_check(symbol): unused_symbols.append(symbol) except Exception as e: logger.warning(f"Error finding unused symbols: {e}") return unused_symbols def _analyze_reference_context(self, search_result: Any, symbol_name: str, language: str) -> Optional[ReferenceInfo]: """Analyze the context of a search result to determine reference type.""" try: line_text = search_result.line_content reference_type = ReferenceType.REFERENCE confidence_score = 1.0 context = line_text.strip() # Determine reference type based on language patterns if language in self._reference_patterns: for ref_type, patterns in self._reference_patterns[language].items(): for pattern in patterns: if re.search(pattern, line_text): reference_type = ref_type confidence_score = 0.9 # High confidence for pattern matches break # Additional context analysis container_symbol = self._extract_container_symbol(line_text, language) scope = self._determine_reference_scope(line_text, language) return ReferenceInfo( symbol_name=symbol_name, reference_type=reference_type, file_path=search_result.file_path, line_number=search_result.line_number, context=context, confidence_score=confidence_score, language=language, scope=scope, container_symbol=container_symbol, metadata={"line_content": line_text}, ) except Exception as e: logger.warning(f"Error analyzing reference context: {e}") return None def _analyze_file_dependencies(self, content: str, language: str) -> Dict[str, List[ReferenceInfo]]: """Analyze dependencies within a file's content.""" dependencies: Dict[str, List[ReferenceInfo]] = { "direct": [], "imports": [], "type_dependencies": [], "runtime_dependencies": [], } # This is a simplified implementation # In a full implementation, you would use AST analysis for accurate dependency detection try: lines = content.split("\n") for i, line in enumerate(lines, 1): line = line.strip() # Look for import statements if language == "python": if line.startswith("import ") or line.startswith("from "): # Extract imported module/symbol import_match = re.search(r"import\s+(\w+)", line) or re.search(r"from\s+(\w+)", line) if import_match: dep_info = ReferenceInfo( symbol_name=import_match.group(1), reference_type=ReferenceType.IMPORT, file_path="", # Will be set by caller line_number=i, context=line, language=language, metadata={"import_line": line}, ) dependencies["imports"].append(dep_info) # Look for function calls and variable usage (simplified) call_pattern = r"(\w+)\s*\(" calls = re.findall(call_pattern, line) for call in calls: if call not in ["if", "for", "while", "def", "class"]: dep_info = ReferenceInfo( symbol_name=call, reference_type=ReferenceType.CALL, file_path="", # Will be set by caller line_number=i, context=line, language=language, metadata={"call_site": line}, ) dependencies["runtime_dependencies"].append(dep_info) except Exception as e: logger.warning(f"Error analyzing file dependencies: {e}") return dependencies def _analyze_transitive_dependencies(self, direct_deps: List[ReferenceInfo], language: str) -> List[ReferenceInfo]: """Analyze transitive dependencies (simplified implementation).""" # This is a placeholder for transitive dependency analysis # In a full implementation, you would recursively analyze dependencies return [] def _is_critical_reference(self, reference: ReferenceInfo) -> bool: """Determine if a reference is critical for refactoring safety.""" # References in test files, configuration files, or public APIs might be critical critical_patterns = ["test_", "spec_", "config", "api", "public"] critical_file_patterns = ["test_", "spec_", "__init__.py"] for pattern in critical_patterns: if pattern in reference.symbol_name.lower(): return True for pattern in critical_file_patterns: if pattern in reference.file_path.lower(): return True # Method calls in inheritance contexts are critical if reference.reference_type == ReferenceType.CALL and "class" in reference.context.lower(): return True return False def _calculate_refactoring_impact(self, references: List[ReferenceInfo]) -> float: """Calculate the impact score of refactoring (0.0 = low, 1.0 = high).""" if not references: return 0.0 impact = 0.0 # More references = higher impact impact = min(len(references) / 10.0, 0.4) # Max 0.4 for reference count # References across multiple files = higher impact unique_files = len(set(r.file_path for r in references)) impact += min(unique_files / 5.0, 0.3) # Max 0.3 for file spread # Critical references = higher impact critical_count = sum(1 for r in references if self._is_critical_reference(r)) impact += min(critical_count / 3.0, 0.3) # Max 0.3 for critical refs return min(impact, 1.0) def _generate_refactoring_recommendations(self, safety_analysis: Dict[str, Any]) -> List[str]: """Generate recommendations for safe refactoring.""" recommendations = [] if not safety_analysis["is_safe_to_rename"]: recommendations.append("Consider creating a backup before refactoring") if safety_analysis["impact_score"] > 0.7: recommendations.append("High impact refactoring - consider incremental changes") if len(safety_analysis["affected_files"]) > 5: recommendations.append("Consider refactoring in smaller batches") if safety_analysis["reference_count"] > 50: recommendations.append("High number of references - ensure comprehensive testing") if not recommendations: recommendations.append("Refactoring appears safe - proceed with normal caution") return recommendations def _extract_container_symbol(self, line_text: str, language: str) -> Optional[str]: """Extract the container symbol (class/function) from a line.""" # This is a simplified implementation # In a full implementation, you would use AST analysis try: if language == "python": # Look for class or function definitions in the broader context # This would require analyzing more than just the current line pass elif language in ["javascript", "typescript"]: # Similar analysis for JS/TS pass except Exception: pass return None def _determine_reference_scope(self, line_text: str, language: str) -> SymbolScope: """Determine the scope of a reference.""" # This is a simplified implementation # In a full implementation, you would use AST analysis if language in ["javascript", "typescript"]: if "this." in line_text: return SymbolScope.CLASS elif "var " in line_text or "let " in line_text or "const " in line_text: return SymbolScope.LOCAL return SymbolScope.GLOBAL def _is_exemption_from_unused_check(self, symbol: SymbolInfo) -> bool: """Check if a symbol should be exempt from unused symbol analysis.""" # Common patterns that should not be flagged as unused exempt_patterns = [ "__init__", # Python constructors "__str__", "__repr__", # String representation methods "main", # Entry points "setUp", "tearDown", # Test setup methods "test_", "spec_", # Test functions ] for pattern in exempt_patterns: if pattern in symbol.name.lower(): return True # Don't flag private methods as unused (they might be used internally) if symbol.is_private: return True return False def clear_cache(self): """Clear all reference analysis caches.""" self._reference_cache.clear() self._dependency_cache.clear() def get_cache_stats(self) -> Dict[str, Any]: """Get cache statistics.""" return { "reference_cache_size": len(self._reference_cache), "dependency_cache_size": len(self._dependency_cache), } class AdvancedSymbolOperations: """Advanced symbol operations with semantic understanding and scope awareness.""" def __init__( self, ripgrep_integration: Optional[RipgrepIntegration] = None, ast_engine: Optional[LLMAstReasoningEngine] = None, search_infrastructure: Optional[EnhancedSearchInfrastructure] = None, ): """Initialize advanced symbol operations.""" self.ripgrep = ripgrep_integration or RipgrepIntegration() self.ast_engine = ast_engine or LLMAstReasoningEngine() self.search_infra = search_infrastructure or EnhancedSearchInfrastructure() # Cache for symbol analysis results self._symbol_cache: Dict[str, SymbolInfo] = {} self._scope_cache: Dict[str, ResolvedScope] = {} # Language-specific patterns self._language_patterns = { "python": { "function": r"def\s+(\w+)\s*\(", "class": r"class\s+(\w+)\s*[\(:]", "method": r"def\s+(\w+)\s*\(", "variable": r"(\w+)\s*=", "import": r"import\s+(\w+)", "from_import": r"from\s+\w+\s+import\s+(\w+)", }, "javascript": { "function": r"function\s+(\w+)\s*\(", "class": r"class\s+(\w+)\s*[\{]", "method": r"(\w+)\s*\([^)]*\)\s*[\{=]", "variable": r"(?:const|let|var)\s+(\w+)\s*=", "import": r"import\s+.*?\s+from\s+['\"](\w+)['\"]", }, "typescript": { "function": r"function\s+(\w+)\s*\(", "class": r"class\s+(\w+)\s*[\{]", "method": r"(\w+)\s*\([^)]*\)\s*[\{=:]", "variable": r"(?:const|let|var)\s+(\w+)\s*[:=]", "interface": r"interface\s+(\w+)\s*[\{]", "type": r"type\s+(\w+)\s*=", }, } def find_symbol(self, symbol_name: str, symbol_type: Optional[SymbolType] = None, scope: Optional[str] = None) -> SymbolInfo: """ Find a symbol by name with semantic understanding and scope awareness. Args: symbol_name: Name of the symbol to find symbol_type: Optional type filter for the symbol scope: Optional scope constraint Returns: SymbolInfo object with comprehensive symbol information """ # Create search context context = SymbolSearchContext(symbol_name=symbol_name, symbol_type=symbol_type, scope=scope, use_semantic_search=True) # Check cache first cache_key = self._generate_symbol_cache_key(context) if cache_key in self._symbol_cache: return self._symbol_cache[cache_key] # Perform multi-stage symbol search symbol_info = self._perform_symbol_search(context) # Cache result self._symbol_cache[cache_key] = symbol_info return symbol_info def find_symbols_by_pattern(self, pattern: str, language: str = "python") -> List[SymbolInfo]: """ Find symbols matching a pattern with intelligent matching. Args: pattern: Search pattern (can include regex) language: Programming language to search in Returns: List of matching SymbolInfo objects """ symbols = [] # Use ripgrep for fast pattern matching try: # Search without file type filter (due to ripgrep integration issue) # and filter results manually based on file extension options = SearchOptions() results = self.ripgrep.search_files(pattern=pattern, path=".", options=options) # Filter results by language and convert to SymbolInfo objects for result in results.results: # Check file extension matches language file_ext = result.file_path.split('.')[-1] if '.' in result.file_path else '' should_include = False if language == "python" and file_ext == "py": should_include = True elif language == "javascript" and file_ext in ["js", "jsx", "mjs"]: should_include = True elif language == "typescript" and file_ext in ["ts", "tsx"]: should_include = True if should_include: symbol_info = self._convert_search_result_to_symbol(result, "unknown", language) if symbol_info: symbols.append(symbol_info) except Exception as e: logger.warning(f"Error in pattern search: {e}") # Sort by confidence score symbols.sort(key=lambda x: x.confidence_score, reverse=True) return symbols def resolve_symbol_scope(self, symbol_name: str, context: str) -> ResolvedScope: """ Resolve symbol scope with contextual understanding. Args: symbol_name: Name of the symbol to resolve context: Context string for scope resolution Returns: ResolvedScope object with scope information """ # Check cache first cache_key = f"{symbol_name}:{context}" if cache_key in self._scope_cache: return self._scope_cache[cache_key] # Parse context to understand scope context_info = self._parse_context(context) # Find all occurrences of the symbol all_occurrences = self.find_symbols_by_pattern(symbol_name, context_info.get("language", "python")) # Filter by context relevance relevant_symbols = self._filter_symbols_by_context(all_occurrences, context_info) # Determine best match best_match = self._determine_best_scope_match(relevant_symbols, context_info) # Create resolved scope resolved_scope = ResolvedScope( symbol_name=symbol_name, resolved_path=best_match.file_path if best_match else "", scope_level=best_match.scope if best_match else SymbolScope.GLOBAL, context=context_info, confidence=best_match.confidence_score if best_match else 0.0, alternative_matches=relevant_symbols, ) # Cache result self._scope_cache[cache_key] = resolved_scope return resolved_scope def get_symbol_references(self, symbol_info: SymbolInfo) -> List[SymbolInfo]: """ Find all references to a given symbol. Args: symbol_info: SymbolInfo object to find references for Returns: List of SymbolInfo objects representing references """ references = [] # Use enhanced search infrastructure search_context = SearchContext( query=symbol_info.name, file_types=[symbol_info.language], max_results=symbol_info.metadata.get("max_references", 100) ) # Search for references results, _ = self.search_infra.search(search_context) # Convert results to symbol references for result in results: if result.file_path != symbol_info.file_path or result.line_number != symbol_info.line_number: reference = SymbolInfo( name=symbol_info.name, symbol_type=SymbolType.VARIABLE, # References are typically variables file_path=result.file_path, line_number=result.line_number, scope=SymbolScope.GLOBAL, confidence_score=result.combined_score, language=symbol_info.language, metadata={"reference_type": "usage", "context": result.line_content}, ) references.append(reference) return references def analyze_symbol_relationships(self, symbol_info: SymbolInfo) -> Dict[str, List[SymbolInfo]]: """ Analyze relationships between symbols (dependencies, usages, etc.). Args: symbol_info: SymbolInfo object to analyze Returns: Dictionary mapping relationship types to lists of related symbols """ relationships: Dict[str, List[SymbolInfo]] = { "dependencies": [], "dependents": [], "related": [], "overrides": [], "implements": [], } # Find dependencies (symbols this one uses) if symbol_info.file_path: dependencies = self._find_symbol_dependencies(symbol_info) relationships["dependencies"] = dependencies # Find dependents (symbols that use this one) dependents = self.get_symbol_references(symbol_info) relationships["dependents"] = dependents # Find related symbols (semantic similarity) related = self._find_semantically_related_symbols(symbol_info) relationships["related"] = related return relationships def _perform_symbol_search(self, context: SymbolSearchContext) -> SymbolInfo: """Perform the actual symbol search with multiple strategies.""" # Strategy 1: Exact pattern matching exact_matches = self._exact_symbol_search(context) # Strategy 2: Semantic search if enabled semantic_matches = [] if context.use_semantic_search: semantic_matches = self._semantic_symbol_search(context) # Combine and rank results all_matches = exact_matches + semantic_matches if not all_matches: # Return empty SymbolInfo if no matches found return SymbolInfo(name=context.symbol_name, symbol_type=SymbolType.VARIABLE, file_path="", line_number=0, confidence_score=0.0) # Return best match best_match = max(all_matches, key=lambda x: x.confidence_score) return best_match def _exact_symbol_search(self, context: SymbolSearchContext) -> List[SymbolInfo]: """Perform exact symbol search using language patterns.""" matches = [] # Get language patterns lang_patterns = self._language_patterns.get(context.language, {}) # Search for symbol name in patterns for symbol_type_str, pattern in lang_patterns.items(): if context.symbol_type and symbol_type_str != context.symbol_type.value: continue # Create exact match pattern exact_pattern = pattern.replace(r"(\w+)", re.escape(context.symbol_name)) try: from .ripgrep_integration import SearchOptions options = SearchOptions(file_types=[context.language]) results = self.ripgrep.search_files(pattern=exact_pattern, path=context.file_path or ".", options=options) for result in results.results: symbol_info = self._convert_search_result_to_symbol(result, symbol_type_str, context.language) if symbol_info: matches.append(symbol_info) except Exception as e: logger.warning(f"Error in exact search: {e}") return matches def _semantic_symbol_search(self, context: SymbolSearchContext) -> List[SymbolInfo]: """Perform semantic symbol search using AST analysis.""" matches = [] try: # Use enhanced search infrastructure for semantic search search_context = SearchContext( query=context.symbol_name, file_types=[context.language], strategy=SearchStrategy.SEMANTIC, max_results=20 ) results, _ = self.search_infra.search(search_context) # Convert results to symbol information for result in results: symbol_info = SymbolInfo( name=context.symbol_name, symbol_type=self._infer_symbol_type(result.line_content), file_path=result.file_path, line_number=result.line_number, confidence_score=result.combined_score * 0.8, # Lower confidence for semantic language=context.language, metadata={"semantic_match": True, "context": result.line_content}, ) matches.append(symbol_info) except Exception as e: logger.warning(f"Error in semantic search: {e}") return matches def _convert_search_result_to_symbol(self, result: SearchResult, symbol_type_str: str, language: str) -> Optional[SymbolInfo]: """Convert SearchResult to SymbolInfo.""" try: # Extract symbol name from line using regex line_text = result.line_text.strip() # Use language-specific patterns to extract symbol name and type symbol_type = SymbolType.VARIABLE # Default type is_private = False is_protected = False if language == "python": if "class " in line_text: # Extract class name class_match = re.search(r"class\s+(\w+)", line_text) symbol_name = class_match.group(1) if class_match else line_text symbol_type = SymbolType.CLASS elif "def " in line_text: # Extract function name and check if private func_match = re.search(r"def\s+(\w+)", line_text) symbol_name = func_match.group(1) if func_match else line_text symbol_type = SymbolType.FUNCTION # Check for private method (starts with underscore) if symbol_name.startswith("_"): is_private = True # Check for async function if "async def" in line_text: symbol_type = SymbolType.ASYNC_FUNCTION elif "=" in line_text and not line_text.startswith("#"): # Extract variable name var_match = re.search(r"(\w+)\s*=", line_text) symbol_name = var_match.group(1) if var_match else line_text symbol_type = SymbolType.VARIABLE # Check for private variable if symbol_name.startswith("_"): is_private = True else: symbol_name = line_text elif language == "javascript": if "class " in line_text: class_match = re.search(r"class\s+(\w+)", line_text) symbol_name = class_match.group(1) if class_match else line_text symbol_type = SymbolType.CLASS elif "function " in line_text: func_match = re.search(r"function\s+(\w+)", line_text) symbol_name = func_match.group(1) if func_match else line_text symbol_type = SymbolType.FUNCTION elif "const " in line_text or "let " in line_text or "var " in line_text: var_match = re.search(r"(?:const|let|var)\s+(\w+)", line_text) symbol_name = var_match.group(1) if var_match else line_text symbol_type = SymbolType.VARIABLE else: symbol_name = line_text else: symbol_name = line_text # Create SymbolInfo symbol_info = SymbolInfo( name=symbol_name, symbol_type=symbol_type, file_path=result.file_path, line_number=result.line_number, column_number=0, # SearchResult doesn't have column_number confidence_score=1.0, language=language, is_private=is_private, is_protected=is_protected, metadata={"match_type": "exact", "context": result.line_text, "language": language}, ) return symbol_info except Exception as e: logger.warning(f"Error converting search result: {e}") return None def _string_to_symbol_type(self, type_str: str) -> SymbolType: """Convert string to SymbolType enum.""" type_mapping = { "function": SymbolType.FUNCTION, "class": SymbolType.CLASS, "method": SymbolType.METHOD, "variable": SymbolType.VARIABLE, "import": SymbolType.IMPORT, "from_import": SymbolType.IMPORT, "interface": SymbolType.INTERFACE, "type": SymbolType.TYPE_ALIAS, } return type_mapping.get(type_str, SymbolType.VARIABLE) def _infer_symbol_type(self, line_content: str) -> SymbolType: """Infer symbol type from line content.""" line_lower = line_content.lower().strip() if "def " in line_lower: if "async def" in line_lower: return SymbolType.ASYNC_FUNCTION return SymbolType.FUNCTION elif "class " in line_lower: return SymbolType.CLASS elif "import " in line_lower: return SymbolType.IMPORT elif "interface " in line_lower: return SymbolType.INTERFACE elif "type " in line_lower and "=" in line_lower: return SymbolType.TYPE_ALIAS else: return SymbolType.VARIABLE def _generate_symbol_cache_key(self, context: SymbolSearchContext) -> str: """Generate cache key for symbol search.""" key_parts = [ context.symbol_name, context.symbol_type.value if context.symbol_type else "any", context.scope or "global", context.file_path or "any", context.language, ] return "|".join(key_parts) def _parse_context(self, context: str) -> Dict[str, Any]: """Parse context string to extract useful information.""" context_info = { "language": "python", # Default "scope": "global", "imports": [], "classes": [], "functions": [], } # Simple language detection if "import " in context and "from " in context: context_info["language"] = "python" elif "function" in context and "class" in context: context_info["language"] = "javascript" elif "interface" in context or "type " in context: context_info["language"] = "typescript" return context_info def _filter_symbols_by_context(self, symbols: List[SymbolInfo], context_info: Dict[str, Any]) -> List[SymbolInfo]: """Filter symbols based on context relevance.""" filtered = [] for symbol in symbols: # Language filter if symbol.language != context_info["language"]: continue # Add context-specific filtering logic here filtered.append(symbol) return filtered def _determine_best_scope_match(self, symbols: List[SymbolInfo], context_info: Dict[str, Any]) -> Optional[SymbolInfo]: """Determine the best scope match from a list of symbols.""" if not symbols: return None # Simple scoring system scored_symbols = [] for symbol in symbols: score = symbol.confidence_score # Boost score for exact name matches if symbol.name == context_info.get("target_name", ""): score += 0.2 # Boost score for appropriate scope if context_info.get("scope") == "class" and symbol.scope == SymbolScope.CLASS: score += 0.1 scored_symbols.append((symbol, score)) # Return symbol with highest score if scored_symbols: return max(scored_symbols, key=lambda x: x[1])[0] return symbols[0] if symbols else None def _find_symbol_dependencies(self, symbol_info: SymbolInfo) -> List[SymbolInfo]: """Find symbols that the given symbol depends on.""" dependencies = [] # This is a simplified implementation # In a full implementation, you would parse the file and analyze AST try: # Read the file containing the symbol with open(symbol_info.file_path, "r", encoding="utf-8") as f: content = f.read() # Simple dependency detection (would be enhanced with AST analysis) lines = content.split("\n") for i, line in enumerate(lines, 1): if i == symbol_info.line_number: continue # Skip the symbol definition itself # Look for potential dependencies for pattern in self._language_patterns.get(symbol_info.language, {}).values(): matches = re.findall(pattern, line) for match in matches: if match != symbol_info.name: dep_symbol = SymbolInfo( name=match, symbol_type=SymbolType.VARIABLE, file_path=symbol_info.file_path, line_number=i, confidence_score=0.5, language=symbol_info.language, metadata={"dependency_type": "usage"}, ) dependencies.append(dep_symbol) except Exception as e: logger.warning(f"Error finding dependencies: {e}") return dependencies def _find_semantically_related_symbols(self, symbol_info: SymbolInfo) -> List[SymbolInfo]: """Find symbols that are semantically related to the given symbol.""" related = [] try: # Use semantic search to find related symbols search_context = SearchContext( query=f"related to {symbol_info.name}", file_types=[symbol_info.language], strategy=SearchStrategy.SEMANTIC, max_results=10 ) results, _ = self.search_infra.search(search_context) for result in results: if result.file_path != symbol_info.file_path: related_symbol = SymbolInfo( name=result.line_content.split()[0] or "unknown", # Extract first word as symbol name symbol_type=self._infer_symbol_type(result.line_content), file_path=result.file_path, line_number=result.line_number, confidence_score=result.combined_score * 0.6, language=symbol_info.language, metadata={"semantic_relation": True}, ) related.append(related_symbol) except Exception as e: logger.warning(f"Error finding semantic relations: {e}") return related def get_performance_metrics(self) -> Dict[str, Any]: """Get performance metrics for symbol operations.""" return { "cache_size": len(self._symbol_cache), "scope_cache_size": len(self._scope_cache), "cache_hit_rate": self._calculate_cache_hit_rate(), "average_search_time": self._calculate_average_search_time(), "supported_languages": list(self._language_patterns.keys()), } def clear_cache(self): """Clear all caches.""" self._symbol_cache.clear() self._scope_cache.clear() def _calculate_cache_hit_rate(self) -> float: """Calculate cache hit rate (simplified).""" # This would be implemented with actual hit/miss tracking return 0.85 # Placeholder def _calculate_average_search_time(self) -> float: """Calculate average search time (simplified).""" # This would be implemented with actual timing data return 0.05 # Placeholder

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/betmoar/FastApply-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server