Scantool - File Scanner MCP

Overview Schema Related Servers Score Discussions

csharp.py•49.2 KiB

"""C# language support - unified scanner and analyzer. This module combines CSharpScanner and CSharpAnalyzer into a single class, eliminating duplication of metadata, tree-sitter parsing, and structure extraction. Key optimizations: - extract_definitions() reuses scan() output instead of re-parsing - Single tree-sitter parser instance shared across all operations """ import re from typing import Optional from pathlib import Path import tree_sitter_c_sharp from tree_sitter import Language, Parser, Node from .base import BaseLanguage from .models import ( StructureNode, ImportInfo, EntryPointInfo, DefinitionInfo, CallInfo, ) class CSharpLanguage(BaseLanguage): """Unified language handler for C# files (.cs, .csx). Provides both structure scanning and semantic analysis: - scan(): Extract classes, interfaces, structs, enums, methods, properties - extract_imports(): Find using directives - find_entry_points(): Find Main methods, ASP.NET controllers, minimal APIs - extract_definitions(): Convert scan() output to DefinitionInfo - extract_calls(): Find method calls (basic implementation) """ def __init__(self, **kwargs): super().__init__(**kwargs) self.parser = Parser() self.parser.language = Language(tree_sitter_c_sharp.language()) # =========================================================================== # Metadata (REQUIRED) # =========================================================================== @classmethod def get_extensions(cls) -> list[str]: return [".cs", ".csx"] @classmethod def get_language_name(cls) -> str: return "C#" @classmethod def get_priority(cls) -> int: return 10 # =========================================================================== # Skip Logic (combined from scanner + analyzer) # =========================================================================== @classmethod def should_skip(cls, filename: str) -> bool: """Skip designer and generated files.""" lower = filename.lower() if '.designer.cs' in lower: return True if lower.endswith('.g.cs') or lower.endswith('.generated.cs'): return True if lower == 'assemblyinfo.cs': return True return False def should_analyze(self, file_path: str) -> bool: """ Skip C# files that should not be analyzed. C#-specific skip patterns: - Skip designer files (*.Designer.cs, *.designer.cs) - Skip generated files (*.g.cs, *.generated.cs) - Skip auto-generated AssemblyInfo files - bin/ and obj/ directories """ filename = Path(file_path).name.lower() path_lower = file_path.lower() # Skip designer files if '.designer.cs' in filename: return False # Skip generated files if filename.endswith('.g.cs') or filename.endswith('.generated.cs'): return False # Skip auto-generated AssemblyInfo files if filename == 'assemblyinfo.cs': return False # Skip bin/obj (should be caught by COMMON_SKIP_DIRS, but double-check) if '/bin/' in path_lower or '/obj/' in path_lower: return False return True def is_low_value_for_inventory(self, file_path: str, size: int = 0) -> bool: """Identify low-value C# files for inventory listing. Low-value files (unless central): - Empty or near-empty files - Global usings files (typically boilerplate) """ filename = Path(file_path).name.lower() # GlobalUsings.cs is usually auto-generated boilerplate if filename == 'globalusings.cs' and size < 500: return True return super().is_low_value_for_inventory(file_path, size) # =========================================================================== # Structure Scanning (from CSharpScanner) # =========================================================================== def scan(self, source_code: bytes) -> Optional[list[StructureNode]]: """Scan C# source code and extract structure with metadata.""" try: tree = self.parser.parse(source_code) # Check if we should use fallback due to too many errors if self._should_use_fallback(tree.root_node): return self._fallback_extract(source_code) return self._extract_structure(tree.root_node, source_code) except Exception as e: # Return error node instead of crashing return [StructureNode( type="error", name=f"Failed to parse: {str(e)}", start_line=1, end_line=1 )] def _extract_structure(self, root: Node, source_code: bytes) -> list[StructureNode]: """Extract structure using tree-sitter.""" structures = [] def traverse(node: Node, parent_structures: list): # Handle parse errors if node.type == "ERROR": if self.show_errors: error_node = StructureNode( type="parse-error", name="invalid syntax", start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1 ) parent_structures.append(error_node) return # Using directives if node.type == "using_directive": self._handle_using(node, parent_structures) # Namespace declaration elif node.type in ("namespace_declaration", "file_scoped_namespace_declaration"): self._handle_namespace(node, parent_structures, source_code, root, traverse) # Classes elif node.type == "class_declaration": class_node = self._extract_class(node, source_code) parent_structures.append(class_node) # Traverse children for methods, properties, nested classes, etc. body = node.child_by_field_name("body") if body: for child in body.children: traverse(child, class_node.children) # Interfaces elif node.type == "interface_declaration": interface_node = self._extract_interface(node, source_code) parent_structures.append(interface_node) # Traverse children for method signatures, properties body = node.child_by_field_name("body") if body: for child in body.children: traverse(child, interface_node.children) # Structs elif node.type == "struct_declaration": struct_node = self._extract_struct(node, source_code) parent_structures.append(struct_node) # Traverse children for methods, properties, etc. body = node.child_by_field_name("body") if body: for child in body.children: traverse(child, struct_node.children) # Enums elif node.type == "enum_declaration": enum_node = self._extract_enum(node, source_code) parent_structures.append(enum_node) # Records (C# 9+) elif node.type == "record_declaration": record_node = self._extract_record(node, source_code) parent_structures.append(record_node) # Traverse children for methods, properties body = node.child_by_field_name("body") if body: for child in body.children: traverse(child, record_node.children) # Methods elif node.type == "method_declaration": method_node = self._extract_method(node, source_code) parent_structures.append(method_node) # Properties elif node.type == "property_declaration": property_node = self._extract_property(node, source_code) parent_structures.append(property_node) # Constructors elif node.type == "constructor_declaration": constructor_node = self._extract_constructor(node, source_code) parent_structures.append(constructor_node) else: # Keep traversing for child in node.children: traverse(child, parent_structures) traverse(root, structures) return structures def _extract_class(self, node: Node, source_code: bytes) -> StructureNode: """Extract class with full metadata.""" name_node = node.child_by_field_name("name") name = self._get_node_text(name_node, source_code) if name_node else "unnamed" # Get modifiers modifiers = self._extract_modifiers(node, source_code) # Get attributes decorators = self._extract_attributes(node, source_code) # Get type parameters (generics) type_params = self._extract_type_parameters(node, source_code) # Get base class and interfaces signature_parts = [] if type_params: signature_parts.append(type_params) # Get base list (base class and interfaces) base_list = node.child_by_field_name("bases") if base_list: base_text = self._get_node_text(base_list, source_code).strip() # Remove the colon if present base_text = base_text.lstrip(':').strip() signature_parts.append(f": {base_text}") signature = " ".join(signature_parts) if signature_parts else None # Get XML documentation comment docstring = self._extract_xml_doc(node, source_code) # Calculate complexity complexity = self._calculate_complexity(node) return StructureNode( type="class", name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=signature, decorators=decorators, docstring=docstring, complexity=complexity, modifiers=modifiers, children=[] ) def _extract_interface(self, node: Node, source_code: bytes) -> StructureNode: """Extract interface declaration.""" name_node = node.child_by_field_name("name") name = self._get_node_text(name_node, source_code) if name_node else "unnamed" # Get modifiers modifiers = self._extract_modifiers(node, source_code) # Get attributes decorators = self._extract_attributes(node, source_code) # Get type parameters (generics) type_params = self._extract_type_parameters(node, source_code) # Get base interfaces signature_parts = [] if type_params: signature_parts.append(type_params) base_list = node.child_by_field_name("bases") if base_list: base_text = self._get_node_text(base_list, source_code).strip() base_text = base_text.lstrip(':').strip() signature_parts.append(f": {base_text}") signature = " ".join(signature_parts) if signature_parts else None # Get XML documentation comment docstring = self._extract_xml_doc(node, source_code) return StructureNode( type="interface", name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=signature, decorators=decorators, docstring=docstring, modifiers=modifiers, children=[] ) def _extract_struct(self, node: Node, source_code: bytes) -> StructureNode: """Extract struct declaration.""" name_node = node.child_by_field_name("name") name = self._get_node_text(name_node, source_code) if name_node else "unnamed" # Get modifiers modifiers = self._extract_modifiers(node, source_code) # Get attributes decorators = self._extract_attributes(node, source_code) # Get type parameters (generics) type_params = self._extract_type_parameters(node, source_code) # Get interfaces signature_parts = [] if type_params: signature_parts.append(type_params) base_list = node.child_by_field_name("bases") if base_list: base_text = self._get_node_text(base_list, source_code).strip() base_text = base_text.lstrip(':').strip() signature_parts.append(f": {base_text}") signature = " ".join(signature_parts) if signature_parts else None # Get XML documentation comment docstring = self._extract_xml_doc(node, source_code) return StructureNode( type="struct", name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=signature, decorators=decorators, docstring=docstring, modifiers=modifiers, children=[] ) def _extract_record(self, node: Node, source_code: bytes) -> StructureNode: """Extract record declaration (C# 9+).""" name_node = node.child_by_field_name("name") name = self._get_node_text(name_node, source_code) if name_node else "unnamed" # Get modifiers modifiers = self._extract_modifiers(node, source_code) # Get attributes decorators = self._extract_attributes(node, source_code) # Get type parameters (generics) type_params = self._extract_type_parameters(node, source_code) # Get parameters (for positional records) signature_parts = [] if type_params: signature_parts.append(type_params) params_node = node.child_by_field_name("parameters") if params_node: params_text = self._get_node_text(params_node, source_code) signature_parts.append(params_text) # Get base list base_list = node.child_by_field_name("bases") if base_list: base_text = self._get_node_text(base_list, source_code).strip() base_text = base_text.lstrip(':').strip() signature_parts.append(f": {base_text}") signature = " ".join(signature_parts) if signature_parts else None # Get XML documentation comment docstring = self._extract_xml_doc(node, source_code) return StructureNode( type="record", name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=self._normalize_signature(signature) if signature else None, decorators=decorators, docstring=docstring, modifiers=modifiers, children=[] ) def _extract_enum(self, node: Node, source_code: bytes) -> StructureNode: """Extract enum declaration.""" name_node = node.child_by_field_name("name") name = self._get_node_text(name_node, source_code) if name_node else "unnamed" # Get modifiers modifiers = self._extract_modifiers(node, source_code) # Get attributes decorators = self._extract_attributes(node, source_code) # Get base type (enums can have a base type like : byte) # Look for base_list child node signature = None for child in node.children: if child.type == "base_list": base_text = self._get_node_text(child, source_code).strip() base_text = base_text.lstrip(':').strip() signature = f": {base_text}" break # Get XML documentation comment docstring = self._extract_xml_doc(node, source_code) return StructureNode( type="enum", name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=signature, decorators=decorators, docstring=docstring, modifiers=modifiers, children=[] ) def _extract_method(self, node: Node, source_code: bytes) -> StructureNode: """Extract method with signature and metadata.""" name_node = node.child_by_field_name("name") name = self._get_node_text(name_node, source_code) if name_node else "unnamed" # Get modifiers modifiers = self._extract_modifiers(node, source_code) # Get attributes decorators = self._extract_attributes(node, source_code) # Get signature signature = self._extract_method_signature(node, source_code) # Get XML documentation comment docstring = self._extract_xml_doc(node, source_code) # Calculate complexity complexity = self._calculate_complexity(node) return StructureNode( type="method", name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=signature, decorators=decorators, docstring=docstring, modifiers=modifiers, complexity=complexity, children=[] ) def _extract_property(self, node: Node, source_code: bytes) -> StructureNode: """Extract property declaration.""" name_node = node.child_by_field_name("name") name = self._get_node_text(name_node, source_code) if name_node else "unnamed" # Get modifiers modifiers = self._extract_modifiers(node, source_code) # Get attributes decorators = self._extract_attributes(node, source_code) # Get property type type_node = node.child_by_field_name("type") signature = None if type_node: type_text = self._get_node_text(type_node, source_code).strip() signature = f": {type_text}" # Get XML documentation comment docstring = self._extract_xml_doc(node, source_code) return StructureNode( type="property", name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=signature, decorators=decorators, docstring=docstring, modifiers=modifiers, children=[] ) def _extract_constructor(self, node: Node, source_code: bytes) -> StructureNode: """Extract constructor declaration.""" name_node = node.child_by_field_name("name") name = self._get_node_text(name_node, source_code) if name_node else "unnamed" # Get modifiers modifiers = self._extract_modifiers(node, source_code) # Get attributes decorators = self._extract_attributes(node, source_code) # Get signature (parameters) params_node = node.child_by_field_name("parameters") signature = None if params_node: signature = self._get_node_text(params_node, source_code) # Get XML documentation comment docstring = self._extract_xml_doc(node, source_code) # Calculate complexity complexity = self._calculate_complexity(node) return StructureNode( type="constructor", name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=self._normalize_signature(signature) if signature else None, decorators=decorators, docstring=docstring, modifiers=modifiers, complexity=complexity, children=[] ) def _extract_method_signature(self, node: Node, source_code: bytes) -> Optional[str]: """Extract method signature with type parameters, parameters and return type.""" parts = [] # Get return type (it's a child before the method name) # Could be generic_name, predefined_type, identifier, etc. return_type = None name_node = node.child_by_field_name("name") for child in node.children: # Stop when we reach the name if child == name_node: break # Skip modifiers if child.type == "modifier": continue # This must be the return type if child.type in ("generic_name", "predefined_type", "identifier", "nullable_type", "array_type", "qualified_name", "tuple_type"): return_type = self._get_node_text(child, source_code).strip() break # Get type parameters (generics on method) type_params = node.child_by_field_name("type_parameters") if type_params: type_params_text = self._get_node_text(type_params, source_code) parts.append(type_params_text) # Get parameters params_node = node.child_by_field_name("parameters") if params_node: params_text = self._get_node_text(params_node, source_code) parts.append(params_text) # Add return type at the end if return_type: parts.append(f": {return_type}") signature = " ".join(parts) if parts else None return self._normalize_signature(signature) if signature else None def _extract_modifiers(self, node: Node, source_code: bytes) -> list[str]: """Extract modifiers like public, private, static, readonly, async, virtual, override, abstract.""" modifiers = [] for child in node.children: if child.type == "modifier": # Modifier is wrapped - get the actual keyword modifier_text = self._get_node_text(child, source_code).strip() if modifier_text: modifiers.append(modifier_text) return modifiers def _extract_attributes(self, node: Node, source_code: bytes) -> list[str]: """Extract attributes (C# decorators) from a class/method/property.""" attributes = [] # In C#, attributes are children of the node, appearing before modifiers for child in node.children: if child.type == "attribute_list": attr_text = self._get_node_text(child, source_code).strip() attributes.append(attr_text) return attributes def _extract_xml_doc(self, node: Node, source_code: bytes) -> Optional[str]: """Extract first line of XML documentation comment (///).""" # Try to find comments before the node start_byte = node.start_byte # Look backwards in source to find XML doc comments text = source_code.decode('utf-8', errors='replace') lines_before = text[:start_byte].split('\n') # Collect XML doc comment lines (///) doc_lines = [] for line in reversed(lines_before): stripped = line.strip() if stripped.startswith('///'): # Remove /// and extract content content = stripped[3:].strip() # Extract from <summary> tags if present if '<summary>' in content: content = content.replace('<summary>', '').strip() if '</summary>' in content: content = content.replace('</summary>', '').strip() if content and not content.startswith('<') and not content.startswith('/'): doc_lines.insert(0, content) elif stripped and not stripped.startswith('//'): # Stop at first non-comment line break # Return first meaningful line for line in doc_lines: if line and not line.startswith('<') and not line.startswith('/'): return line return None def _extract_type_parameters(self, node: Node, source_code: bytes) -> Optional[str]: """Extract type parameters (generics) like <T> or <TKey, TValue>.""" # Try field-based access first type_params = node.child_by_field_name("type_parameters") if type_params: return self._get_node_text(type_params, source_code) # Fallback: look for type_parameter_list child for child in node.children: if child.type == "type_parameter_list": return self._get_node_text(child, source_code) return None def _handle_namespace(self, node: Node, parent_structures: list, source_code: bytes, root: Node, traverse_func): """Handle namespace declaration.""" name_node = node.child_by_field_name("name") if name_node: namespace_name = self._get_node_text(name_node, source_code) namespace_node = StructureNode( type="namespace", name=namespace_name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, children=[] ) parent_structures.append(namespace_node) # Traverse children within namespace body = node.child_by_field_name("body") if body: for child in body.children: traverse_func(child, namespace_node.children) else: # File-scoped namespace - traverse remaining children for child in node.children: if child.start_point[0] > node.start_point[0]: traverse_func(child, namespace_node.children) def _handle_using(self, node: Node, parent_structures: list): """Group using directives together.""" if not parent_structures or parent_structures[-1].type != "imports": import_node = StructureNode( type="imports", name="using directives", start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1 ) parent_structures.append(import_node) else: # Extend the end line of the existing import group parent_structures[-1].end_line = node.end_point[0] + 1 def _fallback_extract(self, source_code: bytes) -> list[StructureNode]: """Regex-based extraction for severely malformed files.""" text = source_code.decode('utf-8', errors='replace') structures = [] # Find namespace declaration namespace_match = re.search(r'^\s*namespace\s+([\w.]+)', text, re.MULTILINE) if namespace_match: line_num = text[:namespace_match.start()].count('\n') + 1 structures.append(StructureNode( type="namespace", name=namespace_match.group(1), start_line=line_num, end_line=line_num )) # Find class definitions for match in re.finditer(r'^\s*(?:public\s+)?(?:abstract\s+)?(?:sealed\s+)?class\s+(\w+)', text, re.MULTILINE): line_num = text[:match.start()].count('\n') + 1 structures.append(StructureNode( type="class", name=match.group(1) + " (fallback)", start_line=line_num, end_line=line_num )) # Find interface definitions for match in re.finditer(r'^\s*(?:public\s+)?interface\s+(\w+)', text, re.MULTILINE): line_num = text[:match.start()].count('\n') + 1 structures.append(StructureNode( type="interface", name=match.group(1) + " (fallback)", start_line=line_num, end_line=line_num )) # Find struct definitions for match in re.finditer(r'^\s*(?:public\s+)?struct\s+(\w+)', text, re.MULTILINE): line_num = text[:match.start()].count('\n') + 1 structures.append(StructureNode( type="struct", name=match.group(1) + " (fallback)", start_line=line_num, end_line=line_num )) # Find enum definitions for match in re.finditer(r'^\s*(?:public\s+)?enum\s+(\w+)', text, re.MULTILINE): line_num = text[:match.start()].count('\n') + 1 structures.append(StructureNode( type="enum", name=match.group(1) + " (fallback)", start_line=line_num, end_line=line_num )) # Find method definitions for match in re.finditer(r'^\s*(?:public|private|protected|internal)\s+(?:static\s+)?(?:async\s+)?(?:\w+(?:<[^>]+>)?)\s+(\w+)\s*\(', text, re.MULTILINE): line_num = text[:match.start()].count('\n') + 1 structures.append(StructureNode( type="method", name=match.group(1) + " (fallback)", start_line=line_num, end_line=line_num )) # Find property definitions for match in re.finditer(r'^\s*(?:public|private|protected|internal)\s+(?:static\s+)?(?:\w+(?:<[^>]+>)?)\s+(\w+)\s*\{', text, re.MULTILINE): line_num = text[:match.start()].count('\n') + 1 structures.append(StructureNode( type="property", name=match.group(1) + " (fallback)", start_line=line_num, end_line=line_num )) return structures # =========================================================================== # Semantic Analysis - Layer 1 (from CSharpAnalyzer) # =========================================================================== def extract_imports(self, file_path: str, content: str) -> list[ImportInfo]: """ Extract using directives from C# file. Patterns supported: - using System.Collections; - using System.Collections.Generic; - using static System.Math; - using Alias = System.Collections.Generic.List<int>; """ imports = [] # Pattern 1: Standard using directives # using System.Collections.Generic; using_pattern = r'^\s*using\s+([A-Za-z_][A-Za-z0-9_.]*)\s*;' for match in re.finditer(using_pattern, content, re.MULTILINE): namespace = match.group(1) line_num = content[:match.start()].count('\n') + 1 imports.append( ImportInfo( source_file=file_path, target_module=namespace, line=line_num, import_type="using", ) ) # Pattern 2: Static using directives # using static System.Math; static_using_pattern = r'^\s*using\s+static\s+([A-Za-z_][A-Za-z0-9_.]*)\s*;' for match in re.finditer(static_using_pattern, content, re.MULTILINE): namespace = match.group(1) line_num = content[:match.start()].count('\n') + 1 imports.append( ImportInfo( source_file=file_path, target_module=namespace, line=line_num, import_type="static_using", ) ) # Pattern 3: Alias using directives # using MyList = System.Collections.Generic.List<int>; # Note: Allow full type syntax including generics with commas and spaces alias_using_pattern = r'^\s*using\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*([A-Za-z_][A-Za-z0-9_.<>,\s]+?)\s*;' for match in re.finditer(alias_using_pattern, content, re.MULTILINE): alias = match.group(1) namespace = match.group(2) line_num = content[:match.start()].count('\n') + 1 imports.append( ImportInfo( source_file=file_path, target_module=namespace, line=line_num, import_type="alias_using", imported_names=[alias], ) ) return imports def find_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]: """ Find entry points in C# file. Entry points: - static void Main(string[] args) - static async Task Main(string[] args) - static int Main() - [ApiController] or [Controller] attributes (ASP.NET) - [HttpGet], [HttpPost], etc. (ASP.NET action methods) - Top-level statements (C# 9+) """ entry_points = [] # Pattern 1: Main methods (various signatures) # static void Main(), static int Main(string[] args), static async Task Main() main_patterns = [ r'^\s*(?:public\s+|private\s+|internal\s+)?static\s+(?:async\s+)?(?:void|int|Task(?:<int>)?)\s+Main\s*\(', ] for pattern in main_patterns: for match in re.finditer(pattern, content, re.MULTILINE): line_num = content[:match.start()].count('\n') + 1 entry_points.append( EntryPointInfo( file=file_path, type="main_function", line=line_num, name="Main", ) ) # Pattern 2: ASP.NET Controllers (class-level attributes) # [ApiController], [Controller] controller_pattern = r'^\s*\[(?:Api)?Controller\]' for match in re.finditer(controller_pattern, content, re.MULTILINE): line_num = content[:match.start()].count('\n') + 1 # Try to find the class name following the attribute remaining_content = content[match.end():] class_match = re.search(r'\s*(?:public\s+|internal\s+)?class\s+([A-Za-z_][A-Za-z0-9_]*)', remaining_content) class_name = class_match.group(1) if class_match else "Controller" entry_points.append( EntryPointInfo( file=file_path, type="controller", line=line_num, name=class_name, framework="ASP.NET", ) ) # Pattern 3: ASP.NET Action methods (HTTP verb attributes) # [HttpGet], [HttpPost], [HttpPut], [HttpDelete], etc. http_verb_pattern = r'^\s*\[Http(?:Get|Post|Put|Delete|Patch|Head|Options)\]' for match in re.finditer(http_verb_pattern, content, re.MULTILINE): line_num = content[:match.start()].count('\n') + 1 # Try to find the method name following the attribute remaining_content = content[match.end():] method_match = re.search( r'\s*(?:public\s+|private\s+|protected\s+|internal\s+)?(?:async\s+)?(?:Task<?[^>]*>?|ActionResult<?[^>]*>?|IActionResult|[A-Za-z_][A-Za-z0-9_.<>]*)\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(', remaining_content ) method_name = method_match.group(1) if method_match else "ActionMethod" entry_points.append( EntryPointInfo( file=file_path, type="http_handler", line=line_num, name=method_name, framework="ASP.NET", ) ) # Pattern 4: Startup class (ASP.NET Core convention) # public class Startup startup_pattern = r'^\s*(?:public\s+)?class\s+Startup\s*(?::|{)' for match in re.finditer(startup_pattern, content, re.MULTILINE): line_num = content[:match.start()].count('\n') + 1 entry_points.append( EntryPointInfo( file=file_path, type="startup_class", line=line_num, name="Startup", framework="ASP.NET Core", ) ) # Pattern 5: Program class (ASP.NET Core 6+ minimal API) # Look for WebApplication.CreateBuilder or WebApplicationBuilder minimal_api_pattern = r'(?:WebApplication\.CreateBuilder|WebApplicationBuilder)' for match in re.finditer(minimal_api_pattern, content): line_num = content[:match.start()].count('\n') + 1 entry_points.append( EntryPointInfo( file=file_path, type="minimal_api", line=line_num, name="Program", framework="ASP.NET Core", ) ) break # Only report once per file return entry_points # =========================================================================== # Semantic Analysis - Layer 2 # =========================================================================== def extract_definitions(self, file_path: str, content: str) -> list[DefinitionInfo]: """Extract function/class definitions by reusing scan() output. This is the key optimization: instead of re-parsing with tree-sitter, we convert the StructureNode output from scan() to DefinitionInfo. """ try: structures = self.scan(content.encode("utf-8")) if not structures: return [] return self._structures_to_definitions(file_path, structures) except Exception: # Fallback to regex-based extraction return self._extract_definitions_regex(file_path, content) def _structures_to_definitions( self, file_path: str, structures: list[StructureNode], parent: str = None ) -> list[DefinitionInfo]: """Convert StructureNode list to DefinitionInfo list. Extended for C# to handle more types (interfaces, structs, etc.) """ definitions = [] for node in structures: # Include C#-specific types if node.type in ("class", "function", "method", "interface", "struct", "enum", "record", "constructor"): definitions.append( DefinitionInfo( file=file_path, type=node.type, name=node.name, line=node.start_line, signature=node.signature, parent=parent, ) ) # Recurse into children if node.children: child_parent = node.name if node.type in ("class", "interface", "struct", "record", "namespace") else parent definitions.extend( self._structures_to_definitions(file_path, node.children, child_parent) ) return definitions def _extract_definitions_regex( self, file_path: str, content: str ) -> list[DefinitionInfo]: """Fallback: Extract definitions using regex.""" definitions = [] for match in re.finditer(r"^\s*(?:public\s+)?class\s+(\w+)", content, re.MULTILINE): line = content[: match.start()].count("\n") + 1 definitions.append( DefinitionInfo( file=file_path, type="class", name=match.group(1), line=line, signature=None, parent=None, ) ) for match in re.finditer(r"^\s*(?:public\s+)?interface\s+(\w+)", content, re.MULTILINE): line = content[: match.start()].count("\n") + 1 definitions.append( DefinitionInfo( file=file_path, type="interface", name=match.group(1), line=line, signature=None, parent=None, ) ) for match in re.finditer( r"^\s*(?:public|private|protected|internal)\s+(?:static\s+)?(?:\w+)\s+(\w+)\s*\(", content, re.MULTILINE ): line = content[: match.start()].count("\n") + 1 definitions.append( DefinitionInfo( file=file_path, type="method", name=match.group(1), line=line, signature=None, parent=None, ) ) return definitions def extract_calls( self, file_path: str, content: str, definitions: list[DefinitionInfo] ) -> list[CallInfo]: """Extract method calls using tree-sitter. Note: This needs tree-sitter parsing because call sites are not captured in the structure scan (which only captures definitions). """ try: source_bytes = content.encode("utf-8") tree = self.parser.parse(source_bytes) return self._extract_calls_tree_sitter( file_path, tree.root_node, source_bytes, definitions ) except Exception: return self._extract_calls_regex(file_path, content, definitions) def _extract_calls_tree_sitter( self, file_path: str, root, source_bytes: bytes, definitions: list[DefinitionInfo] ) -> list[CallInfo]: """Extract calls using tree-sitter AST.""" calls = [] current_function = None def traverse(node, context_func=None): nonlocal current_function # Track current method/function context if node.type in ("method_declaration", "constructor_declaration"): name_node = node.child_by_field_name("name") if name_node: current_function = source_bytes[ name_node.start_byte : name_node.end_byte ].decode("utf-8") for child in node.children: traverse(child, current_function) current_function = context_func return # Invocation expressions (method calls) if node.type == "invocation_expression": func_node = node.child_by_field_name("function") if func_node: # Simple identifier call: MethodName() if func_node.type == "identifier": callee_name = source_bytes[ func_node.start_byte : func_node.end_byte ].decode("utf-8") line = node.start_point[0] + 1 calls.append( CallInfo( caller_file=file_path, caller_name=context_func, callee_name=callee_name, line=line, is_cross_file=False, ) ) # Member access call: obj.Method() or Class.StaticMethod() elif func_node.type == "member_access_expression": name_node = func_node.child_by_field_name("name") if name_node: callee_name = source_bytes[ name_node.start_byte : name_node.end_byte ].decode("utf-8") line = node.start_point[0] + 1 calls.append( CallInfo( caller_file=file_path, caller_name=context_func, callee_name=callee_name, line=line, is_cross_file=False, ) ) for child in node.children: traverse(child, context_func) traverse(root) local_defs = {d.name for d in definitions} for call in calls: if call.callee_name not in local_defs: call.is_cross_file = True return calls def _extract_calls_regex( self, file_path: str, content: str, definitions: list[DefinitionInfo] ) -> list[CallInfo]: """Fallback: Extract calls using regex.""" calls = [] # Simple pattern to find method calls for match in re.finditer(r"\b(\w+)\s*\(", content): callee_name = match.group(1) line = content[: match.start()].count("\n") + 1 # Skip keywords and common constructs if callee_name in [ "if", "for", "while", "foreach", "switch", "catch", "using", "lock", "return", "new", "typeof", "nameof", "class", "struct", "interface", "enum", "void", "int", "string", "bool", "double", "float", "decimal", "byte", ]: continue calls.append( CallInfo( caller_file=file_path, caller_name=None, callee_name=callee_name, line=line, is_cross_file=False, ) ) local_defs = {d.name for d in definitions} for call in calls: if call.callee_name not in local_defs: call.is_cross_file = True return calls # =========================================================================== # Classification (enhanced for C#) # =========================================================================== def classify_file(self, file_path: str, content: str) -> str: """ Classify C# file into architectural cluster. Uses base class heuristics plus C#-specific patterns. """ # Use base class classification (handles common patterns like test_) base_cluster = super().classify_file(file_path, content) # C#-specific patterns if base_cluster == "other": name = Path(file_path).name.lower() path_lower = file_path.lower() # Entry points (Program.cs, Startup.cs) if name in ["program.cs", "startup.cs"]: return "entry_points" # Check for Main method if re.search(r'^\s*static\s+(?:async\s+)?(?:void|int|Task)\s+Main\s*\(', content, re.MULTILINE): return "entry_points" # Controllers if "/controllers/" in path_lower or name.endswith("controller.cs"): return "core_logic" # ASP.NET Controller attribute if re.search(r'^\s*\[(?:Api)?Controller\]', content, re.MULTILINE): return "core_logic" # Models if "/models/" in path_lower or name.endswith("model.cs"): return "core_logic" # Services if "/services/" in path_lower or name.endswith("service.cs"): return "core_logic" # Repositories if "/repositories/" in path_lower or name.endswith("repository.cs"): return "core_logic" # Config files if name in ["appsettings.cs", "config.cs", "configuration.cs"]: return "config" # Extensions (helper methods) if name.endswith("extensions.cs"): return "utilities" # Tests (NUnit, xUnit, MSTest patterns) if "test" in name or "/tests/" in path_lower: return "tests" return base_cluster # =========================================================================== # CodeMap Integration # =========================================================================== def resolve_import_to_file( self, module: str, source_file: str, all_files: list[str], definitions_map: dict[str, str], ) -> Optional[str]: """ Resolve C# using directive to file path. C# uses namespace imports, not file imports. We try to match namespace parts to file paths. System.* namespaces are skipped. """ # Skip system namespaces if module.startswith(("System", "Microsoft", "Windows")): return None # Try matching namespace to file path # Namespace.ClassName -> Namespace/ClassName.cs parts = module.split(".") candidate = "/".join(parts) + ".cs" if candidate in all_files: return candidate # Just the last part (class name) if len(parts) > 0: for f in all_files: if f.endswith(f"/{parts[-1]}.cs") or f == f"{parts[-1]}.cs": return f return None def format_entry_point(self, ep: EntryPointInfo) -> str: """ Format C# entry point for display. Formats: - main_method: "static void Main @line" - controller: "[ApiController] ControllerName @line" - aspnet_app: "WebApplication @line" """ if ep.type == "main_function": return f" {ep.file}:static void Main @{ep.line}" elif ep.type == "controller": return f" {ep.file}:[ApiController] {ep.name} @{ep.line}" elif ep.type == "aspnet_app": return f" {ep.file}:WebApplication @{ep.line}" elif ep.type == "top_level": return f" {ep.file}:top-level statements @{ep.line}" elif ep.type == "minimal_api": return f" {ep.file}:minimal API @{ep.line}" elif ep.type == "startup_class": return f" {ep.file}:Startup class @{ep.line}" elif ep.type == "http_handler": return f" {ep.file}:[Http*] {ep.name} @{ep.line}" else: return super().format_entry_point(ep)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mariusei/file-scanner-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

csharp.py•49.2 KiB