Scantool - File Scanner MCP

Overview Schema Related Servers Score Discussions

css.py•30.5 KiB

"""CSS language support - unified scanner and analyzer. This module combines CSSScanner and CSSAnalyzer into a single class, eliminating duplication of metadata, tree-sitter parsing, and structure extraction. Key optimizations: - extract_definitions() reuses scan() output via _structures_to_definitions() - Single tree-sitter parser instance shared across all operations """ import re from typing import Optional from pathlib import Path import tree_sitter_css from tree_sitter import Language, Parser, Node from .base import BaseLanguage from .models import ( StructureNode, ImportInfo, EntryPointInfo, DefinitionInfo, CallInfo, ) class CSSLanguage(BaseLanguage): """Unified language handler for CSS files (.css). Provides both structure scanning and semantic analysis: - scan(): Extract rules, at-rules, media queries, keyframes - extract_imports(): Find @import statements and url() references - find_entry_points(): Find main stylesheets and :root CSS variables - extract_definitions(): Convert scan() output to DefinitionInfo """ def __init__(self, **kwargs): super().__init__(**kwargs) self.parser = Parser() self.parser.language = Language(tree_sitter_css.language()) # =========================================================================== # Metadata (REQUIRED) # =========================================================================== @classmethod def get_extensions(cls) -> list[str]: return [".css"] @classmethod def get_language_name(cls) -> str: return "CSS" @classmethod def get_priority(cls) -> int: return 10 # =========================================================================== # Skip Logic (combined from scanner + analyzer) # =========================================================================== @classmethod def should_skip(cls, filename: str) -> bool: """Skip minified and generated CSS files.""" if filename.endswith(".min.css"): return True if filename.endswith(".css.map"): return True # Skip common generated patterns if any(pattern in filename.lower() for pattern in [ ".generated.", ".compiled.", "bundle.", "chunk." ]): return True return False def should_analyze(self, file_path: str) -> bool: """Skip CSS files that should not be analyzed. - Skip minified CSS files - Skip source maps - Skip generated/compiled files """ filename = Path(file_path).name.lower() # Skip minified files if ".min." in filename: return False # Skip source maps if filename.endswith(".map"): return False # Skip common generated patterns if any(pattern in filename for pattern in [ ".generated.", ".compiled.", "bundle.", "chunk." ]): return False return True def is_low_value_for_inventory(self, file_path: str, size: int = 0) -> bool: """Identify low-value CSS files for inventory listing. Low-value files: - Very small files (likely stubs) - Vendor files """ filename = Path(file_path).name.lower() # Very small CSS files if size < 50: return True # Vendor/third-party files if any(pattern in filename for pattern in [ "vendor", "normalize", "reset", "bootstrap.min" ]): return True return super().is_low_value_for_inventory(file_path, size) # =========================================================================== # Structure Scanning (from CSSScanner) # =========================================================================== def scan(self, source_code: bytes) -> Optional[list[StructureNode]]: """Scan CSS source code and extract structure with metadata.""" try: tree = self.parser.parse(source_code) # Check for excessive errors if self._should_use_fallback(tree.root_node): if self.fallback_on_errors: return self._fallback_extract(source_code) return None return self._extract_structure(tree.root_node, source_code) except Exception as e: if self.show_errors: print(f"CSS parsing error: {e}") if self.fallback_on_errors: return self._fallback_extract(source_code) return None def _extract_structure( self, root: Node, source_code: bytes ) -> list[StructureNode]: """Extract structure from CSS stylesheet.""" structures = [] for node in root.children: if node.type == "ERROR": # Extract valid structures from within ERROR nodes error_structures = self._extract_from_error_node(node, source_code) structures.extend(error_structures) continue # @import statements if node.type == "import_statement": import_node = self._extract_import_node(node, source_code) if import_node: structures.append(import_node) # @media statements elif node.type == "media_statement": media_node = self._extract_media(node, source_code) if media_node: structures.append(media_node) # @keyframes statements elif node.type == "keyframes_statement": keyframes_node = self._extract_keyframes(node, source_code) if keyframes_node: structures.append(keyframes_node) # @supports statements elif node.type == "supports_statement": supports_node = self._extract_supports(node, source_code) if supports_node: structures.append(supports_node) # @font-face, @charset, @namespace, @layer, etc. elif node.type in ("at_rule", "font_face_statement", "charset_statement", "namespace_statement", "layer_statement"): at_rule_node = self._extract_at_rule(node, source_code) if at_rule_node: structures.append(at_rule_node) # Rule sets (selector { declarations }) elif node.type == "rule_set": rule_node = self._extract_rule_set(node, source_code) if rule_node: structures.append(rule_node) # Comments (only important ones with /*! */) elif node.type == "comment": comment_text = self._get_node_text(node, source_code) if comment_text.startswith("/*!"): structures.append(StructureNode( type="comment", name=self._extract_comment_title(comment_text), start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, docstring=comment_text[:100] )) return structures def _extract_from_error_node( self, error_node: Node, source_code: bytes ) -> list[StructureNode]: """Extract valid CSS structures from within an ERROR node.""" structures = [] for child in error_node.children: if child.type == "ERROR": structures.extend(self._extract_from_error_node(child, source_code)) continue if child.type == "import_statement": import_node = self._extract_import_node(child, source_code) if import_node: structures.append(import_node) elif child.type == "media_statement": media_node = self._extract_media(child, source_code) if media_node: structures.append(media_node) elif child.type == "keyframes_statement": keyframes_node = self._extract_keyframes(child, source_code) if keyframes_node: structures.append(keyframes_node) elif child.type == "supports_statement": supports_node = self._extract_supports(child, source_code) if supports_node: structures.append(supports_node) elif child.type in ("at_rule", "font_face_statement", "charset_statement", "namespace_statement", "layer_statement"): at_rule_node = self._extract_at_rule(child, source_code) if at_rule_node: structures.append(at_rule_node) elif child.type == "rule_set": rule_node = self._extract_rule_set(child, source_code) if rule_node: structures.append(rule_node) elif child.type == "comment": comment_text = self._get_node_text(child, source_code) if comment_text.startswith("/*!"): structures.append(StructureNode( type="comment", name=self._extract_comment_title(comment_text), start_line=child.start_point[0] + 1, end_line=child.end_point[0] + 1, docstring=comment_text[:100] )) return structures def _extract_import_node( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract @import statement as StructureNode.""" url = None for child in node.children: if child.type == "call_expression": # url("...") for arg in child.children: if arg.type == "arguments": for string_node in arg.children: if string_node.type == "string_value": url = self._get_node_text(string_node, source_code).strip('"\'') break elif child.type == "string_value": url = self._get_node_text(child, source_code).strip('"\'') if not url: return None return StructureNode( type="import", name=url, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=url, modifiers=["import"] ) def _extract_media( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract @media statement.""" query = None children = [] for child in node.children: if child.type in ("keyword_query", "feature_query", "media_query_list", "binary_query", "unary_query", "parenthesized_query"): query = self._normalize_signature( self._get_node_text(child, source_code) ) elif child.type == "block": children = self._extract_nested_rules(child, source_code) return StructureNode( type="media_query", name=query or "@media", start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=query, modifiers=["media"], children=children, complexity={"rules": len(children)} if children else None ) def _extract_keyframes( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract @keyframes statement.""" name = None for child in node.children: if child.type == "keyframes_name": name = self._get_node_text(child, source_code) break return StructureNode( type="keyframes", name=name or "animation", start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, modifiers=["keyframes"] ) def _extract_supports( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract @supports statement.""" query = None children = [] for child in node.children: if child.type in ("feature_query", "parenthesized_query"): query = self._normalize_signature( self._get_node_text(child, source_code) ) elif child.type == "block": children = self._extract_nested_rules(child, source_code) return StructureNode( type="at_supports", name=query or "@supports", start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=query, modifiers=["supports"], children=children ) def _extract_at_rule( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract at-rule (@media, @keyframes, @import, etc.).""" keyword = None query = None children = [] for child in node.children: if child.type == "at_keyword": keyword = self._get_node_text(child, source_code) elif child.type in ("keyword_query", "feature_query", "media_query"): query = self._normalize_signature( self._get_node_text(child, source_code) ) elif child.type == "keyframes_name": query = self._get_node_text(child, source_code) elif child.type == "block": children = self._extract_nested_rules(child, source_code) elif child.type in ("string_value", "call_expression"): query = self._get_node_text(child, source_code).strip('"\'') if not keyword: return None at_type = keyword.lstrip("@") # Determine node type based on at-rule type if at_type == "media": node_type = "media_query" name = query or "@media" elif at_type == "keyframes": node_type = "keyframes" name = query or "animation" elif at_type == "import": node_type = "import" name = query or "import" elif at_type == "font-face": node_type = "font_face" name = "@font-face" elif at_type in ("supports", "layer", "container"): node_type = f"at_{at_type}" name = query or f"@{at_type}" else: node_type = "at_rule" name = f"@{at_type}" return StructureNode( type=node_type, name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=query, modifiers=[at_type], children=children, complexity={"rules": len(children)} if children else None ) def _extract_nested_rules( self, block_node: Node, source_code: bytes ) -> list[StructureNode]: """Extract rules nested inside a block (e.g., @media).""" rules = [] for child in block_node.children: if child.type == "rule_set": rule = self._extract_rule_set(child, source_code) if rule: rules.append(rule) elif child.type == "at_rule": at_rule = self._extract_at_rule(child, source_code) if at_rule: rules.append(at_rule) return rules def _extract_rule_set( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract a CSS rule set (selector + declarations).""" selectors = [] declaration_count = 0 has_variables = False modifiers = [] for child in node.children: if child.type == "selectors": selectors = self._extract_selectors(child, source_code) elif child.type == "block": declaration_count, has_variables = self._count_declarations( child, source_code ) if not selectors: return None # Analyze selector complexity selector_text = ", ".join(selectors) if len(selector_text) > 60: selector_text = selector_text[:57] + "..." # Detect selector patterns if any(":root" in s for s in selectors): modifiers.append("root") if any(s.startswith(".") for s in selectors): modifiers.append("class") if any(s.startswith("#") for s in selectors): modifiers.append("id") if any(":" in s for s in selectors): modifiers.append("has-pseudo") if has_variables: modifiers.append("has-variables") # Use first selector as name, or combine if multiple if len(selectors) == 1: name = selectors[0] else: name = f"{selectors[0]} (+{len(selectors) - 1})" if len(name) > 50: name = name[:47] + "..." return StructureNode( type="rule_set", name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=f"{len(selectors)} sel, {declaration_count} decl", modifiers=modifiers, complexity={"selectors": len(selectors), "declarations": declaration_count} ) def _extract_selectors( self, selectors_node: Node, source_code: bytes ) -> list[str]: """Extract individual selectors from a selectors node.""" selectors = [] current = [] for child in selectors_node.children: if child.type == ",": if current: selectors.append(self._normalize_signature(" ".join(current))) current = [] else: text = self._get_node_text(child, source_code).strip() if text: current.append(text) if current: selectors.append(self._normalize_signature(" ".join(current))) return selectors def _count_declarations( self, block_node: Node, source_code: bytes ) -> tuple[int, bool]: """Count declarations and check for CSS variables.""" count = 0 has_variables = False for child in block_node.children: if child.type == "declaration": count += 1 # Check for CSS custom properties (--var-name) for prop in child.children: if prop.type == "property_name": prop_name = self._get_node_text(prop, source_code) if prop_name.startswith("--"): has_variables = True return count, has_variables def _extract_comment_title(self, comment: str) -> str: """Extract a title from a comment.""" text = comment.strip("/*! \n\r\t*/") first_line = text.split("\n")[0].strip() if len(first_line) > 50: first_line = first_line[:47] + "..." return first_line or "comment" def _fallback_extract(self, source_code: bytes) -> list[StructureNode]: """Regex-based extraction for malformed CSS files.""" text = source_code.decode("utf-8", errors="replace") structures = [] # Find @import rules import_pattern = r'@import\s+(?:url\(["\']?([^"\')\s]+)["\']?\)|["\']([^"\']+)["\'])' for match in re.finditer(import_pattern, text, re.IGNORECASE): url = match.group(1) or match.group(2) line_num = text[:match.start()].count("\n") + 1 structures.append(StructureNode( type="import", name=url, start_line=line_num, end_line=line_num, modifiers=["import"] )) # Find @media queries media_pattern = r'@media\s+([^{]+)\s*\{' for match in re.finditer(media_pattern, text, re.IGNORECASE): query = match.group(1).strip() line_num = text[:match.start()].count("\n") + 1 structures.append(StructureNode( type="media_query", name=query[:50] + "..." if len(query) > 50 else query, start_line=line_num, end_line=line_num, signature=query, modifiers=["media"] )) # Find @keyframes keyframes_pattern = r'@keyframes\s+([^\s{]+)\s*\{' for match in re.finditer(keyframes_pattern, text, re.IGNORECASE): name = match.group(1) line_num = text[:match.start()].count("\n") + 1 structures.append(StructureNode( type="keyframes", name=name, start_line=line_num, end_line=line_num, modifiers=["keyframes"] )) # Find @font-face fontface_pattern = r'@font-face\s*\{' for match in re.finditer(fontface_pattern, text, re.IGNORECASE): line_num = text[:match.start()].count("\n") + 1 structures.append(StructureNode( type="font_face", name="@font-face", start_line=line_num, end_line=line_num, modifiers=["font-face"] )) # Find :root rule (CSS variables) root_pattern = r':root\s*\{' for match in re.finditer(root_pattern, text): line_num = text[:match.start()].count("\n") + 1 structures.append(StructureNode( type="rule_set", name=":root", start_line=line_num, end_line=line_num, modifiers=["root", "has-variables"] )) # Find class selectors (common patterns) class_pattern = r'^\s*(\.[a-zA-Z_][\w-]*(?:\s*[,>+~]\s*[^{]+)?)\s*\{' for match in re.finditer(class_pattern, text, re.MULTILINE): selector = match.group(1).strip() line_num = text[:match.start()].count("\n") + 1 if len(selector) > 50: selector = selector[:47] + "..." structures.append(StructureNode( type="rule_set", name=selector, start_line=line_num, end_line=line_num, modifiers=["class"] )) # Find ID selectors id_pattern = r'^\s*(#[a-zA-Z_][\w-]*(?:\s*[,>+~]\s*[^{]+)?)\s*\{' for match in re.finditer(id_pattern, text, re.MULTILINE): selector = match.group(1).strip() line_num = text[:match.start()].count("\n") + 1 if len(selector) > 50: selector = selector[:47] + "..." structures.append(StructureNode( type="rule_set", name=selector, start_line=line_num, end_line=line_num, modifiers=["id"] )) return structures # =========================================================================== # Semantic Analysis - Layer 1 (from CSSAnalyzer) # =========================================================================== def extract_imports(self, file_path: str, content: str) -> list[ImportInfo]: """Extract imports from CSS file. Patterns supported: - @import url("...") - @import "..." - url(...) references in properties """ imports = [] # Pattern 1: @import statements import_pattern = r'@import\s+(?:url\(["\']?([^"\')\s]+)["\']?\)|["\']([^"\']+)["\'])' for match in re.finditer(import_pattern, content, re.IGNORECASE): url = match.group(1) or match.group(2) line_num = content[:match.start()].count("\n") + 1 if self._is_external_url(url): continue imports.append(ImportInfo( source_file=file_path, target_module=url, line=line_num, import_type="css_import", imported_names=[], )) # Pattern 2: url() references in properties (fonts, images, etc.) url_pattern = r'url\(["\']?([^"\')\s]+)["\']?\)' for match in re.finditer(url_pattern, content, re.IGNORECASE): url = match.group(1) line_num = content[:match.start()].count("\n") + 1 # Skip data URIs and external URLs if url.startswith("data:") or self._is_external_url(url): continue # Determine asset type url_lower = url.lower() if any(ext in url_lower for ext in [".woff", ".woff2", ".ttf", ".otf", ".eot"]): import_type = "font" elif any(ext in url_lower for ext in [".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"]): import_type = "image" else: import_type = "asset" imports.append(ImportInfo( source_file=file_path, target_module=url, line=line_num, import_type=import_type, imported_names=[], )) return imports def find_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]: """Find entry points in CSS file. Entry points: - Main stylesheets (main.css, styles.css, app.css) - Files with :root CSS variables """ entry_points = [] filename = Path(file_path).name.lower() # Check for main stylesheet files main_patterns = ["main.css", "styles.css", "style.css", "app.css", "global.css"] if filename in main_patterns: entry_points.append(EntryPointInfo( file=file_path, type="main_stylesheet", name=filename, line=1, framework="CSS", )) # Check for :root with CSS variables root_pattern = r':root\s*\{([^}]+)\}' root_match = re.search(root_pattern, content, re.DOTALL) if root_match: line_num = content[:root_match.start()].count("\n") + 1 # Count CSS variables var_count = root_match.group(1).count("--") if var_count > 0: entry_points.append(EntryPointInfo( file=file_path, type="css_variables", name=f":root ({var_count} variables)", line=line_num, )) return entry_points # =========================================================================== # Semantic Analysis - Layer 2 # =========================================================================== def extract_definitions(self, file_path: str, content: str) -> list[DefinitionInfo]: """Extract definitions by reusing scan() output. For CSS, definitions include rule sets, media queries, keyframes, etc. """ try: structures = self.scan(content.encode("utf-8")) if not structures: return [] return self._structures_to_definitions(file_path, structures) except Exception: return [] def extract_calls( self, file_path: str, content: str, definitions: list[DefinitionInfo] ) -> list[CallInfo]: """CSS doesn't have function calls - return empty list.""" return [] # =========================================================================== # Classification (enhanced for CSS) # =========================================================================== def classify_file(self, file_path: str, content: str) -> str: """Classify CSS file into architectural cluster.""" cluster = super().classify_file(file_path, content) if cluster == "other": filename = Path(file_path).name.lower() # Check for main stylesheets if any(pattern in filename for pattern in ["main", "styles", "app", "global"]): return "entry_points" # Check for utility/helper stylesheets if any(pattern in filename for pattern in [ "utils", "utility", "helpers", "mixins", "variables" ]): return "utilities" # Check for component stylesheets if any(pattern in filename for pattern in [ "component", "button", "card", "modal", "form" ]): return "core_logic" # Check for reset/normalize if any(pattern in filename for pattern in ["reset", "normalize", "base"]): return "utilities" return cluster # =========================================================================== # CodeMap Integration # =========================================================================== def resolve_import_to_file( self, module: str, source_file: str, all_files: list[str], definitions_map: dict[str, str], ) -> Optional[str]: """Resolve CSS @import to file path. CSS imports are relative paths or URLs. """ # Skip URLs if module.startswith(("http://", "https://", "//")): return None # Direct path match if module in all_files: return module # Try with .css extension if not module.endswith(".css"): candidate = f"{module}.css" if candidate in all_files: return candidate # Try relative to source file source_dir = str(Path(source_file).parent) if source_dir != ".": candidate = f"{source_dir}/{module}" if candidate in all_files: return candidate return None def format_entry_point(self, ep: EntryPointInfo) -> str: """Format CSS entry point for display.""" if ep.type == "main_stylesheet": return f" {ep.file}:{ep.name} (main stylesheet)" elif ep.type == "css_variables": return f" {ep.file}:{ep.name}" else: return super().format_entry_point(ep) # =========================================================================== # Helper methods # =========================================================================== def _is_external_url(self, url: str) -> bool: """Check if a URL is external (not a local file reference).""" if not url: return True # Check for protocol prefixes if url.startswith(("http://", "https://", "//", "data:", "blob:")): return True return False

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mariusei/file-scanner-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

css.py•30.5 KiB