Scantool - File Scanner MCP

Overview Schema Related Servers Score Discussions

css_scanner.py•20.8 KiB

"""CSS language scanner with structure and metadata extraction.""" import re from typing import Optional import tree_sitter_css from tree_sitter import Language, Node, Parser from .base import BaseScanner, StructureNode class CSSScanner(BaseScanner): """Scanner for CSS files with rule and at-rule extraction.""" def __init__(self, **kwargs): super().__init__(**kwargs) self.parser = Parser() self.parser.language = Language(tree_sitter_css.language()) @classmethod def get_extensions(cls) -> list[str]: return [".css"] @classmethod def get_language_name(cls) -> str: return "CSS" @classmethod def should_skip(cls, filename: str) -> bool: """Skip minified and generated CSS files.""" if filename.endswith(".min.css"): return True if filename.endswith(".css.map"): return True # Skip common generated patterns if any(pattern in filename.lower() for pattern in [ ".generated.", ".compiled.", "bundle.", "chunk." ]): return True return False def scan(self, source_code: bytes) -> Optional[list[StructureNode]]: """Scan CSS source code and extract structure with metadata.""" try: tree = self.parser.parse(source_code) # Check for excessive errors if self._should_use_fallback(tree.root_node): if self.fallback_on_errors: return self._fallback_extract(source_code) return None return self._extract_structure(tree.root_node, source_code) except Exception as e: if self.show_errors: print(f"CSS parsing error: {e}") if self.fallback_on_errors: return self._fallback_extract(source_code) return None def _extract_structure( self, root: Node, source_code: bytes ) -> list[StructureNode]: """Extract structure from CSS stylesheet.""" structures = [] for node in root.children: if node.type == "ERROR": # Extract valid structures from within ERROR nodes # Tree-sitter often wraps valid CSS in ERROR when there's a syntax issue error_structures = self._extract_from_error_node(node, source_code) structures.extend(error_structures) continue # @import statements if node.type == "import_statement": import_node = self._extract_import(node, source_code) if import_node: structures.append(import_node) # @media statements elif node.type == "media_statement": media_node = self._extract_media(node, source_code) if media_node: structures.append(media_node) # @keyframes statements elif node.type == "keyframes_statement": keyframes_node = self._extract_keyframes(node, source_code) if keyframes_node: structures.append(keyframes_node) # @supports statements elif node.type == "supports_statement": supports_node = self._extract_supports(node, source_code) if supports_node: structures.append(supports_node) # @font-face, @charset, @namespace, @layer, etc. elif node.type in ("at_rule", "font_face_statement", "charset_statement", "namespace_statement", "layer_statement"): at_rule_node = self._extract_at_rule(node, source_code) if at_rule_node: structures.append(at_rule_node) # Rule sets (selector { declarations }) elif node.type == "rule_set": rule_node = self._extract_rule_set(node, source_code) if rule_node: structures.append(rule_node) # Comments (only important ones with /*! */) elif node.type == "comment": comment_text = self._get_node_text(node, source_code) if comment_text.startswith("/*!"): structures.append(StructureNode( type="comment", name=self._extract_comment_title(comment_text), start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, docstring=comment_text[:100] )) return structures def _extract_from_error_node( self, error_node: Node, source_code: bytes ) -> list[StructureNode]: """Extract valid CSS structures from within an ERROR node. Tree-sitter often wraps valid CSS in ERROR nodes when there's a syntax issue earlier in the file. This method recursively searches for valid structures within ERROR nodes so we can still show useful information. """ structures = [] for child in error_node.children: # Skip nested ERROR nodes at the immediate level, but still recurse into them if child.type == "ERROR": structures.extend(self._extract_from_error_node(child, source_code)) continue # Try to extract the same structures we would at the top level if child.type == "import_statement": import_node = self._extract_import(child, source_code) if import_node: structures.append(import_node) elif child.type == "media_statement": media_node = self._extract_media(child, source_code) if media_node: structures.append(media_node) elif child.type == "keyframes_statement": keyframes_node = self._extract_keyframes(child, source_code) if keyframes_node: structures.append(keyframes_node) elif child.type == "supports_statement": supports_node = self._extract_supports(child, source_code) if supports_node: structures.append(supports_node) elif child.type in ("at_rule", "font_face_statement", "charset_statement", "namespace_statement", "layer_statement"): at_rule_node = self._extract_at_rule(child, source_code) if at_rule_node: structures.append(at_rule_node) elif child.type == "rule_set": rule_node = self._extract_rule_set(child, source_code) if rule_node: structures.append(rule_node) elif child.type == "comment": comment_text = self._get_node_text(child, source_code) if comment_text.startswith("/*!"): structures.append(StructureNode( type="comment", name=self._extract_comment_title(comment_text), start_line=child.start_point[0] + 1, end_line=child.end_point[0] + 1, docstring=comment_text[:100] )) return structures def _extract_import( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract @import statement.""" url = None for child in node.children: if child.type == "call_expression": # url("...") for arg in child.children: if arg.type == "arguments": for string_node in arg.children: if string_node.type == "string_value": url = self._get_node_text(string_node, source_code).strip('"\'') break elif child.type == "string_value": url = self._get_node_text(child, source_code).strip('"\'') if not url: return None return StructureNode( type="import", name=url, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=url, modifiers=["import"] ) def _extract_media( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract @media statement.""" query = None children = [] for child in node.children: # Handle various query types if child.type in ("keyword_query", "feature_query", "media_query_list", "binary_query", "unary_query", "parenthesized_query"): query = self._normalize_signature( self._get_node_text(child, source_code) ) elif child.type == "block": children = self._extract_nested_rules(child, source_code) return StructureNode( type="media_query", name=query or "@media", start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=query, modifiers=["media"], children=children, complexity={"rules": len(children)} if children else None ) def _extract_keyframes( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract @keyframes statement.""" name = None for child in node.children: if child.type == "keyframes_name": name = self._get_node_text(child, source_code) break return StructureNode( type="keyframes", name=name or "animation", start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, modifiers=["keyframes"] ) def _extract_supports( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract @supports statement.""" query = None children = [] for child in node.children: if child.type in ("feature_query", "parenthesized_query"): query = self._normalize_signature( self._get_node_text(child, source_code) ) elif child.type == "block": children = self._extract_nested_rules(child, source_code) return StructureNode( type="at_supports", name=query or "@supports", start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=query, modifiers=["supports"], children=children ) def _extract_at_rule( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract at-rule (@media, @keyframes, @import, etc.).""" keyword = None query = None children = [] for child in node.children: if child.type == "at_keyword": keyword = self._get_node_text(child, source_code) elif child.type in ("keyword_query", "feature_query", "media_query"): query = self._normalize_signature( self._get_node_text(child, source_code) ) elif child.type == "keyframes_name": query = self._get_node_text(child, source_code) elif child.type == "block": # Extract nested rules for @media children = self._extract_nested_rules(child, source_code) elif child.type in ("string_value", "call_expression"): # For @import url("...") query = self._get_node_text(child, source_code).strip('"\'') if not keyword: return None at_type = keyword.lstrip("@") # Determine node type based on at-rule type if at_type == "media": node_type = "media_query" name = query or "@media" elif at_type == "keyframes": node_type = "keyframes" name = query or "animation" elif at_type == "import": node_type = "import" name = query or "import" elif at_type == "font-face": node_type = "font_face" name = "@font-face" elif at_type in ("supports", "layer", "container"): node_type = f"at_{at_type}" name = query or f"@{at_type}" else: node_type = "at_rule" name = f"@{at_type}" return StructureNode( type=node_type, name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=query, modifiers=[at_type], children=children, complexity={"rules": len(children)} if children else None ) def _extract_nested_rules( self, block_node: Node, source_code: bytes ) -> list[StructureNode]: """Extract rules nested inside a block (e.g., @media).""" rules = [] for child in block_node.children: if child.type == "rule_set": rule = self._extract_rule_set(child, source_code) if rule: rules.append(rule) elif child.type == "at_rule": at_rule = self._extract_at_rule(child, source_code) if at_rule: rules.append(at_rule) return rules def _extract_rule_set( self, node: Node, source_code: bytes ) -> Optional[StructureNode]: """Extract a CSS rule set (selector + declarations).""" selectors = [] declaration_count = 0 has_variables = False modifiers = [] for child in node.children: if child.type == "selectors": selectors = self._extract_selectors(child, source_code) elif child.type == "block": declaration_count, has_variables = self._count_declarations( child, source_code ) if not selectors: return None # Analyze selector complexity selector_text = ", ".join(selectors) if len(selector_text) > 60: selector_text = selector_text[:57] + "..." # Detect selector patterns if any(":root" in s for s in selectors): modifiers.append("root") if any(s.startswith(".") for s in selectors): modifiers.append("class") if any(s.startswith("#") for s in selectors): modifiers.append("id") if any(":" in s for s in selectors): modifiers.append("has-pseudo") if has_variables: modifiers.append("has-variables") # Use first selector as name, or combine if multiple if len(selectors) == 1: name = selectors[0] else: name = f"{selectors[0]} (+{len(selectors) - 1})" if len(name) > 50: name = name[:47] + "..." return StructureNode( type="rule_set", name=name, start_line=node.start_point[0] + 1, end_line=node.end_point[0] + 1, signature=f"{len(selectors)} sel, {declaration_count} decl", modifiers=modifiers, complexity={"selectors": len(selectors), "declarations": declaration_count} ) def _extract_selectors( self, selectors_node: Node, source_code: bytes ) -> list[str]: """Extract individual selectors from a selectors node.""" selectors = [] current = [] for child in selectors_node.children: if child.type == ",": if current: selectors.append(self._normalize_signature(" ".join(current))) current = [] else: text = self._get_node_text(child, source_code).strip() if text: current.append(text) if current: selectors.append(self._normalize_signature(" ".join(current))) return selectors def _count_declarations( self, block_node: Node, source_code: bytes ) -> tuple[int, bool]: """Count declarations and check for CSS variables.""" count = 0 has_variables = False for child in block_node.children: if child.type == "declaration": count += 1 # Check for CSS custom properties (--var-name) for prop in child.children: if prop.type == "property_name": prop_name = self._get_node_text(prop, source_code) if prop_name.startswith("--"): has_variables = True return count, has_variables def _extract_comment_title(self, comment: str) -> str: """Extract a title from a comment.""" # Remove comment markers text = comment.strip("/*! \n\r\t*/") # Take first line first_line = text.split("\n")[0].strip() if len(first_line) > 50: first_line = first_line[:47] + "..." return first_line or "comment" def _fallback_extract(self, source_code: bytes) -> list[StructureNode]: """Regex-based extraction for malformed CSS files.""" text = source_code.decode("utf-8", errors="replace") structures = [] # Find @import rules import_pattern = r'@import\s+(?:url\(["\']?([^"\')\s]+)["\']?\)|["\']([^"\']+)["\'])' for match in re.finditer(import_pattern, text, re.IGNORECASE): url = match.group(1) or match.group(2) line_num = text[:match.start()].count("\n") + 1 structures.append(StructureNode( type="import", name=url, start_line=line_num, end_line=line_num, modifiers=["import"] )) # Find @media queries media_pattern = r'@media\s+([^{]+)\s*\{' for match in re.finditer(media_pattern, text, re.IGNORECASE): query = match.group(1).strip() line_num = text[:match.start()].count("\n") + 1 structures.append(StructureNode( type="media_query", name=query[:50] + "..." if len(query) > 50 else query, start_line=line_num, end_line=line_num, signature=query, modifiers=["media"] )) # Find @keyframes keyframes_pattern = r'@keyframes\s+([^\s{]+)\s*\{' for match in re.finditer(keyframes_pattern, text, re.IGNORECASE): name = match.group(1) line_num = text[:match.start()].count("\n") + 1 structures.append(StructureNode( type="keyframes", name=name, start_line=line_num, end_line=line_num, modifiers=["keyframes"] )) # Find @font-face fontface_pattern = r'@font-face\s*\{' for match in re.finditer(fontface_pattern, text, re.IGNORECASE): line_num = text[:match.start()].count("\n") + 1 structures.append(StructureNode( type="font_face", name="@font-face", start_line=line_num, end_line=line_num, modifiers=["font-face"] )) # Find :root rule (CSS variables) root_pattern = r':root\s*\{' for match in re.finditer(root_pattern, text): line_num = text[:match.start()].count("\n") + 1 structures.append(StructureNode( type="rule_set", name=":root", start_line=line_num, end_line=line_num, modifiers=["root", "has-variables"] )) # Find class selectors (common patterns) class_pattern = r'^\s*(\.[a-zA-Z_][\w-]*(?:\s*[,>+~]\s*[^{]+)?)\s*\{' for match in re.finditer(class_pattern, text, re.MULTILINE): selector = match.group(1).strip() line_num = text[:match.start()].count("\n") + 1 if len(selector) > 50: selector = selector[:47] + "..." structures.append(StructureNode( type="rule_set", name=selector, start_line=line_num, end_line=line_num, modifiers=["class"] )) # Find ID selectors id_pattern = r'^\s*(#[a-zA-Z_][\w-]*(?:\s*[,>+~]\s*[^{]+)?)\s*\{' for match in re.finditer(id_pattern, text, re.MULTILINE): selector = match.group(1).strip() line_num = text[:match.start()].count("\n") + 1 if len(selector) > 50: selector = selector[:47] + "..." structures.append(StructureNode( type="rule_set", name=selector, start_line=line_num, end_line=line_num, modifiers=["id"] )) return structures

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mariusei/file-scanner-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

css_scanner.py•20.8 KiB