Skeleton MCP Server

aria_processor.py•7.56 KiB

""" ARIA Snapshot Processor Processes ARIA snapshots: parsing, querying, and formatting. """ from typing import Any import mistune import yaml from aria_snapshot_parser import AriaSnapshotParser, AriaSnapshotSerializer from .jmespath_extensions import search_with_custom_functions def parse_aria_snapshot(yaml_text: str) -> tuple[Any, list[str]]: """ Parse ARIA YAML snapshot to JSON. Args: yaml_text: ARIA snapshot in YAML format (may be wrapped in markdown) Returns: Tuple of (json_data, error_messages) - json_data: Parsed snapshot as JSON-serializable data, or None if parse failed - error_messages: List of error messages (empty if successful) """ try: # Extract YAML from markdown if wrapped in code fence cleaned_yaml = _extract_yaml_from_markdown(yaml_text) parser = AriaSnapshotParser() tree, errors = parser.parse(cleaned_yaml) if errors: error_messages = [] for e in errors: if e.line is not None: error_messages.append(f"Line {e.line}: {e.message}") else: error_messages.append(e.message) return None, error_messages serializer = AriaSnapshotSerializer() json_data = serializer.to_dict(tree) return json_data, [] except Exception as e: return None, [f"Failed to parse ARIA snapshot: {e}"] def _extract_yaml_from_markdown(text: str) -> str: """ Extract YAML content from markdown, handling code fences and plain text. Playwright-mcp may return ARIA snapshots in various formats: 1. Plain YAML (starts with "- ") 2. Markdown with code fence (```yaml ... ```) 3. Markdown with metadata headers before YAML 4. Text content followed by ARIA tree Args: text: Raw text that may contain markdown-wrapped YAML Returns: Cleaned YAML text ready for parsing """ # Quick check: if text starts with "- ", it's plain YAML stripped_text = text.lstrip() if stripped_text.startswith('- '): return text # Try to extract from markdown code blocks try: markdown = mistune.create_markdown(renderer='ast') ast = markdown(text) # Look for code blocks with yaml/yml language for node in ast: if isinstance(node, dict) and node.get('type') == 'block_code': attrs = node.get('attrs', {}) if isinstance(attrs, dict): info = attrs.get('info', '').lower() if info in ('yaml', 'yml', ''): # Found a code block, return its raw content (strip trailing newline) raw_content = node.get('raw', '') if raw_content: return raw_content.rstrip('\n') except Exception: # If markdown parsing fails, fall back to heuristic approach pass # Fallback: Look for YAML list starting with "- " # Skip any preamble text that doesn't start with "- " lines = text.split('\n') for i, line in enumerate(lines): stripped = line.lstrip() if stripped.startswith('- '): # Found start of YAML, collect until we hit a closing fence or end yaml_lines = [] for j in range(i, len(lines)): line_content = lines[j].strip() # Stop if we hit a closing fence if line_content == '```': break # Stop if we encounter a line that doesn't look like YAML # (empty lines are OK, as are lines starting with "- " or indented content) if line_content and not ( line_content.startswith('- ') or lines[j].startswith(' ') or # indented (child element) lines[j].startswith('\t') # tab-indented ): # Check if it's a continuation or if we've left the YAML block # If the line doesn't start with whitespace and isn't a list item, we're done if j > i: # Only stop if we've collected at least one line break yaml_lines.append(lines[j]) if yaml_lines: return '\n'.join(yaml_lines) # If no YAML list found, return original (will likely fail parsing) return text def apply_jmespath_query(data: Any, expression: str) -> tuple[Any, str | None]: """ Apply JMESPath query with custom functions. Args: data: Data to query expression: JMESPath expression Returns: Tuple of (result, error_message) - result: Query result (or empty list on error) - error_message: Error message if query failed, None otherwise """ try: result = search_with_custom_functions(expression, data) # Return empty list if result is None return (result if result is not None else [], None) except Exception as e: return ([], f"Invalid JMESPath query: {e}") def flatten_aria_tree( node: dict | list, depth: int = 0, parent_role: str | None = None, index_counter: list[int] | None = None ) -> list[dict]: """ Flatten ARIA tree to depth-first list of nodes. Converts hierarchical ARIA snapshot into a flat list where each node is a standalone dict with metadata about its position in the tree. Args: node: ARIA tree (dict) or root array (list) depth: Current nesting level (0 = root) parent_role: Role of parent node (for context) index_counter: Mutable list containing current index (internal use) Returns: Flat list of nodes with added metadata fields: - _depth: Nesting level (0 = root) - _parent_role: Role of parent node (None for root) - _index: Position in flattened list Example: >>> tree = [{"role": "document", "children": [{"role": "button"}]}] >>> flatten_aria_tree(tree) [ {"role": "document", "_depth": 0, "_parent_role": None, "_index": 0}, {"role": "button", "_depth": 1, "_parent_role": "document", "_index": 1} ] """ if index_counter is None: index_counter = [0] result = [] if isinstance(node, list): # Process array of nodes for item in node: result.extend(flatten_aria_tree(item, depth, parent_role, index_counter)) elif isinstance(node, dict): # Create copy of current node without children node_copy = {**node} # Extract children before adding metadata children = node_copy.pop('children', None) # Add metadata node_copy['_depth'] = depth node_copy['_parent_role'] = parent_role node_copy['_index'] = index_counter[0] index_counter[0] += 1 # Add current node to result result.append(node_copy) # Recursively flatten children if children: current_role = node.get('role') result.extend(flatten_aria_tree(children, depth + 1, current_role, index_counter)) return result def format_output( data: dict[str, Any] | list[dict[str, Any]], output_format: str ) -> str | dict[str, Any] | list[dict[str, Any]]: """ Format data as JSON (raw) or YAML. Args: data: Data to format (dict or list of dicts) output_format: 'json' or 'yaml' Returns: Formatted string or raw data """ if output_format.lower() == "json": return data else: # yaml (default) return yaml.dump(data, default_flow_style=False, allow_unicode=True)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nickweedon/playwritght-proxy-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

aria_processor.py•7.56 KiB