Mesh Design System MCP Server

MeshMCP
scrapers

mesh_scraper.py•15.5 KiB

""" Web scraping module for Mesh Design System Handles scraping of components, documentation, and design tokens """ import asyncio import logging from typing import Dict, List, Any, Optional from bs4 import BeautifulSoup import httpx from playwright.async_api import async_playwright, Page import re import json logger = logging.getLogger(__name__) class MeshScraper: """Scraper for Mesh Design System components and documentation""" def __init__(self): self.base_url = "https://www.meshdesignsystem.com" self.storybook_url = "https://storybook.meshdesignsystem.com" # Adjust as needed self.design_tokens_url = f"{self.base_url}/design-tokens/tokens-reference" self.timeout = 30000 # 30 seconds async def scrape_components_list(self) -> List[str]: """Scrape the list of all available components""" try: async with async_playwright() as p: browser = await p.chromium.launch(headless=True) page = await browser.new_page() # Navigate to main components page await page.goto(f"{self.base_url}/components", timeout=self.timeout) await page.wait_for_load_state('networkidle') # Extract component names from navigation or component grid component_links = await page.query_selector_all('[href*="/components/"]') components = [] for link in component_links: href = await link.get_attribute('href') text = await link.text_content() if href and '/components/' in href and text: # Extract component name from URL or text component_name = text.strip() if component_name and component_name not in components: components.append(component_name) await browser.close() # If we didn't find components via navigation, try alternative methods if not components: components = await self._scrape_components_alternative(page) # Fallback to hardcoded list from PRD if scraping fails if not components: logger.warning("Falling back to hardcoded component list from PRD") components = [ "Accordion", "Alert", "Autocomplete", "Button", "Card", "Checkbox", "Checkbox Group", "Copy", "Date Picker", "Date Textbox", "Divider", "Error Template", "Expander", "Expander Group", "Feature Panel", "File Upload", "Footer", "Fonts", "Form", "Form Control", "Grow Layout", "Header", "Header Footer Layout", "Heading", "Hero Panel", "Icons", "Info Box", "Link", "Loader", "Logo", "Modal", "ModeProvider", "Overlay", "Product Card", "Progress Stepper", "Radio", "Radio Button", "Radio Group", "React HTML", "Select", "Simple Table", "Skip Link", "Table", "Tabs", "Tag", "Textarea", "Textbox", "Theme", "Tooltip", "Utility Button", "Villain Panel" ] logger.info(f"Found {len(components)} components") return components except Exception as e: logger.error(f"Error scraping components list: {str(e)}") raise async def _scrape_components_alternative(self, page: Page) -> List[str]: """Alternative method to scrape components if primary method fails""" try: # Try to find components in the page content content = await page.content() soup = BeautifulSoup(content, 'html.parser') components = [] # Look for common patterns that might indicate component names # This is a fallback and might need adjustment based on actual site structure for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'a', 'span']): text = element.get_text(strip=True) # Simple heuristic to identify component names if text and len(text) < 50 and any(word in text.lower() for word in ['button', 'card', 'input', 'modal', 'tab']): if text not in components: components.append(text) return components except Exception as e: logger.error(f"Error in alternative component scraping: {str(e)}") return [] async def scrape_component_details(self, component_name: str) -> Optional[Dict[str, Any]]: """Scrape detailed information for a specific component""" try: async with async_playwright() as p: browser = await p.chromium.launch(headless=True) page = await browser.new_page() # Try multiple URL patterns for the component possible_urls = [ f"{self.base_url}/components/{component_name.lower().replace(' ', '-')}", f"{self.base_url}/components/{component_name.lower().replace(' ', '_')}", f"{self.storybook_url}/?path=/docs/{component_name.lower().replace(' ', '-')}", ] component_details = None for url in possible_urls: try: await page.goto(url, timeout=self.timeout) await page.wait_for_load_state('networkidle') # Check if page loaded successfully (not 404) if page.url != url or "404" in await page.title(): continue component_details = await self._extract_component_details(page, component_name) if component_details: break except Exception as e: logger.debug(f"Failed to load {url}: {str(e)}") continue await browser.close() return component_details except Exception as e: logger.error(f"Error scraping component details for {component_name}: {str(e)}") return None async def _extract_component_details(self, page: Page, component_name: str) -> Dict[str, Any]: """Extract component details from the current page""" try: content = await page.content() soup = BeautifulSoup(content, 'html.parser') details = { "name": component_name, "description": "", "props": {}, "codeExamples": [], "storybookUrl": page.url, "designGuidance": "" } # Extract description description_selectors = [ 'p:first-of-type', '.description', '[class*="description"]', 'h1 + p', 'h2 + p' ] for selector in description_selectors: desc_element = soup.select_one(selector) if desc_element: details["description"] = desc_element.get_text(strip=True) break # Extract props information # Look for prop tables or prop documentation prop_tables = soup.find_all('table') for table in prop_tables: headers = [th.get_text(strip=True).lower() for th in table.find_all('th')] if any(header in ['prop', 'property', 'name'] for header in headers): props = self._extract_props_from_table(table) details["props"].update(props) # Extract code examples code_blocks = soup.find_all(['pre', 'code']) for code_block in code_blocks: code_text = code_block.get_text(strip=True) if len(code_text) > 10 and component_name.lower() in code_text.lower(): details["codeExamples"].append(code_text) # Extract design guidance guidance_selectors = [ '.guidance', '.guidelines', '[class*="guidance"]', '[class*="guideline"]' ] for selector in guidance_selectors: guidance_element = soup.select_one(selector) if guidance_element: details["designGuidance"] = guidance_element.get_text(strip=True) break return details except Exception as e: logger.error(f"Error extracting component details: {str(e)}") return None def _extract_props_from_table(self, table) -> Dict[str, Any]: """Extract props information from an HTML table""" props = {} try: rows = table.find_all('tr') if not rows: return props # Get headers header_row = rows[0] headers = [th.get_text(strip=True).lower() for th in header_row.find_all(['th', 'td'])] # Map common header names name_idx = next((i for i, h in enumerate(headers) if h in ['name', 'prop', 'property']), -1) type_idx = next((i for i, h in enumerate(headers) if h in ['type', 'data type']), -1) desc_idx = next((i for i, h in enumerate(headers) if h in ['description', 'desc']), -1) default_idx = next((i for i, h in enumerate(headers) if h in ['default', 'default value']), -1) # Extract prop rows for row in rows[1:]: cells = row.find_all(['td', 'th']) if len(cells) < 2: continue prop_info = {} if name_idx >= 0 and name_idx < len(cells): prop_name = cells[name_idx].get_text(strip=True) if not prop_name: continue if type_idx >= 0 and type_idx < len(cells): prop_info['type'] = cells[type_idx].get_text(strip=True) if desc_idx >= 0 and desc_idx < len(cells): prop_info['description'] = cells[desc_idx].get_text(strip=True) if default_idx >= 0 and default_idx < len(cells): prop_info['default'] = cells[default_idx].get_text(strip=True) props[prop_name] = prop_info except Exception as e: logger.error(f"Error extracting props from table: {str(e)}") return props async def scrape_design_tokens(self, token_type: str = "all") -> Dict[str, Any]: """Scrape design tokens from the design tokens reference page""" try: async with async_playwright() as p: browser = await p.chromium.launch(headless=True) page = await browser.new_page() await page.goto(self.design_tokens_url, timeout=self.timeout) await page.wait_for_load_state('networkidle') content = await page.content() soup = BeautifulSoup(content, 'html.parser') tokens = {} if token_type == "all" or token_type == "colors": tokens["colors"] = await self._extract_color_tokens(soup) if token_type == "all" or token_type == "typography": tokens["typography"] = await self._extract_typography_tokens(soup) if token_type == "all" or token_type == "spacing": tokens["spacing"] = await self._extract_spacing_tokens(soup) await browser.close() return tokens except Exception as e: logger.error(f"Error scraping design tokens: {str(e)}") # Return basic fallback tokens return { "colors": {"primary": "#0066CC", "secondary": "#6C757D", "success": "#28A745"}, "typography": {"fontFamily": "Inter, system-ui, sans-serif"}, "spacing": {"small": "8px", "medium": "16px", "large": "24px"} } async def _extract_color_tokens(self, soup: BeautifulSoup) -> Dict[str, Any]: """Extract color tokens from the page""" colors = {} # Look for color swatches or color definitions color_elements = soup.find_all(['div', 'span'], class_=re.compile(r'color|swatch', re.I)) for element in color_elements: # Try to extract color name and value color_name = element.get_text(strip=True) style = element.get('style', '') # Extract hex colors from style attribute hex_match = re.search(r'#[0-9A-Fa-f]{6}', style) if hex_match and color_name: colors[color_name] = hex_match.group(0) return colors if colors else {"primary": "#0066CC", "secondary": "#6C757D"} async def _extract_typography_tokens(self, soup: BeautifulSoup) -> Dict[str, Any]: """Extract typography tokens from the page""" typography = {} # Look for typography-related elements typo_elements = soup.find_all(['div', 'section'], class_=re.compile(r'typography|font', re.I)) for element in typo_elements: text = element.get_text(strip=True) # Extract font families, sizes, etc. if 'font-family' in text.lower(): # Basic extraction - would need refinement based on actual structure typography["fontFamily"] = "Inter, system-ui, sans-serif" return typography if typography else {"fontFamily": "Inter, system-ui, sans-serif"} async def _extract_spacing_tokens(self, soup: BeautifulSoup) -> Dict[str, Any]: """Extract spacing tokens from the page""" spacing = {} # Look for spacing-related elements spacing_elements = soup.find_all(['div', 'section'], class_=re.compile(r'spacing|margin|padding', re.I)) for element in spacing_elements: text = element.get_text(strip=True) # Extract spacing values - would need refinement based on actual structure if any(unit in text for unit in ['px', 'rem', 'em']): # Basic extraction numbers = re.findall(r'\d+(?:px|rem|em)', text) for i, num in enumerate(numbers[:3]): spacing_names = ['small', 'medium', 'large'] if i < len(spacing_names): spacing[spacing_names[i]] = num return spacing if spacing else {"small": "8px", "medium": "16px", "large": "24px"}

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lukeylias/MeshMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

mesh_scraper.py•15.5 KiB