Web Research Assistant

Overview Inspect Schema Related Servers Score Discussions

MIT License

service_health.py•21.8 kB

"""Service health and status page monitoring.""" from __future__ import annotations import re from dataclasses import dataclass, field from datetime import datetime @dataclass class ServiceComponent: """Status of a service component.""" name: str status: str @dataclass class ServiceStatus: """Overall service health status.""" service: str status: str status_page_url: str | None = None checked_at: str | None = None current_incidents: list[str] = field(default_factory=list) components: list[ServiceComponent] = field(default_factory=list) class StatusPageDetector: """Detect and find status pages for services.""" # Known service → status page mappings KNOWN_STATUS_PAGES = { # Payment & Finance "stripe": "https://status.stripe.com", "paypal": "https://www.paypal-status.com", "plaid": "https://status.plaid.com", # Code & DevOps "github": "https://www.githubstatus.com", "gitlab": "https://status.gitlab.com", "bitbucket": "https://bitbucket.status.atlassian.com", "vercel": "https://www.vercel-status.com", "netlify": "https://www.netlifystatus.com", "heroku": "https://status.heroku.com", "docker": "https://status.docker.com", "dockerhub": "https://status.docker.com", "npm": "https://status.npmjs.org", "pypi": "https://status.python.org", "circleci": "https://status.circleci.com", # AI & ML Services "openai": "https://status.openai.com", "anthropic": "https://status.anthropic.com", "claude": "https://status.anthropic.com", "claudeapi": "https://status.anthropic.com", "anthropicclaudeapi": "https://status.anthropic.com", "gemini": "https://status.cloud.google.com", "googlegemini": "https://status.cloud.google.com", "googlegeminiapi": "https://status.cloud.google.com", "vertexai": "https://status.cloud.google.com", "googlecloudvertexai": "https://status.cloud.google.com", "googlecloud": "https://status.cloud.google.com", "replicate": "https://replicate.statuspage.io", "huggingface": "https://status.huggingface.co", "hf": "https://status.huggingface.co", "cohere": "https://status.cohere.com", "mistral": "https://status.mistral.ai", "mistralai": "https://status.mistral.ai", "together": "https://status.together.ai", "togetherai": "https://status.together.ai", "groq": "https://status.groq.com", "perplexity": "https://status.perplexity.ai", "perplexityai": "https://status.perplexity.ai", # Image/Video AI "fal": "https://fal.statuspage.io", "falai": "https://fal.statuspage.io", "midjourney": "https://status.midjourney.com", "stability": "https://status.stability.ai", "stabilityai": "https://status.stability.ai", "runway": "https://status.runwayml.com", "runwayml": "https://status.runwayml.com", "leonardo": "https://status.leonardo.ai", "leonardoai": "https://status.leonardo.ai", "ideogram": "https://status.ideogram.ai", "flux": "https://status.bfl.ml", "bfl": "https://status.bfl.ml", "blackforestlabs": "https://status.bfl.ml", "blackforestlabsbflfluxapi": "https://status.bfl.ml", "bflblackforestlabsfluxapi": "https://status.bfl.ml", # Voice/Audio AI "elevenlabs": "https://status.elevenlabs.io", "11labs": "https://status.elevenlabs.io", "resemble": "https://status.resemble.ai", "assemblyai": "https://status.assemblyai.com", "deepgram": "https://status.deepgram.com", # Video AI "heygen": "https://status.heygen.com", "descript": "https://status.descript.com", "luma": "https://status.lumalabs.ai", "lumalabs": "https://status.lumalabs.ai", "pika": "https://status.pika.art", "sync": "https://status.sync.so", "syncso": "https://status.sync.so", "synclabs": "https://status.sync.so", # Cloud Providers "aws": "https://health.aws.amazon.com/health/status", "amazon": "https://health.aws.amazon.com/health/status", "gcp": "https://status.cloud.google.com", "googlecloudplatform": "https://status.cloud.google.com", "azure": "https://status.azure.com", "microsoft": "https://status.azure.com", "digitalocean": "https://status.digitalocean.com", "linode": "https://status.linode.com", "vultr": "https://status.vultr.com", "render": "https://status.render.com", "railway": "https://railway.instatus.com", "fly": "https://status.fly.io", "flyio": "https://status.fly.io", # Databases "mongodb": "https://status.mongodb.com", "supabase": "https://status.supabase.com", "planetscale": "https://www.planetscalestatus.com", "neon": "https://neonstatus.com", "fauna": "https://status.fauna.com", "redis": "https://status.redis.com", "upstash": "https://status.upstash.com", "cockroachdb": "https://status.cockroachlabs.cloud", # Communication "twilio": "https://status.twilio.com", "sendgrid": "https://status.sendgrid.com", "mailgun": "https://status.mailgun.com", "postmark": "https://status.postmarkapp.com", "slack": "https://status.slack.com", "discord": "https://discordstatus.com", "zoom": "https://status.zoom.us", "intercom": "https://www.intercomstatus.com", # CDN & DNS "cloudflare": "https://www.cloudflarestatus.com", "fastly": "https://status.fastly.com", "akamai": "https://cloudharmony.com/status-for-akamai", # Auth & Identity "auth0": "https://status.auth0.com", "okta": "https://status.okta.com", "clerk": "https://status.clerk.com", # Analytics & Monitoring "datadog": "https://status.datadoghq.com", "newrelic": "https://status.newrelic.com", "sentry": "https://status.sentry.io", "mixpanel": "https://status.mixpanel.com", "amplitude": "https://status.amplitude.com", "segment": "https://status.segment.com", "posthog": "https://status.posthog.com", # Other "notion": "https://status.notion.so", "airtable": "https://status.airtable.com", "figma": "https://status.figma.com", "linear": "https://linearstatus.com", "jira": "https://jira-software.status.atlassian.com", "confluence": "https://confluence.status.atlassian.com", "atlassian": "https://status.atlassian.com", "shopify": "https://www.shopifystatus.com", "algolia": "https://status.algolia.com", "pinecone": "https://status.pinecone.io", "weaviate": "https://status.weaviate.io", "qdrant": "https://status.qdrant.io", "milvus": "https://status.milvus.io", } # Service name aliases - map variations to canonical names SERVICE_ALIASES = { # Anthropic/Claude variations "anthropic claude": "anthropic", "anthropic claude api": "anthropic", "claude api": "anthropic", "claude": "anthropic", # Google variations "google cloud": "gcp", "google cloud platform": "gcp", "google cloud vertex ai": "vertexai", "vertex ai": "vertexai", "google gemini": "gemini", "google gemini api": "gemini", "gemini api": "gemini", # Fal variations "fal.ai": "fal", "fal ai": "fal", "fal.ai api": "fal", # BFL/Flux variations "black forest labs": "bfl", "black forest labs flux": "bfl", "bfl flux": "bfl", "flux api": "bfl", "black forest labs bfl flux api": "bfl", "bfl black forest labs flux api": "bfl", # Sync variations "sync.so": "sync", "sync labs": "sync", # Other common variations "eleven labs": "elevenlabs", "stability ai": "stability", "runway ml": "runway", "leonardo ai": "leonardo", "hugging face": "huggingface", "together ai": "together", "mistral ai": "mistral", "perplexity ai": "perplexity", "luma labs": "luma", "fly.io": "fly", } # Common patterns to try STATUS_PAGE_PATTERNS = [ "https://status.{service}.com", "https://status.{service}.io", "https://status.{service}.ai", "https://{service}.statuspage.io", "https://{service}.instatus.com", "https://{service}status.com", "https://www.{service}status.com", "https://{service}.com/status", ] def normalize_service_name(self, service: str) -> str: """Normalize service name using aliases.""" # Clean up the input service_lower = service.lower().strip() # Check aliases first if service_lower in self.SERVICE_ALIASES: return self.SERVICE_ALIASES[service_lower] # Try partial matching for aliases for alias, canonical in self.SERVICE_ALIASES.items(): if alias in service_lower or service_lower in alias: return canonical # Remove common suffixes and clean up cleaned = service_lower for suffix in [" api", " status", " service"]: if cleaned.endswith(suffix): cleaned = cleaned[: -len(suffix)].strip() # Remove spaces, dots, dashes for lookup cleaned = cleaned.replace(" ", "").replace(".", "").replace("-", "") return cleaned def find_status_page(self, service: str) -> str | None: """Find status page URL for a service.""" # Normalize the service name normalized = self.normalize_service_name(service) # Check known mappings first if normalized in self.KNOWN_STATUS_PAGES: return self.KNOWN_STATUS_PAGES[normalized] # Also try the raw cleaned name (no alias resolution) raw_cleaned = service.lower().replace(" ", "").replace(".", "").replace("-", "") if raw_cleaned in self.KNOWN_STATUS_PAGES: return self.KNOWN_STATUS_PAGES[raw_cleaned] # Try common patterns with normalized name for pattern in self.STATUS_PAGE_PATTERNS: url = pattern.format(service=normalized) return url # Return first pattern to try return None class StatusPageParser: """Parse status pages and extract health information.""" def parse_status_page(self, html: str, service: str) -> ServiceStatus: """Parse status page HTML.""" from bs4 import BeautifulSoup soup = BeautifulSoup(html, "html.parser") status = ServiceStatus(service=service, status="unknown") # Extract overall status - common patterns status_indicators = [ soup.find("span", class_=re.compile(r"status", re.I)), soup.find("div", class_=re.compile(r"status", re.I)), soup.find(text=re.compile(r"all systems? (operational|normal)", re.I)), soup.find(text=re.compile(r"(no|zero) (active )?incidents?", re.I)), ] for indicator in status_indicators: if indicator: text = indicator.get_text() if hasattr(indicator, "get_text") else str(indicator) status.status = self._normalize_status(text) if status.status != "unknown": break # If still unknown, check for keywords in page html_lower = html.lower() if status.status == "unknown": if "all systems operational" in html_lower or "all systems normal" in html_lower: status.status = "operational" elif "no active incidents" in html_lower or "no incidents" in html_lower: status.status = "operational" elif "investigating" in html_lower or "identified" in html_lower: status.status = "degraded_performance" elif "outage" in html_lower or "down" in html_lower: status.status = "partial_outage" elif "maintenance" in html_lower: status.status = "under_maintenance" # Extract current incidents incident_elements = soup.find_all(["div", "section"], class_=re.compile(r"incident", re.I)) for incident in incident_elements[:3]: # Max 3 title_elem = incident.find( ["h3", "h4", "span"], class_=re.compile(r"(title|name)", re.I) ) if title_elem: status.current_incidents.append(title_elem.get_text(strip=True)) # Extract components component_elements = soup.find_all("div", class_=re.compile(r"component", re.I)) for comp in component_elements[:10]: # Max 10 name_elem = comp.find(["span", "div"], class_=re.compile(r"name", re.I)) status_elem = comp.find(["span", "div"], class_=re.compile(r"status", re.I)) if name_elem and status_elem: component = ServiceComponent( name=name_elem.get_text(strip=True), status=self._normalize_status(status_elem.get_text(strip=True)), ) status.components.append(component) return status def _normalize_status(self, status_text: str) -> str: """Normalize status text to standard values.""" status_lower = status_text.lower() if any( word in status_lower for word in ["operational", "normal", "ok", "all systems", "up"] ): return "operational" elif any(word in status_lower for word in ["degraded", "slow", "performance"]): return "degraded_performance" elif any(word in status_lower for word in ["partial", "some", "limited"]): return "partial_outage" elif any(word in status_lower for word in ["major", "down", "outage", "offline"]): return "major_outage" elif "maintenance" in status_lower: return "under_maintenance" else: return "unknown" def get_status_emoji(self, status: str) -> str: """Get emoji for status.""" emoji_map = { "operational": "✅", "degraded_performance": "⚠️", "partial_outage": "⚠️", "major_outage": "🚨", "under_maintenance": "🔧", "unknown": "❓", } return emoji_map.get(status, "❓") class ServiceHealthChecker: """Check health of external services.""" def __init__(self, crawler_client): """Initialize with crawler client.""" self.crawler = crawler_client self.detector = StatusPageDetector() self.parser = StatusPageParser() async def _check_url_accessible(self, url: str) -> tuple[bool, int | None]: """Check if URL is accessible via HTTP HEAD request.""" import httpx try: async with httpx.AsyncClient( timeout=10.0, follow_redirects=True, headers={"User-Agent": "Mozilla/5.0 (compatible; StatusChecker/1.0)"}, ) as client: response = await client.head(url) return response.status_code < 400, response.status_code except Exception: return False, None async def _fetch_statuspage_api(self, status_url: str) -> dict | None: """Try to fetch status from Statuspage.io API (many services use this).""" import httpx # Statuspage.io has a standard API endpoint # e.g., https://status.example.com/api/v2/status.json api_patterns = [ f"{status_url.rstrip('/')}/api/v2/status.json", f"{status_url.rstrip('/')}/api/v2/summary.json", ] async with httpx.AsyncClient( timeout=10.0, follow_redirects=True, headers={"User-Agent": "Mozilla/5.0 (compatible; StatusChecker/1.0)"}, ) as client: for api_url in api_patterns: try: response = await client.get(api_url) if response.status_code == 200: return response.json() except Exception: continue return None def _parse_statuspage_api_response(self, data: dict, service: str) -> ServiceStatus: """Parse Statuspage.io API response.""" status = ServiceStatus(service=service, status="unknown") # Parse status indicator if "status" in data: indicator = data["status"].get("indicator", "none") description = data["status"].get("description", "") indicator_map = { "none": "operational", "minor": "degraded_performance", "major": "partial_outage", "critical": "major_outage", "maintenance": "under_maintenance", } status.status = indicator_map.get(indicator, "unknown") if description: status.current_incidents.append(description) # Parse components if "components" in data: for comp in data.get("components", [])[:10]: comp_status_map = { "operational": "operational", "degraded_performance": "degraded_performance", "partial_outage": "partial_outage", "major_outage": "major_outage", "under_maintenance": "under_maintenance", } component = ServiceComponent( name=comp.get("name", "Unknown"), status=comp_status_map.get(comp.get("status", ""), "unknown"), ) status.components.append(component) # Parse incidents if "incidents" in data: for incident in data.get("incidents", [])[:3]: name = incident.get("name", "") if name: status.current_incidents.append(name) return status async def check_service(self, service: str) -> dict: """Check service health status.""" # Find status page status_url = self.detector.find_status_page(service) if not status_url: return { "service": service, "status": "unknown", "status_emoji": "❓", "error": "Could not find status page for this service", "suggestion": f"Try checking {service}.com/status or searching for '{service} status page'", } # Strategy 1: Try Statuspage.io API (many services use this - it's more reliable) api_data = await self._fetch_statuspage_api(status_url) if api_data: status = self._parse_statuspage_api_response(api_data, service) status.status_page_url = status_url status.checked_at = datetime.utcnow().isoformat() + "Z" return self._format_status_response(status) # Strategy 2: Try crawling the page try: html = await self.crawler.fetch_raw(status_url, max_chars=200000) if html and len(html.strip()) > 100: # Parse status from HTML status = self.parser.parse_status_page(html, service) status.status_page_url = status_url status.checked_at = datetime.utcnow().isoformat() + "Z" return self._format_status_response(status) except Exception: pass # Fall through to HTTP check # Strategy 3: Fallback - just check if URL is accessible accessible, http_code = await self._check_url_accessible(status_url) if accessible: # Page is up but we couldn't parse it (likely JS-rendered) return { "service": service, "status": "unknown", "status_emoji": "❓", "status_page_url": status_url, "checked_at": datetime.utcnow().isoformat() + "Z", "message": "Status page is accessible but requires JavaScript to render. Please check manually.", "note": f"Visit {status_url} to see current status", } else: return { "service": service, "status": "unknown", "status_emoji": "❓", "status_page_url": status_url, "error": f"Status page returned HTTP {http_code}" if http_code else "Status page unreachable", } def _format_status_response(self, status: ServiceStatus) -> dict: """Format status object as response dict.""" response = { "service": status.service, "status": status.status, "status_emoji": self.parser.get_status_emoji(status.status), "status_page_url": status.status_page_url, "checked_at": status.checked_at, } if status.current_incidents: response["current_incidents"] = status.current_incidents else: response["current_incidents"] = [] response["message"] = "No active incidents reported" if status.components: response["components"] = [ {"name": comp.name, "status": comp.status} for comp in status.components[:10] ] return response

Implementation Reference

check_service_status

Latest Blog Posts

The State of MCP in 2025: Who's Building What and Why It Matters
By punkpeye on December 7, 2025.
mcp
startups
MCP hosting with persistent storage
By punkpeye on December 6, 2025.
changelog
The Operational Metrics and Agent Analytics Driving Successful Model Context Protocol (MCP) Servers
By Om-Shree-0709 on December 6, 2025.
observability
mcp
Agent-Analytics

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/elad12390/web-research-assistant'

If you have feedback or need assistance with the MCP directory API, please join our Discord server