Webpage MCP Server

lib.py•1.79 kB

import sys import urllib.request import urllib.error from urllib.parse import urljoin, urlparse import os import re def get_sitemap(website_url): """ Fetches the sitemap.xml from a given website URL. Args: website_url: The base URL of the website (e.g., 'https://example.com') Returns: The sitemap XML content as a string, or None if not found. """ # Ensure the URL has a scheme if not urlparse(website_url).scheme: website_url = 'https://' + website_url # Try common sitemap locations sitemap_paths = [ '/sitemap.xml', '/sitemap_index.xml', '/sitemap', ] for path in sitemap_paths: sitemap_url = urljoin(website_url, path) try: print(f"Trying: {sitemap_url}") with urllib.request.urlopen(sitemap_url, timeout=10) as response: if response.status == 200: content = response.read().decode('utf-8') print(f"\n✓ Found sitemap at: {sitemap_url}\n") return content except urllib.error.HTTPError as e: print(f" ✗ {e.code} - Not found") except urllib.error.URLError as e: print(f" ✗ Error: {e.reason}") except Exception as e: print(f" ✗ Error: {str(e)}") return None def extract_json(text: str) -> str: """Extract JSON from text, handling markdown code blocks.""" # Try to find JSON in code blocks first json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL) if json_match: return json_match.group(1) # Try to find raw JSON object json_match = re.search(r'\{.*\}', text, re.DOTALL) if json_match: return json_match.group(0) return text

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/brian-bfz/fireworks2'

If you have feedback or need assistance with the MCP directory API, please join our Discord server