Skip to main content
Glama
kbhuw

Dedalus MCP Documentation Server

by kbhuw
EXAMPLES.md12.9 kB
# Usage Examples Practical examples for using the Webpage MCP Server. ## Table of Contents - [Basic Usage](#basic-usage) - [Rate Limiting Examples](#rate-limiting-examples) - [Integration Examples](#integration-examples) - [Common Patterns](#common-patterns) --- ## Basic Usage ### Example 1: List All Pages Get all available pages from the sitemap: ```python from src.main import list_pages # Get all pages pages = list_pages() print(f"Found {len(pages)} pages") for page in pages: print(f" - {page}") # Output: # Found 5 pages # - / # - /blog # - /blog/yc-ankit-gupta-interview # - /marketplace # - /pricing ``` ### Example 2: Fetch Homepage HTML Retrieve the HTML content of the homepage: ```python from src.main import get_page # Fetch homepage result = get_page("/") if "error" not in result: print(f"URL: {result['url']}") print(f"Status: {result['status_code']}") print(f"Content-Type: {result['content_type']}") print(f"\nHTML Preview:") print(result['html'][:200] + "...") else: print(f"Error: {result['message']}") # Output: # URL: https://example.com/ # Status: 200 # Content-Type: text/html # # HTML Preview: # <!DOCTYPE html> # <html> # <head> # <title>Example Domain</title> # ... ``` ### Example 3: Access Sitemap Resource Get the raw sitemap XML: ```python from src.main import get_sitemap sitemap_content = get_sitemap() print(sitemap_content) # Output: # <?xml version="1.0" encoding="UTF-8"?> # <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> # <url> # <loc>https://example.com/</loc> # ... # </url> # </urlset> ``` --- ## Rate Limiting Examples ### Example 4: Handle Rate Limits Properly handle rate limit errors: ```python from src.main import get_page import time def fetch_with_retry(path, user_id="default", max_retries=3): """Fetch page with automatic retry on rate limit""" for attempt in range(max_retries): result = get_page(path, user_id=user_id) if "error" in result and result["error"] == "Rate limit exceeded": wait_time = result["reset_in_seconds"] print(f"Rate limited. Waiting {wait_time} seconds...") time.sleep(wait_time) continue return result raise Exception("Max retries exceeded") # Usage page = fetch_with_retry("/blog", user_id="user123") print(page["html"]) ``` ### Example 5: Multi-User Rate Limiting Different users have separate rate limits: ```python from src.main import get_page # User 1 makes requests for i in range(5): result = get_page("/", user_id="user1") print(f"User1 request {i+1}: {result.get('status_code', 'rate limited')}") # User 2 has their own limit for i in range(5): result = get_page("/", user_id="user2") print(f"User2 request {i+1}: {result.get('status_code', 'rate limited')}") # Both users can make 10 requests each ``` --- ## Integration Examples ### Example 6: Parse Sitemap and Fetch All Pages Fetch HTML for all pages in the sitemap: ```python from src.main import list_pages, get_page import time def fetch_all_pages(delay=0.5): """Fetch all pages with delay between requests""" pages = list_pages() results = {} for page in pages: print(f"Fetching {page}...") result = get_page(page) if "error" in result: print(f" Error: {result['message']}") results[page] = None else: print(f" Success: {len(result['html'])} bytes") results[page] = result time.sleep(delay) # Avoid rate limiting return results # Fetch all pages all_pages = fetch_all_pages() print(f"\nSuccessfully fetched {sum(1 for v in all_pages.values() if v)} pages") ``` ### Example 7: Extract Specific Data from Pages Extract titles from all blog posts: ```python from src.main import list_pages, get_page from bs4 import BeautifulSoup import re def get_blog_titles(): """Extract titles from blog posts""" pages = list_pages() blog_posts = [p for p in pages if p.startswith('/blog/')] titles = {} for post in blog_posts: result = get_page(post) if "error" not in result: soup = BeautifulSoup(result['html'], 'html.parser') title = soup.find('title') if title: titles[post] = title.get_text() return titles # Get all blog titles titles = get_blog_titles() for path, title in titles.items(): print(f"{path}: {title}") ``` ### Example 8: Content Analysis Analyze content across multiple pages: ```python from src.main import list_pages, get_page from collections import Counter import re def analyze_content(): """Analyze word frequency across all pages""" pages = list_pages() all_words = [] for page in pages: result = get_page(page) if "error" not in result: # Simple word extraction (remove HTML tags first) text = re.sub(r'<[^>]+>', '', result['html']) words = re.findall(r'\b\w+\b', text.lower()) all_words.extend(words) # Get most common words word_freq = Counter(all_words) return word_freq.most_common(20) # Analyze content top_words = analyze_content() print("Top 20 words:") for word, count in top_words: print(f" {word}: {count}") ``` --- ## Common Patterns ### Example 9: Caching Pages Locally Cache fetched pages to avoid repeated requests: ```python from src.main import get_page from pathlib import Path import json import hashlib class PageCache: def __init__(self, cache_dir="./cache"): self.cache_dir = Path(cache_dir) self.cache_dir.mkdir(exist_ok=True) def _get_cache_path(self, path): # Create filename from path hash hash_key = hashlib.md5(path.encode()).hexdigest() return self.cache_dir / f"{hash_key}.json" def get(self, path, user_id=None): cache_path = self._get_cache_path(path) # Check cache if cache_path.exists(): with open(cache_path) as f: return json.load(f) # Fetch and cache result = get_page(path, user_id) if "error" not in result: with open(cache_path, 'w') as f: json.dump(result, f) return result # Usage cache = PageCache() page = cache.get("/blog") # Fetches from server page = cache.get("/blog") # Returns from cache ``` ### Example 10: Batch Processing with Progress Process multiple pages with progress tracking: ```python from src.main import list_pages, get_page from tqdm import tqdm import time def batch_process_pages(processor_func, delay=0.5): """Process pages in batches with progress bar""" pages = list_pages() results = [] for page in tqdm(pages, desc="Processing pages"): result = get_page(page) if "error" not in result: processed = processor_func(result) results.append({ 'path': page, 'data': processed }) else: tqdm.write(f"Error on {page}: {result['message']}") time.sleep(delay) return results # Example processor def extract_metadata(page_result): return { 'url': page_result['url'], 'size': len(page_result['html']), 'content_type': page_result['content_type'] } # Process all pages metadata = batch_process_pages(extract_metadata) ``` ### Example 11: Error Handling Pattern Robust error handling for production use: ```python from src.main import get_page import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def safe_get_page(path, user_id=None, max_retries=3): """Safely fetch page with comprehensive error handling""" for attempt in range(max_retries): try: result = get_page(path, user_id) # Handle rate limit if "error" in result: if result["error"] == "Rate limit exceeded": wait_time = result["reset_in_seconds"] logger.warning(f"Rate limited. Retry {attempt+1}/{max_retries}") time.sleep(wait_time) continue else: logger.error(f"Error fetching {path}: {result['message']}") return None logger.info(f"Successfully fetched {path}") return result except Exception as e: logger.error(f"Unexpected error on attempt {attempt+1}: {e}") if attempt == max_retries - 1: raise time.sleep(1) return None # Usage page = safe_get_page("/blog/post-1", user_id="user123") if page: print(f"Got {len(page['html'])} bytes") ``` ### Example 12: Testing Server Responses Test suite for validating server responses: ```python from src.main import list_pages, get_page, get_sitemap def test_server(): """Test all server functionality""" # Test 1: List pages print("Test 1: Listing pages...") pages = list_pages() assert len(pages) > 0, "Should have pages" assert all(p.startswith('/') for p in pages), "All paths should start with /" print(f"✓ Found {len(pages)} pages") # Test 2: Get sitemap print("\nTest 2: Getting sitemap...") sitemap = get_sitemap() assert '<?xml' in sitemap, "Should be valid XML" assert 'urlset' in sitemap, "Should contain urlset" print(f"✓ Sitemap is {len(sitemap)} bytes") # Test 3: Fetch page print("\nTest 3: Fetching page...") result = get_page(pages[0]) assert "error" not in result, "Should fetch successfully" assert result['status_code'] == 200, "Should return 200" assert len(result['html']) > 0, "Should have HTML content" print(f"✓ Page fetched: {result['url']}") # Test 4: Rate limiting print("\nTest 4: Testing rate limit...") for i in range(12): result = get_page("/", user_id="test") if "error" in result and result["error"] == "Rate limit exceeded": print(f"✓ Rate limit triggered at request {i+1}") break print("\n✅ All tests passed!") # Run tests test_server() ``` --- ## Advanced Usage ### Example 13: Sitemap Parser Custom sitemap parser with filtering: ```python from src.main import get_sitemap import xml.etree.ElementTree as ET from datetime import datetime def parse_sitemap_advanced(): """Parse sitemap with full metadata""" sitemap_xml = get_sitemap() root = ET.fromstring(sitemap_xml) # Define namespace ns = {'ns': 'http://www.sitemaps.org/schemas/sitemap/0.9'} urls = [] for url in root.findall('.//ns:url', ns): loc = url.find('ns:loc', ns) lastmod = url.find('ns:lastmod', ns) changefreq = url.find('ns:changefreq', ns) priority = url.find('ns:priority', ns) urls.append({ 'url': loc.text if loc is not None else None, 'lastmod': lastmod.text if lastmod is not None else None, 'changefreq': changefreq.text if changefreq is not None else None, 'priority': float(priority.text) if priority is not None else 0.5 }) return urls # Get all URL metadata sitemap_data = parse_sitemap_advanced() # Filter high-priority pages high_priority = [u for u in sitemap_data if u['priority'] >= 0.9] print(f"High priority pages: {len(high_priority)}") # Filter recently updated for url in sitemap_data: if url['lastmod']: print(f"{url['url']}: Last modified {url['lastmod']}") ``` ### Example 14: Async-Style Processing Process pages concurrently (using threading): ```python from src.main import get_page from concurrent.futures import ThreadPoolExecutor, as_completed import time def fetch_pages_concurrent(paths, user_id_prefix="user", max_workers=5): """Fetch multiple pages concurrently""" results = {} def fetch_one(index, path): # Use different user_id for each thread to avoid rate limits user_id = f"{user_id_prefix}_{index}" return path, get_page(path, user_id=user_id) with ThreadPoolExecutor(max_workers=max_workers) as executor: # Submit all tasks futures = { executor.submit(fetch_one, i, path): path for i, path in enumerate(paths) } # Collect results for future in as_completed(futures): path, result = future.result() results[path] = result if "error" in result: print(f"✗ {path}: {result['message']}") else: print(f"✓ {path}: {result['status_code']}") return results # Usage from src.main import list_pages pages = list_pages() all_results = fetch_pages_concurrent(pages[:5]) ``` This documentation provides comprehensive examples for using the Webpage MCP Server effectively!

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kbhuw/dedalus-marketplace'

If you have feedback or need assistance with the MCP directory API, please join our Discord server