Web Search MCP Server

Overview Schema Related Servers Score Discussions

web-search
debug

debug_parsing.py•3.52 KiB

#!/usr/bin/env python3 """Debug parsing issues for DuckDuckGo and Startpage.""" import asyncio import aiohttp from urllib.parse import quote_plus from bs4 import BeautifulSoup async def debug_engine(name, url, expected_selectors): """Debug a single search engine's parsing""" print(f"\n🔍 Debugging {name}") print(f"URL: {url}") print("-" * 60) try: async with aiohttp.ClientSession() as session: async with session.get(url, timeout=10) as response: print(f"Status: {response.status}") if response.status == 200: content = await response.text() print(f"Content length: {len(content)} chars") soup = BeautifulSoup(content, 'html.parser') # Check each expected selector for selector_name, selector in expected_selectors.items(): elements = soup.select(selector) print(f"Selector '{selector_name}' ({selector}): {len(elements)} matches") if elements: for i, elem in enumerate(elements[:3]): print(f" {i+1}. {elem.get_text()[:100]}...") else: print(f" No matches found") # Show page structure print(f"\nPage structure sample:") body = soup.find('body') if body: # Find divs with class attributes divs_with_class = body.find_all('div', class_=True)[:10] print(f"Found {len(divs_with_class)} divs with classes:") for div in divs_with_class: classes = ' '.join(div.get('class', [])) print(f" <div class='{classes}'> - {div.get_text()[:50]}...") elif response.status == 202: print("HTTP 202 - Request accepted but not processed") content = await response.text() print(f"Content sample: {content[:500]}...") else: print(f"HTTP {response.status} - Error response") except Exception as e: print(f"Error: {e}") async def main(): query = "python tutorial" encoded_query = quote_plus(query) engines = { "DuckDuckGo": { "url": f"https://html.duckduckgo.com/html/?q={encoded_query}", "selectors": { "results": ".result", "links": ".result__a", "titles": ".result__title", "snippets": ".result__snippet", "web_results": ".web-result" } }, "Startpage": { "url": f"https://www.startpage.com/sp/search?query={encoded_query}", "selectors": { "results": ".w-gl__result", "search_results": ".search-result", "result_items": ".result", "titles": ".result-title", "links": ".result-link" } } } print(f"🚀 Debugging Search Engine Parsing") print(f"Query: '{query}'") print("=" * 70) for name, config in engines.items(): await debug_engine(name, config["url"], config["selectors"]) if __name__ == "__main__": asyncio.run(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/vishalkg/web-search'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

debug_parsing.py•3.52 KiB