MCP Web Research Agent

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

mcp-web-research-agent

example_usage.py•4.44 kB

#!/usr/bin/env python3 """ Example usage of MCP Web Research Scraper This demonstrates how to use the scraper directly without MCP protocol """ import asyncio import sys import os # Add the current directory to Python path sys.path.insert(0, os.path.dirname(__file__)) from scraper import MCPWebScraper from database import DatabaseManager async def example_scraping(): """Example of using the scraper directly""" print("🔍 MCP Web Research Scraper - Direct Usage Example\n") # Initialize db_manager = DatabaseManager("example_scraper.db") scraper = MCPWebScraper(db_manager) try: # Example 1: Scrape a single URL print("1️⃣ Single URL Scraping") print(" Scraping example.com for keywords...") result = await scraper.scrape_url( url="https://httpbin.org/html", # Using httpbin for safe testing keywords=["example", "test", "content"], extract_links=False ) print(f" ✅ Scraped {result['stats']['pages_crawled']} pages") print(f" ✅ Found {result['stats']['total_matches']} matches") # Example 2: Get results from database print("\n2️⃣ Database Results") results = db_manager.get_scraping_results(limit=5) print(f" ✅ Retrieved {len(results)} results from database") for result in results: print(f" - {result['title']}: {result['match_count']} matches") # Example 3: Export results print("\n3️⃣ Export Results") export_result = scraper.export_results( format="json", keyword_filter=None ) if "file" in export_result: print(f" ✅ Exported to: {export_result['file']}") else: print(f" ✅ Exported {len(export_result)} results") # Example 4: Get statistics print("\n4️⃣ Statistics") stats = scraper.get_stats() print(f" ✅ URLs visited: {stats['visited_urls']}") print(f" ✅ Active keywords: {stats['active_keywords']}") print("\n🎉 Example completed successfully!") except Exception as e: print(f"❌ Error: {e}") finally: # Cleanup try: if os.path.exists("example_scraper.db"): os.remove("example_scraper.db") except: pass async def example_search_and_scrape(): """Example of search and scrape functionality""" print("\n🔍 Search and Scrape Example\n") db_manager = DatabaseManager("search_example.db") scraper = MCPWebScraper(db_manager) try: print("🔍 Searching for 'web scraping' and looking for Python references...") # Note: This will make actual HTTP requests result = await scraper.search_and_scrape( query="web scraping python", keywords=["python", "beautifulsoup", "requests"], max_results=3 ) print(f"✅ Processed {result['stats']['pages_crawled']} search results") print(f"✅ Found {result['stats']['total_matches']} total keyword matches") for search_result in result["search_results"]: print(f" - {search_result['url']}: {search_result['match_count']} matches") except Exception as e: print(f"❌ Search example error: {e}") print(" (This is expected if there are network issues)") finally: # Cleanup try: if os.path.exists("search_example.db"): os.remove("search_example.db") except: pass async def main(): """Run all examples""" print("🚀 MCP Web Research Scraper - Usage Examples\n") await example_scraping() # Uncomment to test search functionality (requires network) # await example_search_and_scrape() print("\n📋 MCP Tool Functions Available:") print(" • scrape_url - Scrape single URL for keywords") print(" • search_and_scrape - Search and scrape results") print(" • get_scraping_results - Query database results") print(" • export_results - Export to JSON/Markdown/CSV") print(" • get_scraping_stats - Get statistics") print("\n🎯 Ready for MCP integration!") if __name__ == "__main__": asyncio.run(main())

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/SnotacusNexus/mcp-web-research-agent'

If you have feedback or need assistance with the MCP directory API, please join our Discord server