Skip to main content
Glama

MCP Web Research Agent

example_usage.pyโ€ข4.44 kB
#!/usr/bin/env python3 """ Example usage of MCP Web Research Scraper This demonstrates how to use the scraper directly without MCP protocol """ import asyncio import sys import os # Add the current directory to Python path sys.path.insert(0, os.path.dirname(__file__)) from scraper import MCPWebScraper from database import DatabaseManager async def example_scraping(): """Example of using the scraper directly""" print("๐Ÿ” MCP Web Research Scraper - Direct Usage Example\n") # Initialize db_manager = DatabaseManager("example_scraper.db") scraper = MCPWebScraper(db_manager) try: # Example 1: Scrape a single URL print("1๏ธโƒฃ Single URL Scraping") print(" Scraping example.com for keywords...") result = await scraper.scrape_url( url="https://httpbin.org/html", # Using httpbin for safe testing keywords=["example", "test", "content"], extract_links=False ) print(f" โœ… Scraped {result['stats']['pages_crawled']} pages") print(f" โœ… Found {result['stats']['total_matches']} matches") # Example 2: Get results from database print("\n2๏ธโƒฃ Database Results") results = db_manager.get_scraping_results(limit=5) print(f" โœ… Retrieved {len(results)} results from database") for result in results: print(f" - {result['title']}: {result['match_count']} matches") # Example 3: Export results print("\n3๏ธโƒฃ Export Results") export_result = scraper.export_results( format="json", keyword_filter=None ) if "file" in export_result: print(f" โœ… Exported to: {export_result['file']}") else: print(f" โœ… Exported {len(export_result)} results") # Example 4: Get statistics print("\n4๏ธโƒฃ Statistics") stats = scraper.get_stats() print(f" โœ… URLs visited: {stats['visited_urls']}") print(f" โœ… Active keywords: {stats['active_keywords']}") print("\n๐ŸŽ‰ Example completed successfully!") except Exception as e: print(f"โŒ Error: {e}") finally: # Cleanup try: if os.path.exists("example_scraper.db"): os.remove("example_scraper.db") except: pass async def example_search_and_scrape(): """Example of search and scrape functionality""" print("\n๐Ÿ” Search and Scrape Example\n") db_manager = DatabaseManager("search_example.db") scraper = MCPWebScraper(db_manager) try: print("๐Ÿ” Searching for 'web scraping' and looking for Python references...") # Note: This will make actual HTTP requests result = await scraper.search_and_scrape( query="web scraping python", keywords=["python", "beautifulsoup", "requests"], max_results=3 ) print(f"โœ… Processed {result['stats']['pages_crawled']} search results") print(f"โœ… Found {result['stats']['total_matches']} total keyword matches") for search_result in result["search_results"]: print(f" - {search_result['url']}: {search_result['match_count']} matches") except Exception as e: print(f"โŒ Search example error: {e}") print(" (This is expected if there are network issues)") finally: # Cleanup try: if os.path.exists("search_example.db"): os.remove("search_example.db") except: pass async def main(): """Run all examples""" print("๐Ÿš€ MCP Web Research Scraper - Usage Examples\n") await example_scraping() # Uncomment to test search functionality (requires network) # await example_search_and_scrape() print("\n๐Ÿ“‹ MCP Tool Functions Available:") print(" โ€ข scrape_url - Scrape single URL for keywords") print(" โ€ข search_and_scrape - Search and scrape results") print(" โ€ข get_scraping_results - Query database results") print(" โ€ข export_results - Export to JSON/Markdown/CSV") print(" โ€ข get_scraping_stats - Get statistics") print("\n๐ŸŽฏ Ready for MCP integration!") if __name__ == "__main__": asyncio.run(main())

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/SnotacusNexus/mcp-web-research-agent'

If you have feedback or need assistance with the MCP directory API, please join our Discord server