example_usage.pyโข4.44 kB
#!/usr/bin/env python3
"""
Example usage of MCP Web Research Scraper
This demonstrates how to use the scraper directly without MCP protocol
"""
import asyncio
import sys
import os
# Add the current directory to Python path
sys.path.insert(0, os.path.dirname(__file__))
from scraper import MCPWebScraper
from database import DatabaseManager
async def example_scraping():
"""Example of using the scraper directly"""
print("๐ MCP Web Research Scraper - Direct Usage Example\n")
# Initialize
db_manager = DatabaseManager("example_scraper.db")
scraper = MCPWebScraper(db_manager)
try:
# Example 1: Scrape a single URL
print("1๏ธโฃ Single URL Scraping")
print(" Scraping example.com for keywords...")
result = await scraper.scrape_url(
url="https://httpbin.org/html", # Using httpbin for safe testing
keywords=["example", "test", "content"],
extract_links=False
)
print(f" โ
Scraped {result['stats']['pages_crawled']} pages")
print(f" โ
Found {result['stats']['total_matches']} matches")
# Example 2: Get results from database
print("\n2๏ธโฃ Database Results")
results = db_manager.get_scraping_results(limit=5)
print(f" โ
Retrieved {len(results)} results from database")
for result in results:
print(f" - {result['title']}: {result['match_count']} matches")
# Example 3: Export results
print("\n3๏ธโฃ Export Results")
export_result = scraper.export_results(
format="json",
keyword_filter=None
)
if "file" in export_result:
print(f" โ
Exported to: {export_result['file']}")
else:
print(f" โ
Exported {len(export_result)} results")
# Example 4: Get statistics
print("\n4๏ธโฃ Statistics")
stats = scraper.get_stats()
print(f" โ
URLs visited: {stats['visited_urls']}")
print(f" โ
Active keywords: {stats['active_keywords']}")
print("\n๐ Example completed successfully!")
except Exception as e:
print(f"โ Error: {e}")
finally:
# Cleanup
try:
if os.path.exists("example_scraper.db"):
os.remove("example_scraper.db")
except:
pass
async def example_search_and_scrape():
"""Example of search and scrape functionality"""
print("\n๐ Search and Scrape Example\n")
db_manager = DatabaseManager("search_example.db")
scraper = MCPWebScraper(db_manager)
try:
print("๐ Searching for 'web scraping' and looking for Python references...")
# Note: This will make actual HTTP requests
result = await scraper.search_and_scrape(
query="web scraping python",
keywords=["python", "beautifulsoup", "requests"],
max_results=3
)
print(f"โ
Processed {result['stats']['pages_crawled']} search results")
print(f"โ
Found {result['stats']['total_matches']} total keyword matches")
for search_result in result["search_results"]:
print(f" - {search_result['url']}: {search_result['match_count']} matches")
except Exception as e:
print(f"โ Search example error: {e}")
print(" (This is expected if there are network issues)")
finally:
# Cleanup
try:
if os.path.exists("search_example.db"):
os.remove("search_example.db")
except:
pass
async def main():
"""Run all examples"""
print("๐ MCP Web Research Scraper - Usage Examples\n")
await example_scraping()
# Uncomment to test search functionality (requires network)
# await example_search_and_scrape()
print("\n๐ MCP Tool Functions Available:")
print(" โข scrape_url - Scrape single URL for keywords")
print(" โข search_and_scrape - Search and scrape results")
print(" โข get_scraping_results - Query database results")
print(" โข export_results - Export to JSON/Markdown/CSV")
print(" โข get_scraping_stats - Get statistics")
print("\n๐ฏ Ready for MCP integration!")
if __name__ == "__main__":
asyncio.run(main())