Nash MCP Server

import html2text import requests import logging import traceback def fetch_webpage(url: str) -> str: """Fetch content from a webpage and convert to readable text. This tool retrieves the HTML content from a URL and converts it to plain text, removing HTML tags, formatting, and other non-text elements. The conversion ignores links and images to focus on the textual content. USE CASES: - Retrieving article content for analysis - Accessing documentation from websites - Scraping data from public web pages - Gathering information from online resources - Reading blog posts, news articles, or reference materials USAGE WORKFLOW: 1. Call fetch_webpage with a complete URL (including https://) 2. Process the returned text directly or with execute_python() 3. For complex HTML parsing needs, use execute_python() with BeautifulSoup after this IMPLEMENTATION DETAILS: - Uses requests library to fetch the webpage - Uses html2text to convert HTML to markdown-like plain text - Ignores links and images in the conversion - Returns error message if the fetch fails SECURITY AND ETHICAL CONSIDERATIONS: - Only fetch publicly accessible webpages - Respect robots.txt and website terms of service - Don't use for scraping private/protected content - Avoid making excessive requests to the same site - Don't use for accessing internal network resources or localhost - Be mindful of rate limits and server load Args: url: Complete URL to fetch (including https:// protocol) Returns: Plain text content of the webpage with HTML elements removed Error message if the fetch fails """ logging.info(f"Fetching webpage: {url}") try: response = requests.get(url) response.raise_for_status() logging.info(f"Successfully retrieved content from {url} (status code: {response.status_code})") h = html2text.HTML2Text() h.ignore_links = True h.ignore_images = True converted_text = h.handle(response.text) logging.info(f"Successfully converted HTML to text (content length: {len(converted_text)} chars)") return converted_text except requests.exceptions.HTTPError as e: logging.error(f"HTTP error while fetching {url}: {str(e)}") return f"Error fetching {url}: HTTP status code {e.response.status_code}" except requests.exceptions.ConnectionError as e: logging.error(f"Connection error while fetching {url}: {str(e)}") return f"Error fetching {url}: Connection failed. Check the URL and your internet connection." except requests.exceptions.Timeout as e: logging.error(f"Timeout error while fetching {url}: {str(e)}") return f"Error fetching {url}: Request timed out." except requests.exceptions.RequestException as e: logging.error(f"Request exception while fetching {url}: {str(e)}") return f"Error fetching {url}: {str(e)}" except Exception as e: logging.error(f"Unexpected error while fetching {url}: {str(e)}") logging.error(traceback.format_exc()) return f"Error fetching {url}: {str(e)}"