fetch_page_links
Extract all links from a web page by providing its URL. This tool helps identify and collect hyperlinks for web scraping, content analysis, or navigation purposes.
Instructions
Return a list of all links on the page.
Args: url: The URL to fetch links from
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes |
Implementation Reference
- src/url_text_fetcher/server.py:405-462 (handler)Primary implementation of the fetch_page_links tool handler. Downloads the webpage HTML, parses it using BeautifulSoup, extracts all href attributes from anchor tags, filters valid links (http/https or relative), and returns a formatted list limited to the first 100 links.@mcp.tool() async def fetch_page_links(url: str) -> str: """Return a list of all links on the page. Args: url: The URL to fetch links from """ # Sanitize URL input url = sanitize_url(url) if not url: return "Error: Invalid URL format" # Validate URL safety if not is_safe_url(url): logger.warning(f"Blocked unsafe URL for link fetching: {url}") return "Error: URL not allowed for security reasons" try: logger.info(f"Fetching page links: {url}") resp = requests.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, stream=True) resp.raise_for_status() # Check content length content_length = resp.headers.get('Content-Length') if content_length and int(content_length) > MAX_RESPONSE_SIZE: return f"Error: Page too large ({content_length} bytes)" # Read content with size limit content_chunks = [] total_size = 0 for chunk in resp.iter_content(chunk_size=8192, decode_unicode=True): if chunk: total_size += len(chunk) if total_size > MAX_RESPONSE_SIZE: return "Error: Page content too large" content_chunks.append(chunk) html_content = ''.join(content_chunks) soup = BeautifulSoup(html_content, "html.parser") links = [a.get('href') for a in soup.find_all('a', href=True) if a.get('href')] # Filter and clean links valid_links = [] for link in links: if link.startswith(('http://', 'https://', '/')): valid_links.append(link) links_text = "\n".join(f"- {link}" for link in valid_links[:100]) # Limit to 100 links return f"Links found on {url} ({len(valid_links)} total, showing first 100):\n\n{links_text}" except requests.RequestException as e: logger.error(f"Request failed for {url}: {e}") return "Error: Unable to fetch page" except Exception as e: logger.error(f"Unexpected error fetching links from {url}: {e}", exc_info=True) return "Error: Unable to process page"
- Alternative implementation of the fetch_page_links tool handler for FastMCP, including Pydantic Field for input schema validation. Identical logic to the primary handler.@mcp.tool() def fetch_page_links(url: str = Field(description="The URL to fetch links from")) -> str: """Return a list of all links on the page""" # Sanitize URL input url = sanitize_url(url) if not url: return "Error: Invalid URL format" # Validate URL safety if not is_safe_url(url): logger.warning(f"Blocked unsafe URL for link fetching: {url}") return "Error: URL not allowed for security reasons" try: logger.info(f"Fetching page links: {url}") resp = requests.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, stream=True) resp.raise_for_status() # Check content length content_length = resp.headers.get('Content-Length') if content_length and int(content_length) > MAX_RESPONSE_SIZE: return f"Error: Page too large ({content_length} bytes)" # Read content with size limit content_chunks = [] total_size = 0 for chunk in resp.iter_content(chunk_size=8192, decode_unicode=True): if chunk: total_size += len(chunk) if total_size > MAX_RESPONSE_SIZE: return "Error: Page content too large" content_chunks.append(chunk) html_content = ''.join(content_chunks) soup = BeautifulSoup(html_content, "html.parser") links = [a.get('href') for a in soup.find_all('a', href=True) if a.get('href')] # Filter and clean links valid_links = [] for link in links: if link.startswith(('http://', 'https://', '/')): valid_links.append(link) links_text = "\n".join(f"- {link}" for link in valid_links[:100]) # Limit to 100 links return f"Links found on {url} ({len(valid_links)} total, showing first 100):\n\n{links_text}" except requests.RequestException as e: logger.error(f"Request failed for {url}: {e}") return "Error: Unable to fetch page" except Exception as e: logger.error(f"Unexpected error fetching links from {url}: {e}", exc_info=True) return "Error: Unable to process page"
- src/url_text_fetcher/server.py:320-352 (registration)The tool is listed in the get_server_info tool's output as an available tool, serving as informal registration documentation.@mcp.tool() async def get_server_info() -> str: """Get information about this MCP server including version, implementation, and capabilities. Returns: Server information including version, implementation type, and available features """ info = [ f"URL Text Fetcher MCP Server", f"Version: {__version__}", f"Implementation: {__implementation__}", f"Brave Search Rate Limit: {BRAVE_RATE_LIMIT_RPS} requests/second", f"Request Timeout: {REQUEST_TIMEOUT} seconds", f"Content Limit: {CONTENT_LENGTH_LIMIT:,} characters", f"Max Response Size: {MAX_RESPONSE_SIZE:,} bytes", "", "Available Tools:", "• fetch_url_text - Download visible text from any URL", "• fetch_page_links - Extract all links from a webpage", "• brave_search_and_fetch - Search web and fetch content from top results", "• test_brave_search - Test Brave Search API connectivity", "• get_server_info - Display this server information", "", "Security Features:", "• SSRF protection against internal network access", "• Input sanitization for URLs and search queries", "• Content size limiting and memory protection", "• Thread-safe rate limiting for API requests", "", f"Brave API Key: {'✓ Configured' if BRAVE_API_KEY else '✗ Missing'}" ] return "\n".join(info)