Skip to main content
Glama

waybackurls_discovery

Discover historical URLs from web archives to identify exposed endpoints and forgotten content for security reconnaissance and vulnerability assessment.

Instructions

Execute Waybackurls for historical URL discovery with enhanced logging.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
additional_argsNo
domainYes
get_versionsNo
no_subsNo

Implementation Reference

  • MCP tool registration and proxy handler for waybackurls_discovery, forwards requests to REST API endpoint /api/waybackurls.
    def waybackurls_discovery( domain: str, get_versions: bool = False, no_subs: bool = False, additional_args: str = "", ) -> dict[str, Any]: """Execute Waybackurls for historical URL discovery with enhanced logging.""" data = { "domain": domain, "get_versions": get_versions, "no_subs": no_subs, "additional_args": additional_args, } logger.info(f"🕰️ Starting Waybackurls discovery for {domain}") result = api_client.safe_post("api/waybackurls", data) if result.get("success"): logger.info(f"✅ Waybackurls discovery completed for {domain}") else: logger.error("❌ Waybackurls discovery failed") return result
  • REST API handler function that orchestrates execution of the waybackurls binary, including parameter extraction, command building, execution, and output parsing.
    @tool(required_fields=["domain"]) def execute_waybackurls(): """Execute Waybackurls for historical URL discovery.""" data = request.get_json() params = extract_waybackurls_params(data) logger.info(f"Executing Waybackurls on {params['domain']}") started_at = datetime.now() command = build_waybackurls_command(params) execution_result = execute_command(command, timeout=params["timeout"]) ended_at = datetime.now() return parse_waybackurls_output( execution_result, params, command, started_at, ended_at )
  • Parameter extraction and schema validation for waybackurls tool inputs, supports aggressive preset.
    def extract_waybackurls_params(data: dict) -> dict[str, Any]: """Extract waybackurls parameters from request data.""" # Check for aggressive mode aggressive = data.get("aggressive", False) # Extract parameters with basic defaults domain = data.get("url", data.get("domain", "")) # Base parameters without validation base_params = { "domain": domain, "no_subs": data.get("no_subs", False), "get_versions": data.get("get_versions", False), "dates": data.get("dates", ""), "output_file": data.get("output_file", ""), "timeout": data.get("timeout", 30), "additional_args": data.get("additional_args", ""), } # Apply aggressive preset if requested if aggressive: merged_params = base_params.copy() for key, aggressive_value in AGGRESSIVE_PRESET.items(): if key not in base_params or base_params.get(key) in [ True, False, 30, None, "", ]: merged_params[key] = aggressive_value return merged_params return base_params
  • Helper function to construct the shell command for running the waybackurls binary.
    def build_waybackurls_command(params: dict) -> list[str]: """Build waybackurls command from parameters.""" command_args = ["waybackurls", params["domain"]] if params["get_versions"]: command_args.append("--get-versions") if params["no_subs"]: command_args.append("--no-subs") if params["dates"]: command_args.extend(["--dates", params["dates"]]) if params["output_file"]: command_args.extend(["-o", params["output_file"]]) # Parse additional args safely if params["additional_args"]: try: additional_parsed = shlex.split(params["additional_args"]) command_args.extend(additional_parsed) except ValueError as e: logger.warning( f"Invalid additional args: {params['additional_args']}, error: {e}" ) # Skip invalid additional args rather than failing return command_args
  • Helper function to parse stdout from waybackurls, extract URLs, and format into structured findings with metadata.
    def parse_waybackurls_output( execution_result: dict[str, Any], params: dict, command: list[str], started_at: datetime, ended_at: datetime, ) -> dict[str, Any]: """Parse waybackurls execution result and format response.""" duration_ms = int((ended_at - started_at).total_seconds() * 1000) command_str = " ".join(command) if execution_result["success"]: urls = [ url.strip() for url in execution_result["stdout"].split("\n") if url.strip() ] # Convert URLs to findings format findings = [] for url in urls: finding = { "type": "url", "target": url, "evidence": { "raw_output": url, "tool": "waybackurls", "domain": params["domain"], }, "severity": "info", "confidence": "high", "tags": ["waybackurls", "historical"], "raw_ref": url, } findings.append(finding) payload_bytes = len(execution_result["stdout"].encode("utf-8")) return { "tool": "waybackurls", "target": params["domain"], "parameters": params, "command": command_str, "started_at": started_at.isoformat(), "ended_at": ended_at.isoformat(), "duration_ms": duration_ms, "status": "completed", "urls": urls, "unique_urls": len(urls), "success": True, "stdout": execution_result["stdout"], "stderr": execution_result["stderr"] if execution_result["stderr"] else None, "findings": findings, "stats": { "findings": len(findings), "dupes": 0, "payload_bytes": payload_bytes, }, } else: default_error = "Unknown error" error_msg = execution_result.get( "error", execution_result.get("stderr", default_error) ) logger.error( "Waybackurls command failed: " f"{execution_result.get('error', 'Unknown error')}" ) return { "tool": "waybackurls", "target": params["domain"], "parameters": params, "command": command_str, "started_at": started_at.isoformat(), "ended_at": ended_at.isoformat(), "duration_ms": duration_ms, "success": False, "status": "failed", "error": f"Waybackurls execution failed: {error_msg}", "findings": [], "stats": {"findings": 0, "dupes": 0, "payload_bytes": 0}, }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/SlanyCukr/bugbounty-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server