Skip to main content
Glama

gau_discovery

Discovers URLs from multiple sources including Wayback Machine, Common Crawl, OTX, and URLScan for comprehensive web reconnaissance during security assessments.

Instructions

Execute Gau (Get All URLs) for URL discovery from multiple sources with enhanced logging.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
additional_argsNo
blacklistNopng,jpg,gif,jpeg,swf,woff,svg,pdf,css,ico
domainYes
include_subsNo
providersNowayback,commoncrawl,otx,urlscan

Implementation Reference

  • MCP tool registration and handler for 'gau_discovery'. Proxies parameters to the REST API endpoint '/api/gau' for execution.
    def gau_discovery( domain: str, providers: str = "wayback,commoncrawl,otx,urlscan", include_subs: bool = True, blacklist: str = "png,jpg,gif,jpeg,swf,woff,svg,pdf,css,ico", additional_args: str = "", ) -> dict[str, Any]: """Run Gau to discover URLs from multiple sources with logging.""" data = { "domain": domain, "providers": providers, "include_subs": include_subs, "blacklist": blacklist, "additional_args": additional_args, } logger.info(f"🔍 Starting Gau URL discovery for {domain}") result = api_client.safe_post("api/gau", data) if result.get("success"): logger.info(f"✅ Gau URL discovery completed for {domain}") else: logger.error("❌ Gau URL discovery failed") return result
  • Core handler function 'execute_gau' that builds and executes the gau command, parses output, and returns structured results.
    @tool(required_fields=["domain"]) def execute_gau(): """Execute Gau (Get All URLs) for URL discovery from multiple sources.""" data = request.get_json() params = extract_gau_params(data) logger.info(f"Executing Gau on {params['domain']}") started_at = datetime.now() command = build_gau_command(params) execution_result = execute_command(command, timeout=params["gau_timeout"]) ended_at = datetime.now() return parse_gau_output(execution_result, params, command, started_at, ended_at)
  • Supporting helper functions: extract_gau_params (input validation), build_gau_command (CLI generation), parse_gau_output (result processing).
    def extract_gau_params(data): """Extract gau parameters from request data.""" return { "domain": data.get("url", data.get("domain", "")), "providers": data.get("providers", "wayback,commoncrawl,otx,urlscan"), "include_subs": data.get("include_subs", data.get("include_subdomains", False)), "blacklist": data.get("blacklist", ""), "from_date": data.get("from_date", ""), "to_date": data.get("to_date", ""), "output_file": data.get("output_file", ""), "threads": data.get("threads", 5), "timeout": data.get("timeout", 60), "retries": data.get("retries", 5), "proxy": data.get("proxy", ""), "random_agent": data.get("random_agent", False), "verbose": data.get("verbose", False), "additional_args": data.get("additional_args", ""), "gau_timeout": data.get("gau_timeout", 300), } def build_gau_command(params): """Build gau command from parameters.""" command = f"gau {params['domain']}" if params["providers"] != "wayback,commoncrawl,otx,urlscan": command += f" --providers {params['providers']}" if params["include_subs"]: command += " --subs" if params["blacklist"]: command += f" --blacklist {params['blacklist']}" if params["from_date"]: command += f" --from {params['from_date']}" if params["to_date"]: command += f" --to {params['to_date']}" if params["output_file"]: command += f" --output {params['output_file']}" if params["threads"] != 5: command += f" --threads {int(params['threads'])}" if params["timeout"] != 60: command += f" --timeout {int(params['timeout'])}" if params["retries"] != 5: command += f" --retries {int(params['retries'])}" if params["proxy"]: command += f" --proxy {params['proxy']}" if params["random_agent"]: command += " --random-agent" if params["verbose"]: command += " --verbose" # Handle additional arguments if params["additional_args"]: command += " " + params["additional_args"] return command def parse_gau_output(execution_result, params, command, started_at, ended_at): """Parse gau execution result and format response.""" duration_ms = int((ended_at - started_at).total_seconds() * 1000) if execution_result.get("success"): findings = [] if execution_result.get("stdout"): for url in execution_result["stdout"].split("\n"): url = url.strip() if url: finding = { "type": "url", "target": url, "evidence": { "raw_output": url, "tool": "gau", "domain": params["domain"], }, "severity": "info", "confidence": "medium", "tags": ["gau"], "raw_ref": url, } findings.append(finding) payload_bytes = len(execution_result.get("stdout", "").encode("utf-8")) return { "success": True, "tool": "gau", "params": params, "command": command, "started_at": started_at.isoformat(), "ended_at": ended_at.isoformat(), "duration_ms": duration_ms, "findings": findings, "stats": { "findings": len(findings), "dupes": 0, "payload_bytes": payload_bytes, }, } else: return { "success": False, "tool": "gau", "params": params, "command": command, "started_at": started_at.isoformat(), "ended_at": ended_at.isoformat(), "duration_ms": duration_ms, "error": execution_result.get("error", "Command execution failed"), "findings": [], "stats": {"findings": 0, "dupes": 0, "payload_bytes": 0}, }
  • Imports gau module to register the Flask endpoint for the gau tool.
    from .gau import gau as gau
  • FastMCP tool decorator registration for 'gau_discovery'.
    @mcp.tool() def gau_discovery(

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/SlanyCukr/bugbounty-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server