get_manifest

Retrieve the structural manifest of a webpage with title, summary, keywords, language, and heading sections for token-efficient content extraction.

Instructions

Fetch the WASP structural index for a webpage. Returns a manifest with the page title, summary, keywords, language, and a list of heading sections (chunks) with their anchors and token estimates. Checks /.well-known/wasp.json first (native manifest); falls back to DOM-generated manifest if not found.

Input Schema

TableJSON Schema

Name	Required	Description	Default
`url`	Yes	Fully-qualified URL of the webpage to index

Implementation Reference

index.ts:90-99 (handler)

The CallToolRequestSchema handler for 'get_manifest'. Extracts the 'url' argument from the request, calls getManifest(url), and returns the manifest as a JSON-stringified text response.

server.setRequestHandler(CallToolRequestSchema, async (request) => {
  const { name, arguments: args } = request.params;

  if (name === "get_manifest") {
    const { url } = args as { url: string };
    const { manifest } = await getManifest(url);
    return {
      content: [{ type: "text", text: JSON.stringify(manifest, null, 2) }],
    };
  }

manifest.ts:140-179 (helper)

The exported getManifest() function that fetches the WASP manifest. Tries a native manifest from /.well-known/wasp.json first (3s timeout), falls back to DOM-generated manifest via generateManifest(). Returns both the manifest object and the page HTML.

export async function getManifest(
  url: string
): Promise<{ manifest: Manifest; html: string }> {
  const cached = getCached(url);
  if (cached) return { manifest: cached.manifest, html: cached.html };

  // Try native manifest first (3 s timeout)
  const native = await checkNativeManifest(url);

  // Fetch page HTML regardless — needed for chunk content extraction
  const res = await fetch(url, {
    headers: { "User-Agent": USER_AGENT },
    redirect: "follow",
  });
  if (!res.ok) {
    throw new Error(`HTTP ${res.status} fetching ${url}`);
  }
  const rawHtml = await res.text();

  if (native) {
    // Use the native manifest but cache the page HTML for chunk extraction
    setCached(url, { manifest: native, html: rawHtml });
    return { manifest: native, html: rawHtml };
  }

  // Build client manifest from DOM, stamp IDs, then serialize
  const dom = new JSDOM(rawHtml, { url });
  const manifest = generateManifest(dom, url);

  // Serialize after ID stamping so fetch_chunk sees the same anchors
  const html = dom.serialize();

  if (manifest.chunks.length === 0 && !manifest.summary) {
    manifest.summary =
      "This page appears to be a SPA — JavaScript-rendered content may not be indexed.";
  }

  setCached(url, { manifest, html });
  return { manifest, html };
}

manifest.ts:58-138 (helper)

generateManifest() builds a client-side manifest from the JSDOM-parsed HTML, extracting title, summary, keywords, language, heading chunks (with synthetic IDs), and interactive actions.

export function generateManifest(dom: JSDOM, url: string): Manifest {
  const document = dom.window.document;

  const ogTitle = document
    .querySelector('meta[property="og:title"]')
    ?.getAttribute("content");
  const h1Text = document.querySelector("h1")?.textContent?.trim();
  const title = ogTitle || h1Text || document.title || url;

  const summary =
    document
      .querySelector('meta[name="description"]')
      ?.getAttribute("content") ?? "";

  const language = document.documentElement.getAttribute("lang") ?? "en";

  const kwMeta = document
    .querySelector('meta[name="keywords"]')
    ?.getAttribute("content");
  let keywords: string[];
  if (kwMeta) {
    keywords = kwMeta
      .split(",")
      .map((k) => k.trim())
      .filter(Boolean)
      .slice(0, 10);
  } else {
    const texts = Array.from(document.querySelectorAll("h1,h2,h3,h4"))
      .map((h) => h.textContent?.trim() ?? "")
      .filter(Boolean);
    keywords = [...new Set(texts)].slice(0, 10);
  }

  const headingEls = Array.from(document.querySelectorAll("h1,h2,h3,h4"));
  const chunks: Chunk[] = headingEls.map((el, i) => {
    // Stamp synthetic IDs onto the live DOM so fetch_chunk can resolve them
    if (!el.id) {
      el.id = "wasp-" + String(i + 1).padStart(3, "0");
    }
    const depth = parseInt(el.tagName[1], 10);
    const heading = el.textContent?.trim() ?? "";
    return {
      id: "chunk_" + String(i + 1).padStart(3, "0"),
      heading,
      anchor: "#" + el.id,
      type: isInsideNav(el) ? "nav" : "heading",
      depth,
      tokens: Math.ceil(heading.length / 4),
      order: i + 1,
    };
  });

  const actionEls = Array.from(
    document.querySelectorAll("input, form, button, a[href]")
  ).slice(0, 10);
  const actions: Action[] = actionEls.map((el, i) => {
    let type: "fill" | "click" | "navigate" = "click";
    if (el.tagName === "INPUT" || el.tagName === "FORM") type = "fill";
    else if (el.tagName === "A") type = "navigate";
    return {
      id: "action_" + String(i + 1).padStart(3, "0"),
      type,
      label: getActionLabel(el),
      selector: buildActionSelector(el),
      description: "",
    };
  });

  return {
    wasp: "1.0",
    url,
    title,
    summary,
    keywords,
    language,
    generatedAt: new Date().toISOString(),
    generated: "client",
    chunks,
    actions,
  };
}

index.ts:16-36 (registration)

Tool registration metadata for 'get_manifest' in the ListToolsRequestSchema handler, defining the tool name, description, and input schema (url string required).

server.setRequestHandler(ListToolsRequestSchema, async () => ({
  tools: [
    {
      name: "get_manifest",
      description:
        "Fetch the WASP structural index for a webpage. Returns a manifest with the " +
        "page title, summary, keywords, language, and a list of heading sections " +
        "(chunks) with their anchors and token estimates. " +
        "Checks /.well-known/wasp.json first (native manifest); falls back to " +
        "DOM-generated manifest if not found.",
      inputSchema: {
        type: "object",
        properties: {
          url: {
            type: "string",
            description: "Fully-qualified URL of the webpage to index",
          },
        },
        required: ["url"],
      },
    },

manifest.ts:9-29 (helper)

checkNativeManifest() attempts to fetch the native WASP manifest from /.well-known/wasp.json with a 3-second timeout, returning null on failure.

async function checkNativeManifest(url: string): Promise<Manifest | null> {
  const { protocol, host } = new URL(url);
  const nativeUrl = `${protocol}//${host}/.well-known/wasp.json`;
  try {
    const controller = new AbortController();
    const timer = setTimeout(() => controller.abort(), NATIVE_TIMEOUT_MS);
    const res = await fetch(nativeUrl, {
      signal: controller.signal,
      headers: { "User-Agent": USER_AGENT },
    });
    clearTimeout(timer);
    if (!res.ok) return null;
    const json = (await res.json()) as unknown;
    if (json && typeof json === "object" && "wasp" in json) {
      return json as Manifest;
    }
  } catch {
    // 404, timeout, or non-JSON — fall through to DOM generation
  }
  return null;
}

wasp-mcp

get_manifest

Instructions

Input Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API