check_technical

Audit a page's HEAD for technical signals critical to AI crawlers: HTTPS, canonical, OpenGraph, Twitter Card, hreflang, noindex, and title-H1 alignment.

Instructions

Audit a page's HEAD section for technical signals relevant to AI crawlers: HTTPS, canonical, OpenGraph, Twitter Card, hreflang, noindex, and title-vs-H1 hygiene.

Read-only. One HTTP GET, inspects HEAD only (body is not parsed).

Deterministic, rule-based; no LLM.

When to use: when you specifically need HEAD-tag audit findings. For the full page including schema and AI-Overview scoring, use audit_page. For canonical-only, use audit_canonical.

Input Schema

TableJSON Schema

Name	Required	Description	Default
`url`	Yes	Public URL to audit. The tool fetches the URL once and inspects HEAD-section signals: HTTPS, canonical, OpenGraph, Twitter Card, hreflang, noindex, title length and overlap with H1. Body content is not parsed. Read-only HTTP GET.
`respect_robots`	No	If true (default), respect robots.txt before fetching. Set false only for auditing your own site where you've intentionally blocked crawlers.

Implementation Reference

src/tools/check-technical.ts:58-270 (handler)

Main handler for check_technical tool. Fetches the URL via politeFetch, parses HEAD section, and audits technical signals: HTTPS, canonical, noindex, OpenGraph tags, Twitter Card, meta description length, title/OG match, and hreflang. Returns TechnicalResult with findings.

export async function checkTechnical(
  input: CheckTechnicalInput,
  hostDelays?: HostDelayMap,
  robotsCache?: Map<string, string>
): Promise<TechnicalResult> {
  const result = await politeFetch(input.url, {
    respectRobots: input.respect_robots,
    hostDelays,
    robotsCache,
  });

  const ct = result.headers["content-type"];
  const ctStr = Array.isArray(ct) ? ct[0] : (ct ?? "");
  if (ctStr && !ctStr.includes("html")) {
    throw new ToolFetchError({
      type: "non_html_response",
      url: input.url,
      content_type: ctStr,
    });
  }

  const xRobotsTag = result.headers["x-robots-tag"];
  const head = parseHead(result.body, xRobotsTag);
  const findings: Finding[] = [];

  // HTTPS
  const https = input.url.startsWith("https://");

  // Canonical
  let canonicalSelfRef = false;
  let canonicalCrossDomain = false;
  if (head.canonical) {
    try {
      const pageUrl = new URL(result.finalUrl);
      const canonUrl = new URL(head.canonical, input.url);
      canonicalSelfRef =
        canonUrl.hostname === pageUrl.hostname &&
        canonUrl.pathname === pageUrl.pathname;
      canonicalCrossDomain = canonUrl.hostname !== pageUrl.hostname;
    } catch {
      // ignore URL parse errors
    }
  }

  if (!head.canonical) {
    findings.push({
      severity: "warning",
      category: "technical",
      where: "<head>",
      message: "No canonical link element found.",
      fix: 'Add <link rel="canonical" href="https://example.com/page"> to <head>.',
      estimated_impact: "medium",
    });
  } else if (canonicalCrossDomain) {
    findings.push({
      severity: "warning",
      category: "technical",
      where: 'link[rel="canonical"]',
      message: "Canonical points to a different domain.",
      fix: "Verify this is intentional (syndicated content). If not, update to the self-referencing canonical.",
      estimated_impact: "medium",
    });
  }

  // noindex
  if (head.noindex) {
    findings.push({
      severity: "critical",
      category: "technical",
      where: head.noindexHeader ? "X-Robots-Tag header" : 'meta[name="robots"]',
      message: "Page has noindex directive - no AI search engine can index this page.",
      fix: "Remove the noindex directive if you want this page to appear in AI search results.",
      estimated_impact: "high",
    });
  }

  // Redirect finding
  if (result.redirected && result.finalUrl !== input.url) {
    findings.push({
      severity: "info",
      category: "technical",
      where: "page-level",
      message: `Page redirects to ${result.finalUrl} - ensure canonical and OG tags reflect the canonical URL.`,
      fix: "Update og:url and canonical href to the final redirect target URL.",
    });
  }

  // OG tags
  if (!head.ogTitle) {
    findings.push({
      severity: "warning",
      category: "technical",
      where: "og:title",
      message: "og:title is missing.",
      fix: 'Add <meta property="og:title" content="Page Title">.',
      estimated_impact: "medium",
    });
  }
  if (!head.ogDescription) {
    findings.push({
      severity: "warning",
      category: "technical",
      where: "og:description",
      message: "og:description is missing.",
      fix: 'Add <meta property="og:description" content="120-160 character description.">.',
      estimated_impact: "medium",
    });
  }
  if (!head.ogImage) {
    findings.push({
      severity: "warning",
      category: "technical",
      where: "og:image",
      message: "og:image is missing.",
      fix: 'Add <meta property="og:image" content="https://example.com/image.jpg">.',
      estimated_impact: "medium",
    });
  }

  // Twitter card
  if (!head.twitterCard) {
    findings.push({
      severity: "info",
      category: "technical",
      where: "twitter:card",
      message: "Twitter Card tags are absent.",
      fix: 'Add <meta name="twitter:card" content="summary_large_image"> and twitter:title, twitter:description.',
    });
  }

  // Meta description length
  const metaDescLen = head.metaDescription?.length ?? 0;
  if (!head.metaDescription) {
    findings.push({
      severity: "warning",
      category: "technical",
      where: 'meta[name="description"]',
      message: "Meta description is missing.",
      fix: 'Add <meta name="description" content="120-160 character description.">.',
      estimated_impact: "medium",
    });
  } else if (metaDescLen < 50) {
    findings.push({
      severity: "warning",
      category: "technical",
      where: 'meta[name="description"]',
      message: `Meta description is only ${metaDescLen} chars - too short (ideal: 120-160).`,
      fix: "Expand the meta description to 120-160 characters summarizing the page content.",
      estimated_impact: "low",
    });
  } else if (metaDescLen > 200) {
    findings.push({
      severity: "warning",
      category: "technical",
      where: 'meta[name="description"]',
      message: `Meta description is ${metaDescLen} chars - too long (ideal: 120-160).`,
      fix: "Trim the meta description to under 200 characters.",
      estimated_impact: "low",
    });
  }

  // Title/OG match
  const titleOgMatch =
    !head.title || !head.ogTitle
      ? true // can't compare if either is missing
      : levenshtein(head.title, head.ogTitle) <= 10;

  if (!titleOgMatch) {
    findings.push({
      severity: "warning",
      category: "technical",
      where: "og:title vs <title>",
      message: "og:title differs significantly from <title> - may signal content inconsistency.",
      fix: "Align og:title with <title> or ensure the difference is intentional.",
      estimated_impact: "low",
    });
  }

  return {
    url: input.url,
    https,
    canonical: {
      present: !!head.canonical,
      value: head.canonical,
      self_referential: canonicalSelfRef,
      cross_domain: canonicalCrossDomain,
    },
    noindex: head.noindex,
    noindex_header: head.noindexHeader,
    og_tags: {
      title: !!head.ogTitle,
      description: !!head.ogDescription,
      image: !!head.ogImage,
      url: !!head.ogUrl,
      type: !!head.ogType,
    },
    twitter_card: {
      present: !!head.twitterCard,
      card_type: head.twitterCard,
    },
    hreflang: {
      present: head.hreflangTags.length > 0,
      count: head.hreflangTags.length,
      x_default: head.hreflangTags.some((h) => h.lang === "x-default"),
    },
    title_og_match: titleOgMatch,
    meta_description: {
      present: !!head.metaDescription,
      length: metaDescLen,
    },
    findings,
  };
}

src/tools/check-technical.ts:9-19 (schema)

Zod input schema for check_technical: requires 'url' (string URL) and optional 'respect_robots' (boolean, default true).

export const checkTechnicalInputSchema = z.object({
  url: z
    .string()
    .url()
    .describe("Public URL to audit. The tool fetches the URL once and inspects HEAD-section signals: HTTPS, canonical, OpenGraph, Twitter Card, hreflang, noindex, title length and overlap with H1. Body content is not parsed. Read-only HTTP GET."),
  respect_robots: z
    .boolean()
    .optional()
    .default(true)
    .describe("If true (default), respect robots.txt before fetching. Set false only for auditing your own site where you've intentionally blocked crawlers."),
});

src/tools/check-technical.ts:23-56 (schema)

TechnicalResult interface defining the output shape: url, https, canonical details, noindex flags, og_tags, twitter_card, hreflang, title_og_match, meta_description, and findings array.

export interface TechnicalResult {
  url: string;
  https: boolean;
  canonical: {
    present: boolean;
    value: string | null;
    self_referential: boolean;
    cross_domain: boolean;
  };
  noindex: boolean;
  noindex_header: boolean;
  og_tags: {
    title: boolean;
    description: boolean;
    image: boolean;
    url: boolean;
    type: boolean;
  };
  twitter_card: {
    present: boolean;
    card_type: string | null;
  };
  hreflang: {
    present: boolean;
    count: number;
    x_default: boolean;
  };
  title_og_match: boolean;
  meta_description: {
    present: boolean;
    length: number;
  };
  findings: Finding[];
}

src/index.ts:142-153 (registration)

Registration of the 'check_technical' tool with the MCP server via server.tool(), including description and wiring to checkTechnical handler.

// --- Tool 6: check_technical ---
server.tool(
  "check_technical",
  [
    "Audit a page's HEAD section for technical signals relevant to AI crawlers: HTTPS, canonical, OpenGraph, Twitter Card, hreflang, noindex, and title-vs-H1 hygiene.",
    "Read-only. One HTTP GET, inspects HEAD only (body is not parsed).",
    "Deterministic, rule-based; no LLM.",
    "When to use: when you specifically need HEAD-tag audit findings. For the full page including schema and AI-Overview scoring, use `audit_page`. For canonical-only, use `audit_canonical`.",
  ].join("\n\n"),
  checkTechnicalInputSchema.shape,
  async (input) => wrapHandler(() => checkTechnical(input))
);

src/lib/html.ts:41-120 (helper)

parseHead helper function used by checkTechnical to extract HEAD metadata (title, meta description, canonical, OG tags, Twitter Card, noindex, hreflang) from HTML.

export function parseHead(
  html: string,
  xRobotsTag?: string | string[]
): HeadData {
  const $ = cheerio.load(html);
  const head = $("head");

  const title = head.find("title").first().text().trim() || null;
  const metaDescription =
    head
      .find('meta[name="description"]')
      .attr("content")
      ?.trim() ?? null;
  const canonical = head.find('link[rel="canonical"]').attr("href")?.trim() ?? null;
  const ogTitle =
    head.find('meta[property="og:title"]').attr("content")?.trim() ?? null;
  const ogDescription =
    head.find('meta[property="og:description"]').attr("content")?.trim() ?? null;
  const ogImage =
    head.find('meta[property="og:image"]').attr("content")?.trim() ?? null;
  const ogUrl =
    head.find('meta[property="og:url"]').attr("content")?.trim() ?? null;
  const ogType =
    head.find('meta[property="og:type"]').attr("content")?.trim() ?? null;
  const twitterCard =
    head.find('meta[name="twitter:card"]').attr("content")?.trim() ?? null;
  const twitterTitle =
    head.find('meta[name="twitter:title"]').attr("content")?.trim() ?? null;
  const twitterDescription =
    head.find('meta[name="twitter:description"]').attr("content")?.trim() ?? null;

  // Check noindex from meta tags
  const robotsMeta = head
    .find('meta[name="robots"]')
    .attr("content")
    ?.toLowerCase() ?? "";
  let noindex = robotsMeta.includes("noindex");

  // Check X-Robots-Tag header
  let noindexHeader = false;
  if (xRobotsTag) {
    const tags = Array.isArray(xRobotsTag) ? xRobotsTag : [xRobotsTag];
    noindexHeader = tags.some((t) => t.toLowerCase().includes("noindex"));
    if (noindexHeader) noindex = true;
  }

  const hreflangTags: Array<{ lang: string; href: string }> = [];
  head.find('link[rel="alternate"][hreflang]').each((_, el) => {
    const lang = $(el).attr("hreflang") ?? "";
    const href = $(el).attr("href") ?? "";
    if (lang && href) hreflangTags.push({ lang, href });
  });

  const charset =
    head.find('meta[charset]').attr("charset")?.trim() ??
    head.find('meta[http-equiv="Content-Type"]').attr("content")?.match(/charset=([^;]+)/i)?.[1]?.trim() ??
    null;

  const viewport =
    head.find('meta[name="viewport"]').attr("content")?.trim() ?? null;

  return {
    title,
    metaDescription,
    canonical,
    ogTitle,
    ogDescription,
    ogImage,
    ogUrl,
    ogType,
    twitterCard,
    twitterTitle,
    twitterDescription,
    noindex,
    noindexHeader,
    hreflangTags,
    charset,
    viewport,
  };
}

automatelab-ai-seo

check_technical

Instructions

Input Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API