check_technical
Audit a page's HEAD for technical signals critical to AI crawlers: HTTPS, canonical, OpenGraph, Twitter Card, hreflang, noindex, and title-H1 alignment.
Instructions
Audit a page's HEAD section for technical signals relevant to AI crawlers: HTTPS, canonical, OpenGraph, Twitter Card, hreflang, noindex, and title-vs-H1 hygiene.
Read-only. One HTTP GET, inspects HEAD only (body is not parsed).
Deterministic, rule-based; no LLM.
When to use: when you specifically need HEAD-tag audit findings. For the full page including schema and AI-Overview scoring, use audit_page. For canonical-only, use audit_canonical.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes | Public URL to audit. The tool fetches the URL once and inspects HEAD-section signals: HTTPS, canonical, OpenGraph, Twitter Card, hreflang, noindex, title length and overlap with H1. Body content is not parsed. Read-only HTTP GET. | |
| respect_robots | No | If true (default), respect robots.txt before fetching. Set false only for auditing your own site where you've intentionally blocked crawlers. |
Implementation Reference
- src/tools/check-technical.ts:58-270 (handler)Main handler for check_technical tool. Fetches the URL via politeFetch, parses HEAD section, and audits technical signals: HTTPS, canonical, noindex, OpenGraph tags, Twitter Card, meta description length, title/OG match, and hreflang. Returns TechnicalResult with findings.
export async function checkTechnical( input: CheckTechnicalInput, hostDelays?: HostDelayMap, robotsCache?: Map<string, string> ): Promise<TechnicalResult> { const result = await politeFetch(input.url, { respectRobots: input.respect_robots, hostDelays, robotsCache, }); const ct = result.headers["content-type"]; const ctStr = Array.isArray(ct) ? ct[0] : (ct ?? ""); if (ctStr && !ctStr.includes("html")) { throw new ToolFetchError({ type: "non_html_response", url: input.url, content_type: ctStr, }); } const xRobotsTag = result.headers["x-robots-tag"]; const head = parseHead(result.body, xRobotsTag); const findings: Finding[] = []; // HTTPS const https = input.url.startsWith("https://"); // Canonical let canonicalSelfRef = false; let canonicalCrossDomain = false; if (head.canonical) { try { const pageUrl = new URL(result.finalUrl); const canonUrl = new URL(head.canonical, input.url); canonicalSelfRef = canonUrl.hostname === pageUrl.hostname && canonUrl.pathname === pageUrl.pathname; canonicalCrossDomain = canonUrl.hostname !== pageUrl.hostname; } catch { // ignore URL parse errors } } if (!head.canonical) { findings.push({ severity: "warning", category: "technical", where: "<head>", message: "No canonical link element found.", fix: 'Add <link rel="canonical" href="https://example.com/page"> to <head>.', estimated_impact: "medium", }); } else if (canonicalCrossDomain) { findings.push({ severity: "warning", category: "technical", where: 'link[rel="canonical"]', message: "Canonical points to a different domain.", fix: "Verify this is intentional (syndicated content). If not, update to the self-referencing canonical.", estimated_impact: "medium", }); } // noindex if (head.noindex) { findings.push({ severity: "critical", category: "technical", where: head.noindexHeader ? "X-Robots-Tag header" : 'meta[name="robots"]', message: "Page has noindex directive - no AI search engine can index this page.", fix: "Remove the noindex directive if you want this page to appear in AI search results.", estimated_impact: "high", }); } // Redirect finding if (result.redirected && result.finalUrl !== input.url) { findings.push({ severity: "info", category: "technical", where: "page-level", message: `Page redirects to ${result.finalUrl} - ensure canonical and OG tags reflect the canonical URL.`, fix: "Update og:url and canonical href to the final redirect target URL.", }); } // OG tags if (!head.ogTitle) { findings.push({ severity: "warning", category: "technical", where: "og:title", message: "og:title is missing.", fix: 'Add <meta property="og:title" content="Page Title">.', estimated_impact: "medium", }); } if (!head.ogDescription) { findings.push({ severity: "warning", category: "technical", where: "og:description", message: "og:description is missing.", fix: 'Add <meta property="og:description" content="120-160 character description.">.', estimated_impact: "medium", }); } if (!head.ogImage) { findings.push({ severity: "warning", category: "technical", where: "og:image", message: "og:image is missing.", fix: 'Add <meta property="og:image" content="https://example.com/image.jpg">.', estimated_impact: "medium", }); } // Twitter card if (!head.twitterCard) { findings.push({ severity: "info", category: "technical", where: "twitter:card", message: "Twitter Card tags are absent.", fix: 'Add <meta name="twitter:card" content="summary_large_image"> and twitter:title, twitter:description.', }); } // Meta description length const metaDescLen = head.metaDescription?.length ?? 0; if (!head.metaDescription) { findings.push({ severity: "warning", category: "technical", where: 'meta[name="description"]', message: "Meta description is missing.", fix: 'Add <meta name="description" content="120-160 character description.">.', estimated_impact: "medium", }); } else if (metaDescLen < 50) { findings.push({ severity: "warning", category: "technical", where: 'meta[name="description"]', message: `Meta description is only ${metaDescLen} chars - too short (ideal: 120-160).`, fix: "Expand the meta description to 120-160 characters summarizing the page content.", estimated_impact: "low", }); } else if (metaDescLen > 200) { findings.push({ severity: "warning", category: "technical", where: 'meta[name="description"]', message: `Meta description is ${metaDescLen} chars - too long (ideal: 120-160).`, fix: "Trim the meta description to under 200 characters.", estimated_impact: "low", }); } // Title/OG match const titleOgMatch = !head.title || !head.ogTitle ? true // can't compare if either is missing : levenshtein(head.title, head.ogTitle) <= 10; if (!titleOgMatch) { findings.push({ severity: "warning", category: "technical", where: "og:title vs <title>", message: "og:title differs significantly from <title> - may signal content inconsistency.", fix: "Align og:title with <title> or ensure the difference is intentional.", estimated_impact: "low", }); } return { url: input.url, https, canonical: { present: !!head.canonical, value: head.canonical, self_referential: canonicalSelfRef, cross_domain: canonicalCrossDomain, }, noindex: head.noindex, noindex_header: head.noindexHeader, og_tags: { title: !!head.ogTitle, description: !!head.ogDescription, image: !!head.ogImage, url: !!head.ogUrl, type: !!head.ogType, }, twitter_card: { present: !!head.twitterCard, card_type: head.twitterCard, }, hreflang: { present: head.hreflangTags.length > 0, count: head.hreflangTags.length, x_default: head.hreflangTags.some((h) => h.lang === "x-default"), }, title_og_match: titleOgMatch, meta_description: { present: !!head.metaDescription, length: metaDescLen, }, findings, }; } - src/tools/check-technical.ts:9-19 (schema)Zod input schema for check_technical: requires 'url' (string URL) and optional 'respect_robots' (boolean, default true).
export const checkTechnicalInputSchema = z.object({ url: z .string() .url() .describe("Public URL to audit. The tool fetches the URL once and inspects HEAD-section signals: HTTPS, canonical, OpenGraph, Twitter Card, hreflang, noindex, title length and overlap with H1. Body content is not parsed. Read-only HTTP GET."), respect_robots: z .boolean() .optional() .default(true) .describe("If true (default), respect robots.txt before fetching. Set false only for auditing your own site where you've intentionally blocked crawlers."), }); - src/tools/check-technical.ts:23-56 (schema)TechnicalResult interface defining the output shape: url, https, canonical details, noindex flags, og_tags, twitter_card, hreflang, title_og_match, meta_description, and findings array.
export interface TechnicalResult { url: string; https: boolean; canonical: { present: boolean; value: string | null; self_referential: boolean; cross_domain: boolean; }; noindex: boolean; noindex_header: boolean; og_tags: { title: boolean; description: boolean; image: boolean; url: boolean; type: boolean; }; twitter_card: { present: boolean; card_type: string | null; }; hreflang: { present: boolean; count: number; x_default: boolean; }; title_og_match: boolean; meta_description: { present: boolean; length: number; }; findings: Finding[]; } - src/index.ts:142-153 (registration)Registration of the 'check_technical' tool with the MCP server via server.tool(), including description and wiring to checkTechnical handler.
// --- Tool 6: check_technical --- server.tool( "check_technical", [ "Audit a page's HEAD section for technical signals relevant to AI crawlers: HTTPS, canonical, OpenGraph, Twitter Card, hreflang, noindex, and title-vs-H1 hygiene.", "Read-only. One HTTP GET, inspects HEAD only (body is not parsed).", "Deterministic, rule-based; no LLM.", "When to use: when you specifically need HEAD-tag audit findings. For the full page including schema and AI-Overview scoring, use `audit_page`. For canonical-only, use `audit_canonical`.", ].join("\n\n"), checkTechnicalInputSchema.shape, async (input) => wrapHandler(() => checkTechnical(input)) ); - src/lib/html.ts:41-120 (helper)parseHead helper function used by checkTechnical to extract HEAD metadata (title, meta description, canonical, OG tags, Twitter Card, noindex, hreflang) from HTML.
export function parseHead( html: string, xRobotsTag?: string | string[] ): HeadData { const $ = cheerio.load(html); const head = $("head"); const title = head.find("title").first().text().trim() || null; const metaDescription = head .find('meta[name="description"]') .attr("content") ?.trim() ?? null; const canonical = head.find('link[rel="canonical"]').attr("href")?.trim() ?? null; const ogTitle = head.find('meta[property="og:title"]').attr("content")?.trim() ?? null; const ogDescription = head.find('meta[property="og:description"]').attr("content")?.trim() ?? null; const ogImage = head.find('meta[property="og:image"]').attr("content")?.trim() ?? null; const ogUrl = head.find('meta[property="og:url"]').attr("content")?.trim() ?? null; const ogType = head.find('meta[property="og:type"]').attr("content")?.trim() ?? null; const twitterCard = head.find('meta[name="twitter:card"]').attr("content")?.trim() ?? null; const twitterTitle = head.find('meta[name="twitter:title"]').attr("content")?.trim() ?? null; const twitterDescription = head.find('meta[name="twitter:description"]').attr("content")?.trim() ?? null; // Check noindex from meta tags const robotsMeta = head .find('meta[name="robots"]') .attr("content") ?.toLowerCase() ?? ""; let noindex = robotsMeta.includes("noindex"); // Check X-Robots-Tag header let noindexHeader = false; if (xRobotsTag) { const tags = Array.isArray(xRobotsTag) ? xRobotsTag : [xRobotsTag]; noindexHeader = tags.some((t) => t.toLowerCase().includes("noindex")); if (noindexHeader) noindex = true; } const hreflangTags: Array<{ lang: string; href: string }> = []; head.find('link[rel="alternate"][hreflang]').each((_, el) => { const lang = $(el).attr("hreflang") ?? ""; const href = $(el).attr("href") ?? ""; if (lang && href) hreflangTags.push({ lang, href }); }); const charset = head.find('meta[charset]').attr("charset")?.trim() ?? head.find('meta[http-equiv="Content-Type"]').attr("content")?.match(/charset=([^;]+)/i)?.[1]?.trim() ?? null; const viewport = head.find('meta[name="viewport"]').attr("content")?.trim() ?? null; return { title, metaDescription, canonical, ogTitle, ogDescription, ogImage, ogUrl, ogType, twitterCard, twitterTitle, twitterDescription, noindex, noindexHeader, hreflangTags, charset, viewport, }; }