check_website
Verify if a website includes an llms.txt file by entering its URL. This tool helps identify compliance with the llms.txt standard for discovering machine learning models.
Instructions
Check if a website has llms.txt files
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes | URL of the website to check |
Implementation Reference
- src/index.ts:156-344 (handler)The primary handler function that performs the website check: normalizes URL, fetches /llms.txt and /llms-full.txt, extracts and fetches linked @ URLs, handles errors/timeouts/caching, returns structured WebsiteCheckResult.async function checkWebsite(domain: string): Promise<WebsiteCheckResult> { console.error('Starting website check for:', domain); // Return cached result if available if (websiteCheckCache[domain]) { console.error('Returning cached result for:', domain); return websiteCheckCache[domain]; } const result: WebsiteCheckResult = { hasLlmsTxt: false, hasLlmsFullTxt: false }; // Create an overall timeout for the entire operation const globalTimeout = new Promise<never>((_, reject) => { setTimeout(() => { reject(new Error('Global timeout exceeded')); }, 15000); // 15 second global timeout }); try { // Normalize domain and add protocol if missing let normalizedDomain = domain; if (!domain.startsWith('http://') && !domain.startsWith('https://')) { normalizedDomain = `https://${domain}`; } console.error('Normalized domain:', normalizedDomain); // Validate URL format let url: URL; try { url = new URL(normalizedDomain); } catch (e) { console.error('Invalid URL:', domain); throw new Error(`Invalid URL format: ${domain}`); } // Use the normalized URL const baseUrl = url.origin; console.error('Base URL:', baseUrl); // Helper function to fetch with timeout async function fetchWithTimeout(url: string, timeout = 5000) { // Reduced to 5 seconds console.error(`Fetching ${url} with ${timeout}ms timeout`); const controller = new AbortController(); const timeoutId = setTimeout(() => { controller.abort(); console.error(`Timeout after ${timeout}ms for ${url}`); }, timeout); try { const startTime = Date.now(); const response = await fetch(url, { signal: controller.signal, headers: { 'User-Agent': 'llms-txt-explorer/0.1.0' } }); const endTime = Date.now(); console.error(`Fetch completed in ${endTime - startTime}ms for ${url}`); clearTimeout(timeoutId); return response; } catch (error) { clearTimeout(timeoutId); console.error(`Fetch error for ${url}:`, error); throw error; } } const checkPromise = (async () => { // Check for llms.txt try { const llmsTxtUrl = `${baseUrl}/llms.txt`; console.error('Fetching llms.txt from:', llmsTxtUrl); const llmsTxtRes = await fetchWithTimeout(llmsTxtUrl); console.error('llms.txt response status:', llmsTxtRes.status); if (llmsTxtRes.ok) { result.hasLlmsTxt = true; result.llmsTxtUrl = llmsTxtUrl; const content = await llmsTxtRes.text(); console.error(`llms.txt content length: ${content.length} bytes`); result.llmsTxtContent = content; console.error('Successfully fetched llms.txt'); // Extract and fetch linked contents in parallel with timeout const linkedUrls = extractLinkedUrls(content).slice(0, 3); // Reduced to 3 linked contents if (linkedUrls.length > 0) { console.error(`Found ${linkedUrls.length} linked URLs in llms.txt (limited to 3)`); result.linkedContents = []; const fetchPromises = linkedUrls.map(async (url) => { console.error(`Fetching linked content from: ${url}`); try { const linkedRes = await fetchWithTimeout(url); if (!linkedRes.ok) { throw new Error(`Failed to fetch content: ${linkedRes.status}`); } const linkedContent = await linkedRes.text(); console.error(`Linked content length: ${linkedContent.length} bytes`); return { url, content: linkedContent }; } catch (error) { console.error(`Error fetching linked content from ${url}:`, error); return { url, error: error instanceof Error ? error.message : 'Unknown error' }; } }); // Wait for all fetches to complete with a 10 second timeout const linkedContentTimeout = new Promise<never>((_, reject) => { setTimeout(() => { reject(new Error('Linked content fetch timeout')); }, 10000); }); try { result.linkedContents = await Promise.race([ Promise.all(fetchPromises), linkedContentTimeout ]); } catch (error) { console.error('Error fetching linked contents:', error); result.linkedContents = linkedUrls.map(url => ({ url, error: 'Timeout fetching linked contents' })); } } } } catch (error: unknown) { console.error('Error in main llms.txt fetch:', error); if (error instanceof Error) { result.error = error.message; } else { result.error = 'Unknown error fetching llms.txt'; } } // Only check llms-full.txt if llms.txt was successful if (result.hasLlmsTxt && !result.error) { try { const llmsFullTxtUrl = `${baseUrl}/llms-full.txt`; console.error('Fetching llms-full.txt from:', llmsFullTxtUrl); const llmsFullTxtRes = await fetchWithTimeout(llmsFullTxtUrl); console.error('llms-full.txt response status:', llmsFullTxtRes.status); if (llmsFullTxtRes.ok) { result.hasLlmsFullTxt = true; result.llmsFullTxtUrl = llmsFullTxtUrl; const content = await llmsFullTxtRes.text(); console.error(`llms-full.txt content length: ${content.length} bytes`); result.llmsFullTxtContent = content; console.error('Successfully fetched llms-full.txt'); } } catch (error) { console.error('Error fetching llms-full.txt:', error); // Don't fail the whole operation for llms-full.txt errors } } return result; })(); // Race between the check operation and the global timeout const finalResult = await Promise.race([checkPromise, globalTimeout]); // Cache successful results only if (!finalResult.error) { websiteCheckCache[domain] = finalResult; } console.error('Final result:', JSON.stringify(finalResult, null, 2)); return finalResult; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.error('Error checking website:', errorMessage); return { hasLlmsTxt: false, hasLlmsFullTxt: false, error: errorMessage }; } }
- src/index.ts:389-402 (schema)Tool registration in ListToolsRequestHandler including name, description, and input schema definition (object with required 'url' string).{ name: "check_website", description: "Check if a website has llms.txt files", inputSchema: { type: "object", properties: { url: { type: "string", description: "URL of the website to check" } }, required: ["url"] } },
- src/index.ts:431-464 (registration)Dispatch handler in CallToolRequestSchema that extracts 'url' argument, calls checkWebsite function, and returns JSON stringified result as text content.case "check_website": { const url = String(request.params.arguments?.url); console.error('Checking website:', url); if (!url) { console.error('URL is required'); return { content: [{ type: "text", text: JSON.stringify({ error: "URL is required" }, null, 2) }] }; } try { const result = await checkWebsite(url); console.error('Tool returning result:', JSON.stringify(result, null, 2)); return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.error('Tool returning error:', errorMessage); return { content: [{ type: "text", text: JSON.stringify({ error: errorMessage }, null, 2) }] }; } }
- src/index.ts:44-53 (schema)TypeScript interface defining the output structure returned by checkWebsite.interface WebsiteCheckResult { hasLlmsTxt: boolean; hasLlmsFullTxt: boolean; llmsTxtUrl?: string; llmsFullTxtUrl?: string; llmsTxtContent?: string; llmsFullTxtContent?: string; linkedContents?: LinkedContent[]; error?: string; }
- src/index.ts:136-151 (helper)Helper function to extract @-prefixed URLs from llms.txt content for fetching linked contents.function extractLinkedUrls(content: string): string[] { const urls: string[] = []; const lines = content.split('\n'); for (const line of lines) { const trimmedLine = line.trim(); if (trimmedLine.startsWith('@')) { const url = trimmedLine.slice(1).trim(); if (url) { urls.push(url); } } } return urls; }