Skip to main content
Glama

check_website

Verify if a website implements the llms.txt standard by checking for its presence at the specified URL.

Instructions

Check if a website has llms.txt files

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
urlYesURL of the website to check

Implementation Reference

  • The primary handler function that executes the tool logic: checks a website for llms.txt and llms-full.txt files, fetches their contents, handles linked @ URLs, with caching and timeouts.
    async function checkWebsite(domain: string): Promise<WebsiteCheckResult> {
      console.error('Starting website check for:', domain);
    
      // Return cached result if available
      if (websiteCheckCache[domain]) {
        console.error('Returning cached result for:', domain);
        return websiteCheckCache[domain];
      }
    
      const result: WebsiteCheckResult = {
        hasLlmsTxt: false,
        hasLlmsFullTxt: false
      };
    
      // Create an overall timeout for the entire operation
      const globalTimeout = new Promise<never>((_, reject) => {
        setTimeout(() => {
          reject(new Error('Global timeout exceeded'));
        }, 15000); // 15 second global timeout
      });
    
      try {
        // Normalize domain and add protocol if missing
        let normalizedDomain = domain;
        if (!domain.startsWith('http://') && !domain.startsWith('https://')) {
          normalizedDomain = `https://${domain}`;
        }
        console.error('Normalized domain:', normalizedDomain);
    
        // Validate URL format
        let url: URL;
        try {
          url = new URL(normalizedDomain);
        } catch (e) {
          console.error('Invalid URL:', domain);
          throw new Error(`Invalid URL format: ${domain}`);
        }
    
        // Use the normalized URL
        const baseUrl = url.origin;
        console.error('Base URL:', baseUrl);
    
        // Helper function to fetch with timeout
        async function fetchWithTimeout(url: string, timeout = 5000) { // Reduced to 5 seconds
          console.error(`Fetching ${url} with ${timeout}ms timeout`);
          const controller = new AbortController();
          const timeoutId = setTimeout(() => {
            controller.abort();
            console.error(`Timeout after ${timeout}ms for ${url}`);
          }, timeout);
    
          try {
            const startTime = Date.now();
            const response = await fetch(url, {
              signal: controller.signal,
              headers: {
                'User-Agent': 'llms-txt-explorer/0.1.0'
              }
            });
            const endTime = Date.now();
            console.error(`Fetch completed in ${endTime - startTime}ms for ${url}`);
            clearTimeout(timeoutId);
            return response;
          } catch (error) {
            clearTimeout(timeoutId);
            console.error(`Fetch error for ${url}:`, error);
            throw error;
          }
        }
    
        const checkPromise = (async () => {
          // Check for llms.txt
          try {
            const llmsTxtUrl = `${baseUrl}/llms.txt`;
            console.error('Fetching llms.txt from:', llmsTxtUrl);
            const llmsTxtRes = await fetchWithTimeout(llmsTxtUrl);
            console.error('llms.txt response status:', llmsTxtRes.status);
    
            if (llmsTxtRes.ok) {
              result.hasLlmsTxt = true;
              result.llmsTxtUrl = llmsTxtUrl;
              const content = await llmsTxtRes.text();
              console.error(`llms.txt content length: ${content.length} bytes`);
              result.llmsTxtContent = content;
              console.error('Successfully fetched llms.txt');
    
              // Extract and fetch linked contents in parallel with timeout
              const linkedUrls = extractLinkedUrls(content).slice(0, 3); // Reduced to 3 linked contents
              if (linkedUrls.length > 0) {
                console.error(`Found ${linkedUrls.length} linked URLs in llms.txt (limited to 3)`);
                result.linkedContents = [];
    
                const fetchPromises = linkedUrls.map(async (url) => {
                  console.error(`Fetching linked content from: ${url}`);
                  try {
                    const linkedRes = await fetchWithTimeout(url);
                    if (!linkedRes.ok) {
                      throw new Error(`Failed to fetch content: ${linkedRes.status}`);
                    }
                    const linkedContent = await linkedRes.text();
                    console.error(`Linked content length: ${linkedContent.length} bytes`);
                    return {
                      url,
                      content: linkedContent
                    };
                  } catch (error) {
                    console.error(`Error fetching linked content from ${url}:`, error);
                    return {
                      url,
                      error: error instanceof Error ? error.message : 'Unknown error'
                    };
                  }
                });
    
                // Wait for all fetches to complete with a 10 second timeout
                const linkedContentTimeout = new Promise<never>((_, reject) => {
                  setTimeout(() => {
                    reject(new Error('Linked content fetch timeout'));
                  }, 10000);
                });
    
                try {
                  result.linkedContents = await Promise.race([
                    Promise.all(fetchPromises),
                    linkedContentTimeout
                  ]);
                } catch (error) {
                  console.error('Error fetching linked contents:', error);
                  result.linkedContents = linkedUrls.map(url => ({
                    url,
                    error: 'Timeout fetching linked contents'
                  }));
                }
              }
            }
          } catch (error: unknown) {
            console.error('Error in main llms.txt fetch:', error);
            if (error instanceof Error) {
              result.error = error.message;
            } else {
              result.error = 'Unknown error fetching llms.txt';
            }
          }
    
          // Only check llms-full.txt if llms.txt was successful
          if (result.hasLlmsTxt && !result.error) {
            try {
              const llmsFullTxtUrl = `${baseUrl}/llms-full.txt`;
              console.error('Fetching llms-full.txt from:', llmsFullTxtUrl);
              const llmsFullTxtRes = await fetchWithTimeout(llmsFullTxtUrl);
              console.error('llms-full.txt response status:', llmsFullTxtRes.status);
    
              if (llmsFullTxtRes.ok) {
                result.hasLlmsFullTxt = true;
                result.llmsFullTxtUrl = llmsFullTxtUrl;
                const content = await llmsFullTxtRes.text();
                console.error(`llms-full.txt content length: ${content.length} bytes`);
                result.llmsFullTxtContent = content;
                console.error('Successfully fetched llms-full.txt');
              }
            } catch (error) {
              console.error('Error fetching llms-full.txt:', error);
              // Don't fail the whole operation for llms-full.txt errors
            }
          }
    
          return result;
        })();
    
        // Race between the check operation and the global timeout
        const finalResult = await Promise.race([checkPromise, globalTimeout]);
    
        // Cache successful results only
        if (!finalResult.error) {
          websiteCheckCache[domain] = finalResult;
        }
    
        console.error('Final result:', JSON.stringify(finalResult, null, 2));
        return finalResult;
      } catch (error) {
        const errorMessage = error instanceof Error ? error.message : 'Unknown error';
        console.error('Error checking website:', errorMessage);
        return {
          hasLlmsTxt: false,
          hasLlmsFullTxt: false,
          error: errorMessage
        };
      }
    }
  • src/index.ts:389-402 (registration)
    Tool registration in the ListToolsRequestSchema handler, defining name, description, and input schema.
    {
      name: "check_website",
      description: "Check if a website has llms.txt files",
      inputSchema: {
        type: "object",
        properties: {
          url: {
            type: "string",
            description: "URL of the website to check"
          }
        },
        required: ["url"]
      }
    },
  • MCP tool dispatch handler in CallToolRequestSchema that validates input, calls checkWebsite, and formats the response.
    case "check_website": {
      const url = String(request.params.arguments?.url);
      console.error('Checking website:', url);
    
      if (!url) {
        console.error('URL is required');
        return {
          content: [{
            type: "text",
            text: JSON.stringify({ error: "URL is required" }, null, 2)
          }]
        };
      }
    
      try {
        const result = await checkWebsite(url);
        console.error('Tool returning result:', JSON.stringify(result, null, 2));
        return {
          content: [{
            type: "text",
            text: JSON.stringify(result, null, 2)
          }]
        };
      } catch (error) {
        const errorMessage = error instanceof Error ? error.message : 'Unknown error';
        console.error('Tool returning error:', errorMessage);
        return {
          content: [{
            type: "text",
            text: JSON.stringify({ error: errorMessage }, null, 2)
          }]
        };
      }
    }
  • TypeScript interface defining the output structure of the check_website tool.
    interface WebsiteCheckResult {
      hasLlmsTxt: boolean;
      hasLlmsFullTxt: boolean;
      llmsTxtUrl?: string;
      llmsFullTxtUrl?: string;
      llmsTxtContent?: string;
      llmsFullTxtContent?: string;
      linkedContents?: LinkedContent[];
      error?: string;
    }
  • Helper function to extract linked URLs from llms.txt content starting with '@'.
    function extractLinkedUrls(content: string): string[] {
      const urls: string[] = [];
      const lines = content.split('\n');
    
      for (const line of lines) {
        const trimmedLine = line.trim();
        if (trimmedLine.startsWith('@')) {
          const url = trimmedLine.slice(1).trim();
          if (url) {
            urls.push(url);
          }
        }
      }
    
      return urls;
    }
Install Server

Other Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/thedaviddias/mcp-llms-txt-explorer'

If you have feedback or need assistance with the MCP directory API, please join our Discord server