Skip to main content
Glama

docs-mcp-server

ScrapeFormContent.tsx17.2 kB
import { ScrapeMode } from "../../scraper/types"; import Alert from "./Alert"; import Tooltip from "./Tooltip"; interface ScrapeFormContentProps { defaultExcludePatterns?: string[]; } /** * Renders the form fields for queuing a new scrape job. * Includes basic fields (URL, Library, Version) and advanced options. */ const ScrapeFormContent = ({ defaultExcludePatterns, }: ScrapeFormContentProps) => { // Format default patterns for display in textarea (one per line) const defaultExcludePatternsText = defaultExcludePatterns?.join("\n") || ""; return ( <div class="mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600"> <h3 class="text-xl font-semibold text-gray-900 dark:text-white mb-2"> Queue New Scrape Job </h3> <form hx-post="/web/jobs/scrape" hx-target="#job-response" hx-swap="innerHTML" class="space-y-2" x-data="{ url: '', hasPath: false, headers: [], checkUrlPath() { try { const url = new URL(this.url); this.hasPath = url.pathname !== '/' && url.pathname !== ''; } catch (e) { this.hasPath = false; } } }" > <div> <div class="flex items-center"> <label for="url" class="block text-sm font-medium text-gray-700 dark:text-gray-300" > URL </label> <Tooltip text={ <div> <p>Enter the URL of the documentation you want to scrape.</p> <p class="mt-2"> For local files/folders, you must use the{" "} <code>file://</code> prefix and ensure the path is accessible to the server. </p> <p class="mt-2"> If running in Docker, <b>mount the folder</b> (see README for details). </p> </div> } /> </div> <input type="url" name="url" id="url" required x-model="url" x-on:input="checkUrlPath" x-on:paste="$nextTick(() => checkUrlPath())" class="mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white" /> <div x-show="hasPath && !(url.startsWith('file://'))" x-cloak x-transition:enter="transition ease-out duration-300" x-transition:enter-start="opacity-0 transform -translate-y-2" x-transition:enter-end="opacity-100 transform translate-y-0" class="mt-2" > <Alert type="info" message="By default, only subpages under the given URL will be scraped. To scrape the whole website, adjust the 'Scope' option in Advanced Options." /> </div> </div> <div> <div class="flex items-center"> <label for="library" class="block text-sm font-medium text-gray-700 dark:text-gray-300" > Library Name </label> <Tooltip text="The name of the library you're documenting. This will be used when searching." /> </div> <input type="text" name="library" id="library" required class="mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white" /> </div> <div> <div class="flex items-center"> <label for="version" class="block text-sm font-medium text-gray-700 dark:text-gray-300" > Version (optional) </label> <Tooltip text="Specify the version of the library documentation you're indexing. This allows for version-specific searches." /> </div> <input type="text" name="version" id="version" class="mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white" /> </div> {/* Consider using Flowbite Accordion here */} <details class="bg-gray-50 dark:bg-gray-900 p-2 rounded-md"> <summary class="cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400"> Advanced Options </summary> <div class="mt-2 space-y-2" x-data="{ headers: [] }"> <div> <div class="flex items-center"> <label for="maxPages" class="block text-sm font-medium text-gray-700 dark:text-gray-300" > Max Pages </label> <Tooltip text="The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." /> </div> <input type="number" name="maxPages" id="maxPages" min="1" placeholder="1000" class="mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white" /> </div> <div> <div class="flex items-center"> <label for="maxDepth" class="block text-sm font-medium text-gray-700 dark:text-gray-300" > Max Depth </label> <Tooltip text="How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." /> </div> <input type="number" name="maxDepth" id="maxDepth" min="0" placeholder="3" class="mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white" /> </div> <div> <div class="flex items-center"> <label for="scope" class="block text-sm font-medium text-gray-700 dark:text-gray-300" > Scope </label> <Tooltip text={ <div> Controls which pages are scraped: <ul class="list-disc pl-5"> <li> 'Subpages' only scrapes under the given URL path, </li> <li> 'Hostname' scrapes all content on the same host (e.g., all of docs.example.com), </li> <li> 'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com). </li> </ul> </div> } /> </div> <select name="scope" id="scope" class="mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white" > <option value="subpages" selected> Subpages (Default) </option> <option value="hostname">Hostname</option> <option value="domain">Domain</option> </select> </div> <div> <div class="flex items-center"> <label for="includePatterns" class="block text-sm font-medium text-gray-700 dark:text-gray-300" > Include Patterns </label> <Tooltip text="Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." /> </div> <textarea name="includePatterns" id="includePatterns" rows="2" placeholder="e.g. docs/* or /api\/v1.*/" class="mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white" ></textarea> </div> <div> <div class="flex items-center"> <label for="excludePatterns" class="block text-sm font-medium text-gray-700 dark:text-gray-300" > Exclude Patterns </label> <Tooltip text="Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/. Edit or clear this field to customize exclusions." /> </div> <textarea name="excludePatterns" id="excludePatterns" rows="5" safe class="mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs" > {defaultExcludePatternsText} </textarea> <p class="mt-1 text-xs text-gray-500 dark:text-gray-400"> Default patterns are pre-filled. Edit to customize or clear to exclude nothing. </p> </div> <div> <div class="flex items-center"> <label for="scrapeMode" class="block text-sm font-medium text-gray-700 dark:text-gray-300" > Scrape Mode </label> <Tooltip text={ <div> <ul class="list-disc pl-5"> <li>'Auto' automatically selects the best method,</li> <li> 'Fetch' uses simple HTTP requests (faster but may miss dynamic content), </li> <li> 'Playwright' uses a headless browser (slower but better for JS-heavy sites). </li> </ul> </div> } /> </div> <select name="scrapeMode" id="scrapeMode" class="mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white" > <option value={ScrapeMode.Auto} selected> Auto (Default) </option> <option value={ScrapeMode.Fetch}>Fetch</option> <option value={ScrapeMode.Playwright}>Playwright</option> </select> </div> <div> <div class="flex items-center mb-1"> <label class="block text-sm font-medium text-gray-700 dark:text-gray-300"> Custom HTTP Headers </label> <Tooltip text="Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." /> </div> <div> {/* AlpineJS dynamic header rows */} <template x-for="(header, idx) in headers"> <div class="flex space-x-2 mb-1"> <input type="text" class="w-1/3 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs" placeholder="Header Name" x-model="header.name" required /> <span class="text-gray-500">:</span> <input type="text" class="w-1/2 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs" placeholder="Header Value" x-model="header.value" required /> <button type="button" class="text-red-500 hover:text-red-700 text-xs" x-on:click="headers.splice(idx, 1)" > Remove </button> <input type="hidden" name="header[]" x-bind:value="header.name && header.value ? header.name + ':' + header.value : ''" /> </div> </template> <button type="button" class="mt-1 px-2 py-0.5 bg-primary-100 dark:bg-primary-900 text-primary-700 dark:text-primary-200 rounded text-xs" x-on:click="headers.push({ name: '', value: '' })" > + Add Header </button> </div> </div> <div class="flex items-center"> <input id="followRedirects" name="followRedirects" type="checkbox" checked class="h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700" /> <label for="followRedirects" class="ml-1 block text-sm text-gray-900 dark:text-gray-300" > Follow Redirects </label> </div> <div class="flex items-center"> <input id="ignoreErrors" name="ignoreErrors" type="checkbox" checked class="h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700" /> <label for="ignoreErrors" class="ml-1 block text-sm text-gray-900 dark:text-gray-300" > Ignore Errors During Scraping </label> </div> </div> </details> <div> <button type="submit" class="w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500" > Queue Job </button> </div> </form> {/* Target div for HTMX response */} <div id="job-response" class="mt-2 text-sm"></div> {/* Script to handle HTMX error responses */} <script> {` document.addEventListener('htmx:responseError', function(evt) { // Handle error responses from the form submission if (evt.detail.xhr && evt.detail.xhr.response) { const responseDiv = document.getElementById('job-response'); if (responseDiv) { responseDiv.innerHTML = evt.detail.xhr.response; } } }); `} </script> </div> ); }; export default ScrapeFormContent;

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server