Skip to main content
Glama
index.ts4.63 kB
#!/usr/bin/env node import { Command } from 'commander'; import { fetch, CrawlOptions } from '@just-every/crawl'; import { fetchMarkdown } from './internal/fetchMarkdown.js'; import { readFileSync } from 'fs'; import { fileURLToPath } from 'url'; import { dirname, join } from 'path'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); // Read package.json for version const packageJson = JSON.parse( readFileSync(join(__dirname, '../package.json'), 'utf-8') ); const program = new Command(); program .name('mcp') .description( 'Markdown Content Preprocessor - Extract and convert web content to clean Markdown' ) .version(packageJson.version); program .command('fetch <url>') .description('Fetch a URL and convert to Markdown') .option('-p, --pages <number>', 'Maximum number of pages to crawl', '1') .option('-c, --concurrency <number>', 'Max concurrent requests', '3') .option('--no-robots', 'Ignore robots.txt') .option('--all-origins', 'Allow cross-origin crawling') .option('-u, --user-agent <string>', 'Custom user agent') .option('--cache-dir <path>', 'Cache directory', '.cache') .option('-t, --timeout <ms>', 'Request timeout in milliseconds', '30000') .option('--cookies-file <path>', 'Path to Netscape cookie file for authenticated pages') .option( '-o, --output <format>', 'Output format: json, markdown, or both', 'markdown' ) .action(async (url: string, options) => { try { const pages = parseInt(options.pages, 10); const depth = pages > 1 ? 1 : 0; // If more than 1 page requested, crawl 1 level deep const crawlOptions: CrawlOptions = { depth: depth, maxConcurrency: parseInt(options.concurrency, 10), respectRobots: options.robots, sameOriginOnly: !options.allOrigins, userAgent: options.userAgent, cacheDir: options.cacheDir, timeout: parseInt(options.timeout, 10), }; // Optional cookie support (available in newer @just-every/crawl) if (options.cookiesFile) { (crawlOptions as any).cookiesFile = options.cookiesFile; } console.error(`Fetching ${url}...`); if (options.output === 'json') { const results = await fetch(url, crawlOptions); console.log(JSON.stringify(results, null, 2)); } else if (options.output === 'markdown') { const result = await fetchMarkdown(url, { ...crawlOptions, maxPages: pages, }); // Output the combined markdown if (result.markdown) { console.log(result.markdown); } // Show error if any if (result.error) { console.error(`Error: ${result.error}`); } } else if (options.output === 'both') { const results = await fetch(url, crawlOptions); results.forEach((result: any) => { console.log(`\n## URL: ${result.url}\n`); if (result.markdown) { console.log(result.markdown); } if (result.error) { console.error( `${result.markdown ? 'Warning' : 'Error'}: ${result.error}` ); } }); } } catch (error) { console.error( 'Error:', error instanceof Error ? error.message : error ); process.exit(1); } }); program .command('clear-cache') .description('Clear the cache directory') .option('--cache-dir <path>', 'Cache directory', '.cache') .action(async options => { try { const { rm } = await import('fs/promises'); await rm(options.cacheDir, { recursive: true, force: true }); console.log(`Cache cleared: ${options.cacheDir}`); } catch (error) { console.error('Error clearing cache:', error); process.exit(1); } }); program .command('serve') .description('Run as an MCP server') .action(async () => { // Import and run the serve module await import('./serve.js'); }); program.parse();

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/just-every/mcp-read-website-fast'

If you have feedback or need assistance with the MCP directory API, please join our Discord server