docmcp

docmcp
src
scripts

add-docs.ts•16.1 KiB

#!/usr/bin/env node /** * Add Documentation CLI Script * * This script provides a command-line interface for adding documentation * to the DocMCP system. It's an alternative to using the MCP tool and allows * direct initiation of documentation crawling and processing from the terminal. * * @example * ``` * npm run add-docs -- --url https://reactjs.org/docs --max-depth 3 --tags react,frontend --wait * ``` */ // Load environment variables from .env file import * as dotenv from 'dotenv'; dotenv.config(); import yargs from 'yargs'; import { URL } from 'url'; import logger from '../utils/logger'; import { JobService } from '../services/job.service'; import { JobStatus, JobType } from '../generated/prisma'; import { getPrismaClient } from '../config/database'; import { startCrawlingProcess } from '../services/mcp-tools/add-documentation.tool'; import inquirer from 'inquirer'; // Define interface for the CLI arguments interface CliArgs { url: string; 'max-depth': number; name?: string; tags?: string[]; 'rate-limit': number; 'respect-robots-txt': boolean; wait: boolean; verbose: boolean; check: boolean; package?: string; version?: string; 'skip-prompts': boolean; [key: string]: unknown; } /** * Display job progress in real-time on the CLI * @param jobId The ID of the job to monitor * @param prisma PrismaClient instance * @returns A promise that resolves when the job completes or fails */ async function displayJobProgress(jobId: string, prisma: any): Promise<void> { return new Promise((resolve) => { const progressInterval = 2000; // Update progress every 2 seconds let lastProgress = -1; const interval = setInterval(async () => { try { const job = await prisma.job.findUnique({ where: { id: jobId }, select: { status: true, progress: true, stats: true, error: true, timeElapsed: true, timeRemaining: true } }); if (!job) { console.log(`Job ${jobId} not found`); clearInterval(interval); resolve(); return; } // Only display progress if it has changed const currentProgress = Math.floor(job.progress * 100); if (currentProgress !== lastProgress) { lastProgress = currentProgress; // Clear previous line and show progress process.stdout.write('\r\x1b[K'); // Clear line process.stdout.write(`Progress: ${currentProgress}% `); // Add status indicators if (job.stats) { process.stdout.write(`| Pages: ${job.stats.pagesProcessed || 0} processed, ${job.stats.pagesSkipped || 0} skipped `); } // Add time information if available if (job.timeElapsed !== null && job.timeRemaining !== null) { const formatTime = (seconds: number) => { if (seconds < 60) return `${seconds}s`; const mins = Math.floor(seconds / 60); const secs = seconds % 60; return `${mins}m ${secs}s`; }; process.stdout.write(`| Time: ${formatTime(job.timeElapsed || 0)} elapsed`); if (job.timeRemaining) { process.stdout.write(`, ~${formatTime(job.timeRemaining)} remaining`); } } } // Check if job is complete if (['completed', 'failed', 'cancelled', 'paused'].includes(job.status)) { clearInterval(interval); // Print a newline to ensure next output starts on a fresh line console.log(''); resolve(); } } catch (error) { console.error(`Error fetching job progress: ${error instanceof Error ? error.message : String(error)}`); clearInterval(interval); resolve(); } }, progressInterval); }); } /** * Prompt the user for package information if not provided via command line * @param args The parsed command line arguments * @returns Object containing package name and version */ async function promptForPackageInfo(args: CliArgs): Promise<{ packageName: string; packageVersion: string }> { // Use CLI args if provided if (args.package && args['skip-prompts']) { return { packageName: args.package, packageVersion: args.version || 'latest' }; } // Define answers interface interface PromptAnswers { packageName?: string; packageVersion?: string; } // If we have questions to ask, prompt the user const answers: PromptAnswers = {}; // Only ask for package name if not provided via CLI if (!args.package) { const packageNameResponse = await inquirer.prompt<{packageName: string}>({ type: 'input', name: 'packageName', message: 'What is the name of the package this documentation is for?', validate: (input: string) => { return input.trim().length > 0 ? true : 'Package name is required'; } }); answers.packageName = packageNameResponse.packageName; } // Always ask for version if not provided via CLI if (!args.version) { const versionResponse = await inquirer.prompt<{packageVersion: string}>({ type: 'input', name: 'packageVersion', message: 'What version of the package is this documentation for? (leave blank for "latest")', default: 'latest' }); answers.packageVersion = versionResponse.packageVersion; } return { packageName: args.package || answers.packageName || '', packageVersion: args.version || answers.packageVersion || 'latest' }; } /** * Main function to parse arguments and execute the documentation addition process */ async function main() { // Filter out the '--' argument that npm adds when running as 'npm run add-docs -- --args' const filteredArgs = process.argv.filter(arg => arg !== '--'); // Parse command line arguments using yargs and the filtered arguments const argv = yargs(filteredArgs) .usage('Usage: $0 --url <url> [options]') .option('url', { type: 'string', demandOption: true, describe: 'The URL of the documentation site to crawl and process' }) .option('max-depth', { type: 'number', default: 3, describe: 'Maximum crawl depth' }) .option('name', { type: 'string', describe: 'Friendly name for the source (defaults to hostname)' }) .option('tags', { type: 'string', describe: 'Comma-separated tags for categorization', coerce: (arg: string) => arg.split(',').map(tag => tag.trim()) }) .option('rate-limit', { type: 'number', default: 1000, describe: 'Milliseconds between requests' }) .option('respect-robots-txt', { type: 'boolean', default: true, describe: 'Whether to respect robots.txt rules' }) .option('wait', { type: 'boolean', default: false, describe: 'Wait for the job to complete before exiting' }) .option('verbose', { type: 'boolean', alias: 'v', default: false, describe: 'Enable more detailed logging' }) .option('check', { type: 'boolean', default: false, describe: 'Validate arguments without connecting to the database' }) .option('package', { type: 'string', describe: 'The name of the package this documentation is for' }) .option('version', { type: 'string', describe: 'The version of the package this documentation is for (defaults to "latest")' }) .option('skip-prompts', { type: 'boolean', default: false, describe: 'Skip interactive prompts and use provided CLI arguments or defaults' }) .check((argv) => { // Validate URL format try { new URL(argv.url as string); return true; } catch (error) { throw new Error('Invalid URL format. Please provide a valid URL with protocol (e.g., https://example.com).'); } }) .epilogue('For more information, see the documentation in the README.md file.') .help() .alias('help', 'h') .parseSync() as CliArgs; // Type assertion here try { // Enable verbose logging if the flag is set if (argv.verbose) { logger.level = 'debug'; logger.debug('Verbose logging enabled'); logger.debug('Parsed CLI arguments:', argv); } logger.info(`Starting documentation addition process for URL: ${argv.url}`); // Extract hostname and tags for displaying in check mode too const urlObj = new URL(argv.url); const name = argv['name'] || urlObj.hostname; const tags = argv.tags || [urlObj.hostname]; // Get package information - either from CLI args or interactive prompts let packageInfo; if (!argv.check) { packageInfo = await promptForPackageInfo(argv); logger.info(`Package information: ${packageInfo.packageName}@${packageInfo.packageVersion}`); } else { packageInfo = { packageName: argv.package || 'not specified', packageVersion: argv.version || 'latest' }; } // If in check mode, just validate and exit if (argv.check) { logger.info('Check mode: Validating arguments without database connection'); console.log('✅ Arguments validated successfully'); console.log('Configuration summary:'); console.log(` URL: ${argv.url}`); console.log(` Max Depth: ${argv['max-depth']}`); console.log(` Name: ${name}`); console.log(` Tags: ${tags.join(', ')}`); console.log(` Package: ${packageInfo.packageName}`); console.log(` Package Version: ${packageInfo.packageVersion}`); console.log(` Rate Limit: ${argv['rate-limit']}ms`); console.log(` Respect robots.txt: ${argv['respect-robots-txt']}`); console.log(` Wait for completion: ${argv.wait}`); process.exit(0); } try { // Connect to the database const prisma = getPrismaClient(); // Initialize the job service const jobService = new JobService(prisma); // Create job record in the database logger.info('Creating job record in database...'); // Create job with appropriate parameters const job = await jobService.createJob({ url: argv.url, status: JobStatus.pending, type: JobType.crawl, startDate: new Date(), progress: 0, endDate: null, error: null, stats: { pagesProcessed: 0, pagesSkipped: 0, totalChunks: 0 }, metadata: { sourceUrl: argv.url, sourceName: name, sourceTags: tags, crawlMaxDepth: argv['max-depth'], crawlRateLimit: argv['rate-limit'], crawlRespectRobotsTxt: argv['respect-robots-txt'], // Add package information to the metadata packageName: packageInfo.packageName, packageVersion: packageInfo.packageVersion } }); logger.info(`Job created successfully with ID: ${job.id}`); console.log(`Job ID: ${job.id}`); console.log(`Package mapping: ${packageInfo.packageName}@${packageInfo.packageVersion}`); // Implement core processing logic (Task 11.3) if (!argv.wait) { // Start the crawling process in the background startCrawlingProcess(job.id, { url: argv.url, maxDepth: argv['max-depth'], name: name, tags: tags, rateLimit: argv['rate-limit'], respectRobotsTxt: argv['respect-robots-txt'], // Pass package information to the crawling process packageName: packageInfo.packageName, packageVersion: packageInfo.packageVersion }).catch(error => { logger.error(`Unhandled error in background job ${job.id}:`, error); }); console.log(`Documentation processing started in the background.`); console.log(`Use 'mcp_docmcp_local_stdio_get_job_status' with jobId=${job.id} to check progress.`); // Exit after starting the process process.exit(0); } else { // Implement wait functionality (Task 11.4) console.log(`Waiting for job ${job.id} to complete...`); // When --wait is specified, start displaying progress console.log(`Real-time progress display enabled. Press Ctrl+C to stop (job will continue in background).`); // Start the process const crawlPromise = startCrawlingProcess(job.id, { url: argv.url, maxDepth: argv['max-depth'], name: name, tags: tags, rateLimit: argv['rate-limit'], respectRobotsTxt: argv['respect-robots-txt'], // Pass package information to the crawling process packageName: packageInfo.packageName, packageVersion: packageInfo.packageVersion, _bypassAsync: true // Run synchronously }); // Display progress while job is running const progressDisplayPromise = displayJobProgress(job.id, prisma); // Wait for both the crawl process and progress display to finish try { await Promise.all([crawlPromise, progressDisplayPromise]); // Get final job details const finalJob = await jobService.findJobById(job.id); if (!finalJob) { console.error(`Error: Job ${job.id} not found`); process.exit(1); } // Final status report console.log(`\nJob ${job.id} ${finalJob.status}!`); if (finalJob.status === 'completed') { const jobStats = finalJob.stats as Record<string, any>; if (jobStats) { console.log(`Processed ${jobStats.pagesProcessed || 0} pages`); console.log(`Created ${jobStats.totalChunks || 0} chunks`); console.log(`Skipped ${jobStats.pagesSkipped || 0} pages`); } console.log(`\nDocumentation is now available in the knowledge base.`); console.log(`Package mapping created for: ${packageInfo.packageName}@${packageInfo.packageVersion}`); process.exit(0); } else { console.error(`\nJob ended with status: ${finalJob.status}`); if (finalJob.error) { console.error(`Error: ${finalJob.error}`); } process.exit(finalJob.status === 'cancelled' || finalJob.status === 'paused' ? 0 : 1); } } catch (error) { console.error(`Error processing job: ${error instanceof Error ? error.message : String(error)}`); process.exit(1); } } } catch (dbError) { // Check for database connection errors const errorMessage = dbError instanceof Error ? dbError.message : String(dbError); if (errorMessage.includes("Can't reach database server") || errorMessage.includes("connection") || errorMessage.includes("ECONNREFUSED")) { logger.error('Database connection error'); console.error('\n❌ Database Connection Error'); console.error('-----------------------------'); console.error('Could not connect to the PostgreSQL database.'); console.error('\nPossible solutions:'); console.error('1. Make sure the PostgreSQL server is running'); console.error('2. Check that the database connection settings in .env are correct'); console.error('3. If using Docker, ensure the database container is up and running:'); console.error(' $ ./docker-start.sh'); console.error('\nSee README.md for more information on setting up the environment.'); process.exit(2); // Special exit code for database connection issues } // Re-throw other database errors throw dbError; } } catch (error) { logger.error('Error during documentation addition:', error); console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`); process.exit(1); } } // Execute the main function main().catch(error => { logger.error('Unhandled error in add-docs script:', error); console.error(`Fatal error: ${error instanceof Error ? error.message : 'Unknown error'}`); process.exit(1); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/visheshd/docmcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

add-docs.ts•16.1 KiB