Skip to main content
Glama
index.ts3.73 kB
import { JobQueueManager, CrawlJobData } from './queue/job-queue'; import { GitHubCrawler } from './crawlers/github-crawler'; import { DocsScraper } from './crawlers/docs-scraper'; import { MarkdownParser } from './parsers/markdown-parser'; import { CodeExtractor } from './parsers/code-extractor'; /** * Crawler Engine Main Entry Point * Orchestrates crawling, parsing, and processing of documentation */ export class CrawlerEngine { private jobQueue: JobQueueManager; private githubCrawler: GitHubCrawler; private docsScraper: DocsScraper; private markdownParser: MarkdownParser; private codeExtractor: CodeExtractor; constructor( private redisHost: string = 'localhost', private redisPort: number = 6379, ) { this.jobQueue = new JobQueueManager(redisHost, redisPort); this.githubCrawler = new GitHubCrawler(); this.docsScraper = new DocsScraper(''); this.markdownParser = new MarkdownParser(); this.codeExtractor = new CodeExtractor(); } /** * Initialize the crawler engine */ async initialize(): Promise<void> { console.log('[CrawlerEngine] Initializing...'); await this.jobQueue.initialize(); console.log('[CrawlerEngine] Ready'); } /** * Register crawl job processors */ async registerProcessors(): Promise<void> { // GitHub crawler processor this.jobQueue.registerWorker('github-crawler', async (job) => { return this.githubCrawler.processJob(job); }); // Docs scraper processor this.jobQueue.registerWorker('docs-scraper', async (job) => { return this.docsScraper.processJob(job); }); console.log('[CrawlerEngine] Processors registered'); } /** * Queue a library for crawling */ async queueCrawl(data: CrawlJobData, priority: number = 10): Promise<string> { const job = await this.jobQueue.addJob(data, { priority }); return job.id || ''; } /** * Get crawl job status */ async getJobStatus(jobId: string) { return this.jobQueue.getJobStatus(jobId); } /** * Get queue statistics */ async getQueueStats() { return this.jobQueue.getQueueStats(); } /** * Shutdown crawler engine */ async shutdown(): Promise<void> { await this.jobQueue.shutdown(); console.log('[CrawlerEngine] Shut down'); } } // Export types and classes for external use export { JobQueueManager, CrawlJobData, CrawlJobResult } from './queue/job-queue'; export { GitHubCrawler, ExtractedContent, GitHubFile } from './crawlers/github-crawler'; export { DocsScraper, ScrapedPage } from './crawlers/docs-scraper'; export { MarkdownParser, ParsedMarkdown, ParsedHeading } from './parsers/markdown-parser'; export { CodeExtractor, ExtractedCodeExample, CodeAnalysis } from './parsers/code-extractor'; // Main execution async function main() { const engine = new CrawlerEngine( process.env.REDIS_HOST || 'localhost', parseInt(process.env.REDIS_PORT || '6379', 10), ); try { await engine.initialize(); await engine.registerProcessors(); console.log('[CrawlerEngine] Started and ready for jobs'); // Cleanup old jobs every hour setInterval(async () => { const removed = await engine.getQueueStats(); console.log('[CrawlerEngine] Queue stats:', removed); }, 60000); // Handle graceful shutdown process.on('SIGTERM', async () => { console.log('[CrawlerEngine] Received SIGTERM, shutting down...'); await engine.shutdown(); process.exit(0); }); } catch (error) { console.error('[CrawlerEngine] Failed to start:', error); process.exit(1); } } // Run if executed directly if (require.main === module) { main().catch(console.error); } export default CrawlerEngine;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/aiatamai/atamai-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server