mcmodding-mcp

Overview Schema Related Servers Score Discussions

mcmodding-mcp
scripts

index-docs.ts•11.4 KiB

#!/usr/bin/env tsx /* eslint-disable no-console */ /* eslint-disable @typescript-eslint/no-non-null-assertion */ /* eslint-disable no-undef */ /** * Advanced documentation indexing script with full feature support * - Sitemap-based URL discovery * - Semantic search embeddings * - Version-aware indexing * - Incremental updates */ import { mkdir } from 'fs/promises'; import { join, dirname } from 'path'; import { fileURLToPath } from 'url'; import os from 'os'; import { DocumentCrawler, getFabricDocumentationUrls } from '../src/indexer/crawler.js'; import { DocumentChunker } from '../src/indexer/chunker.js'; import { DocumentStore } from '../src/indexer/store.js'; import { getFabricUrlsFromSitemap, getFabricWikiUrlsFromSitemap, getNeoforgeUrlsFromSitemap, } from '../src/indexer/sitemap.js'; import { EmbeddingGenerator } from '../src/indexer/embeddings.js'; const __dirname = dirname(fileURLToPath(import.meta.url)); interface IndexOptions { force?: boolean; incremental?: boolean; useSitemap?: boolean; generateEmbeddings?: boolean; embeddingsBatchSize?: number; } /** * Calculate optimal batch size based on system resources */ function calculateOptimalBatchSize(): number { const totalMem = os.totalmem(); const freeMem = os.freemem(); const cpus = os.cpus().length; console.log('🖥️ System Resources:'); console.log(` • CPU Cores: ${cpus}`); console.log(` • Total Memory: ${(totalMem / 1024 / 1024 / 1024).toFixed(2)} GB`); console.log(` • Free Memory: ${(freeMem / 1024 / 1024 / 1024).toFixed(2)} GB`); // Base batch size let batchSize = 20; // Adjust based on memory (conservative: 100MB per batch of embeddings approx) // If we have > 8GB free, we can go big if (freeMem > 8 * 1024 * 1024 * 1024) { batchSize = 100; } else if (freeMem > 4 * 1024 * 1024 * 1024) { batchSize = 50; } else if (freeMem > 2 * 1024 * 1024 * 1024) { batchSize = 30; } else { batchSize = 10; // Low memory mode } // Adjust based on CPU (more cores = can handle more parallel processing if we were parallelizing) // For embeddings, batch size also affects inference speed. // MiniLM is small, so we can increase batch size on better CPUs. if (cpus >= 16) { batchSize = Math.min(batchSize * 2, 200); } else if (cpus >= 8) { batchSize = Math.min(batchSize * 1.5, 150); } console.log(`⚡ Optimized Batch Size: ${Math.floor(batchSize)}`); return Math.floor(batchSize); } async function main(options: IndexOptions = {}) { console.log('🚀 Starting Advanced Documentation Indexing...\n'); // Ensure data directory exists const dataDir = join(__dirname, '..', 'data'); await mkdir(dataDir, { recursive: true }); const dbPath = join(dataDir, 'mcmodding-docs.db'); const store = new DocumentStore(dbPath); try { // Get URLs to crawl let urls: string[]; if (options.useSitemap) { console.log('📡 Fetching URLs from sitemap...'); urls = []; urls.push(...(await getFabricWikiUrlsFromSitemap())); urls.push(...(await getFabricUrlsFromSitemap())); urls.push(...(await getNeoforgeUrlsFromSitemap())); if (urls.length === 0) { console.log('⚠️ Sitemap fetch failed, falling back to static list'); urls = getFabricDocumentationUrls(); } else { console.log(`✅ Fetched ${urls.length} URLs from sitemap`); } } else { urls = getFabricDocumentationUrls(); } console.log(`📋 Found ${urls.length} documentation pages to index\n`); // Initialize crawler with progress tracking const crawler = new DocumentCrawler({ maxConcurrency: 3, delayMs: 1000, retryAttempts: 3, }); crawler.setProgressCallback((progress) => { const percent = Math.round((progress.completed / progress.total) * 100); const eta = progress.estimatedTimeRemaining ? ` | ETA: ${Math.round(progress.estimatedTimeRemaining)}s` : ''; process.stdout.write( `\r⏳ Progress: ${progress.completed}/${progress.total} (${percent}%) | Failed: ${progress.failed}${eta} ` ); }); // Crawl all pages console.log('🕷️ Crawling documentation...'); const documents = await crawler.crawlAll(urls); console.log(`\n✅ Successfully crawled ${documents.length} pages\n`); // Initialize chunker const chunker = new DocumentChunker({ maxChunkSize: 1000, overlapSize: 100, preserveCodeBlocks: true, }); // Process and store documents console.log('💾 Storing documents and creating search indexes...'); let processedCount = 0; let updatedCount = 0; let skippedCount = 0; let totalChunksToEmbed = 0; // Track chunks for embedding in batches let pendingChunks: Array<{ id: string; content: string; documentId: number }> = []; let embeddingGen: EmbeddingGenerator | null = null; // Initialize embedding generator early if needed if (options.generateEmbeddings) { embeddingGen = new EmbeddingGenerator(); await embeddingGen.initialize(); } // Calculate optimal batch size if not provided const embeddingBatchSize = options.embeddingsBatchSize || calculateOptimalBatchSize(); // Helper function to process pending embeddings async function processEmbeddingBatch() { if (!options.generateEmbeddings || pendingChunks.length === 0 || !embeddingGen) { return; } const batch = pendingChunks; pendingChunks = []; // Clear for next batch const batchTexts = batch.map((c) => c.content); // Pass the optimized batch size to the generator const batchEmbeddings = await embeddingGen.generateEmbeddings(batchTexts, embeddingBatchSize); const embeddings: Array<{ chunkId: string; embedding: number[] }> = []; for (let j = 0; j < batch.length; j++) { embeddings.push({ chunkId: batch[j]!.id, embedding: batchEmbeddings[j]!, }); } // Store embeddings immediately to free memory store.storeEmbeddings(embeddings, 'Xenova/all-MiniLM-L6-v2'); // Clear arrays explicitly batchTexts.length = 0; embeddings.length = 0; // Force garbage collection hint aggressively if (global.gc) { global.gc(); } // Yield to event loop await new Promise((resolve) => setImmediate(resolve)); } for (const doc of documents) { try { // Check if document needs updating (incremental mode) if (options.incremental && !options.force) { if (!store.needsUpdate(doc.url, doc.hash)) { skippedCount++; continue; } } // Store document const documentId = store.storeDocument(doc); // Create and store chunks const chunks = chunker.chunkDocument(doc); store.storeChunks(chunks, documentId); // Collect chunks for embedding generation (but process in small batches) if (options.generateEmbeddings) { for (const chunk of chunks) { pendingChunks.push({ id: chunk.id, content: chunk.content, documentId, }); totalChunksToEmbed++; // Process embedding batch when size threshold reached if (pendingChunks.length >= embeddingBatchSize) { await processEmbeddingBatch(); } } } updatedCount++; processedCount++; // Progress indicator process.stdout.write( `\r Processed: ${processedCount}/${documents.length} | Updated: ${updatedCount} | Skipped: ${skippedCount} ` ); } catch (error) { console.error(`\n❌ Error processing ${doc.url}:`, error); } } // Process remaining chunks if (pendingChunks.length > 0) { await processEmbeddingBatch(); } console.log('\n'); // Log embedding completion if (options.generateEmbeddings && totalChunksToEmbed > 0) { console.log(`✅ Embeddings generated and stored for ${totalChunksToEmbed} chunks\n`); } // Update timestamp store.updateTimestamp(); // Show statistics console.log('📊 Indexing Statistics:'); const stats = store.getStats(); console.log(` • Total Documents: ${stats.totalDocuments}`); console.log(` • Total Sections: ${stats.totalSections}`); console.log(` • Total Code Blocks: ${stats.totalCodeBlocks}`); console.log(` • Fabric Docs: ${stats.loaders.fabric}`); console.log(` • NeoForge Docs: ${stats.loaders.neoforge}`); console.log(` • Shared Docs: ${stats.loaders.shared}`); // Show version breakdown const versions = store.getAllVersions(); if (versions.length > 0) { console.log(` • Minecraft Versions: ${versions.join(', ')}`); } // Show embedding stats if (options.generateEmbeddings) { const embStats = store.getEmbeddingStats(); console.log(` • Total Embeddings: ${embStats.totalEmbeddings}`); if (embStats.models.length > 0) { console.log( ` • Embedding Models: ${embStats.models.map((m) => `${m.model} (${m.count})`).join(', ')}` ); } } console.log(` • Last Updated: ${stats.lastUpdated.toISOString()}`); console.log(` • Index Version: ${stats.version}`); console.log(`\n✨ Indexing complete!`); console.log(` Updated: ${updatedCount} documents`); console.log(` Skipped: ${skippedCount} documents (no changes)`); console.log(` Database: ${dbPath}\n`); } catch (error) { console.error('\n💥 Indexing failed:', error); process.exit(1); } finally { store.close(); } } // Parse command line arguments const args = process.argv.slice(2); const options: IndexOptions = { force: args.includes('--force') || args.includes('-f'), incremental: args.includes('--incremental') || args.includes('-i'), useSitemap: args.includes('--sitemap') || args.includes('-s'), generateEmbeddings: args.includes('--embeddings') || args.includes('-e'), embeddingsBatchSize: 100, }; // Show help if (args.includes('--help') || args.includes('-h')) { console.log('Usage: npm run index-docs [options]'); console.log(''); console.log('Options:'); console.log(' -f, --force Force full re-index (ignore hashes)'); console.log(' -i, --incremental Incremental update (skip unchanged)'); console.log(' -s, --sitemap Fetch URLs from sitemap.xml'); console.log(' -e, --embeddings Generate semantic embeddings'); console.log(' -h, --help Show this help message'); console.log(''); console.log('Examples:'); console.log(' npm run index-docs # Standard index'); console.log(' npm run index-docs -- --incremental # Update only changed pages'); console.log(' npm run index-docs -- --sitemap # Use sitemap for URLs'); console.log(' npm run index-docs -- --embeddings # Generate embeddings'); console.log(' npm run index-docs -- -i -s -e # All features'); process.exit(0); } // Run indexer console.log('Configuration:'); console.log(` • Force re-index: ${options.force ? 'Yes' : 'No'}`); console.log(` • Incremental: ${options.incremental ? 'Yes' : 'No'}`); console.log(` • Use sitemap: ${options.useSitemap ? 'Yes' : 'No'}`); console.log(` • Generate embeddings: ${options.generateEmbeddings ? 'Yes' : 'No'}`); console.log(''); main(options).catch((error) => { console.error('Fatal error:', error); process.exit(1); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/OGMatrix/mcmodding-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index-docs.ts•11.4 KiB