Skip to main content
Glama

Visum Thinker MCP Server

MIT License
process-pdf.js14.7 kB
#!/usr/bin/env node /** * Direct PDF Processing Script * Processes large PDF files using the visum-thinker PDF processing capabilities */ import fs from 'fs'; import path from 'path'; import fsExtra from 'fs-extra'; // Dynamic import for pdf-parse to avoid startup issues let pdfParse = null; async function initializePdfParser() { try { if (!pdfParse) { // Try different import approaches try { const pdfParseModule = await import("pdf-parse"); pdfParse = pdfParseModule.default; } catch (importError) { console.error("Primary import failed:", importError.message); // Try alternative import try { pdfParse = (await import("pdf-parse/lib/pdf-parse.js")).default; } catch (altError) { console.error("Alternative import failed:", altError.message); // Try require fallback (CommonJS style) try { const { createRequire } = await import('module'); const require = createRequire(import.meta.url); pdfParse = require('pdf-parse'); } catch (requireError) { console.error("Require fallback failed:", requireError.message); return null; } } } } // Test the parser with a minimal operation if (pdfParse && typeof pdfParse === 'function') { return pdfParse; } else { console.error("PDF parser is not a function:", typeof pdfParse); return null; } } catch (error) { console.error("Warning: PDF parsing unavailable:", error.message); return null; } } async function processLargePDF(filePath, options = {}) { const { chunkSizePages = 20, startPage = 1, endPage = null, outputSummary = true, outputFile = null } = options; try { console.log(`🔍 Processing PDF: ${filePath}`); // Check if file exists if (!fs.existsSync(filePath)) { throw new Error(`PDF file not found at path: ${filePath}`); } // Check file size const stats = fs.statSync(filePath); const fileSizeMB = stats.size / (1024 * 1024); console.log(`📄 File size: ${fileSizeMB.toFixed(2)} MB`); if (!filePath.toLowerCase().endsWith('.pdf')) { throw new Error('File must be a PDF'); } // Initialize PDF parser const parser = await initializePdfParser(); if (!parser) { throw new Error('PDF parsing library could not be loaded'); } // Read PDF console.log('📖 Reading PDF file...'); const pdfBuffer = fs.readFileSync(filePath); // Get metadata console.log('🔍 Extracting PDF metadata...'); const pdfInfo = await parser(pdfBuffer, { max: 1, version: 'v1.10.100' }); const totalPages = pdfInfo.numpages; const actualEndPage = endPage || totalPages; const pagesToProcess = actualEndPage - startPage + 1; const chunks = Math.ceil(pagesToProcess / chunkSizePages); console.log(`📊 Total pages: ${totalPages}`); console.log(`📈 Processing: pages ${startPage}-${actualEndPage} (${pagesToProcess} pages)`); console.log(`⚡ Chunks: ${chunks} chunks of ${chunkSizePages} pages each`); let processedContent = ''; let processedPages = 0; let totalCharacters = 0; // Process in chunks for (let chunkIndex = 0; chunkIndex < chunks; chunkIndex++) { const chunkStart = startPage + (chunkIndex * chunkSizePages); const chunkEnd = Math.min(chunkStart + chunkSizePages - 1, actualEndPage); console.log(`🔄 Processing chunk ${chunkIndex + 1}/${chunks}: pages ${chunkStart}-${chunkEnd}`); try { // Process this chunk const chunkData = await parser(pdfBuffer, { max: chunkEnd, version: 'v1.10.100' }); let chunkContent = chunkData.text; // For summary mode, truncate very long chunks if (outputSummary && chunkContent.length > 10000) { const chunkSummary = chunkContent.substring(0, 3000) + "\n\n[...content abbreviated for summary...]\n\n" + chunkContent.substring(Math.max(0, chunkContent.length - 1000)); processedContent += `\n\n=== CHUNK ${chunkIndex + 1} (Pages ${chunkStart}-${chunkEnd}) ===\n\n${chunkSummary}`; totalCharacters += chunkContent.length; } else { processedContent += `\n\n=== CHUNK ${chunkIndex + 1} (Pages ${chunkStart}-${chunkEnd}) ===\n\n${chunkContent}`; totalCharacters += chunkContent.length; } processedPages += (chunkEnd - chunkStart + 1); // Progress update const progress = ((chunkIndex + 1) / chunks * 100).toFixed(1); console.log(`✅ Chunk ${chunkIndex + 1} completed (${progress}% done)`); // Small delay to prevent memory issues if (chunkIndex < chunks - 1) { await new Promise(resolve => setTimeout(resolve, 100)); } } catch (chunkError) { console.error(`❌ Error processing chunk ${chunkIndex + 1}:`, chunkError.message); processedContent += `\n\n=== CHUNK ${chunkIndex + 1} ERROR ===\n\nFailed to process pages ${chunkStart}-${chunkEnd}: ${chunkError.message}\n\n`; } } // Create results const results = { filename: path.basename(filePath), fileSizeMB: fileSizeMB, totalPages: totalPages, processedPages: `${startPage}-${actualEndPage}`, chunksUsed: chunks, chunkSize: chunkSizePages, summaryMode: outputSummary, originalCharacters: totalCharacters, processedCharacters: processedContent.length, processedAt: new Date(), content: processedContent }; // Save to file if requested if (outputFile) { console.log(`💾 Saving results to: ${outputFile}`); await fsExtra.writeJson(outputFile, results, { spaces: 2 }); } // Display summary console.log('\n🎉 PDF Processing Complete!'); console.log('================================'); console.log(`📄 File: ${results.filename}`); console.log(`📊 Size: ${results.fileSizeMB.toFixed(2)} MB`); console.log(`📖 Pages: ${results.totalPages} (processed ${results.processedPages})`); console.log(`⚡ Chunks: ${results.chunksUsed}`); console.log(`📝 Original: ${results.originalCharacters.toLocaleString()} chars`); console.log(`✨ Processed: ${results.processedCharacters.toLocaleString()} chars`); console.log(`🎯 Mode: ${results.summaryMode ? 'Summary (optimized)' : 'Full content'}`); // Show preview const preview = processedContent.substring(0, 500); console.log('\n📖 Content Preview:'); console.log('=================='); console.log(preview + (processedContent.length > 500 ? '...' : '')); return results; } catch (error) { console.error('❌ Error processing PDF:', error.message); throw error; } } async function processMultiplePDFs(filePaths, options = {}) { const { merge = false, outputFile = null, ...processOptions } = options; console.log(`🚀 Processing ${filePaths.length} PDF files...`); console.log(`📋 Mode: ${merge ? 'Merged knowledge base' : 'Separate processing'}`); if (merge) { // Merge all files into one comprehensive knowledge base let mergedContent = ''; const mergedResults = { files: [], totalPages: 0, totalSizeMB: 0, processedAt: new Date(), mergingStrategy: 'sequential', content: '' }; for (let i = 0; i < filePaths.length; i++) { const filePath = filePaths[i]; console.log(`\n📖 Processing file ${i + 1}/${filePaths.length}: ${path.basename(filePath)}`); try { // Check if file exists if (!fs.existsSync(filePath)) { console.error(`⚠️ Skipping missing file: ${filePath}`); continue; } const result = await processLargePDF(filePath, { ...processOptions, outputFile: null // Don't save individual files when merging }); // Add to merged knowledge mergedContent += `\n\n=== DOCUMENT ${i + 1}: ${result.filename} ===\n`; mergedContent += `File Size: ${result.fileSizeMB.toFixed(2)} MB | Pages: ${result.totalPages} | Mode: ${result.summaryMode ? 'Summary' : 'Full'}\n`; mergedContent += `Processed: ${result.processedAt}\n\n`; mergedContent += result.content; // Track merged stats mergedResults.files.push({ filename: result.filename, pages: result.totalPages, sizeMB: result.fileSizeMB, originalChars: result.originalCharacters, processedChars: result.processedCharacters }); mergedResults.totalPages += result.totalPages; mergedResults.totalSizeMB += result.fileSizeMB; console.log(`✅ Added ${result.filename} to knowledge base`); } catch (fileError) { console.error(`❌ Failed to process ${filePath}:`, fileError.message); mergedContent += `\n\n=== DOCUMENT ${i + 1}: ERROR ===\n`; mergedContent += `File: ${path.basename(filePath)}\n`; mergedContent += `Error: ${fileError.message}\n\n`; } } mergedResults.content = mergedContent; mergedResults.totalCharacters = mergedContent.length; // Save merged results if (outputFile) { console.log(`\n💾 Saving merged knowledge base to: ${outputFile}`); await fsExtra.writeJson(outputFile, mergedResults, { spaces: 2 }); } // Display merged summary console.log('\n🎉 Multiple PDF Processing Complete!'); console.log('====================================='); console.log(`📚 Files processed: ${mergedResults.files.length}`); console.log(`📄 Total pages: ${mergedResults.totalPages.toLocaleString()}`); console.log(`📊 Total size: ${mergedResults.totalSizeMB.toFixed(2)} MB`); console.log(`📝 Total content: ${mergedResults.totalCharacters.toLocaleString()} chars`); console.log(`🎯 Knowledge base: ${outputFile || 'In-memory only'}`); console.log('\n📚 Included Documents:'); mergedResults.files.forEach((file, index) => { console.log(` ${index + 1}. ${file.filename} (${file.pages} pages, ${file.sizeMB.toFixed(1)}MB)`); }); return mergedResults; } else { // Process files separately const results = []; for (let i = 0; i < filePaths.length; i++) { const filePath = filePaths[i]; console.log(`\n📖 Processing file ${i + 1}/${filePaths.length}: ${path.basename(filePath)}`); try { if (!fs.existsSync(filePath)) { console.error(`⚠️ Skipping missing file: ${filePath}`); continue; } const separateOutputFile = outputFile ? `${path.parse(outputFile).name}_${i + 1}_${path.parse(filePath).name}.json` : null; const result = await processLargePDF(filePath, { ...processOptions, outputFile: separateOutputFile }); results.push(result); console.log(`✅ Completed ${result.filename}`); } catch (fileError) { console.error(`❌ Failed to process ${filePath}:`, fileError.message); results.push({ filename: path.basename(filePath), error: fileError.message }); } } console.log('\n🎉 Batch Processing Complete!'); console.log('=============================='); console.log(`📚 Files attempted: ${filePaths.length}`); console.log(`✅ Files successful: ${results.filter(r => !r.error).length}`); console.log(`❌ Files failed: ${results.filter(r => r.error).length}`); return results; } } // CLI Usage async function main() { const args = process.argv.slice(2); if (args.length === 0) { console.log(` 🎯 Visum Thinker PDF Processor Usage: node process-pdf.js <pdf-file(s)> [options] Single File: node process-pdf.js document.pdf Multiple Files: node process-pdf.js file1.pdf file2.pdf file3.pdf node process-pdf.js *.pdf node process-pdf.js ~/Documents/*.pdf Options: --chunks=N Number of pages per chunk (default: 20) --start=N Starting page (default: 1) --end=N Ending page (default: all) --full Full content mode (default: summary) --output=file.json Save results to JSON file --merge Merge all files into single knowledge base --separate Process files separately (default) Examples: node process-pdf.js document.pdf node process-pdf.js *.pdf --merge --output=combined-knowledge.json node process-pdf.js file1.pdf file2.pdf --separate --chunks=10 node process-pdf.js ~/Documents/*.pdf --merge --full `); return; } // Separate file paths from options const filePaths = []; const options = {}; let merge = false; for (let i = 0; i < args.length; i++) { const arg = args[i]; if (arg.startsWith('--chunks=')) { options.chunkSizePages = parseInt(arg.split('=')[1]); } else if (arg.startsWith('--start=')) { options.startPage = parseInt(arg.split('=')[1]); } else if (arg.startsWith('--end=')) { options.endPage = parseInt(arg.split('=')[1]); } else if (arg.startsWith('--output=')) { options.outputFile = arg.split('=')[1]; } else if (arg === '--full') { options.outputSummary = false; } else if (arg === '--merge') { merge = true; } else if (arg === '--separate') { merge = false; } else if (!arg.startsWith('--')) { // This is a file path filePaths.push(arg); } } if (filePaths.length === 0) { console.error('❌ No PDF files specified'); process.exit(1); } try { if (filePaths.length === 1) { // Single file processing await processLargePDF(filePaths[0], options); } else if (merge) { // Merge multiple files into one knowledge base await processMultiplePDFs(filePaths, { ...options, merge: true }); } else { // Process files separately await processMultiplePDFs(filePaths, { ...options, merge: false }); } } catch (error) { console.error('❌ Processing failed:', error.message); console.error('Full error:', error); process.exit(1); } } // Fix for ES module main detection const isMain = import.meta.url === `file://${process.argv[1]}` || import.meta.url.endsWith(process.argv[1]); if (isMain) { main().catch(error => { console.error('❌ Fatal error:', error); process.exit(1); }); } export { processLargePDF, processMultiplePDFs };

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/multiluca2020/visum-thinker-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server