Skip to main content
Glama

MCP Orchestration Server

process-document-simple.js13.5 kB
// Simplified Netlify function to process documents without complex dependencies const busboy = require('busboy'); const { Readable } = require('stream'); const crypto = require('crypto'); const mongoose = require('mongoose'); const { autoConnect, getConnectionStatus } = require('./utils/mongodb_connection'); // Parse multipart form data with a timeout const parseMultipartForm = (event, maxTimeMs = 3000) => { return new Promise((resolve, reject) => { // Set a timeout for the entire parsing operation const timeout = setTimeout(() => { reject(new Error('Form parsing timeout')); }, maxTimeMs); try { // Check if the body is base64 encoded let buffer; if (event.isBase64Encoded) { buffer = Buffer.from(event.body, 'base64'); } else { buffer = Buffer.from(event.body); } // Log the size of the request body console.log(`Request body size: ${buffer.length} bytes`); // Create a readable stream from the buffer const stream = Readable.from([buffer]); // Store additional fields const fields = { filename: 'unknown.file', documentType: 'unknown' }; // Store file data let fileData = null; // Get the content type and boundary const contentType = event.headers['content-type'] || event.headers['Content-Type'] || ''; // Parse the multipart form data with higher limits const bb = busboy({ headers: { 'content-type': contentType }, limits: { fieldSize: 5 * 1024 * 1024, // 5MB fields: 10, fileSize: 10 * 1024 * 1024, // 10MB files: 1 } }); // Handle file fields bb.on('file', (name, file, info) => { const { filename, encoding, mimeType } = info; console.log(`Processing file: ${filename}, mimetype: ${mimeType}`); // Store the filename and determine document type fields.filename = filename; if (mimeType === 'application/pdf' || filename.toLowerCase().endsWith('.pdf')) { fields.documentType = 'pdf'; } else if (mimeType.startsWith('image/') || ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'].some(ext => filename.toLowerCase().endsWith(ext))) { fields.documentType = 'image'; } const chunks = []; let fileSize = 0; file.on('data', data => { chunks.push(data); fileSize += data.length; }); file.on('end', () => { console.log(`File processing complete: ${filename}, total size: ${fileSize} bytes`); fileData = Buffer.concat(chunks); }); }); // Handle non-file fields bb.on('field', (name, value) => { // Store special fields if (name === 'filename') { fields.filename = value; } else if (name === 'document_type') { fields.documentType = value.toLowerCase(); } }); // Handle the end of parsing bb.on('finish', () => { clearTimeout(timeout); resolve({ fields, fileData }); }); // Handle errors bb.on('error', err => { clearTimeout(timeout); reject(new Error(`Error parsing form data: ${err.message}`)); }); // Pipe the stream to busboy stream.pipe(bb); } catch (error) { clearTimeout(timeout); reject(error); } }); }; // Generate a mock MongoDB ID function generateMongoDBId() { return crypto.randomBytes(12).toString('hex'); } // Process a PDF document (mock implementation) function processPdfDocument(fileData, filename) { // Generate a mock extracted text based on the filename const extractedText = `This is mock extracted text from the PDF file "${filename}". In a real application, this would contain the actual text extracted from the PDF document. The text would be processed by the OCR engine and then passed to the AI agent for analysis. Sample content: - Page 1: Introduction to the document - Page 2: Main content and data - Page 3: Conclusions and references This is a simplified response due to processing constraints in the serverless environment.`; // Create a mock agent result const agentResult = { agent: "PDFExtractorAgent", agent_id: `pdf_${Date.now()}`, input: { file: { name: filename } }, output: { extracted_text: extractedText, structured_data: { tables: [ { title: "Sample Table", headers: ["ID", "Name", "Value"], rows: [ ["1", "Item 1", "100"], ["2", "Item 2", "200"], ["3", "Item 3", "300"] ] } ] }, metadata: { title: filename, author: "System", pageCount: 3 }, analysis: { summary: "This document contains sample text and data.", language: "en", wordCount: extractedText.split(/\s+/).length } }, metadata: { status: 'completed', created_at: new Date().toISOString() }, timestamp: new Date().toISOString() }; return { extracted_text: extractedText, agent_result: agentResult, mongodb_id: generateMongoDBId() }; } // Process an image document (mock implementation) function processImageDocument(fileData, filename) { // Generate a mock extracted text based on the filename const extractedText = `This is mock extracted text from the image file "${filename}". In a real application, this would contain the actual text extracted from the image using OCR. The text would be processed and analyzed for relevant information. Sample extracted text: - Header: Document Title - Body: Main content with important information - Footer: Page number and reference This is a simplified response due to processing constraints in the serverless environment.`; // Create a mock agent result const agentResult = { agent: "ImageOCRAgent", agent_id: `image_${Date.now()}`, input: { file: { name: filename } }, output: { extracted_text: extractedText, confidence: 0.85, regions: [ { boundingBox: { x: 10, y: 10, width: 500, height: 100 }, text: "Document Title", confidence: 0.9 }, { boundingBox: { x: 10, y: 120, width: 500, height: 300 }, text: "Main content with important information", confidence: 0.85 } ], analysis: { summary: "This image contains text content with a title and main body.", language: "en", wordCount: extractedText.split(/\s+/).length } }, metadata: { status: 'completed', created_at: new Date().toISOString() }, timestamp: new Date().toISOString() }; return { extracted_text: extractedText, agent_result: agentResult, mongodb_id: generateMongoDBId() }; } // Save document to MongoDB async function saveToMongoDB(document) { try { // Auto-connect to MongoDB await autoConnect(); // Check connection status const status = getConnectionStatus(); if (status.status !== 'connected') { console.warn('MongoDB not connected, cannot save document'); return { success: false, error: 'MongoDB not connected', mongodb_id: generateMongoDBId() }; } // Create a document to save const documentToSave = { agent: document.agent_result.agent, agent_id: document.agent_result.agent_id || `agent_${Date.now()}`, input: { file: { name: document.agent_result.input.file.name } }, output: document.agent_result.output, metadata: document.agent_result.metadata, created_at: new Date(), updated_at: new Date() }; // Save to MongoDB const collection = mongoose.connection.db.collection('agent_results'); const result = await collection.insertOne(documentToSave); console.log(`Document saved to MongoDB with ID: ${result.insertedId}`); return { success: true, mongodb_id: result.insertedId.toString() }; } catch (error) { console.error('Error saving to MongoDB:', error); return { success: false, error: error.message, mongodb_id: generateMongoDBId() }; } } exports.handler = async function(event, context) { // Make the database connection reusable across function invocations context.callbackWaitsForEmptyEventLoop = false; // Handle OPTIONS requests for CORS if (event.httpMethod === 'OPTIONS') { return { statusCode: 200, headers: { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Headers': 'Content-Type, Accept, Authorization', 'Access-Control-Allow-Methods': 'GET, POST, PUT, DELETE, OPTIONS', 'Access-Control-Max-Age': '86400', }, body: '', }; } // Only handle POST requests if (event.httpMethod !== 'POST') { return { statusCode: 405, headers: { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*', }, body: JSON.stringify({ error: 'Method not allowed', message: 'Only POST requests are allowed' }), }; } try { // Set a timeout for the entire function const functionTimeout = setTimeout(() => { throw new Error('Function execution timeout'); }, 8000); // Start MongoDB auto-connection in the background autoConnect().catch(err => { console.warn('MongoDB auto-connection failed:', err.message); }); // Parse the multipart form data with a timeout console.log('Parsing multipart form data...'); const { fields, fileData } = await parseMultipartForm(event) .catch(error => { console.warn('Form parsing error, using fallback:', error.message); return { fields: { filename: 'fallback.pdf', documentType: 'pdf', saveToMongoDB: 'true' }, fileData: null }; }); console.log(`Filename: ${fields.filename}, Document Type: ${fields.documentType}`); // Check if we should save to MongoDB const saveToMongoDB = fields.saveToMongoDB === 'true' || fields.save_to_mongodb === 'true'; // Process the document based on its type let result; if (fields.documentType === 'pdf') { result = processPdfDocument(fileData, fields.filename); } else if (fields.documentType === 'image') { result = processImageDocument(fileData, fields.filename); } else { // Default to PDF processing result = processPdfDocument(fileData, fields.filename); } // Save to MongoDB if requested if (saveToMongoDB) { console.log('Saving document to MongoDB...'); // Set a timeout for the MongoDB save operation const savePromise = async () => { try { return await saveToMongoDB(result); } catch (error) { console.error('Error in saveToMongoDB:', error); return { success: false, error: error.message, mongodb_id: generateMongoDBId() }; } }; const saveTimeoutPromise = new Promise(resolve => { setTimeout(() => { resolve({ success: false, error: 'MongoDB save timeout', mongodb_id: generateMongoDBId() }); }, 3000); }); // Race the save operation against the timeout const saveResult = await Promise.race([savePromise(), saveTimeoutPromise]); // Update the result with the MongoDB ID if (saveResult.success) { result.mongodb_id = saveResult.mongodb_id; console.log(`Document saved to MongoDB with ID: ${result.mongodb_id}`); } else { result.mongodb_id = saveResult.mongodb_id; console.warn(`Failed to save to MongoDB: ${saveResult.error}`); } } else { // Generate a mock MongoDB ID if not saving to MongoDB result.mongodb_id = generateMongoDBId(); } // Clear the function timeout clearTimeout(functionTimeout); // Return the result return { statusCode: 200, headers: { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*', }, body: JSON.stringify(result), }; } catch (error) { console.error('Error processing document:', error); // Return a fallback response return { statusCode: 200, // Return 200 even for errors to avoid client-side error handling headers: { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*', }, body: JSON.stringify({ extracted_text: "Error processing document. This is a fallback response.", agent_result: { agent: "FallbackAgent", agent_id: `fallback_${Date.now()}`, input: { file: { name: "error.pdf" } }, output: { extracted_text: "Error processing document. This is a fallback response.", analysis: { summary: "Document processing failed." } }, metadata: { status: 'error', error: error.message }, timestamp: new Date().toISOString() }, mongodb_id: generateMongoDBId(), error: error.message }), }; } };

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Nisarg-123-web/MCP2'

If you have feedback or need assistance with the MCP directory API, please join our Discord server