MCP Knowledge Base Server

upload-document-node.js•6.36 KiB

// Node.js version - full PDF text extraction using pdf-parse import fs from 'fs/promises'; import path from 'path'; import pdfParse from 'pdf-parse'; import { PDFDocument } from 'pdf-lib'; import { supabase, createSupabaseClient } from '../services/supabase.js'; import { generateEmbedding } from '../services/openai.js'; function chunkText(text, maxWords = 500) { const words = text.split(/\s+/); const chunks = []; for (let i = 0; i < words.length; i += maxWords) { const chunk = words.slice(i, i + maxWords).join(' '); if (chunk.trim()) { chunks.push(chunk); } } return chunks; } async function processPDF(filePath, fileName) { try { console.log('[processPDF-Node] Extracting full text from PDF:', fileName); // Read the PDF file const dataBuffer = await fs.readFile(filePath); // Extract text using pdf-parse const pdfData = await pdfParse(dataBuffer); // Get metadata using pdf-lib for additional info let additionalMetadata = {}; try { const pdfDoc = await PDFDocument.load(dataBuffer, { ignoreEncryption: true }); additionalMetadata = { Title: pdfDoc.getTitle() || '', Author: pdfDoc.getAuthor() || '', Subject: pdfDoc.getSubject() || '', Creator: pdfDoc.getCreator() || '', Producer: pdfDoc.getProducer() || '', CreationDate: pdfDoc.getCreationDate()?.toISOString() || '', ModificationDate: pdfDoc.getModificationDate()?.toISOString() || '' }; } catch (e) { console.warn('Could not extract additional metadata:', e.message); } const metadata = { pages: pdfData.numpages, info: { ...pdfData.info, ...additionalMetadata }, version: pdfData.version || '1.0' }; let fullText = pdfData.text || ''; // If no text was extracted, add metadata as searchable content if (!fullText || fullText.trim().length === 0) { fullText = `PDF Document: ${metadata.info.Title || fileName}\n`; fullText += `Filename: ${fileName}\n`; fullText += `Author: ${metadata.info.Author || 'Unknown'}\n`; fullText += `Pages: ${metadata.pages}\n`; fullText += `Subject: ${metadata.info.Subject || 'N/A'}\n\n`; fullText += `Note: This PDF appears to contain images or scanned content without extractable text.`; } else { console.log(`[processPDF-Node] Extracted ${fullText.length} characters of text from ${metadata.pages} pages`); } // Clean up the text fullText = fullText .replace(/\0/g, '') // Remove null bytes .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '') // Remove control characters except \t, \n, \r .replace(/\s+/g, ' ') // Normalize whitespace .trim(); return { text: fullText, metadata: metadata }; } catch (error) { console.error('Error processing PDF with pdf-parse:', error); return { text: `PDF Document: ${fileName}\nNote: Unable to extract content from this PDF.`, metadata: { pages: 0, info: {}, version: '1.0' } }; } } export async function uploadDocument({ file_path, credentials = null }) { try { if (!file_path) { throw new Error('file_path is required'); } console.log(`[uploadDocument-Node] Starting upload for: ${file_path}`); const filename = path.basename(file_path); const fileExt = path.extname(filename).toLowerCase(); let content = ''; let metadata = {}; let documentType = 'text'; // Handle different file types if (fileExt === '.pdf') { console.log('[uploadDocument-Node] Processing PDF file with full text extraction...'); const pdfResult = await processPDF(file_path, filename); content = pdfResult.text; metadata = pdfResult.metadata; documentType = 'pdf'; } else { // Handle text files content = await fs.readFile(file_path, 'utf-8'); documentType = 'text'; } // Ensure content is not empty if (!content || content.trim().length === 0) { content = '[No readable content found]'; } // Use provided credentials or fall back to default client const supabaseClient = credentials ? createSupabaseClient(credentials) : supabase; if (!supabaseClient) { throw new Error('No Supabase client available'); } // Store document in database const { data: doc, error: docError } = await supabaseClient .from('documents') .insert({ filename, content, content_type: documentType === 'pdf' ? 'text' : 'text', metadata: metadata }) .select() .single(); if (docError) { throw new Error(`Failed to store document: ${docError.message}`); } // Chunk the text content const chunks = chunkText(content, 500); console.log(`[uploadDocument-Node] Processing ${chunks.length} text chunks for ${filename}...`); // Process text chunks in parallel for better performance const chunkPromises = chunks.map(async (chunkContent, index) => { try { const embedding = await generateEmbedding(chunkContent, credentials); const { error: chunkError } = await supabaseClient .from('document_chunks') .insert({ document_id: doc.id, content: chunkContent, embedding: embedding }); if (chunkError) { console.error(`Failed to store chunk ${index + 1}: ${chunkError.message}`); return false; } return true; } catch (error) { console.error(`Failed to process chunk ${index + 1}: ${error.message}`); return false; } }); const results = await Promise.all(chunkPromises); const successfulChunks = results.filter(r => r).length; console.log(`[uploadDocument-Node] Successfully processed ${successfulChunks}/${chunks.length} chunks`); return { success: true, message: `Document uploaded successfully: ${filename}`, document: { id: doc.id, filename: filename, content_type: documentType, chunks_created: successfulChunks, total_chunks: chunks.length, text_length: content.length, metadata: metadata } }; } catch (error) { console.error('[uploadDocument-Node] Upload failed:', error); return { success: false, error: error.message }; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/maxsmosarski/mcp-knowledge-base'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

upload-document-node.js•6.36 KiB