MCP Knowledge Base Server

upload-document-worker.js•5.49 KiB

// Cloudflare Worker version - metadata only, no complex PDF parsing import { PDFDocument } from 'pdf-lib'; import { supabase, createSupabaseClient } from '../services/supabase.js'; import { generateEmbedding } from '../services/openai.js'; function chunkText(text, maxWords = 500) { const words = text.split(/\s+/); const chunks = []; for (let i = 0; i < words.length; i += maxWords) { const chunk = words.slice(i, i + maxWords).join(' '); if (chunk.trim()) { chunks.push(chunk); } } return chunks; } async function processPDF(fileData, fileName) { try { console.log('[processPDF-Worker] Processing PDF with metadata only for:', fileName); // Load the PDF document using pdf-lib for basic metadata const pdfDoc = await PDFDocument.load(fileData, { ignoreEncryption: true }); // Get basic metadata const numPages = pdfDoc.getPageCount(); const metadata = { pages: numPages, info: { Title: pdfDoc.getTitle() || '', Author: pdfDoc.getAuthor() || '', Subject: pdfDoc.getSubject() || '', Creator: pdfDoc.getCreator() || '', Producer: pdfDoc.getProducer() || '', CreationDate: pdfDoc.getCreationDate()?.toISOString() || '', ModificationDate: pdfDoc.getModificationDate()?.toISOString() || '' }, version: '1.0' }; // Create searchable content from metadata let fullText = `PDF Document: ${metadata.info.Title || fileName}\n`; fullText += `Filename: ${fileName}\n`; fullText += `Author: ${metadata.info.Author || 'Unknown'}\n`; fullText += `Pages: ${numPages}\n`; fullText += `Subject: ${metadata.info.Subject || 'N/A'}\n`; fullText += `Creator: ${metadata.info.Creator || 'N/A'}\n`; fullText += `Producer: ${metadata.info.Producer || 'N/A'}\n\n`; fullText += `Note: This PDF was uploaded via Cloudflare Workers. `; fullText += `Full text extraction is not available in the Worker environment. `; fullText += `For full text search, please use the desktop application.`; return { text: fullText, metadata: metadata }; } catch (error) { console.error('Error processing PDF in Worker:', error); return { text: `PDF Document: ${fileName}\nNote: Unable to process this PDF in Worker environment.`, metadata: { pages: 0, info: {}, version: '1.0' } }; } } export async function uploadDocument({ file_data, original_filename, credentials = null }) { try { console.log(`[uploadDocument-Worker] Starting upload for: ${original_filename}`); if (!file_data || !original_filename) { throw new Error('file_data and original_filename are required'); } const filename = original_filename; const fileExt = filename.includes('.') ? filename.substring(filename.lastIndexOf('.')).toLowerCase() : '.pdf'; let content = ''; let metadata = {}; let documentType = 'text'; // Handle different file types if (fileExt === '.pdf') { console.log('[uploadDocument-Worker] Processing PDF file...'); const pdfResult = await processPDF(file_data, filename); content = pdfResult.text; metadata = pdfResult.metadata; documentType = 'pdf'; } else { // Handle text files const decoder = new TextDecoder('utf-8'); content = decoder.decode(file_data); documentType = 'text'; } // Ensure content is not empty if (!content || content.trim().length === 0) { content = '[No readable content found]'; } // Use provided credentials or fall back to default client const supabaseClient = credentials ? createSupabaseClient(credentials) : supabase; if (!supabaseClient) { throw new Error('No Supabase client available'); } // Store document in database const { data: doc, error: docError } = await supabaseClient .from('documents') .insert({ filename, content, content_type: documentType === 'pdf' ? 'text' : 'text', metadata: metadata }) .select() .single(); if (docError) { throw new Error(`Failed to store document: ${docError.message}`); } // Chunk the text content const chunks = chunkText(content, 500); console.log(`[uploadDocument-Worker] Processing ${chunks.length} text chunks for ${filename}...`); // Process text chunks let successfulChunks = 0; for (const chunkContent of chunks) { try { const embedding = await generateEmbedding(chunkContent, credentials); const { error: chunkError } = await supabaseClient .from('document_chunks') .insert({ document_id: doc.id, content: chunkContent, embedding: embedding }); if (!chunkError) { successfulChunks++; } } catch (error) { console.error(`Failed to process chunk: ${error.message}`); } } console.log(`[uploadDocument-Worker] Successfully processed ${successfulChunks}/${chunks.length} chunks`); return { success: true, message: `Document uploaded successfully: ${filename}`, document: { id: doc.id, filename: filename, content_type: documentType, chunks_created: successfulChunks, total_chunks: chunks.length, metadata: metadata } }; } catch (error) { console.error('[uploadDocument-Worker] Upload failed:', error); return { success: false, error: error.message }; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/maxsmosarski/mcp-knowledge-base'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

upload-document-worker.js•5.49 KiB