OpenTK Model Context Protocol Server

Overview Schema Related Servers Score Discussions

document-extractor.ts•4.02 KiB

/** * Utility for extracting text from various document formats * Using established libraries for better reliability */ // Using require for pdf-parse due to CommonJS module compatibility const pdfParse = require('pdf-parse'); import * as mammoth from 'mammoth'; /** * Extracts text from a PDF document using pdf-parse library * @param data The PDF document as a Buffer * @returns The extracted text content */ export async function extractTextFromPdf(data: ArrayBuffer): Promise<string> { try { // Convert ArrayBuffer to Buffer for pdf-parse const buffer = Buffer.from(data); // Parse the PDF const result = await pdfParse(buffer); // Get the text content let extractedText = result.text || ''; // Clean up the text extractedText = extractedText.replace(/\s+/g, ' ').trim(); if (!extractedText || extractedText.length < 50) { return 'The document appears to be a PDF file, but no readable text content could be extracted. This might be due to the document structure, content format, or encryption. Please download the original document for full content.'; } return extractedText; } catch (error) { console.error(`Error extracting text from PDF: ${(error as Error).message}`); return 'Failed to extract text from the PDF document. This might be due to the document structure, content format, or encryption. Please download the original document for full content.'; } } /** * Extracts text from a DOCX document using mammoth library * @param data The DOCX document as an ArrayBuffer * @returns The extracted text content */ export async function extractTextFromDocx(data: ArrayBuffer): Promise<string> { try { // Convert ArrayBuffer to Buffer for mammoth const buffer = Buffer.from(data); // Extract text from the DOCX const result = await mammoth.extractRawText({ buffer }); // Get the text content let extractedText = result.value || ''; // Clean up the text extractedText = extractedText.replace(/\s+/g, ' ').trim(); if (!extractedText || extractedText.length < 50) { return 'The document appears to be a Word file, but no readable text content could be extracted. This might be due to the document structure or content format. Please download the original document for full content.'; } return extractedText; } catch (error) { console.error(`Error extracting text from DOCX: ${(error as Error).message}`); return 'Failed to extract text from the DOCX document. This might be due to the document structure or content format. Please download the original document for full content.'; } } /** * Summarizes the extracted text to a reasonable length * @param text The full extracted text * @param maxLength Maximum length of the summary (default: 8000 characters) * @param offset Starting position for extraction (default: 0) * @returns Object containing the summarized text and pagination info */ export function summarizeText(text: string, maxLength: number = 8000, offset: number = 0): { text: string; isTruncated: boolean; totalLength: number; currentOffset: number; nextOffset: number | null; remainingLength: number; } { const totalLength = text.length; // Validate offset if (offset >= totalLength) { return { text: 'No more content available. You have reached the end of the document.', isTruncated: false, totalLength, currentOffset: offset, nextOffset: null, remainingLength: 0 }; } // Extract the portion of text from offset to offset + maxLength const endPosition = Math.min(offset + maxLength, totalLength); const extractedText = text.substring(offset, endPosition); const isTruncated = endPosition < totalLength; // Calculate next offset and remaining length const nextOffset = isTruncated ? endPosition : null; const remainingLength = totalLength - endPosition; return { text: extractedText + (isTruncated ? '... [Text truncated due to length]' : ''), isTruncated, totalLength, currentOffset: offset, nextOffset, remainingLength }; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/r-huijts/opentk-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

document-extractor.ts•4.02 KiB