Skip to main content
Glama
PSPDFKit

Nutrient Document Engine MCP Server

by PSPDFKit
runner.ts10.8 kB
/** * Main evaluation runner that orchestrates the focused LLM tool usage evaluation */ import { createReactAgent } from '@langchain/langgraph/prebuilt'; import { BaseLanguageModel } from '@langchain/core/language_models/base'; import { ChatOpenAI } from '@langchain/openai'; import { DynamicStructuredTool } from '@langchain/core/tools'; import { mcpToolsToRegister } from '../src/mcpTools.js'; import { getDocumentEngineClient } from '../src/api/ClientFactory.js'; import { z, ZodRawShape } from 'zod'; import { readFile } from 'fs/promises'; import { join } from 'path'; import { FocusedEvaluationResults, ToolUsageResult } from './types.js'; import { TOOL_USAGE_SCENARIOS } from './scenarios.js'; import { ToolUsageEvaluator } from './evaluator.js'; import { EvaluationReporter } from './reporter.js'; import { DocumentEngineClient } from '../src/api/Client.js'; import { createDocumentLayer } from '../src/api/DocumentLayerAbstraction.js'; /** * Test document configuration */ interface TestDocument { filePath: string; documentId: string; title: string; } /** * Focused evaluation runner */ export class FocusedEvaluationRunner { private client?: DocumentEngineClient; private evaluator: ToolUsageEvaluator; private reporter: EvaluationReporter; // Test documents configuration private readonly testDocuments: TestDocument[] = [ { filePath: join(process.cwd(), 'assets', 'contract.pdf'), documentId: 'doc-12345', title: 'Sample Contract Document', }, { filePath: join(process.cwd(), 'assets', 'form.pdf'), documentId: 'doc-form-123', title: 'Sample Form Document', }, { filePath: join(process.cwd(), 'assets', 'ocr.pdf'), documentId: 'doc-scan-123', title: 'Sample Scanned Document', }, { filePath: join(process.cwd(), 'assets', 'report.pdf'), documentId: 'doc-report-456', title: 'Sample Report Document', }, { filePath: join(process.cwd(), 'assets', 'A.pdf'), documentId: 'doc-111', title: 'Sample A Document', }, { filePath: join(process.cwd(), 'assets', 'B.pdf'), documentId: 'doc-222', title: 'Sample B Document', }, ]; // Layer names used in evaluation scenarios (excluding non-existent-layer for error testing) private readonly testLayers: string[] = [ 'additional-pages-layer', 'analysis-layer', 'annotation-layer', 'approval-layer', 'approved-layer', 'comments-layer', 'completed-layer', 'data-layer', 'draft-layer', 'edit-layer', 'edited-layer', 'final-layer', 'final-redaction-layer', 'finance-layer', 'markup-layer', 'metadata-layer', 'ocr-layer', 'original-layer', 'privacy-layer', 'redaction-layer', 'review-layer', 'reviewer-1-layer', 'reviewer-2-layer', 'rotation-layer', 'search-layer', 'split-layer', 'temp-layer', 'template-layer', 'test-layer', 'watermark-layer', ]; constructor() { this.evaluator = new ToolUsageEvaluator(); this.reporter = new EvaluationReporter(); } /** * Initialize the runner */ async initialize(): Promise<void> { console.log('🎯 Initializing focused LLM tool usage evaluation...'); this.client = await getDocumentEngineClient(); console.log('✅ Initialized successfully'); } /** * Upload test documents to Document Engine with specific IDs and create all required layers * This ensures all layer evaluation scenarios have the necessary layers available */ private async uploadTestDocuments(): Promise<void> { console.log('📤 Uploading test documents...'); if (!this.client) { throw new Error('Please run initialize() first'); } for (const testDoc of this.testDocuments) { try { console.log(` Uploading ${testDoc.documentId} (${testDoc.title})...`); // Read the file const fileBuffer = await readFile(testDoc.filePath); const blob = new Blob([fileBuffer], { type: 'application/octet-stream' }); const formData = new FormData(); formData.append('file', blob, testDoc.title); formData.append('document_id', testDoc.documentId); formData.append('title', testDoc.title); formData.append('overwrite_existing_document', 'true'); // Upload with specific document ID and overwrite if exists // @ts-expect-error We can send form data. await this.client['upload-document']({}, formData, { headers: { 'Content-Type': 'multipart/form-data', }, }); console.log(` ✅ Successfully uploaded ${testDoc.documentId}`); // Create all required layers for this document (used in layer evaluation scenarios) console.log(` Creating ${this.testLayers.length} layers for ${testDoc.documentId}...`); for (const layerName of this.testLayers) { try { await createDocumentLayer(this.client, testDoc.documentId, layerName); console.log(` ✅ Created layer: ${layerName}`); } catch (error) { // Some layers might already exist from previous runs, which is fine console.log(` ⚠️ Layer ${layerName} may already exist: ${error}`); } } console.log(` ✅ Completed layer creation for ${testDoc.documentId}`); } catch (error) { console.error(` ❌ Failed to upload ${testDoc.documentId}:`, error); throw new Error(`Failed to upload test document ${testDoc.documentId}: ${error}`); } } console.log('📤 All test documents and layers uploaded successfully'); } /** * Evaluate multiple models on tool usage */ async evaluateModels( models: Array<{ name: string; llm: BaseLanguageModel }> ): Promise<FocusedEvaluationResults[]> { const allResults: FocusedEvaluationResults[] = []; for (const model of models) { // Upload and overwrite the documents from the previous run. await this.uploadTestDocuments(); console.log(`\n🔬 Evaluating model: ${model.name}`); const result = await this.evaluateModel(model.name, model.llm); allResults.push(result); this.reporter.printModelSummary(model.name, result); } return allResults; } /** * Evaluate a single model */ async evaluateModel( modelName: string, llm: BaseLanguageModel ): Promise<FocusedEvaluationResults> { const startTime = Date.now(); // Create tools const tools = mcpToolsToRegister.map(tool => { return new DynamicStructuredTool({ name: tool.name, description: `Document processing tool: ${tool.name}`, schema: z.object(tool.schema as ZodRawShape), func: async args => { if (!this.client) { throw new Error('Please run initialize() first'); } try { const result = await tool.handler( this.client, args, // Mock the request extras. { signal: new AbortController().signal, sendNotification: _ => { return Promise.resolve(); }, sendRequest: _ => { return Promise.resolve(args); }, requestId: '', } ); return result.content?.[0]?.text || 'Operation completed successfully'; } catch (error) { return `Error: ${error}`; } }, }); }); // Create agent const agent = createReactAgent({ llm, tools }); console.log(` Running ${TOOL_USAGE_SCENARIOS.length} tool usage scenarios...`); const results: ToolUsageResult[] = []; for (let i = 0; i < TOOL_USAGE_SCENARIOS.length; i++) { const scenario = TOOL_USAGE_SCENARIOS[i]; try { const result = await this.evaluator.evaluateScenario(scenario, agent); results.push(result); this.reporter.printScenarioProgress(i, TOOL_USAGE_SCENARIOS.length, scenario, result); } catch (error: unknown) { this.reporter.printScenarioError(i, TOOL_USAGE_SCENARIOS.length, scenario, error as Error); results.push({ scenarioId: scenario.id, description: scenario.description, correctTools: false, correctOrder: false, efficient: false, correctParameters: false, expectedTools: scenario.expectedTools, actualTools: [], toolCallCount: 0, maxAllowed: scenario.maxToolCalls || 999, score: 0, issues: [`Execution error: ${error}`], }); } } // Calculate overall metrics const correctToolUsage = results.filter(r => r.correctTools).length / results.length; const correctOrderUsage = results.filter(r => r.correctOrder).length / results.length; const efficiencyScore = results.filter(r => r.efficient).length / results.length; const correctParameterUsage = results.filter(r => r.correctParameters).length / results.length; const overallScore = (correctToolUsage + correctOrderUsage + efficiencyScore + correctParameterUsage) / 4; const duration = Date.now() - startTime; return { model: modelName, totalScenarios: results.length, correctToolUsage, correctOrderUsage, efficiencyScore, correctParameterUsage, overallScore, results, timestamp: new Date(), duration, }; } /** * Get the reporter instance for external use */ getReporter(): EvaluationReporter { return this.reporter; } } /** * Quick evaluation with multiple models */ export async function runFocusedEvaluation(): Promise<FocusedEvaluationResults[]> { const runner = new FocusedEvaluationRunner(); await runner.initialize(); // Define models to test const models = [ { name: 'GPT-4o', llm: new ChatOpenAI({ model: 'gpt-4o', temperature: 0 }), }, { name: 'GPT-4o-mini', llm: new ChatOpenAI({ model: 'gpt-4o-mini', temperature: 0 }), }, { name: 'GPT-4.1-mini', llm: new ChatOpenAI({ model: 'gpt-4.1-mini', temperature: 0 }), }, { name: 'GPT-4.1-nano', llm: new ChatOpenAI({ model: 'gpt-4.1-nano', temperature: 0 }), }, { name: 'o3-mini', llm: new ChatOpenAI({ model: 'o3-mini' }), }, ]; if (models.length === 0) { throw new Error('No models available. Set OPENAI_API_KEY environment variable.'); } const results = await runner.evaluateModels(models); const reporter = runner.getReporter(); reporter.printComparison(results); await reporter.saveResults(results); return results; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PSPDFKit/nutrient-document-engine-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server