PolyMarket MCP Server

by berlinbra
Verified
  • src
import { ProcessedDocument } from './types.js'; import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; import * as fs from 'fs/promises'; import * as path from 'path'; import { FileTypeProcessorRegistry } from './processors/index.js'; export class FileProcessor { private textSplitter: RecursiveCharacterTextSplitter; private processorRegistry: FileTypeProcessorRegistry; constructor(chunkSize: number = 1000, chunkOverlap: number = 200) { this.textSplitter = new RecursiveCharacterTextSplitter({ chunkSize, chunkOverlap, }); this.processorRegistry = FileTypeProcessorRegistry.getInstance(); } private getFileType(filePath: string): string { const ext = path.extname(filePath).toLowerCase(); return ext ? ext.slice(1) : 'txt'; // Remove the dot from extension } private async isTextFile(filePath: string): Promise<boolean> { try { // Try to read the first few bytes of the file const fd = await fs.open(filePath, 'r'); const buffer = Buffer.alloc(4096); const { bytesRead } = await fd.read(buffer, 0, 4096, 0); await fd.close(); if (bytesRead === 0) { return true; // Empty files are considered text files } // Check if the buffer contains null bytes (common in binary files) for (let i = 0; i < bytesRead; i++) { if (buffer[i] === 0) { return false; } } // Try decoding as UTF-8 buffer.slice(0, bytesRead).toString('utf8'); return true; } catch (error) { if (error instanceof Error && error.message.includes('ENOENT')) { throw error; // Re-throw file not found errors } return false; // Other errors indicate non-text content } } async processFile(filePath: string): Promise<ProcessedDocument[]> { try { // First check if it's a text file if (!await this.isTextFile(filePath)) { return []; } const content = await fs.readFile(filePath, 'utf-8'); const fileType = this.getFileType(filePath); const stats = await fs.stat(filePath); // Find appropriate processor const processor = this.processorRegistry.findProcessor(filePath); if (!processor) { return []; // Skip if no processor is found } const processedContent = await processor.process(content); const chunks = await this.textSplitter.createDocuments( [processedContent], [{ source: filePath }] ); return chunks.map((chunk: { pageContent: string }, index: number) => ({ content: chunk.pageContent, metadata: { source: filePath, fileType, lastModified: stats.mtimeMs, chunkIndex: index, totalChunks: chunks.length, }, })); } catch (error) { if (error instanceof Error && error.message.includes('ENOENT')) { throw error; // Re-throw file not found errors } console.error(`Error processing file ${filePath}:`, error); return []; } } async processDirectory(dirPath: string): Promise<ProcessedDocument[]> { const documents: ProcessedDocument[] = []; const items = await fs.readdir(dirPath, { withFileTypes: true }); for (const item of items) { const fullPath = path.join(dirPath, item.name); if (item.isDirectory()) { const subdirDocs = await this.processDirectory(fullPath); documents.push(...subdirDocs); } else if (item.isFile()) { try { const docs = await this.processFile(fullPath); documents.push(...docs); } catch (error) { console.error(`Error processing ${fullPath}:`, error); } } } return documents; } }