LODA MCP Server

document_access_mcp_server.js•15.7 KiB

#!/usr/bin/env node /** * MCP Document Access Server - Phase 2 * Provides iterative document access tools to LLMs for token-efficient exploration * * Solves the token limit problem: * - 300K line document = 4.5M tokens (impossible) * - MCP iterative access = ~16K tokens (achievable) * - 281x token efficiency improvement * * MCP Tools Provided: * 1. list_document_sections - Get hierarchical document structure * 2. read_section - Read specific section with context * 3. read_lines - Read specific line range * 4. search_content - Search for keywords/patterns * * Usage: * # Stdio mode (for MCP clients) * node document_access_mcp_server.js --mode stdio * * # HTTP mode (for testing/debugging) * node document_access_mcp_server.js --mode http --port 49400 */ const fs = require('fs'); const path = require('path'); const readline = require('readline'); // LODA (LLM-Optimized Document Access) module const { LodaSearchHandler } = require('./loda/loda_search_handler'); // Document staging directory (where LLM can access documents) const STAGING_DIR = path.join(__dirname, 'staging'); // Ensure staging directory exists if (!fs.existsSync(STAGING_DIR)) { fs.mkdirSync(STAGING_DIR, { recursive: true }); } /** * Parse document structure - deterministic, no LLM needed * Identifies markdown headings and creates hierarchical structure */ function parseDocumentStructure(documentPath, maxDepth = 6) { const content = fs.readFileSync(documentPath, 'utf8'); const lines = content.split('\n'); const sections = []; let currentSectionId = 0; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const headingMatch = line.match(/^(#{1,6})\s+(.+)$/); if (headingMatch) { const level = headingMatch[1].length; const headerText = headingMatch[2]; if (level <= maxDepth) { // Find section end (next heading of same or higher level) let endLine = lines.length - 1; for (let j = i + 1; j < lines.length; j++) { const nextHeadingMatch = lines[j].match(/^(#{1,6})\s+/); if (nextHeadingMatch && nextHeadingMatch[1].length <= level) { endLine = j - 1; break; } } sections.push({ id: `section-${currentSectionId++}`, header: headerText, level, startLine: i + 1, // 1-indexed for user display endLine: endLine + 1, lineCount: endLine - i + 1 }); } } } return { sections, totalLines: lines.length, totalSections: sections.length, fileName: path.basename(documentPath) }; } /** * Read specific section with optional context lines */ function readSection(documentPath, sectionId, includeContext = true, contextLines = 20) { const structure = parseDocumentStructure(documentPath); const section = structure.sections.find(s => s.id === sectionId); if (!section) { throw new Error(`Section ${sectionId} not found`); } const content = fs.readFileSync(documentPath, 'utf8'); const lines = content.split('\n'); // Get section content const sectionContent = lines.slice(section.startLine - 1, section.endLine).join('\n'); // Get context if requested let contextBefore = ''; let contextAfter = ''; if (includeContext) { const contextStart = Math.max(0, section.startLine - 1 - contextLines); const contextEnd = Math.min(lines.length, section.endLine + contextLines); contextBefore = lines.slice(contextStart, section.startLine - 1).join('\n'); contextAfter = lines.slice(section.endLine, contextEnd).join('\n'); } return { section: { id: section.id, header: section.header, level: section.level, content: sectionContent, lineRange: [section.startLine, section.endLine], contextBefore: includeContext ? contextBefore : null, contextAfter: includeContext ? contextAfter : null } }; } /** * Read specific line range from document */ function readLines(documentPath, startLine, endLine) { const content = fs.readFileSync(documentPath, 'utf8'); const lines = content.split('\n'); // Validate range if (startLine < 1 || endLine > lines.length || startLine > endLine) { throw new Error(`Invalid line range: ${startLine}-${endLine} (document has ${lines.length} lines)`); } const selectedLines = lines.slice(startLine - 1, endLine); return { content: selectedLines.join('\n'), lineRange: [startLine, endLine], lineCount: selectedLines.length }; } /** * Search for keywords/patterns in document * FIX: Moved parseDocumentStructure outside loop for performance (PLAN Section 4.1) */ function searchContent(documentPath, query, maxResults = 10) { const content = fs.readFileSync(documentPath, 'utf8'); const lines = content.split('\n'); const structure = parseDocumentStructure(documentPath); // MOVED: Now called once const matches = []; const regex = new RegExp(query, 'gi'); for (let i = 0; i < lines.length && matches.length < maxResults; i++) { if (regex.test(lines[i])) { // Get context (3 lines before and after) const contextStart = Math.max(0, i - 3); const contextEnd = Math.min(lines.length, i + 4); const context = lines.slice(contextStart, contextEnd).join('\n'); // Find which section this line belongs to const section = structure.sections.find(s => i + 1 >= s.startLine && i + 1 <= s.endLine ); matches.push({ line: i + 1, content: lines[i], context, section: section ? section.header : 'Unknown' }); } } return { query, matches, totalMatches: matches.length }; } // LODA Search Handler - initialized with parseDocumentStructure const lodaHandler = new LodaSearchHandler(parseDocumentStructure); /** * MCP Tool Definitions */ const MCP_TOOLS = { list_document_sections: { name: 'list_document_sections', description: 'Get hierarchical structure of document sections (headings, line ranges, metadata)', inputSchema: { type: 'object', properties: { documentPath: { type: 'string', description: 'Path to document in staging directory (e.g., "document.md" or "/staging/document.md")' }, depth: { type: 'number', description: 'Maximum heading depth to return (1-6, default: 3)', default: 3 } }, required: ['documentPath'] }, handler: (args) => { const docPath = resolveStagingPath(args.documentPath); const depth = args.depth || 3; return parseDocumentStructure(docPath, depth); } }, read_section: { name: 'read_section', description: 'Read specific section with optional context lines before/after', inputSchema: { type: 'object', properties: { documentPath: { type: 'string', description: 'Path to document in staging directory' }, sectionId: { type: 'string', description: 'Section ID from list_document_sections (e.g., "section-0")' }, includeContext: { type: 'boolean', description: 'Include context lines before/after section', default: true }, contextLines: { type: 'number', description: 'Number of context lines to include', default: 20 } }, required: ['documentPath', 'sectionId'] }, handler: (args) => { const docPath = resolveStagingPath(args.documentPath); return readSection( docPath, args.sectionId, args.includeContext !== false, args.contextLines || 20 ); } }, read_lines: { name: 'read_lines', description: 'Read specific line range from document (1-indexed)', inputSchema: { type: 'object', properties: { documentPath: { type: 'string', description: 'Path to document in staging directory' }, startLine: { type: 'number', description: 'Start line number (1-indexed, inclusive)' }, endLine: { type: 'number', description: 'End line number (1-indexed, inclusive)' } }, required: ['documentPath', 'startLine', 'endLine'] }, handler: (args) => { const docPath = resolveStagingPath(args.documentPath); return readLines(docPath, args.startLine, args.endLine); } }, search_content: { name: 'search_content', description: 'Search for keywords/patterns in document using regex', inputSchema: { type: 'object', properties: { documentPath: { type: 'string', description: 'Path to document in staging directory' }, query: { type: 'string', description: 'Search query (regex pattern supported)' }, maxResults: { type: 'number', description: 'Maximum number of results to return', default: 10 } }, required: ['documentPath', 'query'] }, handler: (args) => { const docPath = resolveStagingPath(args.documentPath); return searchContent(docPath, args.query, args.maxResults || 10); } }, // LODA-MCP-COMP-02: Token-optimized search tool loda_search: { name: 'loda_search', description: 'Token-optimized document search using LODA (LLM-Optimized Document Access). Returns relevant sections within optional token budget. Uses Bloom filters for O(1) section elimination and caching for 10x+ speedup.', inputSchema: { type: 'object', properties: { documentPath: { type: 'string', description: 'Path to document (absolute or relative to staging directory)' }, query: { type: 'string', description: 'Search query (keywords or phrase)' }, contextBudget: { type: 'number', description: 'Maximum tokens to return (null/omit for unlimited)' }, maxSections: { type: 'number', description: 'Maximum sections to return (default: 5)', default: 5 } }, required: ['documentPath', 'query'] }, handler: (args) => { const docPath = resolveStagingPath(args.documentPath); return lodaHandler.search(docPath, args.query, { contextBudget: args.contextBudget || null, maxSections: args.maxSections || 5 }); } } }; /** * Resolve document path to staging directory */ function resolveStagingPath(documentPath) { // Handle absolute paths, relative paths, and staging directory references if (documentPath.startsWith('/staging/')) { documentPath = documentPath.replace('/staging/', ''); } const resolved = path.isAbsolute(documentPath) ? documentPath : path.join(STAGING_DIR, documentPath); if (!fs.existsSync(resolved)) { throw new Error(`Document not found: ${documentPath} (resolved to: ${resolved})`); } return resolved; } /** * Execute MCP tool */ function executeTool(toolName, args) { const tool = MCP_TOOLS[toolName]; if (!tool) { throw new Error(`Unknown tool: ${toolName}`); } try { return { success: true, result: tool.handler(args) }; } catch (error) { return { success: false, error: error.message }; } } /** * MCP Stdio Protocol Handler */ function startStdioServer() { const rl = readline.createInterface({ input: process.stdin, output: process.stdout, terminal: false }); console.error('[MCP Server] Started in stdio mode'); console.error(`[MCP Server] Staging directory: ${STAGING_DIR}`); console.error('[MCP Server] Available tools:', Object.keys(MCP_TOOLS).join(', ')); rl.on('line', (line) => { try { const request = JSON.parse(line); if (request.method === 'tools/list') { // List available tools const response = { tools: Object.values(MCP_TOOLS).map(tool => ({ name: tool.name, description: tool.description, inputSchema: tool.inputSchema })) }; console.log(JSON.stringify(response)); } else if (request.method === 'tools/call') { // Execute tool const { name, arguments: args } = request.params; const result = executeTool(name, args); console.log(JSON.stringify(result)); } else { console.log(JSON.stringify({ success: false, error: `Unknown method: ${request.method}` })); } } catch (error) { console.log(JSON.stringify({ success: false, error: error.message })); } }); } /** * HTTP Server for testing/debugging */ function startHttpServer(port = 49400) { const http = require('http'); const server = http.createServer((req, res) => { // Enable CORS res.setHeader('Access-Control-Allow-Origin', '*'); res.setHeader('Content-Type', 'application/json'); if (req.method === 'GET' && req.url === '/tools') { // List tools const tools = Object.values(MCP_TOOLS).map(tool => ({ name: tool.name, description: tool.description, inputSchema: tool.inputSchema })); res.writeHead(200); res.end(JSON.stringify({ tools })); } else if (req.method === 'POST' && req.url.startsWith('/tools/')) { // Execute tool const toolName = req.url.replace('/tools/', ''); let body = ''; req.on('data', chunk => body += chunk); req.on('end', () => { try { const args = JSON.parse(body); const result = executeTool(toolName, args); res.writeHead(200); res.end(JSON.stringify(result)); } catch (error) { res.writeHead(400); res.end(JSON.stringify({ success: false, error: error.message })); } }); } else if (req.method === 'GET' && req.url === '/health') { res.writeHead(200); res.end(JSON.stringify({ status: 'healthy', stagingDir: STAGING_DIR, tools: Object.keys(MCP_TOOLS) })); } else { res.writeHead(404); res.end(JSON.stringify({ error: 'Not found' })); } }); server.listen(port, () => { console.log(`[MCP Server] HTTP mode listening on port ${port}`); console.log(`[MCP Server] Staging directory: ${STAGING_DIR}`); console.log(`[MCP Server] Health: http://localhost:${port}/health`); console.log(`[MCP Server] Tools: http://localhost:${port}/tools`); }); } // Parse command-line arguments const args = process.argv.slice(2); const mode = args.find(arg => arg.startsWith('--mode='))?.split('=')[1] || 'stdio'; const port = parseInt(args.find(arg => arg.startsWith('--port='))?.split('=')[1]) || 49400; if (args.includes('--help') || args.includes('-h')) { console.log(` MCP Document Access Server Usage: node document_access_mcp_server.js [options] Options: --mode=<mode> Server mode: stdio or http (default: stdio) --port=<port> HTTP port (default: 49400, only for http mode) --help, -h Show this help message Modes: stdio Standard I/O mode for MCP clients (production) http HTTP REST API mode for testing/debugging Examples: # Stdio mode (for MCP integration) node document_access_mcp_server.js --mode=stdio # HTTP mode for testing node document_access_mcp_server.js --mode=http --port=49400 Staging Directory: ${STAGING_DIR} Place documents here for LLM access via MCP tools. Available Tools: - list_document_sections: Get document structure - read_section: Read specific section with context - read_lines: Read specific line range - search_content: Search for keywords/patterns - loda_search: Token-optimized search (LODA) with budget awareness `); process.exit(0); } // Start server in appropriate mode if (mode === 'http') { startHttpServer(port); } else { startStdioServer(); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/patrickkarle/loda-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

document_access_mcp_server.js•15.7 KiB