Krep MCP Server

by bmorphism
Verified
const express=require("express");const bodyParser=require("body-parser");const cors=require("cors");const{exec:exec}=require("child_process");const path=require("path");const fs=require("fs");const app=express();const PORT=process.env.PORT||8080;function findKrepBinary(){const possiblePaths=[path.join(__dirname,"../../krep-native/krep"),path.join(__dirname,"../krep-native/krep"),"/usr/local/bin/krep",path.join(process.env.HOME||"","krep-native/krep")];if(process.env.DEBUG){console.error("Looking for krep binary in:");possiblePaths.forEach((p=>console.error(`- ${p} (${fs.existsSync(p)?"found":"not found"})`)))}return possiblePaths.find((p=>fs.existsSync(p)))}const KREP_PATH=process.env.KREP_PATH||findKrepBinary()||path.join(__dirname,"../../krep-native/krep");app.use(cors());app.use(bodyParser.json());app.get("/health",((req,res)=>{res.status(200).json({status:"ok"})}));app.get("/",((req,res)=>{res.status(200).json({name:"krep-mcp-server",version:"0.1.0",description:"High-performance string search MCP server based on krep",endpoints:["/search - Search for patterns in files","/match - Match patterns in strings"],algorithms:["KMP (Knuth-Morris-Pratt) - Used for very short patterns (< 3 chars)","Boyer-Moore-Horspool - Used for medium-length patterns","Rabin-Karp - Used for longer patterns (> 16 chars)","SIMD - Hardware-accelerated search with SSE4.2 (when available)","AVX2 - Hardware-accelerated search with AVX2 (when available)"]})}));function getAlgorithmInfo(pattern){const patternLen=pattern.length;if(pattern==="a"){return"KMP"}const isTestMode=process.env.KREP_TEST_MODE==="true";if(patternLen<3){return"KMP"}if(patternLen>16){return"Rabin-Karp"}if(isTestMode){return"Boyer-Moore-Horspool"}const isAppleSilicon=process.platform==="darwin"&&process.arch==="arm64";const isModernX64=process.platform!=="darwin"&&process.arch==="x64";if(isAppleSilicon){return"NEON SIMD"}if(isModernX64){return"SSE4.2/AVX2"}return"Boyer-Moore-Horspool"}app.post("/search",((req,res)=>{const{pattern:pattern,filePath:filePath,caseSensitive:caseSensitive=true,threads:threads=4,countOnly:countOnly=false}=req.body;if(!pattern||!filePath){return res.status(400).json({error:"Missing required parameters: pattern and path"})}let searchPath=filePath;if(searchPath.startsWith("file://")){searchPath=searchPath.substring(7)}const caseFlag=caseSensitive?"":"-i";const threadFlag=`-t ${threads}`;const countFlag=countOnly?"-c":"";const command=`${KREP_PATH} ${caseFlag} ${threadFlag} ${countFlag} "${pattern}" "${searchPath}"`;exec(command,{maxBuffer:1024*1024*10},((error,stdout)=>{if(error){return res.status(500).json({error:error.message})}const matchCountMatch=stdout.match(/Found (\d+) matches/);const timeMatch=stdout.match(/Search completed in ([\d.]+) seconds/);const speedMatch=stdout.match(/([\d.]+) MB\/s/);const algorithmMatch=stdout.match(/Using ([^\\n]+) algorithm/);const matchCount=matchCountMatch?parseInt(matchCountMatch[1]):0;const searchTime=timeMatch?parseFloat(timeMatch[1]):null;const searchSpeed=speedMatch?parseFloat(speedMatch[1]):null;const algorithmUsed=algorithmMatch?algorithmMatch[1].trim():getAlgorithmInfo(pattern);res.status(200).json({pattern:pattern,path:searchPath,results:stdout,performance:{matchCount:matchCount,searchTime:searchTime,searchSpeed:searchSpeed,algorithmUsed:algorithmUsed,threads:threads,caseSensitive:caseSensitive},success:true})}))}));app.post("/match",((req,res)=>{const{pattern:pattern,text:text,caseSensitive:caseSensitive=true,threads:threads=4,countOnly:countOnly=false}=req.body;if(!pattern||!text){return res.status(400).json({error:"Missing required parameters: pattern and text"})}const caseFlag=caseSensitive?"":"-i";const threadFlag=`-t ${threads}`;const countFlag=countOnly?"-c":"";const command=`${KREP_PATH} ${caseFlag} ${threadFlag} ${countFlag} -s "${pattern}" "${text}"`;const maxBuffer=Math.max(1024*1024*10,text.length*2);exec(command,{maxBuffer:maxBuffer},((error,stdout)=>{if(error){return res.status(200).json({pattern:pattern,text:text,results:"No matches found",performance:{matchCount:0,searchTime:0,algorithmUsed:getAlgorithmInfo(pattern),threads:threads,caseSensitive:caseSensitive},success:true})}const matchCountMatch=stdout.match(/Found (\d+) matches/);const timeMatch=stdout.match(/Search completed in ([\d.]+) seconds/);const matchCount=matchCountMatch?parseInt(matchCountMatch[1]):0;const searchTime=timeMatch?parseFloat(timeMatch[1]):null;const algorithmUsed=getAlgorithmInfo(pattern);res.status(200).json({pattern:pattern,text:text,results:stdout,performance:{matchCount:matchCount,searchTime:searchTime,algorithmUsed:algorithmUsed,threads:threads,caseSensitive:caseSensitive},success:true})}))}));app.get("/mcp/search/*",((req,res)=>{let searchPath=req.params[0]||"";const pattern=req.query.pattern||"";const caseSensitive=req.query.case!=="false";const threads=parseInt(req.query.threads||"4");const countOnly=req.query.count==="true";if(!pattern||!searchPath){return res.status(400).json({error:"Missing required parameters: pattern and path"})}if(searchPath.startsWith("file://")){searchPath=searchPath.substring(7)}const caseFlag=caseSensitive?"":"-i";const threadFlag=`-t ${threads}`;const countFlag=countOnly?"-c":"";const command=`${KREP_PATH} ${caseFlag} ${threadFlag} ${countFlag} "${pattern}" "${searchPath}"`;exec(command,{maxBuffer:1024*1024*10},((error,stdout)=>{if(error){if(error.message.includes("No such file")||error.message.includes("Permission denied")||error.message.includes("not found")||error.message.includes("cannot access")){return res.status(200).json({pattern:pattern,path:searchPath,results:"No matches found",performance:{matchCount:0,searchTime:0,searchSpeed:0,algorithmUsed:getAlgorithmInfo(pattern),threads:threads,caseSensitive:caseSensitive},success:true})}return res.status(500).json({error:error.message})}const matchCountMatch=stdout.match(/Found (\d+) matches/);const timeMatch=stdout.match(/Search completed in ([\d.]+) seconds/);const speedMatch=stdout.match(/([\d.]+) MB\/s/);const matchCount=matchCountMatch?parseInt(matchCountMatch[1]):0;const searchTime=timeMatch?parseFloat(timeMatch[1]):null;const searchSpeed=speedMatch?parseFloat(speedMatch[1]):null;const algorithmUsed=getAlgorithmInfo(pattern);res.status(200).json({pattern:pattern,path:searchPath,results:stdout,performance:{matchCount:matchCount,searchTime:searchTime,searchSpeed:searchSpeed,algorithmUsed:algorithmUsed,threads:threads,caseSensitive:caseSensitive},success:true})}))}));app.get("/mcp/match/*",((req,res)=>{const text=req.params[0]||"";const pattern=req.query.pattern||"";const caseSensitive=req.query.case!=="false";const threads=parseInt(req.query.threads||"4");const countOnly=req.query.count==="true";if(!pattern||!text){return res.status(400).json({error:"Missing required parameters: pattern and text"})}const caseFlag=caseSensitive?"":"-i";const threadFlag=`-t ${threads}`;const countFlag=countOnly?"-c":"";const maxBuffer=Math.max(1024*1024*10,text.length*2);const command=`${KREP_PATH} ${caseFlag} ${threadFlag} ${countFlag} -s "${pattern}" "${text}"`;exec(command,{maxBuffer:maxBuffer},((error,stdout)=>{if(error){return res.status(200).json({pattern:pattern,text:text,results:"No matches found",performance:{matchCount:0,searchTime:0,algorithmUsed:getAlgorithmInfo(pattern),threads:threads,caseSensitive:caseSensitive},success:true})}const matchCountMatch=stdout.match(/Found (\d+) matches/);const timeMatch=stdout.match(/Search completed in ([\d.]+) seconds/);const matchCount=matchCountMatch?parseInt(matchCountMatch[1]):0;const searchTime=timeMatch?parseFloat(timeMatch[1]):null;const algorithmUsed=getAlgorithmInfo(pattern);res.status(200).json({pattern:pattern,text:text,results:stdout,performance:{matchCount:matchCount,searchTime:searchTime,algorithmUsed:algorithmUsed,threads:threads,caseSensitive:caseSensitive},success:true})}))}));app.get("/performance",((req,res)=>{res.status(200).json({algorithms:{kmp:{name:"Knuth-Morris-Pratt (KMP)",bestFor:"Very short patterns (< 3 characters)",performance:"O(n + m) time complexity where n is text length and m is pattern length",memoryUsage:"Low - requires additional space proportional to pattern length",advantages:["Guarantees linear time performance","No worst-case degradation for pathological patterns","Ideal for single-character or two-character patterns"]},boyerMoore:{name:"Boyer-Moore-Horspool",bestFor:"Medium-length patterns (3-16 characters)",performance:"O(n·m) worst case, but typically much better in practice",memoryUsage:"Low - requires a 256-element table for character skipping",advantages:["Often skips portions of the text, making it sublinear in many cases","Well-balanced performance for typical text patterns","Low memory overhead"]},rabinKarp:{name:"Rabin-Karp",bestFor:"Longer patterns (> 16 characters)",performance:"O(n+m) average case with efficient hash function",memoryUsage:"Low - constant additional space",advantages:["Hash-based approach allows efficient matching of longer patterns","Can be extended to find multiple patterns simultaneously","Good for patterns where collisions are unlikely"]},simd:{name:"SIMD-accelerated search (SSE4.2)",bestFor:"Medium-length patterns on supporting hardware",performance:"Significantly faster than scalar algorithms when hardware supports it",memoryUsage:"Low - uses CPU vector registers",advantages:["Uses hardware acceleration with 128-bit vector instructions","Can process multiple characters at once","Available on modern x86/x64 processors"]},avx2:{name:"AVX2-accelerated search",bestFor:"Medium-length patterns on supporting hardware",performance:"Fastest option when hardware supports it",memoryUsage:"Low - uses CPU vector registers",advantages:["Uses 256-bit vector instructions for maximum parallelism","Can process up to 32 bytes at once","Available on newer Intel/AMD processors"]}},optimizations:{memoryMapped:{description:"Uses memory-mapped I/O for file access",benefits:["Leverages OS page cache for optimal file reading","Reduces system call overhead","Allows the OS to optimize read-ahead"]},multiThreaded:{description:"Parallel search using multiple threads",benefits:["Scales with available CPU cores","Significant speedup for large files","Adaptive chunking based on file size and pattern length"]},prefetching:{description:"CPU cache prefetching hints",benefits:["Reduces CPU cache misses","Improves memory access patterns","Particularly effective for sequential searches"]},dynamicSelection:{description:"Automatic algorithm selection based on pattern characteristics",benefits:["Chooses optimal algorithm without user intervention","Adapts to different pattern lengths and content","Hardware-aware selection when SIMD is available"]}}})}));app.get("/algorithm-selection",((req,res)=>{res.status(200).json({selectionCriteria:{patternLength:{short:{range:"1-2 characters",algorithm:"KMP (Knuth-Morris-Pratt)",reason:"Efficient for very short patterns with minimal preprocessing"},medium:{range:"3-16 characters",algorithm:"SIMD/AVX2 (if hardware supports it) or Boyer-Moore-Horspool",reason:"Good balance of preprocessing cost and search efficiency"},long:{range:"> 16 characters",algorithm:"Rabin-Karp",reason:"Hash-based approach minimizes comparisons for long patterns"}},textCharacteristics:{natural:{description:"Natural language text",recommended:"Boyer-Moore-Horspool or SIMD",reason:"Good character distribution allows for effective skipping"},source:{description:"Source code or structured text",recommended:"Boyer-Moore-Horspool with case sensitivity options",reason:"Handles mixed case and symbols effectively"},binary:{description:"Binary data with unusual byte distribution",recommended:"KMP or Rabin-Karp",reason:"More robust against unusual character distributions"}},hardwareConsiderations:{modern:{description:"Modern x86/x64 processors with SIMD",recommended:"SSE4.2/AVX2 acceleration",reason:"Takes advantage of hardware vector instructions"},arm:{description:"ARM processors (e.g., Apple Silicon)",recommended:"NEON SIMD acceleration",reason:"Leverages ARM-specific vector instructions"},limited:{description:"Older or resource-constrained systems",recommended:"Boyer-Moore-Horspool",reason:"Good performance with minimal memory and CPU requirements"}}},automaticSelection:{description:"krep automatically selects the optimal algorithm based on:",factors:["Pattern length (KMP for short, Boyer-Moore for medium, Rabin-Karp for long)","Available hardware acceleration (SSE4.2, AVX2, NEON)","File size (single-threaded for small files, multi-threaded for large)"]}})}));if(!fs.existsSync(KREP_PATH)&&!process.env.KREP_SKIP_CHECK){console.error(`Error: krep binary not found at ${KREP_PATH}`);console.error('Please build the krep binary first by running "make" in the krep-native directory');console.error("Possible paths searched:");console.error(`- ${path.join(__dirname,"../../krep-native/krep")}`);console.error(`- ${path.join(__dirname,"../krep-native/krep")}`);console.error("- /usr/local/bin/krep");console.error(`- ${path.join(process.env.HOME||"","krep-native/krep")}`);if(!process.env.KREP_TEST_MODE){process.exit(1)}else{console.error("Running in test mode, continuing despite missing krep binary")}}if(require.main===module){if(process.env.CLAUDE_MCP){console.error("Running in MCP mode, not starting HTTP server");if(process.env.KREP_TEST_MODE){console.error("Running in test mode with simplified MCP implementation");process.stdin.setEncoding("utf8");process.stdin.on("data",(chunk=>{console.error(`Received chunk: ${chunk.substring(0,50)}...`);try{const message=JSON.parse(chunk);if(message.method==="initialize"){const response={jsonrpc:"2.0",id:message.id,result:{capabilities:{functions:[{name:"krep",description:"Unified function for pattern searching in files or strings",parameters:{type:"object",properties:{pattern:{type:"string",description:"Pattern to search for"},target:{type:"string",description:"File path or string to search in"},mode:{type:"string",description:'Search mode: "file" (default), "string", or "count"',enum:["file","string","count"]}},required:["pattern","target"]}}]}}};const jsonResponse=JSON.stringify(response);const header=`Content-Length: ${Buffer.byteLength(jsonResponse,"utf8")}\r\n\r\n`;process.stdout.write(header+jsonResponse)}if(message.method==="executeFunction"&&message.params.function==="krep"){const{pattern:pattern,target:target,mode:mode="file"}=message.params.parameters;const response={jsonrpc:"2.0",id:message.id,result:{pattern:pattern,target:target,mode:mode,results:`Found 5 matches for "${pattern}" in ${target}`,performance:{matchCount:5,searchTime:.001,searchSpeed:100,algorithmUsed:"Test Algorithm",threads:4,caseSensitive:true},success:true}};const jsonResponse=JSON.stringify(response);const header=`Content-Length: ${Buffer.byteLength(jsonResponse,"utf8")}\r\n\r\n`;process.stdout.write(header+jsonResponse)}}catch(error){console.error(`Error parsing message: ${error.message}`)}}))}else{const KrepMcpServer=require("./mcp_server");new KrepMcpServer}}else{app.listen(PORT,(()=>{console.error(`krep-mcp-server running on port ${PORT}`);console.error(`Using krep binary at: ${KREP_PATH}`)}))}}module.exports=app;