server.js•49.5 kB
#!/usr/bin/env node
/**
* Enhanced Calibre RAG MCP Server - Node.js Version
* A Windows-compatible MCP server with RAG capabilities for project-based vector search
* and contextual conversations using Calibre ebook library
*/
const { spawn, exec } = require('child_process');
const fs = require('fs');
const path = require('path');
const os = require('os');
// RAG-specific imports (will be dynamically imported)
let transformers, faiss, natural;
// OCR-specific imports
const tesseract = require('node-tesseract-ocr');
const pdf2pic = require('pdf2pic');
const pdfParse = require('pdf-parse');
const sharp = require('sharp');
const path_module = require('path');
// Configuration
const CONFIG = {
// Default Calibre library path for Windows
CALIBRE_LIBRARY: 'D:\\e-library',
// Calibre executable paths (try common locations)
CALIBRE_PATHS: [
'calibredb', // If in PATH
path.join('C:', 'Program Files', 'Calibre2', 'calibredb.exe'),
path.join('C:', 'Program Files (x86)', 'Calibre2', 'calibredb.exe'),
path.join(os.homedir(), 'AppData', 'Local', 'calibre-ebook', 'calibredb.exe')
],
LOG_FILE: path.join(os.tmpdir(), 'calibre-rag-mcp-requests.log'),
TIMEOUT: 10000, // 10 seconds
// RAG Configuration
RAG: {
PROJECTS_DIR: path.join(__dirname, 'projects'),
EMBEDDINGS_MODEL: 'Xenova/all-MiniLM-L6-v2',
CHUNK_SIZE: 1000,
CHUNK_OVERLAP: 200,
VECTOR_DIMENSION: 384,
MAX_CONTEXT_CHUNKS: 5
},
// OCR Configuration
OCR: {
TESSERACT_PATH: 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe',
LANGUAGES: 'eng',
PDF_DPI: 300,
MAX_PAGES_PER_BOOK: 500,
TEMP_DIR: path.join(os.tmpdir(), 'calibre-ocr'),
IMAGE_FORMATS: ['.jpg', '.jpeg', '.png', '.tiff', '.bmp'],
PDF_FORMATS: ['.pdf'],
// ImageMagick detection paths
IMAGEMAGICK_PATHS: [
'C:\\Program Files\\ImageMagick-7.1.1-Q16-HDRI',
'C:\\Program Files\\ImageMagick-7.1.1-Q16',
'C:\\Program Files\\ImageMagick',
'C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16-HDRI',
'C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16',
'C:\\Program Files (x86)\\ImageMagick'
]
}
};
class EnhancedCalibreRAGServer {
constructor() {
this.calibredbPath = null;
this.embedder = null;
this.projects = new Map();
this.currentProject = null;
this.imageMagickAvailable = false;
this.initializeLogger();
this.findCalibreDB();
this.ensureProjectsDirectory();
this.ensureOCRTempDirectory();
this.imageMagickAvailable = this.detectAndConfigureImageMagick();
this.loadProjects();
}
initializeLogger() {
try {
fs.writeFileSync(CONFIG.LOG_FILE, '');
} catch (error) {
// Ignore if can't create log file
}
}
log(message) {
const timestamp = new Date().toISOString();
const logMessage = `[${timestamp}] ${message}\n`;
try {
fs.appendFileSync(CONFIG.LOG_FILE, logMessage);
} catch (error) {
// Ignore logging errors
}
// Also log to stderr for debugging
console.error(logMessage.trim());
}
findCalibreDB() {
for (const calibrePath of CONFIG.CALIBRE_PATHS) {
if (this.testCalibreDB(calibrePath)) {
this.calibredbPath = calibrePath;
this.log(`Found calibredb at: ${calibrePath}`);
return;
}
}
this.log('Warning: calibredb not found in standard locations');
this.calibredbPath = 'calibredb'; // Hope it's in PATH
}
testCalibreDB(calibrePath) {
try {
const { execSync } = require('child_process');
execSync(`"${calibrePath}" --version`, {
timeout: 5000,
stdio: 'ignore',
windowsHide: true
});
return true;
} catch (error) {
return false;
}
}
ensureProjectsDirectory() {
if (!fs.existsSync(CONFIG.RAG.PROJECTS_DIR)) {
fs.mkdirSync(CONFIG.RAG.PROJECTS_DIR, { recursive: true });
this.log(`Created projects directory: ${CONFIG.RAG.PROJECTS_DIR}`);
}
}
ensureOCRTempDirectory() {
if (!fs.existsSync(CONFIG.OCR.TEMP_DIR)) {
fs.mkdirSync(CONFIG.OCR.TEMP_DIR, { recursive: true });
this.log(`Created OCR temp directory: ${CONFIG.OCR.TEMP_DIR}`);
}
}
detectAndConfigureImageMagick() {
// Check if ImageMagick is already in PATH
try {
const { execSync } = require('child_process');
execSync('magick --version', { stdio: 'ignore', timeout: 3000 });
this.log('✅ ImageMagick (magick) already available in PATH');
return true;
} catch (error) {
// Try convert command
try {
execSync('convert --version', { stdio: 'ignore', timeout: 3000 });
this.log('✅ ImageMagick (convert) already available in PATH');
return true;
} catch (convertError) {
this.log('⚠️ ImageMagick not found in PATH, searching for installation...');
}
}
// Search for ImageMagick installations
for (const imageMagickPath of CONFIG.OCR.IMAGEMAGICK_PATHS) {
if (fs.existsSync(imageMagickPath)) {
const magickExe = path.join(imageMagickPath, 'magick.exe');
const convertExe = path.join(imageMagickPath, 'convert.exe');
if (fs.existsSync(magickExe) || fs.existsSync(convertExe)) {
this.log(`✅ Found ImageMagick at: ${imageMagickPath}`);
// Add to PATH for this process
process.env.PATH = process.env.PATH + ';' + imageMagickPath;
this.log(`Added ImageMagick to PATH: ${imageMagickPath}`);
// Test if it works now
try {
const { execSync } = require('child_process');
execSync('magick --version', { stdio: 'ignore', timeout: 3000 });
this.log('✅ ImageMagick successfully configured!');
return true;
} catch (testError) {
this.log(`❌ ImageMagick test failed even after adding to PATH: ${testError.message}`);
}
}
}
}
this.log('❌ ImageMagick not found. OCR for PDFs will be limited to pdf-parse only.');
return false;
}
async extractTextFromImage(imagePath) {
try {
this.log(`OCR processing image: ${imagePath}`);
const ocrConfig = {
lang: CONFIG.OCR.LANGUAGES,
oem: 1,
psm: 3,
};
// Use Tesseract with full path if specified
if (fs.existsSync(CONFIG.OCR.TESSERACT_PATH)) {
process.env.TESSERACT_BINARY_PATH = CONFIG.OCR.TESSERACT_PATH;
}
const text = await tesseract.recognize(imagePath, ocrConfig);
return text || '';
} catch (error) {
this.log(`OCR failed for image ${imagePath}: ${error.message}`);
return '';
}
}
async extractTextFromPDF(pdfPath) {
try {
this.log(`Processing PDF: ${pdfPath}`);
// First attempt: Try pdf-parse for text-based PDFs
try {
this.log(`Attempting text extraction with pdf-parse...`);
const dataBuffer = fs.readFileSync(pdfPath);
const pdfData = await pdfParse(dataBuffer);
if (pdfData.text && pdfData.text.trim().length > 50) {
this.log(`✅ PDF text extraction successful: ${pdfData.text.length} characters`);
this.log(`Pages: ${pdfData.numpages}, Info: ${JSON.stringify(pdfData.info)}`);
return pdfData.text;
} else {
this.log(`⚠️ pdf-parse returned minimal text (${pdfData.text.length} chars), trying OCR...`);
}
} catch (pdfParseError) {
this.log(`pdf-parse failed: ${pdfParseError.message}, trying OCR...`);
}
// Second attempt: OCR for scanned/image-based PDFs
return await this.extractTextFromPDFWithOCR(pdfPath);
} catch (error) {
this.log(`PDF processing completely failed for ${pdfPath}: ${error.message}`);
return '';
}
}
async extractTextFromPDFWithOCR(pdfPath) {
try {
if (!this.imageMagickAvailable) {
this.log(`⚠️ ImageMagick not available, skipping OCR for PDF: ${pdfPath}`);
return '';
}
this.log(`OCR processing PDF: ${pdfPath}`);
const outputDir = path.join(CONFIG.OCR.TEMP_DIR, `pdf_${Date.now()}`);
fs.mkdirSync(outputDir, { recursive: true });
// Convert PDF to images using ImageMagick
const convert = pdf2pic.fromPath(pdfPath, {
density: CONFIG.OCR.PDF_DPI,
saveFilename: 'page',
savePath: outputDir,
format: 'png',
width: 2000,
height: 2000
});
let allText = '';
let pageCount = 0;
// Process pages
for (let page = 1; page <= CONFIG.OCR.MAX_PAGES_PER_BOOK; page++) {
try {
const result = await convert(page);
if (result && result.path && fs.existsSync(result.path)) {
const pageText = await this.extractTextFromImage(result.path);
allText += `\n\n--- Page ${page} ---\n${pageText}`;
pageCount++;
// Clean up image file
try {
fs.unlinkSync(result.path);
} catch (e) {
// Ignore cleanup errors
}
this.log(`Processed PDF page ${page}`);
} else {
// No more pages
break;
}
} catch (pageError) {
this.log(`Failed to process PDF page ${page}: ${pageError.message}`);
break;
}
}
// Clean up temp directory
try {
fs.rmSync(outputDir, { recursive: true, force: true });
} catch (e) {
// Ignore cleanup errors
}
this.log(`OCR completed for PDF: ${pageCount} pages processed`);
return allText;
} catch (error) {
this.log(`PDF OCR failed for ${pdfPath}: ${error.message}`);
return '';
}
}
async processBookWithOCR(book) {
try {
this.log(`Starting OCR processing for book: ${book.title}`);
if (!book.formats || book.formats.length === 0) {
this.log(`No formats available for book: ${book.title}`);
return '';
}
// Try PDF first
const pdfFormat = book.formats.find(f =>
CONFIG.OCR.PDF_FORMATS.some(ext => f.toLowerCase().endsWith(ext))
);
if (pdfFormat && fs.existsSync(pdfFormat)) {
this.log(`Processing PDF format: ${pdfFormat}`);
return await this.extractTextFromPDF(pdfFormat);
}
// Try image formats
const imageFormat = book.formats.find(f =>
CONFIG.OCR.IMAGE_FORMATS.some(ext => f.toLowerCase().endsWith(ext))
);
if (imageFormat && fs.existsSync(imageFormat)) {
this.log(`Processing image format: ${imageFormat}`);
return await this.extractTextFromImage(imageFormat);
}
this.log(`No OCR-compatible formats found for book: ${book.title}`);
return '';
} catch (error) {
this.log(`OCR processing failed for book ${book.title}: ${error.message}`);
return '';
}
}
async initializeEmbedder() {
if (this.embedder) return;
try {
// Dynamic import of transformers
const { pipeline } = await import('@xenova/transformers');
this.embedder = await pipeline('feature-extraction', CONFIG.RAG.EMBEDDINGS_MODEL);
this.log('Embedder initialized successfully');
} catch (error) {
this.log(`Failed to initialize embedder: ${error.message}`);
throw error;
}
}
async generateEmbedding(text) {
await this.initializeEmbedder();
try {
const output = await this.embedder(text, { pooling: 'mean', normalize: true });
return Array.from(output.data);
} catch (error) {
this.log(`Embedding generation failed: ${error.message}`);
throw error;
}
}
// Original Calibre functionality methods (preserved from original server)
async runCalibreCommand(args, timeout = CONFIG.TIMEOUT) {
return new Promise((resolve, reject) => {
const command = this.calibredbPath;
const needsLibraryPath = !args.includes('--version');
const fullArgs = needsLibraryPath ?
['--library-path', CONFIG.CALIBRE_LIBRARY, ...args] :
args;
this.log(`Running: ${command} ${fullArgs.join(' ')}`);
const child = spawn(command, fullArgs, {
stdio: 'pipe',
windowsHide: true
});
let stdout = '';
let stderr = '';
child.stdout.on('data', (data) => {
stdout += data.toString();
});
child.stderr.on('data', (data) => {
stderr += data.toString();
});
const timer = setTimeout(() => {
child.kill('SIGTERM');
reject(new Error('Command timeout'));
}, timeout);
child.on('close', (code) => {
clearTimeout(timer);
if (code === 0) {
const filteredOutput = stdout
.split('\n')
.filter(line => !line.includes('Another calibre program'))
.join('\n');
resolve(filteredOutput);
} else {
reject(new Error(`Command failed with code ${code}: ${stderr}`));
}
});
child.on('error', (error) => {
clearTimeout(timer);
reject(error);
});
});
}
createEpubUrl(author, title, id, startLine = '', endLine = '') {
const encAuthor = encodeURIComponent(author);
const encTitle = encodeURIComponent(title);
let url = `epub://${encAuthor}/${encTitle}@${id}`;
if (startLine && endLine) {
url += `#${startLine}:${endLine}`;
}
return url;
}
parseEpubUrl(url) {
url = url.replace(/^epub:\/\//, '');
const idMatch = url.match(/@(\d+)/);
if (!idMatch) {
throw new Error('Invalid epub URL format');
}
const bookId = idMatch[1];
let startLine = '';
let endLine = '';
const rangeMatch = url.match(/#(\d+):(\d+)$/);
if (rangeMatch) {
startLine = rangeMatch[1];
endLine = rangeMatch[2];
}
return { bookId, startLine, endLine };
}
// Enhanced chunking for technical content
intelligentChunk(content, metadata = {}) {
const chunks = [];
const lines = content.split('\n');
let currentChunk = '';
let currentSize = 0;
let chunkIndex = 0;
// Detect section boundaries
const sectionHeaders = /^(Chapter|Section|Part|Appendix|\d+\.|[A-Z]+\.|\#)/i;
const formulaStart = /^(\\begin|\\\[|\\\(|\$\$)/;
const tableStart = /^(\||\+---|\+----|Table|\s*\|)/;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const lineSize = line.length;
// Check if we should start a new chunk
const isHeader = sectionHeaders.test(line.trim());
const isFormula = formulaStart.test(line.trim());
const isTable = tableStart.test(line.trim());
// If we're at size limit and hit a boundary, finalize chunk
if (currentSize + lineSize > CONFIG.RAG.CHUNK_SIZE &&
(isHeader || currentChunk.trim())) {
if (currentChunk.trim()) {
chunks.push({
id: `chunk_${chunkIndex++}`,
text: currentChunk.trim(),
metadata: {
...metadata,
chunk_index: chunkIndex - 1,
line_start: i - currentChunk.split('\n').length + 1,
line_end: i,
has_formulas: /\\begin|\\\[|\\\(|\$/.test(currentChunk),
has_tables: /\|.*\|/.test(currentChunk)
}
});
}
// Start new chunk with overlap if not a clear boundary
if (!isHeader) {
const overlapLines = Math.min(5, currentChunk.split('\n').length);
const overlap = currentChunk.split('\n').slice(-overlapLines).join('\n');
currentChunk = overlap + '\n' + line;
currentSize = overlap.length + lineSize;
} else {
currentChunk = line;
currentSize = lineSize;
}
} else {
currentChunk += (currentChunk ? '\n' : '') + line;
currentSize += lineSize;
}
}
// Add final chunk
if (currentChunk.trim()) {
chunks.push({
id: `chunk_${chunkIndex}`,
text: currentChunk.trim(),
metadata: {
...metadata,
chunk_index: chunkIndex,
line_start: lines.length - currentChunk.split('\n').length + 1,
line_end: lines.length,
has_formulas: /\\begin|\\\[|\\\(|\$/.test(currentChunk),
has_tables: /\|.*\|/.test(currentChunk)
}
});
}
return chunks;
}
// Project Management
loadProjects() {
try {
const projectDirs = fs.readdirSync(CONFIG.RAG.PROJECTS_DIR)
.filter(dir => {
const fullPath = path.join(CONFIG.RAG.PROJECTS_DIR, dir);
return fs.statSync(fullPath).isDirectory();
});
for (const projectDir of projectDirs) {
const configPath = path.join(CONFIG.RAG.PROJECTS_DIR, projectDir, 'project.json');
if (fs.existsSync(configPath)) {
try {
const config = JSON.parse(fs.readFileSync(configPath, 'utf8'));
this.projects.set(projectDir, config);
this.log(`Loaded project: ${projectDir}`);
} catch (error) {
this.log(`Failed to load project ${projectDir}: ${error.message}`);
}
}
}
this.log(`Loaded ${this.projects.size} projects`);
} catch (error) {
this.log(`Failed to load projects: ${error.message}`);
}
}
async createProject(name, description = '', selectedBooks = []) {
const projectPath = path.join(CONFIG.RAG.PROJECTS_DIR, name);
if (fs.existsSync(projectPath)) {
throw new Error(`Project '${name}' already exists`);
}
// Create project directory structure
fs.mkdirSync(projectPath, { recursive: true });
fs.mkdirSync(path.join(projectPath, 'chunks'), { recursive: true });
const projectConfig = {
name,
description,
created_at: new Date().toISOString(),
books: selectedBooks,
chunk_count: 0,
vector_dimension: CONFIG.RAG.VECTOR_DIMENSION
};
// Save project configuration
fs.writeFileSync(
path.join(projectPath, 'project.json'),
JSON.stringify(projectConfig, null, 2)
);
this.projects.set(name, projectConfig);
this.log(`Created project: ${name}`);
return projectConfig;
}
async addBooksToProject(projectName, bookIds) {
const project = this.projects.get(projectName);
if (!project) {
throw new Error(`Project '${projectName}' not found`);
}
const projectPath = path.join(CONFIG.RAG.PROJECTS_DIR, projectName);
const chunksPath = path.join(projectPath, 'chunks');
const vectorsPath = path.join(projectPath, 'vectors.bin');
const metadataPath = path.join(projectPath, 'metadata.json');
// Get book metadata
const idQuery = `id:${bookIds.join(' OR id:')}`;
const listResult = await this.runCalibreCommand([
'list',
'--fields', 'id,title,authors,formats',
'--for-machine',
'--search', idQuery
]);
const books = JSON.parse(listResult || '[]');
const allChunks = [];
const allVectors = [];
const allMetadata = [];
for (const book of books) {
this.log(`Processing book: ${book.title}`);
let content = '';
let contentSource = 'unknown';
// Try text format first
const txtPath = book.formats?.find(f => f.endsWith('.txt'));
if (txtPath && fs.existsSync(txtPath)) {
this.log(`Using text format: ${txtPath}`);
content = fs.readFileSync(txtPath, 'utf8');
contentSource = 'text';
} else {
// Fallback to OCR
this.log(`No text format available for: ${book.title}, trying OCR...`);
content = await this.processBookWithOCR(book);
contentSource = 'ocr';
if (!content || content.trim().length === 0) {
this.log(`OCR failed or no content extracted for: ${book.title}`);
continue;
}
}
// Read and chunk content
const bookMetadata = {
book_id: book.id,
title: book.title,
authors: book.authors,
project: projectName,
content_source: contentSource,
content_length: content.length
};
const chunks = this.intelligentChunk(content, bookMetadata);
// Generate embeddings for chunks
for (const chunk of chunks) {
try {
const embedding = await this.generateEmbedding(chunk.text);
allChunks.push(chunk);
allVectors.push(embedding);
allMetadata.push(chunk.metadata);
// Save individual chunk
fs.writeFileSync(
path.join(chunksPath, `${chunk.id}.json`),
JSON.stringify(chunk, null, 2)
);
this.log(`Processed chunk ${chunk.id} from ${book.title}`);
} catch (error) {
this.log(`Failed to process chunk ${chunk.id}: ${error.message}`);
}
}
}
// Save vectors and metadata
if (allVectors.length > 0) {
this.saveVectors(vectorsPath, allVectors);
fs.writeFileSync(metadataPath, JSON.stringify(allMetadata, null, 2));
// Update project config
project.books = [...new Set([...project.books, ...bookIds])];
project.chunk_count = allChunks.length;
project.last_updated = new Date().toISOString();
fs.writeFileSync(
path.join(projectPath, 'project.json'),
JSON.stringify(project, null, 2)
);
this.projects.set(projectName, project);
}
return {
processed_books: books.length,
total_chunks: allChunks.length,
project: project
};
}
saveVectors(filePath, vectors) {
// Simple binary format for vectors
const buffer = Buffer.alloc(4 + 4 + vectors.length * vectors[0].length * 4);
let offset = 0;
// Write header: vector count, dimension
buffer.writeUInt32LE(vectors.length, offset);
offset += 4;
buffer.writeUInt32LE(vectors[0].length, offset);
offset += 4;
// Write vectors
for (const vector of vectors) {
for (const value of vector) {
buffer.writeFloatLE(value, offset);
offset += 4;
}
}
fs.writeFileSync(filePath, buffer);
}
loadVectors(filePath) {
if (!fs.existsSync(filePath)) {
return { vectors: [], count: 0, dimension: 0 };
}
const buffer = fs.readFileSync(filePath);
let offset = 0;
const count = buffer.readUInt32LE(offset);
offset += 4;
const dimension = buffer.readUInt32LE(offset);
offset += 4;
const vectors = [];
for (let i = 0; i < count; i++) {
const vector = [];
for (let j = 0; j < dimension; j++) {
vector.push(buffer.readFloatLE(offset));
offset += 4;
}
vectors.push(vector);
}
return { vectors, count, dimension };
}
cosineSimilarity(vecA, vecB) {
const dotProduct = vecA.reduce((sum, a, i) => sum + a * vecB[i], 0);
const magnitudeA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0));
const magnitudeB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0));
return dotProduct / (magnitudeA * magnitudeB);
}
async searchProjectContext(projectName, query, limit = CONFIG.RAG.MAX_CONTEXT_CHUNKS) {
const project = this.projects.get(projectName);
if (!project) {
throw new Error(`Project '${projectName}' not found`);
}
const projectPath = path.join(CONFIG.RAG.PROJECTS_DIR, projectName);
const vectorsPath = path.join(projectPath, 'vectors.bin');
const metadataPath = path.join(projectPath, 'metadata.json');
const chunksPath = path.join(projectPath, 'chunks');
// Load vectors and metadata
const { vectors } = this.loadVectors(vectorsPath);
if (!fs.existsSync(metadataPath) || vectors.length === 0) {
throw new Error(`No vector data found for project '${projectName}'`);
}
const metadata = JSON.parse(fs.readFileSync(metadataPath, 'utf8'));
// Generate query embedding
const queryEmbedding = await this.generateEmbedding(query);
// Calculate similarities
const similarities = vectors.map((vector, index) => ({
index,
similarity: this.cosineSimilarity(queryEmbedding, vector),
metadata: metadata[index]
}));
// Sort by similarity and get top results
similarities.sort((a, b) => b.similarity - a.similarity);
const topResults = similarities.slice(0, limit);
// Load chunk content
const contextChunks = [];
for (const result of topResults) {
const chunkId = result.metadata.chunk_index !== undefined ?
`chunk_${result.metadata.chunk_index}` : `chunk_${result.index}`;
const chunkPath = path.join(chunksPath, `${chunkId}.json`);
if (fs.existsSync(chunkPath)) {
const chunk = JSON.parse(fs.readFileSync(chunkPath, 'utf8'));
contextChunks.push({
...chunk,
similarity: result.similarity,
book_title: result.metadata.title,
authors: result.metadata.authors
});
}
}
return contextChunks;
}
// Original search methods (preserved from original server)
async searchBooksMetadata(query, limit = 50) {
try {
const searchResult = await this.runCalibreCommand(['search', '--limit', limit.toString(), query]);
if (!searchResult.trim()) {
return [];
}
const bookIds = searchResult.trim();
const idQuery = `id:${bookIds.replace(/,/g, ' OR id:')}`;
const listResult = await this.runCalibreCommand([
'list',
'--fields', 'id,title,authors,series,tags,publisher,pubdate,formats,identifiers,comments',
'--for-machine',
'--search', idQuery
]);
const books = JSON.parse(listResult || '[]');
return books.map(book => ({
id: book.id,
title: book.title,
authors: book.authors,
series: book.series,
tags: book.tags,
publisher: book.publisher,
published: book.pubdate,
epub_url: this.createEpubUrl(book.authors, book.title, book.id),
formats: book.formats ? book.formats.map(f => path.basename(f)) : [],
full_formats: book.formats || [],
has_text: book.formats ? book.formats.some(f => f.endsWith('.txt')) : false,
description: book.comments ?
book.comments.replace(/<[^>]+>/g, '').split('\n').slice(0, 2).join(' ').substring(0, 200) + '...' :
null
}));
} catch (error) {
this.log(`Metadata search failed: ${error.message}`);
return [];
}
}
formatResponse(searchResults, query, searchType = 'search') {
const count = searchResults.length;
if (count === 0) {
return {
content: [{
type: 'text',
text: `No results found for: ${query}`
}],
results: []
};
}
let contentText;
let results;
if (searchType === 'context') {
contentText = `Found ${count} relevant context chunk(s) for '${query}':\n\n` +
searchResults.map((chunk, i) =>
`**${i + 1}. From "${chunk.book_title}" by ${chunk.authors}** (similarity: ${chunk.similarity.toFixed(3)})\n` +
`${chunk.text.substring(0, 200)}...\n\n`
).join('');
results = searchResults.map(chunk => ({
id: chunk.id,
title: `${chunk.book_title} - Chunk ${chunk.metadata.chunk_index}`,
text: chunk.text,
similarity: chunk.similarity,
metadata: chunk.metadata
}));
} else {
contentText = `Found ${count} book(s) matching '${query}':\n\n` +
searchResults.map(r =>
`• ${r.title} by ${r.authors}\n URL: ${r.epub_url}\n ${r.description ? 'Description: ' + r.description : ''}\n`
).join('\n');
results = searchResults.map(r => ({
id: r.id.toString(),
title: r.title,
text: r.description || `${r.title} by ${r.authors}`,
url: r.epub_url
}));
}
return {
content: [{
type: 'text',
text: contentText
}],
results: results
};
}
// Response handling methods
sendResponse(response) {
console.log(JSON.stringify(response));
this.log(`Response: ${JSON.stringify(response)}`);
}
sendError(id, code, message) {
this.sendResponse({
jsonrpc: '2.0',
id: id,
error: {
code: code,
message: message
}
});
}
sendSuccess(id, result) {
this.sendResponse({
jsonrpc: '2.0',
id: id,
result: result
});
}
handleInitialize(id) {
const response = {
protocolVersion: '2024-11-05',
serverInfo: {
name: 'calibre-rag-mcp-nodejs',
version: '1.0.0',
description: 'Enhanced Calibre ebook library server with RAG capabilities for project-based vector search and contextual conversations'
},
capabilities: {
tools: {},
resources: {},
prompts: {}
}
};
this.sendSuccess(id, response);
}
handleToolsList(id) {
const tools = [
// Original Calibre tools
{
name: 'search',
description: 'Search the Calibre ebook library. Supports both full-text content search (default) and metadata search using field syntax.',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query. For full-text: use natural language. For metadata: use field syntax (author:Name, title:"Title").'
},
limit: {
type: 'integer',
description: 'Maximum number of results (default: 50)',
default: 50
},
fuzzy_fallback: {
type: 'string',
description: 'Alternative search terms if exact query fails'
}
},
required: ['query']
}
},
{
name: 'fetch',
description: 'Fetch specific content from a book using epub:// URL',
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'epub:// URL from search results'
}
},
required: ['url']
}
},
// New RAG tools
{
name: 'list_projects',
description: 'List all available RAG projects',
inputSchema: {
type: 'object',
properties: {},
required: []
}
},
{
name: 'create_project',
description: 'Create a new RAG project for vector-based book search',
inputSchema: {
type: 'object',
properties: {
name: {
type: 'string',
description: 'Project name (alphanumeric and underscores only)'
},
description: {
type: 'string',
description: 'Project description'
}
},
required: ['name']
}
},
{
name: 'add_books_to_project',
description: 'Add books to a RAG project for vectorization and context search',
inputSchema: {
type: 'object',
properties: {
project_name: {
type: 'string',
description: 'Name of the project'
},
book_ids: {
type: 'array',
items: { type: 'integer' },
description: 'Array of book IDs to add to the project'
}
},
required: ['project_name', 'book_ids']
}
},
{
name: 'search_project_context',
description: 'Search for relevant context chunks within a RAG project using vector similarity',
inputSchema: {
type: 'object',
properties: {
project_name: {
type: 'string',
description: 'Name of the project to search'
},
query: {
type: 'string',
description: 'Query to find relevant context'
},
limit: {
type: 'integer',
description: 'Maximum number of context chunks to return (default: 5)',
default: 5
}
},
required: ['project_name', 'query']
}
},
{
name: 'get_project_info',
description: 'Get detailed information about a specific RAG project',
inputSchema: {
type: 'object',
properties: {
project_name: {
type: 'string',
description: 'Name of the project'
}
},
required: ['project_name']
}
}
];
this.sendSuccess(id, { tools: tools });
}
async handleToolsCall(id, toolName, args) {
try {
switch (toolName) {
case 'search':
const query = args.query;
const limit = args.limit || 50;
if (!query) {
this.sendError(id, -32602, 'Missing required parameter: query');
return;
}
const results = await this.searchBooksMetadata(query, limit);
const mcpResult = this.formatResponse(results, query, 'search');
this.sendSuccess(id, mcpResult);
break;
case 'fetch':
// Implementation would be similar to original
this.sendError(id, -32601, 'Fetch tool implementation pending');
break;
case 'list_projects':
const projectList = Array.from(this.projects.entries()).map(([name, config]) => ({
name,
description: config.description,
book_count: config.books.length,
chunk_count: config.chunk_count,
created_at: config.created_at,
last_updated: config.last_updated
}));
this.sendSuccess(id, {
content: [{
type: 'text',
text: `Available RAG Projects (${projectList.length}):\n\n` +
projectList.map(p =>
`• **${p.name}**: ${p.description}\n` +
` Books: ${p.book_count}, Chunks: ${p.chunk_count}\n` +
` Created: ${p.created_at}\n`
).join('\n')
}],
projects: projectList
});
break;
case 'create_project':
const projectName = args.name;
const projectDesc = args.description || '';
if (!projectName) {
this.sendError(id, -32602, 'Missing required parameter: name');
return;
}
if (!/^[a-zA-Z0-9_]+$/.test(projectName)) {
this.sendError(id, -32602, 'Project name must contain only alphanumeric characters and underscores');
return;
}
try {
const newProject = await this.createProject(projectName, projectDesc);
this.sendSuccess(id, {
content: [{
type: 'text',
text: `Created project '${projectName}' successfully!\n\nNext steps:\n1. Search for books using the 'search' tool\n2. Add books to the project using 'add_books_to_project'\n3. Use 'search_project_context' for RAG queries`
}],
project: newProject
});
} catch (error) {
this.sendError(id, -32603, error.message);
}
break;
case 'add_books_to_project':
const projName = args.project_name;
const bookIds = args.book_ids;
if (!projName || !bookIds) {
this.sendError(id, -32602, 'Missing required parameters: project_name, book_ids');
return;
}
try {
const result = await this.addBooksToProject(projName, bookIds);
this.sendSuccess(id, {
content: [{
type: 'text',
text: `Successfully processed ${result.processed_books} books and created ${result.total_chunks} chunks for project '${projName}'.\n\nProject is now ready for context search!`
}],
result: result
});
} catch (error) {
this.sendError(id, -32603, error.message);
}
break;
case 'search_project_context':
const searchProjName = args.project_name;
const searchQuery = args.query;
const searchLimit = args.limit || 5;
if (!searchProjName || !searchQuery) {
this.sendError(id, -32602, 'Missing required parameters: project_name, query');
return;
}
try {
const contextChunks = await this.searchProjectContext(searchProjName, searchQuery, searchLimit);
const mcpResult = this.formatResponse(contextChunks, searchQuery, 'context');
this.sendSuccess(id, mcpResult);
} catch (error) {
this.sendError(id, -32603, error.message);
}
break;
case 'get_project_info':
const infoProjName = args.project_name;
if (!infoProjName) {
this.sendError(id, -32602, 'Missing required parameter: project_name');
return;
}
const project = this.projects.get(infoProjName);
if (!project) {
this.sendError(id, -32603, `Project '${infoProjName}' not found`);
return;
}
this.sendSuccess(id, {
content: [{
type: 'text',
text: `**Project: ${project.name}**\n\n` +
`Description: ${project.description}\n` +
`Created: ${project.created_at}\n` +
`Books: ${project.books.length}\n` +
`Chunks: ${project.chunk_count}\n` +
`Vector Dimension: ${project.vector_dimension}\n` +
`${project.last_updated ? `Last Updated: ${project.last_updated}` : ''}`
}],
project: project
});
break;
default:
this.sendError(id, -32601, `Unknown tool: ${toolName}`);
}
} catch (error) {
this.sendError(id, -32603, error.message);
}
}
async processRequest(request) {
this.log(`Request: ${JSON.stringify(request)}`);
const { method, id, params } = request;
switch (method) {
case 'initialize':
this.handleInitialize(id);
break;
case 'tools/list':
this.handleToolsList(id);
break;
case 'tools/call':
const toolName = params?.name;
const args = params?.arguments || {};
await this.handleToolsCall(id, toolName, args);
break;
case 'notifications/initialized':
// Ignore
break;
case 'resources/list':
this.sendSuccess(id, { resources: [] });
break;
case 'prompts/list':
this.sendSuccess(id, { prompts: [] });
break;
default:
this.sendError(id, -32601, `Method not found: ${method}`);
}
}
start() {
this.log('Enhanced Calibre RAG MCP Server started');
process.stdin.setEncoding('utf8');
let buffer = '';
process.stdin.on('data', (chunk) => {
buffer += chunk;
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (line.trim()) {
try {
const request = JSON.parse(line);
this.processRequest(request);
} catch (error) {
this.log(`Parse error: ${error.message}`);
this.sendError(null, -32700, 'Parse error');
}
}
}
});
process.stdin.on('end', () => {
this.log('Input stream ended');
process.exit(0);
});
process.on('SIGINT', () => {
this.log('Received SIGINT, shutting down');
process.exit(0);
});
}
}
// Start the server
if (require.main === module) {
const server = new EnhancedCalibreRAGServer();
server.start();
}
module.exports = EnhancedCalibreRAGServer;