MCP Docs RAG Server
by kazuph
Verified
- mcp-docs-rag
- src
#!/usr/bin/env node
/**
* MCP server that implements RAG functionality for documents in ~/docs directory.
* Features:
* - List available documents as resources
* - Read and query documents using RAG
* - Add new documents via git clone or wget
*/
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
CallToolRequestSchema,
ListResourcesRequestSchema,
ListToolsRequestSchema,
ReadResourceRequestSchema,
ListPromptsRequestSchema,
GetPromptRequestSchema,
} from "@modelcontextprotocol/sdk/types.js";
import fs from "fs";
import path from "path";
import { fileURLToPath } from "url";
import { exec } from "child_process";
import { promisify } from "util";
import {
VectorStoreIndex,
Document,
storageContextFromDefaults,
SimpleVectorStore,
CallbackManager,
Settings
} from "llamaindex";
import { Gemini, GEMINI_MODEL, GeminiEmbedding } from "@llamaindex/google";
const execAsync = promisify(exec);
// GEMINI_API_KEYをGOOGLE_API_KEYにマッピング(ライブラリが自動的に取得するため)
if (process.env.GEMINI_API_KEY && !process.env.GOOGLE_API_KEY) {
process.env.GOOGLE_API_KEY = process.env.GEMINI_API_KEY;
}
// Get docs path from environment variable, with fallback
// エクスポート可能にするために変数をexportします
export const DOCS_PATH = process.env.DOCS_PATH || path.join(process.env.HOME || process.env.USERPROFILE || '', 'docs');
// Ensure docs directory exists
if (!fs.existsSync(DOCS_PATH)) {
fs.mkdirSync(DOCS_PATH, { recursive: true });
}
// Ensure .indices directory exists
export const INDICES_PATH = path.join(DOCS_PATH, '.indices');
if (!fs.existsSync(INDICES_PATH)) {
fs.mkdirSync(INDICES_PATH, { recursive: true });
}
// Store indices for each document
const indices: Record<string, { index: VectorStoreIndex, description: string }> = {};
/**
* Normalizes a repository name from its URL or path
*/
export function normalizeRepoName(repoUrl: string): string {
const parts = repoUrl.split('/');
return parts[parts.length - 1].replace('.git', '');
}
/**
* Lists all available documents in the docs directory
*/
export async function listDocuments(): Promise<Array<{ id: string, name: string, path: string, description: string }>> {
const documents: Array<{ id: string, name: string, path: string, description: string }> = [];
const entries = fs.readdirSync(DOCS_PATH, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isDirectory() || entry.name.startsWith('.')) continue;
const entryPath = path.join(DOCS_PATH, entry.name);
// Gitリポジトリかテキストファイルかを判断
let isGitRepo = false;
try {
// .gitディレクトリが存在するか確認
const gitPath = path.join(entryPath, '.git');
isGitRepo = fs.existsSync(gitPath) && fs.statSync(gitPath).isDirectory();
} catch (error) {
// エラーが発生した場合はGitリポジトリではない
isGitRepo = false;
}
// index.txtファイルがあるか確認
const indexPath = path.join(entryPath, 'index.txt');
const hasIndexFile = fs.existsSync(indexPath) && fs.statSync(indexPath).isFile();
if (isGitRepo) {
documents.push({
id: entry.name,
name: entry.name,
path: entryPath,
description: `Git repository: ${entry.name}`,
});
} else if (hasIndexFile) {
documents.push({
id: entry.name,
name: entry.name,
path: indexPath,
description: `Text document: ${entry.name}`,
});
}
}
return documents;
}
/**
* ディレクトリ内のファイルを再帰的に読み込む関数
* @param dirPath 対象ディレクトリのパス
* @returns ドキュメントの配列
*/
export async function readDirectoryRecursively(dirPath: string): Promise<Document[]> {
const result: Document[] = [];
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dirPath, entry.name);
if (entry.isDirectory()) {
// サブディレクトリを再帰的に処理
const subdirDocs = await readDirectoryRecursively(fullPath);
result.push(...subdirDocs);
} else if (entry.isFile() && !entry.name.startsWith('.')) {
// ファイルを処理
try {
const content = fs.readFileSync(fullPath, 'utf-8');
// ファイルの相対パスを保存(DOCS_PATHからの相対パス)
const relativePath = path.relative(DOCS_PATH, fullPath);
result.push(new Document({
text: content,
metadata: {
name: entry.name,
source: fullPath,
path: relativePath
}
}));
} catch (error: any) {
console.warn(`Failed to read file ${fullPath}: ${error.message}`);
}
}
}
return result;
}
/**
* Load and index a document
* 存在しないドキュメントの場合は自動的に作成を試みる
*/
export async function loadDocument(documentId: string): Promise<VectorStoreIndex> {
if (indices[documentId]?.index) {
return indices[documentId].index;
}
let documents = await listDocuments();
let document = documents.find(c => c.id === documentId);
// ドキュメントが存在しない場合はエラーをスロー
if (!document) {
throw new Error(`Document not found: ${documentId}`);
}
let documentItems: Document[] = [];
if (fs.statSync(document.path).isDirectory()) {
// ディレクトリを再帰的に処理
documentItems = await readDirectoryRecursively(document.path);
// 空のドキュメントリストの場合にフォールバックメッセージを追加
if (documentItems.length === 0) {
console.warn(`No documents found in document: ${documentId}`);
documentItems.push(new Document({
text: `This document (${document.name}) appears to be empty. Please check if files exist at path: ${document.path}`,
metadata: { name: 'empty-notice', source: document.path }
}));
}
} else {
// Process single file
const text = fs.readFileSync(document.path, 'utf-8');
documentItems = [new Document({ text, metadata: { name: document.id, source: document.path } })];
}
// Gemini埋め込みモデルを設定
const geminiEmbed = new GeminiEmbedding();
// Gemini LLMモデルを設定
const gemini = new Gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH
});
// グローバル設定に埋め込みモデルとLLMを設定(これをグローバルに保持)
Settings.embedModel = geminiEmbed;
Settings.llm = gemini;
// Create storage context
const storageContext = await storageContextFromDefaults({
persistDir: path.join(DOCS_PATH, '.indices', documentId),
});
// Create index
const index = await VectorStoreIndex.fromDocuments(documentItems, {
storageContext,
});
// Save index for future use
indices[documentId] = {
index,
description: document.description
};
return index;
}
/**
* Clone a git repository to the docs directory
* @param repoUrl URL of the Git repository to clone
* @param subdirectory Optional specific subdirectory to sparse checkout
* @param documentName Optional custom name for the document
*/
export async function cloneRepository(repoUrl: string, subdirectory?: string, documentName?: string): Promise<{ name: string; exists: boolean }> {
// Use custom document name if provided, otherwise normalize repo name
const repoName = documentName || normalizeRepoName(repoUrl);
const repoPath = path.join(DOCS_PATH, repoName);
// Check if repository already exists
if (fs.existsSync(repoPath)) {
// Document already exists, don't update it
return { name: repoName, exists: true };
} else {
if (subdirectory) {
// Clone with sparse-checkout for specific subdirectory
await execAsync(`mkdir -p "${repoPath}" && cd "${repoPath}" && \
git init && \
git remote add origin ${repoUrl} && \
git config core.sparseCheckout true && \
git config --local core.autocrlf false && \
echo "${subdirectory}/*" >> .git/info/sparse-checkout && \
git pull --depth=1 origin main || git pull --depth=1 origin master`);
} else {
// Normal clone for the entire repository
await execAsync(`cd "${DOCS_PATH}" && git clone ${repoUrl}`);
}
}
return { name: repoName, exists: false };
}
/**
* Download a file from URL to the docs directory
* @param fileUrl ダウンロードするファイルのURL
* @param documentName ドキュメント名(ディレクトリ名として使用)
*/
export async function downloadFile(fileUrl: string, documentName: string): Promise<{ name: string; exists: boolean }> {
// ドキュメント用のディレクトリを作成
const docDir = path.join(DOCS_PATH, documentName);
// ディレクトリが存在する場合は更新せずに通知
if (fs.existsSync(docDir)) {
return { name: documentName, exists: true };
}
// ディレクトリが存在しない場合は作成
fs.mkdirSync(docDir, { recursive: true });
// ファイル名を取得(URLのパス部分の最後)
const fileName = path.basename(fileUrl);
// index.txtとしてファイルを保存
const filePath = path.join(docDir, 'index.txt');
// ファイルをダウンロード
await execAsync(`cd "${docDir}" && wget -O "index.txt" ${fileUrl}`);
return { name: documentName, exists: false };
}
/**
* Create MCP server
*/
const server = new Server(
{
name: "mcp-docs-rag",
version: "0.1.0",
},
{
capabilities: {
resources: {},
tools: {},
prompts: {},
},
}
);
/**
* Handler for listing available documents as resources
*/
server.setRequestHandler(ListResourcesRequestSchema, async () => {
const documents = await listDocuments();
return {
resources: documents.map(document => ({
uri: `docs:///${document.id}`,
mimeType: "text/plain",
name: document.name,
description: document.description
}))
};
});
/**
* Handler for reading a document
*/
server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
const url = new URL(request.params.uri);
const documentId = url.pathname.replace(/^\//, '');
const documents = await listDocuments();
const document = documents.find(c => c.id === documentId);
if (!document) {
throw new Error(`Document not found: ${documentId}`);
}
let content: string;
if (fs.statSync(document.path).isDirectory()) {
// List files in directory
const files = fs.readdirSync(document.path, { withFileTypes: true })
.filter(entry => entry.isFile())
.map(entry => entry.name);
content = `Repository: ${document.name}\n\nFiles:\n${files.join('\n')}`;
} else {
// Read file content
content = fs.readFileSync(document.path, 'utf-8');
}
return {
contents: [{
uri: request.params.uri,
mimeType: "text/plain",
text: content
}]
};
});
/**
* Handler for listing available tools
*/
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: [
{
name: "list_documents",
description: "List all available documents in the DOCS_PATH directory. Always use this tool first to check if desired documents already exist before adding new ones.",
inputSchema: {
type: "object",
properties: {}
}
},
{
name: "rag_query",
description: "Query a document using RAG. Note: If the index does not exist, it will be created when you query, which may take some time.",
inputSchema: {
type: "object",
properties: {
document_id: {
type: "string",
description: "ID of the document to query"
},
query: {
type: "string",
description: "Query to run against the document"
}
},
required: ["document_id", "query"]
}
},
{
name: "add_git_repository",
description: "Add a git repository to the docs directory with optional sparse checkout. Please do not use 'docs' in the document name.",
inputSchema: {
type: "object",
properties: {
repository_url: {
type: "string",
description: "URL of the git repository to clone"
},
document_name: {
type: "string",
description: "Optional: Custom name for the document (defaults to repository name). Use a simple, descriptive name without '-docs' suffix. For example, use 'react' instead of 'react-docs'."
},
subdirectory: {
type: "string",
description: "Optional: Specific subdirectory to sparse checkout (e.g. 'path/to/specific/dir'). This uses Git's sparse-checkout feature to only download the specified directory."
}
},
required: ["repository_url"]
}
},
{
name: "add_text_file",
description: "Add a text file to the docs directory with a specified name. Please do not use 'docs' in the document name.",
inputSchema: {
type: "object",
properties: {
file_url: {
type: "string",
description: "URL of the text file to download"
},
document_name: {
type: "string",
description: "Name of the document (will be used as directory name). Choose a descriptive name rather than using the URL filename (e.g. 'hono' instead of 'llms-full.txt')"
}
},
required: ["file_url", "document_name"]
}
}
]
};
});
/**
* Handler for tool calls
*/
server.setRequestHandler(CallToolRequestSchema, async (request) => {
switch (request.params.name) {
case "list_documents": {
const documents = await listDocuments();
// ドキュメントの情報を整形
const documentsList = documents.map(document => {
return `- ${document.name}: ${document.description}`;
}).join('\n');
return {
content: [{
type: "text",
text: `Available documents in ${DOCS_PATH}:\n\n${documentsList}\n\nTotal documents: ${documents.length}`
}]
};
}
case "rag_query": {
const documentId = String(request.params.arguments?.document_id);
const query = String(request.params.arguments?.query);
if (!documentId || !query) {
throw new Error("Document ID and query are required");
}
try {
// ドキュメントが存在するか確認し、存在しなければ自動的に作成を試みる
let documents = await listDocuments();
let document = documents.find(c => c.id === documentId);
if (!document) {
return {
content: [{
type: "text",
text: `Document '${documentId}' not found. Please add it manually using add_git_repository or add_text_file tools.`
}]
};
}
// Load and index document if needed
const index = await loadDocument(documentId);
// 一時的にGemini LLMを設定
const originalLLM = Settings.llm;
const gemini = new Gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH
});
// グローバル設定に設定
Settings.llm = gemini;
// クエリエンジンの作成
const queryEngine = index.asQueryEngine();
// クエリの実行
const response = await queryEngine.query({
query
});
return {
content: [{
type: "text",
text: response.toString()
}]
};
} catch (error: any) {
console.error(`Error in rag_query:`, error.message);
return {
content: [{
type: "text",
text: `Error processing query: ${error.message}`
}]
};
}
}
case "add_git_repository": {
const repositoryUrl = String(request.params.arguments?.repository_url);
const subdirectory = request.params.arguments?.subdirectory ? String(request.params.arguments?.subdirectory) : undefined;
const documentName = request.params.arguments?.document_name ? String(request.params.arguments?.document_name) : undefined;
if (!repositoryUrl) {
throw new Error("Repository URL is required");
}
const result = await cloneRepository(repositoryUrl, subdirectory, documentName);
// If document already exists, inform the user without updating
if (result.exists) {
return {
content: [{
type: "text",
text: `Document '${result.name}' already exists. Please use list_documents to view existing documents.`
}]
};
}
// Prepare response message for new document
let responseText = `Added git repository: ${result.name}`;
if (subdirectory) {
responseText += ` (sparse checkout of '${subdirectory}')`;
}
if (documentName) {
responseText += ` with custom name '${documentName}'`;
}
return {
content: [{
type: "text",
text: `${responseText}. The index will be created when you query this document for the first time.`
}]
};
}
case "add_text_file": {
const fileUrl = String(request.params.arguments?.file_url);
const documentName = String(request.params.arguments?.document_name);
if (!fileUrl) {
throw new Error("File URL is required");
}
if (!documentName) {
throw new Error("Document name is required");
}
const result = await downloadFile(fileUrl, documentName);
// If document already exists, inform the user without updating
if (result.exists) {
return {
content: [{
type: "text",
text: `Document '${result.name}' already exists. Please use list_documents to view existing documents.`
}]
};
}
return {
content: [{
type: "text",
text: `Added document '${result.name}' with content from ${fileUrl}. The index will be created when you query this document for the first time.`
}]
};
}
default:
throw new Error("Unknown tool");
}
});
/**
* Handler for listing available prompts
*/
server.setRequestHandler(ListPromptsRequestSchema, async () => {
return {
prompts: [
{
name: "guide_documents_usage",
description: "Guide on how to use documents and RAG functionality",
}
]
};
});
/**
* Handler for getting a prompt
*/
server.setRequestHandler(GetPromptRequestSchema, async (request) => {
if (request.params.name !== "guide_documents_usage") {
throw new Error("Unknown prompt");
}
const documents = await listDocuments();
return {
messages: [
{
role: "user",
content: {
type: "text",
text: "Please list the available documents and guide on how to use the RAG functionality."
}
},
{
role: "assistant",
content: {
type: "text",
text: `Available documents:\n${documents.map(c => `- ${c.name}: ${c.description}`).join('\n')}\n\nUse the 'rag_query' tool to ask questions about these documents.`
}
}
]
};
});
/**
* Main function to start the server
*/
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
}
// Start the server
main().catch((error) => {
console.error("Server error:", error);
process.exit(1);
});