Multi-Agent RAG MCP Server

Overview Schema Related Servers Score Discussions

rag-mcp-server
src
data_sources

document_loader.py•3.31 KiB

import os from pathlib import Path from typing import List, Dict import PyPDF2 import docx import aiofiles class DocumentLoader: """Loads and extracts text from various document formats""" def __init__(self, documents_path: str = "./data/documents"): self.documents_path = Path(documents_path) # Create the directory if it doesn't exist self.documents_path.mkdir(parents=True, exist_ok=True) print(f"Document loader initialized. Looking for files in: {self.documents_path}") async def load_pdf(self, file_path: str) -> str: """Extract text from PDF file""" print(f" Loading PDF: {Path(file_path).name}") with open(file_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) text = "" for page_num, page in enumerate(pdf_reader.pages, 1): text += page.extract_text() if page_num % 10 == 0: print(f" Processed {page_num}/{len(pdf_reader.pages)} pages") return text async def load_docx(self, file_path: str) -> str: """Extract text from Word document""" print(f" Loading DOCX: {Path(file_path).name}") doc = docx.Document(file_path) text = "\n".join([paragraph.text for paragraph in doc.paragraphs]) return text async def load_txt(self, file_path: str) -> str: """Load plain text file""" print(f" Loading TXT: {Path(file_path).name}") async with aiofiles.open(file_path, 'r', encoding='utf-8') as file: text = await file.read() return text async def load_document(self, file_path: str) -> Dict: """Load any supported document type and return content + metadata""" path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"File not found: {file_path}") # Determine file type and load accordingly extension = path.suffix.lower() if extension == '.pdf': text = await self.load_pdf(str(path)) elif extension == '.docx': text = await self.load_docx(str(path)) elif extension == '.txt': text = await self.load_txt(str(path)) else: raise ValueError(f"Unsupported file type: {extension}") return { 'text': text, 'filename': path.name, 'path': str(path.absolute()), 'type': extension, 'size': len(text) } async def load_all_documents(self) -> List[Dict]: """Load all supported documents from the documents directory""" print(f"Scanning for documents in: {self.documents_path}") documents = [] # Find all supported files recursively for file_path in self.documents_path.rglob('*'): if file_path.is_file() and file_path.suffix in ['.pdf', '.docx', '.txt']: try: doc = await self.load_document(str(file_path)) documents.append(doc) print(f" ✓ Loaded: {doc['filename']} ({doc['size']:,} characters)") except Exception as e: print(f" ✗ Error loading {file_path.name}: {e}") print(f"✓ Loaded {len(documents)} documents total") return documents

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Tsarri/rag-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

document_loader.py•3.31 KiB