PDF RAG MCP Server

Overview Schema Related Servers Score Discussions

test_query.py

test_query.py•4.85 KiB

#!/usr/bin/env python import sys import os from pathlib import Path import asyncio import argparse # Add project root directory to Python path sys.path.append(str(Path(__file__).parent.parent)) from app.vector_store import VectorStore from app.pdf_processor import PDFProcessor from app.database import SessionLocal, PDFDocument from sentence_transformers import SentenceTransformer async def test_vector_db(): """Test vector database query functionality""" # 1. Check if there is data in the vector database vector_store = VectorStore() doc_count = vector_store.get_document_count() print(f"Document count in vector database: {doc_count}") if doc_count == 0: print("Warning: No documents in vector database, please upload and process PDF files first.") return # 2. Test queries embedding_model = SentenceTransformer("all-MiniLM-L6-v2") # Test several different queries test_queries = [ "This is a test query", "What is the MCP protocol", "Features of the PDF knowledge base", "How does the vector database work" ] for query in test_queries: print(f"\nExecuting query: '{query}'") query_embedding = embedding_model.encode(query) results = vector_store.search(query_embedding, n_results=3) # Print results documents = results.get("documents", [[]]) metadatas = results.get("metadatas", [[]]) distances = results.get("distances", [[]]) if not documents[0]: print(" No relevant documents found") continue print(f" Found {len(documents[0])} results:") for i, (doc, meta, dist) in enumerate(zip(documents[0], metadatas[0], distances[0])): doc_preview = doc[:100] + "..." if len(doc) > 100 else doc print(f" {i+1}. Similarity: {1-dist:.4f}, PDF ID: {meta.get('pdf_id')}, Chunk ID: {meta.get('chunk_id')}") print(f" Text preview: {doc_preview}") async def reset_vector_db(): """Reset vector database""" vector_store = VectorStore() if vector_store.reset(): print("Vector database has been successfully reset") else: print("Failed to reset vector database") async def list_documents(): """List all PDF documents""" db = SessionLocal() try: docs = db.query(PDFDocument).all() if not docs: print("No PDF documents found") return print(f"Found {len(docs)} PDF documents:") for doc in docs: status = "Processed" if doc.processed else "Processing" if doc.processing else "Not processed" error = f" (Error: {doc.error})" if doc.error else "" print(f" ID: {doc.id}, Filename: {doc.filename}, Status: {status}{error}") finally: db.close() async def process_document(doc_id=None): """Manually process a specified PDF document""" db = SessionLocal() try: if doc_id is None: # Get the first unprocessed document doc = db.query(PDFDocument).filter( PDFDocument.processed == False, PDFDocument.processing == False ).first() else: # Get document with specified ID doc = db.query(PDFDocument).filter(PDFDocument.id == doc_id).first() if not doc: print("No PDF documents found that need processing") return print(f"Starting to process document: {doc.filename} (ID: {doc.id})") # Start processing processor = PDFProcessor() result = await processor.process_pdf(doc.id, doc.file_path, doc.filename) if result: print(f"Document processing successful: {doc.filename}") else: print(f"Document processing failed: {doc.filename}") finally: db.close() async def main(): parser = argparse.ArgumentParser(description="PDF Knowledge Base Testing Tool") parser.add_argument("--reset", action="store_true", help="Reset vector database") parser.add_argument("--list", action="store_true", help="List all PDF documents") parser.add_argument("--process", type=int, nargs="?", const=0, help="Process a PDF document with the specified ID, if no ID is specified, process the first unprocessed document") parser.add_argument("--query", action="store_true", help="Test vector database queries") args = parser.parse_args() if args.reset: await reset_vector_db() if args.list: await list_documents() if args.process is not None: doc_id = args.process if args.process > 0 else None await process_document(doc_id) if args.query or (not args.reset and not args.list and args.process is None): await test_vector_db() if __name__ == "__main__": asyncio.run(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/hyson666/pdf-rag-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_query.py•4.85 KiB