AI Tutoring RAG System

ebuka_mcps
tests

test_file_processor.py•8.77 KiB

""" Test script for file processing capabilities. Demonstrates PDF and DOCX text extraction and storage in RAG system. """ from utils.file_processor import FileProcessor from rag.system import TutoringRAGSystem import base64 import json def test_pdf_extraction(): """Test PDF text extraction.""" print("\n" + "=" * 50) print("Testing PDF Extraction") print("=" * 50) # Initialize services rag_system = TutoringRAGSystem() processor = FileProcessor(rag_system) # Read a sample PDF file try: with open("sample.pdf", "rb") as f: pdf_content = f.read() print(f"✓ Loaded PDF file ({len(pdf_content)} bytes)") # Extract text preview preview = processor.extract_text_preview( pdf_content, "sample.pdf", max_chars=300 ) if preview["status"] == "success": print(f"✓ Extraction successful") print(f" Total characters: {preview['total_characters']}") print(f" Total pages: {preview['metadata'].get('total_pages', 'N/A')}") print(f"\n Preview:\n {preview['preview'][:200]}...") else: print(f"✗ Extraction failed: {preview.get('message')}") except FileNotFoundError: print("✗ sample.pdf not found. Please add a PDF file to test.") except Exception as e: print(f"✗ Error: {str(e)}") def test_docx_extraction(): """Test DOCX text extraction.""" print("\n" + "=" * 50) print("Testing DOCX Extraction") print("=" * 50) # Initialize services rag_system = TutoringRAGSystem() processor = FileProcessor(rag_system) # Read a sample DOCX file try: with open("sample.docx", "rb") as f: docx_content = f.read() print(f"✓ Loaded DOCX file ({len(docx_content)} bytes)") # Extract text preview preview = processor.extract_text_preview( docx_content, "sample.docx", max_chars=300 ) if preview["status"] == "success": print(f"✓ Extraction successful") print(f" Total characters: {preview['total_characters']}") print( f" Total paragraphs: {preview['metadata'].get('total_paragraphs', 'N/A')}" ) print(f" Total tables: {preview['metadata'].get('total_tables', 'N/A')}") print(f"\n Preview:\n {preview['preview'][:200]}...") else: print(f"✗ Extraction failed: {preview.get('message')}") except FileNotFoundError: print("✗ sample.docx not found. Please add a DOCX file to test.") except Exception as e: print(f"✗ Error: {str(e)}") def test_full_processing_and_storage(): """Test complete file processing pipeline with RAG storage.""" print("\n" + "=" * 50) print("Testing Full Processing Pipeline") print("=" * 50) # Initialize services rag_system = TutoringRAGSystem() processor = FileProcessor(rag_system) # Test with PDF try: with open("sample.pdf", "rb") as f: pdf_content = f.read() print("\nProcessing PDF...") result = processor.process_and_store_file( file_content=pdf_content, filename="sample.pdf", student_id="test_student_001", subject="Mathematics", topic="Calculus", difficulty_level=7, additional_metadata={"source": "textbook_chapter"}, ) if result["status"] == "success": print(f"✓ PDF processed and stored successfully") print(f" Chunks stored: {result['chunks_stored']}") print(f" Total characters: {result['total_characters']}") print( f" Document IDs: {result['document_ids'][:2]}... ({len(result['document_ids'])} total)" ) else: print(f"✗ Processing failed: {result.get('message')}") except FileNotFoundError: print("✗ sample.pdf not found") except Exception as e: print(f"✗ Error: {str(e)}") # Test with DOCX try: with open("sample.docx", "rb") as f: docx_content = f.read() print("\nProcessing DOCX...") result = processor.process_and_store_file( file_content=docx_content, filename="sample.docx", student_id="test_student_001", subject="History", topic="World War II", difficulty_level=6, additional_metadata={"source": "study_notes"}, ) if result["status"] == "success": print(f"✓ DOCX processed and stored successfully") print(f" Chunks stored: {result['chunks_stored']}") print(f" Total characters: {result['total_characters']}") print( f" Document IDs: {result['document_ids'][:2]}... ({len(result['document_ids'])} total)" ) else: print(f"✗ Processing failed: {result.get('message')}") except FileNotFoundError: print("✗ sample.docx not found") except Exception as e: print(f"✗ Error: {str(e)}") def test_retrieval_after_processing(): """Test retrieving stored file content from RAG system.""" print("\n" + "=" * 50) print("Testing Retrieval After Processing") print("=" * 50) rag_system = TutoringRAGSystem() # Try to retrieve content we just stored print("\nQuerying for calculus content...") response = rag_system.generate_personalized_response( student_id="test_student_001", current_question="What did I learn about calculus?", subject="Mathematics", topic="Calculus", context_limit=3, ) print(f"\n✓ Retrieved response:") print(f" {response[:300]}...") def test_base64_encoding(): """Test base64 encoding for MCP tool usage.""" print("\n" + "=" * 50) print("Testing Base64 Encoding (for MCP)") print("=" * 50) try: with open("sample.pdf", "rb") as f: pdf_content = f.read() # Encode to base64 (as required by MCP tool) encoded = base64.b64encode(pdf_content).decode("utf-8") print(f"✓ Original size: {len(pdf_content)} bytes") print(f"✓ Encoded size: {len(encoded)} characters") print(f"✓ First 100 chars: {encoded[:100]}...") # Decode back decoded = base64.b64decode(encoded) if decoded == pdf_content: print(f"✓ Encoding/decoding verified successfully") else: print(f"✗ Encoding/decoding mismatch!") except FileNotFoundError: print("✗ sample.pdf not found") except Exception as e: print(f"✗ Error: {str(e)}") def create_sample_docx(): """Create a sample DOCX file for testing.""" print("\n" + "=" * 50) print("Creating Sample DOCX File") print("=" * 50) try: from docx import Document doc = Document() doc.add_heading("Calculus Study Notes", 0) doc.add_heading("Chapter 1: Limits", level=1) doc.add_paragraph( "A limit is the value that a function approaches as the input " "approaches some value. Limits are essential to calculus and are " "used to define continuity, derivatives, and integrals." ) doc.add_heading("Key Concepts", level=2) doc.add_paragraph( "One-sided limits: approaching from left or right", style="List Bullet" ) doc.add_paragraph( "Two-sided limits: both sides approach same value", style="List Bullet" ) doc.add_paragraph( "Infinite limits: function grows without bound", style="List Bullet" ) doc.add_heading("Example Problem", level=2) doc.add_paragraph( "Find the limit of f(x) = (x² - 1)/(x - 1) as x approaches 1.\n" "Solution: Factor the numerator to get (x+1)(x-1)/(x-1), " "cancel (x-1), and evaluate at x=1 to get 2." ) doc.save("sample.docx") print("✓ Created sample.docx successfully") except ImportError: print("✗ python-docx not installed. Run: pip install python-docx") except Exception as e: print(f"✗ Error: {str(e)}") if __name__ == "__main__": print("\n" + "=" * 70) print(" FILE PROCESSOR TEST SUITE") print("=" * 70) # Create sample DOCX if it doesn't exist import os if not os.path.exists("sample.docx"): create_sample_docx() # Run tests test_pdf_extraction() test_docx_extraction() test_full_processing_and_storage() test_retrieval_after_processing() test_base64_encoding() print("\n" + "=" * 70) print(" TESTS COMPLETE") print("=" * 70) print("\nNote: Some tests may show 'file not found' if you don't have") print("sample PDF/DOCX files. Use create_sample_docx() to create a test file.")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Chukwuebuka-2003/ebuka_mcps'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_file_processor.py•8.77 KiB