Skip to main content
Glama
juanqui
by juanqui
test_smoke_default.py2.09 kB
from pathlib import Path import pytest from pdfkb.config import ServerConfig from pdfkb.document_processor import DocumentProcessor as PDFProcessor from pdfkb.embeddings import EmbeddingService reportlab = pytest.importorskip("reportlab", reason="reportlab is required for smoke test") from reportlab.pdfgen import canvas # noqa: E402 pytestmark = pytest.mark.asyncio async def test_smoke_default_pipeline(tmp_path: Path): """ Smoke test: default parser (PyMuPDF4LLM) + default chunker (LangChain) work OOTB. - Creates a minimal synthetic PDF with reportlab - Uses a test OpenAI key so EmbeddingService returns mock embeddings on errors - Runs PDFProcessor end-to-end and asserts success """ # Prepare directories kb_dir = tmp_path / "pdfs" cache_dir = tmp_path / ".cache" kb_dir.mkdir(parents=True, exist_ok=True) cache_dir.mkdir(parents=True, exist_ok=True) # Create a simple PDF pdf_path = kb_dir / "sample.pdf" c = canvas.Canvas(str(pdf_path)) c.drawString(72, 720, "Hello World - PDFKB MCP Smoke Test") c.drawString(72, 700, "This PDF is generated for default pipeline smoke testing.") c.showPage() c.save() # Default config should now be: # - PDF_PARSER = pymupdf4llm # - PDF_CHUNKER = langchain # - EMBEDDING_MODEL = text-embedding-3-large config = ServerConfig( openai_api_key="sk-test-key-smoke", # triggers mock embeddings on API errors knowledgebase_path=kb_dir, cache_dir=cache_dir, ) embedding_service = EmbeddingService(config) processor = PDFProcessor(config, embedding_service, cache_manager=None) result = await processor.process_pdf(pdf_path, metadata={"source": "smoke"}) assert result.success, f"Processing failed: {result.error}" assert result.document is not None, "No document produced" assert result.chunks_created > 0, "No chunks created by default chunker" # Embeddings may be mock, but ensure generation path runs assert len([c for c in result.document.chunks if c.embedding is not None]) >= 0

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/juanqui/pdfkb-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server