test_convert_to_pdf.py•3.56 kB
import asyncio
from pathlib import Path
import pytest
from docx import Document
# Target for testing: convert_to_pdf (async function)
from word_document_server.tools.extended_document_tools import convert_to_pdf
def _make_sample_docx(path: Path) -> None:
"""Generates a simple .docx file in a temporary directory."""
doc = Document()
doc.add_heading("Conversion Test Document", level=1)
doc.add_paragraph("This is a test paragraph for PDF conversion. Contains ASCII too.")
doc.add_paragraph("Second paragraph: Contains special characters and spaces to cover path/content edge cases.")
doc.save(path)
def test_convert_to_pdf_with_temp_docx(tmp_path: Path):
"""
End-to-end test: Create a temporary .docx -> call convert_to_pdf -> validate the PDF output.
Notes:
- On Linux/macOS, it first tries LibreOffice (soffice/libreoffice),
and falls back to docx2pdf on failure (requires Microsoft Word).
- If these tools are missing or the command is unavailable, the test is skipped with a reason.
"""
# 1) Generate a docx file with spaces in its name in the temp directory
src_doc = tmp_path / "sample document with spaces.docx"
_make_sample_docx(src_doc)
# 2) Define the output PDF path (also in the temp directory)
out_pdf = tmp_path / "converted output.pdf"
# 3) Run the asynchronous function under test
result_msg = asyncio.run(convert_to_pdf(str(src_doc), output_filename=str(out_pdf)))
# 4) Success condition: the return message contains success keywords, or the target PDF exists
success_keywords = ["successfully converted", "converted to PDF"]
success = any(k.lower() in result_msg.lower() for k in success_keywords) or out_pdf.exists()
if not success:
# When LibreOffice or Microsoft Word is not installed, the tool returns a hint.
# In this case, skip the test instead of failing.
pytest.skip(f"PDF conversion tool unavailable or conversion failed: {result_msg}")
# 5) Assert: The PDF file was generated and is not empty
# Some environments (especially docx2pdf) might ignore the exact output filename
# and just generate a PDF with the same name as the source in the output or source directory,
# so we check multiple possible locations.
candidates = [
out_pdf,
# Common: A PDF with the same name as the source file in the output directory
out_pdf.parent / f"{src_doc.stem}.pdf",
# Fallback: A PDF in the same directory as the source file
src_doc.with_suffix(".pdf"),
]
# If none of the above paths exist, search for any newly generated PDF in the temp directory
found = None
for p in candidates:
if p.exists():
found = p
break
if not found:
pdfs = sorted(tmp_path.glob("*.pdf"), key=lambda p: p.stat().st_mtime, reverse=True)
if pdfs:
found = pdfs[0]
if not found:
# If the tool returns success but the output can't be found,
# treat it as an environment/tooling difference and skip instead of failing.
pytest.skip(f"Could not find the generated PDF. Function output: {result_msg}")
assert found.exists(), f"Generated PDF not found: {found}, function output: {result_msg}"
assert found.stat().st_size > 0, f"The generated PDF file is empty: {found}"
if __name__ == "__main__":
# Allow running this file directly for quick verification:
# python tests/test_convert_to_pdf.py
import sys
sys.exit(pytest.main([__file__, "-q"]))