SharePoint MCP Server (Certificate Auth)

test_content_extraction.py•1.92 KiB

"""Tests for content extraction utilities."""

import pytest

from mcp_sharepoint.resources import (
    extract_text_from_excel,
    extract_text_from_pdf,
    extract_text_from_word,
)


class TestPdfExtraction:
    """Tests for PDF text extraction."""

    def test_extract_from_valid_pdf(self):
        """Should extract text from valid PDF bytes."""
        # Create a minimal PDF with text
        # This is a real minimal PDF structure
        pdf_content = b"""%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >> endobj
4 0 obj << /Length 44 >> stream
BT /F1 12 Tf 100 700 Td (Hello World) Tj ET
endstream endobj
5 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> endobj
xref
0 6
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000266 00000 n
0000000359 00000 n
trailer << /Size 6 /Root 1 0 R >>
startxref
434
%%EOF"""
        text, page_count = extract_text_from_pdf(pdf_content)
        assert page_count == 1
        assert isinstance(text, str)

    def test_extract_from_invalid_pdf_raises(self):
        """Should raise on invalid PDF."""
        with pytest.raises((RuntimeError, ValueError, Exception)):
            extract_text_from_pdf(b"not a pdf")


class TestExcelExtraction:
    """Tests for Excel text extraction."""

    def test_extract_from_invalid_excel_raises(self):
        """Should raise on invalid Excel file."""
        with pytest.raises((ValueError, Exception)):
            extract_text_from_excel(b"not an excel file")


class TestWordExtraction:
    """Tests for Word document text extraction."""

    def test_extract_from_invalid_word_raises(self):
        """Should raise on invalid Word file."""
        with pytest.raises((ValueError, Exception)):
            extract_text_from_word(b"not a word doc")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/peacockery-studio/mcp-sharepoint-cert'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_content_extraction.py•1.92 KiB

"""Tests for content extraction utilities."""

import pytest

from mcp_sharepoint.resources import (
    extract_text_from_excel,
    extract_text_from_pdf,
    extract_text_from_word,
)


class TestPdfExtraction:
    """Tests for PDF text extraction."""

    def test_extract_from_valid_pdf(self):
        """Should extract text from valid PDF bytes."""
        # Create a minimal PDF with text
        # This is a real minimal PDF structure
        pdf_content = b"""%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >> endobj
4 0 obj << /Length 44 >> stream
BT /F1 12 Tf 100 700 Td (Hello World) Tj ET
endstream endobj
5 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> endobj
xref
0 6
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000266 00000 n
0000000359 00000 n
trailer << /Size 6 /Root 1 0 R >>
startxref
434
%%EOF"""
        text, page_count = extract_text_from_pdf(pdf_content)
        assert page_count == 1
        assert isinstance(text, str)

    def test_extract_from_invalid_pdf_raises(self):
        """Should raise on invalid PDF."""
        with pytest.raises((RuntimeError, ValueError, Exception)):
            extract_text_from_pdf(b"not a pdf")


class TestExcelExtraction:
    """Tests for Excel text extraction."""

    def test_extract_from_invalid_excel_raises(self):
        """Should raise on invalid Excel file."""
        with pytest.raises((ValueError, Exception)):
            extract_text_from_excel(b"not an excel file")


class TestWordExtraction:
    """Tests for Word document text extraction."""

    def test_extract_from_invalid_word_raises(self):
        """Should raise on invalid Word file."""
        with pytest.raises((ValueError, Exception)):
            extract_text_from_word(b"not a word doc")