DICOM MCP Server

MIT License

Overview InspectNew Schema Related Servers Reviews Score

dicom-mcp
tests

"""
Test extracting text from PDFs encapsulated in DICOM.
"""
import io
import os
import tempfile
import pytest
import requests
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from pydicom.dataset import Dataset, FileMetaDataset
from pydicom.uid import ExplicitVRLittleEndian
import re
from datetime import datetime
from dicom_mcp.dicom_client import DicomClient
# Import the same configuration variables and fixtures from test_dicom_mcp.py
from tests.test_dicom_mcp import (
    ORTHANC_HOST, ORTHANC_PORT, ORTHANC_WEB_PORT, ORTHANC_AET, 
    ORTHANC_USERNAME, ORTHANC_PASSWORD, dicom_config, dicom_client,
    wait_for_orthanc
)

# Helper function to wait for Orthanc
def wait_for_orthanc():
    """Check if Orthanc is available"""
    try:
        response = requests.get(f"http://{ORTHANC_HOST}:{ORTHANC_WEB_PORT}/system")
        return response.status_code == 200
    except:
        return False

def parse_date_from_report(date_string):
    """Parse dates from report format (DD-MM-YYYY) to DICOM format (YYYYMMDD)"""
    # Extract date in DD-MM-YYYY format and convert to YYYYMMDD
    try:
        dt = datetime.strptime(date_string, "%d-%m-%Y")
        return dt.strftime("%Y%m%d")
    except ValueError:
        # Return a default date if parsing fails
        return "20240101"

def extract_study_date(report_text):
    """Extract date from report title line"""
    date_match = re.search(r'\((\d{2}-\d{2}-\d{4})\)', report_text)
    if date_match:
        return parse_date_from_report(date_match.group(1))
    return "20240101"  # Default date

def create_pdf_from_report(report_text):
    """Create a PDF from the report text"""
    pdf_buffer = io.BytesIO()
    c = canvas.Canvas(pdf_buffer, pagesize=letter)
    
    # Simple formatting for the PDF
    y_position = 750
    for line in report_text.split('\n'):
        # Handle headers with larger font
        if line.startswith('# '):
            c.setFont("Helvetica-Bold", 14)
            c.drawString(100, y_position, line[2:])
            y_position -= 20
        elif line.startswith('## '):
            c.setFont("Helvetica-Bold", 12)
            c.drawString(100, y_position, line[3:])
            y_position -= 20
        elif line.startswith('---'):
            # Draw a line for separators
            c.line(100, y_position, 500, y_position)
            y_position -= 10
        elif line.strip():
            c.setFont("Helvetica", 10)
            # Wrap text if too long
            words = line.split()
            current_line = ""
            for word in words:
                test_line = current_line + " " + word if current_line else word
                if c.stringWidth(test_line, "Helvetica", 10) < 400:
                    current_line = test_line
                else:
                    c.drawString(100, y_position, current_line)
                    y_position -= 15
                    current_line = word
            if current_line:
                c.drawString(100, y_position, current_line)
                y_position -= 15
        else:
            # Empty line
            y_position -= 10
            
        # Check if we need a new page
        if y_position < 50:
            c.showPage()
            y_position = 750
    
    c.save()
    return pdf_buffer.getvalue()

@pytest.fixture(scope="session")
def upload_pdf_dicom():
    """Create and upload multiple DICOMs with encapsulated PDFs to Orthanc from markdown content"""
    # Ensure Orthanc is ready
    assert wait_for_orthanc(), "Orthanc is not available"
    
    # Read the markdown file content
    with open('tests/synthetic-pet-ct-reports.md', 'r') as file:
        markdown_content = file.read()
    
    # Split the content into individual reports
    reports = re.split(r'(?=# Rapport \d+:)', markdown_content)
    reports = [r for r in reports if r.strip()]  # Remove empty reports
    
    # Base study UID prefix - each report will get a unique study UID
    study_uid_prefix = "1.2.3.4.5.6.7.8.9"
    
    uploaded_reports = []
    
    for i, report_text in enumerate(reports, 1):
        # Extract report date
        study_date = extract_study_date(report_text)
        
        # Create PDF from report
        pdf_data = create_pdf_from_report(report_text)
        
        # Create a new SOP Instance UID and Series UID for each report
        sop_instance_uid = f"1.2.3.4.5.6.7.8.9.{10+i}"
        series_instance_uid = f"1.2.3.4.5.6.7.8.9.{20+i}"
        sop_class_uid = "1.2.840.10008.5.1.4.1.1.104.1"  # Encapsulated PDF Storage
        
        # Create the FileMetaDataset
        file_meta = FileMetaDataset()
        file_meta.MediaStorageSOPClassUID = sop_class_uid
        file_meta.MediaStorageSOPInstanceUID = sop_instance_uid
        file_meta.TransferSyntaxUID = ExplicitVRLittleEndian
        
        # Create the dataset
        ds = Dataset()
        ds.file_meta = file_meta
        
        # Patient data - same for all reports
        ds.PatientName = "John Doe"
        ds.PatientID = "DLBCL2024"
        ds.PatientBirthDate = "19700101"
        ds.PatientSex = "O"
        
        # Create unique Study UID for each study
        ds.StudyInstanceUID = f"{study_uid_prefix}.{100+i}"  # Unique study UID for each report
        ds.StudyDate = study_date
        ds.StudyTime = "120000"
        ds.StudyID = f"DLBCL{i}"
        
        # Extract title for study description
        title_match = re.search(r'# Rapport \d+: (.*?)[\n\r]', report_text)
        if title_match:
            ds.StudyDescription = f"PET/CT - {title_match.group(1)}"
        else:
            ds.StudyDescription = f"PET/CT Report {i}"
            
        ds.AccessionNumber = f"ACC{i}2024"
        
        # Series data
        ds.SeriesInstanceUID = series_instance_uid
        ds.SeriesNumber = i
        ds.Modality = "DOC"  # Document
        ds.SeriesDescription = f"PET/CT Report {i}"
        
        # Instance data
        ds.SOPInstanceUID = sop_instance_uid
        ds.SOPClassUID = sop_class_uid
        ds.InstanceNumber = 1
        
        # Add the PDF data
        ds.MIMETypeOfEncapsulatedDocument = "application/pdf"
        ds.EncapsulatedDocument = pdf_data
        
        # Save to file
        with tempfile.NamedTemporaryFile(suffix='.dcm', delete=False) as temp:
            ds.save_as(temp.name, write_like_original=False)
            temp_path = temp.name
        
        try:
            # Upload via HTTP
            with open(temp_path, 'rb') as f:
                dicom_data = f.read()
            
            # Try auth if credentials provided
            if ORTHANC_USERNAME and ORTHANC_PASSWORD:
                response = requests.post(
                    f"http://{ORTHANC_HOST}:{ORTHANC_WEB_PORT}/instances",
                    data=dicom_data,
                    auth=(ORTHANC_USERNAME, ORTHANC_PASSWORD),
                    headers={'Content-Type': 'application/dicom'}
                )
            else:
                response = requests.post(
                    f"http://{ORTHANC_HOST}:{ORTHANC_WEB_PORT}/instances",
                    data=dicom_data,
                    headers={'Content-Type': 'application/dicom'}
                )
            
            assert response.status_code == 200, f"Failed to upload PDF DICOM {i}: {response.text}"
            
            # Store the report info
            uploaded_reports.append({
                "report_number": i,
                "study_instance_uid": ds.StudyInstanceUID,
                "series_instance_uid": series_instance_uid,
                "sop_instance_uid": sop_instance_uid,
                "description": ds.StudyDescription
            })
            
            print(f"Uploaded report {i}: {ds.StudyDescription}")
            
        finally:
            # Clean up
            os.unlink(temp_path)
    
    return uploaded_reports
def test_extract_pdf_text(dicom_client:DicomClient, upload_pdf_dicom):
    """Test extracting text from a PDF encapsulated in DICOM"""
    # Get the PDF DICOM identifiers
    pdf_info = upload_pdf_dicom[0]
    
    # Call the extraction function
    result = dicom_client.extract_pdf_text_from_dicom(
        study_instance_uid=pdf_info["study_instance_uid"],
        series_instance_uid=pdf_info["series_instance_uid"],
        sop_instance_uid=pdf_info["sop_instance_uid"]
    )
    
    # Check the result
    assert result["success"], f"Text extraction failed: {result['message']}"
    assert result["text_content"], "No text was extracted from the PDF"
    
    # Clean up the temporary file if it exists
    if result["file_path"] and os.path.exists(result["file_path"]):
        try:
            os.unlink(result["file_path"])
        except:
            pass  # Ignore errors when cleaning up

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ChristianHinge/dicom-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server