get-encapsulated-pdf-report-text

Convert an Encapsulated PDF DICOM report into readable text by providing study, series, and SOP instance UIDs.

Instructions

Retrieves and converts an Encapsulated PDF instance to human-readable text. Requires Study, Series, and SOP Instance UIDs from find-encapsulated-pdf-reports. Does not retrieve image data.

Input Schema

TableJSON Schema

Name	Required	Description
`studyInstanceUid`	Yes	DICOM Study Instance UID (e.g., 1.2.840.113619.2.55.3). Obtain from find-studies or find-encapsulated-pdf-reports.
`seriesInstanceUid`	Yes	DICOM Series Instance UID (e.g., 1.2.840.113619.2.55.3.604688123). Obtain from find-series or find-encapsulated-pdf-reports.
`sopInstanceUid`	Yes	DICOM SOP Instance UID (e.g., 1.2.840.113619.2.55.3.604688123.123.1591781234.469). Obtain from find-instances or find-encapsulated-pdf-reports.

Implementation Reference

src/index.js:336-381 (registration)

Registration of the 'get-encapsulated-pdf-report-text' tool with the MCP server. Defines the tool name, description, schema (studyInstanceUid, seriesInstanceUid, sopInstanceUid), and the async handler that calls getEncapsulatedPdfReportText().

server.tool(
  'get-encapsulated-pdf-report-text',
  'Retrieves and converts an Encapsulated PDF instance to human-readable text. Requires Study, Series, and SOP Instance UIDs from find-encapsulated-pdf-reports. Does not retrieve image data.',
  {
    studyInstanceUid: studyUidSchema.describe(
      'DICOM Study Instance UID (e.g., 1.2.840.113619.2.55.3). Obtain from find-studies or find-encapsulated-pdf-reports.'
    ),
    seriesInstanceUid: seriesUidSchema.describe(
      'DICOM Series Instance UID (e.g., 1.2.840.113619.2.55.3.604688123). Obtain from find-series or find-encapsulated-pdf-reports.'
    ),
    sopInstanceUid: sopUidSchema.describe(
      'DICOM SOP Instance UID (e.g., 1.2.840.113619.2.55.3.604688123.123.1591781234.469). Obtain from find-instances or find-encapsulated-pdf-reports.'
    ),
  },
  async ({ studyInstanceUid, seriesInstanceUid, sopInstanceUid }) => {
    let textResult;
    try {
      // Log the retrieval criteria
      server.sendLoggingMessage({
        level: 'info',
        data: `Retrieving encapsulated PDF report text for studyInstanceUid: ${studyInstanceUid}, seriesInstanceUid: ${seriesInstanceUid}, sopInstanceUid: ${sopInstanceUid}`,
      });

      // Perform the retrieval using the provided parameters
      textResult = await getEncapsulatedPdfReportText(
        studyInstanceUid,
        seriesInstanceUid,
        sopInstanceUid,
        process.env
      );

      // Log the successful retrieval
      server.sendLoggingMessage({
        level: 'info',
        data: `Successfully retrieved encapsulated PDF report text for SOP Instance UID: ${sopInstanceUid}`,
      });
    } catch (error) {
      const err = `Error retrieving encapsulated PDF report text: ${error.message}`;
      server.sendLoggingMessage({ level: 'error', data: err });

      return errorContent(err);
    }

    return textContent(textResult);
  }
);

src/tools/getEncapsulatedPdfReportText.js:21-63 (handler)

Handler function that fetches DICOM instance metadata from the DICOMweb server, finds the Encapsulated PDF SOP class UID match, and returns human-readable text via pdfToText().

export async function getEncapsulatedPdfReportText(
  studyInstanceUid,
  seriesInstanceUid,
  sopInstanceUid,
  env = process.env
) {
  // Fetch the instance metadata
  const headers = buildAuthHeaders(env);
  const res = await makeQuery(
    urlJoin(
      env.DICOMWEB_HOST,
      `/studies/${encodeURIComponent(studyInstanceUid)}/series/${encodeURIComponent(seriesInstanceUid)}/instances/${encodeURIComponent(sopInstanceUid)}/metadata`
    ),
    {
      headers,
      signal: buildSignal(env),
    }
  );
  if (!res.ok) {
    throw new Error(
      `Get instance metadata request failed with HTTP status ${res.status} [uri: ${scrubUrl(res.url)}]`
    );
  }

  const items = await res.json();
  if (!items || !Array.isArray(items) || items.length === 0) {
    throw new Error(
      `Instance not found [Study Instance UID: ${studyInstanceUid}, Series Instance UID: ${seriesInstanceUid}, SOP Instance UID: ${sopInstanceUid}]`
    );
  }

  // Find the first item that matches the Encapsulated PDF SOP Class UID and convert it to text
  const pdfItem = items.find(
    (item) => item['00080016']?.Value?.[0] === ENCAPSULATED_PDF_REPORT_SOP_CLASS_UID
  );
  if (pdfItem) {
    return pdfToText(pdfItem, env);
  }

  throw new Error(
    `Encapsulated PDF report not found [Study Instance UID: ${studyInstanceUid}, Series Instance UID: ${seriesInstanceUid}, SOP Instance UID: ${sopInstanceUid}]`
  );
}

src/utils/pdfToText.js:106-112 (helper)

Helper function that converts an Encapsulated PDF DICOM instance into plain text. Resolves PDF bytes (inline base64 or BulkDataURI), then uses pdf-parse to extract text.

export async function pdfToText(pdfInstance, env = process.env) {
  const pdfBytes = await resolvePdfBytes(pdfInstance, env);
  const parser = new PDFParse({ data: pdfBytes });
  const result = await parser.getText();

  return result.text;
}

src/utils/pdfToText.js:44-93 (helper)

Helper function to resolve raw PDF bytes from a DICOM JSON instance. Handles InlineBinary (base64) and BulkDataURI (separate URL fetch with multipart support).

async function resolvePdfBytes(pdfInstance, env) {
  const tag = pdfInstance[ENCAPSULATED_DOCUMENT_TAG];
  if (!tag) {
    throw new Error(
      `DICOM instance is missing the EncapsulatedDocument tag (${ENCAPSULATED_DOCUMENT_TAG})`
    );
  }

  if (tag.InlineBinary) {
    return Buffer.from(tag.InlineBinary, 'base64');
  }

  if (tag.BulkDataURI) {
    // Only forward auth credentials to the same origin as DICOMWEB_HOST.
    // A BulkDataURI may legitimately point to separate storage (e.g. a pre-signed
    // cloud storage URL); sending credentials there would leak them to an unintended server.
    const sameOrigin = isSameOrigin(tag.BulkDataURI, env.DICOMWEB_HOST);
    const res = await makeQuery(tag.BulkDataURI, {
      headers: {
        ...(sameOrigin ? buildAuthHeaders(env) : {}),
        Accept:
          'multipart/related; type=application/octet-stream, multipart/related; type=application/pdf, application/pdf',
      },
      signal: buildSignal(env),
    });
    if (!res.ok) {
      throw new Error(
        `Failed to fetch BulkDataURI for EncapsulatedDocument: HTTP ${res.status} [uri: ${scrubUrl(tag.BulkDataURI)}]`
      );
    }

    const responseBuffer = Buffer.from(await res.arrayBuffer());
    const contentType = res.headers.get('Content-Type') ?? '';
    if (contentType.toLowerCase().startsWith('multipart/')) {
      const parts = parseMultipart(responseBuffer, parseBoundary(contentType));
      if (parts.length === 0) {
        throw new Error(
          `BulkDataURI multipart response contained no parts [uri: ${scrubUrl(tag.BulkDataURI)}]`
        );
      }
      return parts[0].data;
    }

    return responseBuffer;
  }

  throw new Error(
    `EncapsulatedDocument tag (${ENCAPSULATED_DOCUMENT_TAG}) has neither InlineBinary nor BulkDataURI`
  );
}

src/index.js:38-44 (schema)

Zod schema for sopInstanceUid validation (regex for DICOM UID format, max 64 chars). Used as input validation for the tool parameters. Also studyUidSchema (lines 24-28) and seriesUidSchema (lines 30-36) are used.

const sopUidSchema = z
  .string()
  .regex(/^[0-9]+(\.[0-9]+)*$/, 'SOPInstanceUID must be a valid DICOM UID')
  .max(64, 'SOPInstanceUID must not exceed 64 characters')
  .describe(
    'DICOM SOP Instance UID (e.g., 1.2.840.113619.2.55.3.604688123.123.1591781234.469). Obtain from find-instances.'
  );

DICOMweb MCP Server

get-encapsulated-pdf-report-text

Instructions

Input Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API