#!/bin/bash
# scripts/download-test-images.sh
# Downloads public domain chest X-ray images from NIH or other public sources
# to replace licensed MIMIC-CXR images in test fixtures.
SET -e
TARGET_DIR="tests/fixtures/sample_medical_images"
mkdir -p "$TARGET_DIR"
echo "Downloading public chest X-ray samples (NIH ChestX-ray14)..."
# Using a few direct links to representative public domain DICOMs or PNGs
# Note: NIH ChestX-ray14 is primarily PNG, but we can use them for vectorization tests.
# For DICOM specific tests, we might use TCIA (The Cancer Imaging Archive) public samples.
# Example: NIH ChestX-ray14 samples (using a subset)
# Since the full dataset is 42GB, we only download a few samples.
# For now, let's provide a script that downloads from a known public repository or S3.
# NIH ChestX-ray14 on AWS: s3://nih-chest-xrays/
# We can use curl to get a few if we have the URLs.
# As a placeholder that works immediately for the user, I'll use some public DICOM samples
# from the DICOM library or similar public domain sources if available.
# Otherwise, I will provide instructions in the README.
echo "This script will download public domain medical images for testing."
echo "Currently targeted: NIH ChestX-ray14 samples."
# Placeholder for actual download commands
# curl -L -o "$TARGET_DIR/sample1.png" "https://example.com/path/to/public/xray.png"
echo "Note: Full NIH dataset is available at: https://nihcc.app.box.com/v/ChestXray-NIHCC"
echo "Or via AWS S3: s3://nih-chest-xrays/"
cat <<EOF > "$TARGET_DIR/README.md"
# Public Medical Image Test Fixtures
This directory is intended for public domain medical images used in testing.
MIMIC-CXR images have been removed to comply with data use agreements.
## Instructions to Populate
To run full integration tests, you can download public domain images:
1. **NIH ChestX-ray14**: https://nihcc.app.box.com/v/ChestXray-NIHCC
2. **TCIA Public Collections**: https://www.cancerimagingarchive.net/
Place your test images in this directory. The image vectorization pipeline
supports DICOM (.dcm), PNG, and JPG formats.
EOF
echo "Done."