RAGStack-Lambda

Overview Schema Related Servers Score Discussions

test_process_zip.py•14.2 KiB

"""Tests for process_zip Lambda function.""" import importlib.util import io import json import sys import zipfile from pathlib import Path from unittest.mock import MagicMock, patch import pytest def _load_process_zip_module(): """Load process_zip module using importlib (avoids 'lambda' keyword issue).""" module_path = Path(__file__).parent.parent.parent.parent / "src/lambda/process_zip/index.py" spec = importlib.util.spec_from_file_location("process_zip_index", module_path) module = importlib.util.module_from_spec(spec) sys.modules["process_zip_index"] = module spec.loader.exec_module(module) return module @pytest.fixture def mock_env(monkeypatch): """Set up environment variables.""" monkeypatch.setenv("TRACKING_TABLE", "test-tracking-table") monkeypatch.setenv("DATA_BUCKET", "test-data-bucket") monkeypatch.setenv("GRAPHQL_ENDPOINT", "https://test-graphql.example.com") monkeypatch.setenv("CAPTION_MODEL_ID", "anthropic.claude-3-haiku-20240307-v1:0") @pytest.fixture def mock_boto3(): """Set up mocked boto3 clients and resources.""" with ( patch("boto3.client") as mock_client, patch("boto3.resource") as mock_resource, ): # Mock S3 client mock_s3 = MagicMock() # Mock head_object to return a valid ContentLength (10MB by default) mock_s3.head_object.return_value = {"ContentLength": 10 * 1024 * 1024} # Mock Bedrock runtime client mock_bedrock = MagicMock() # Mock DynamoDB resource and table mock_dynamodb = MagicMock() mock_table = MagicMock() mock_dynamodb.Table.return_value = mock_table def client_factory(service_name, *args, **kwargs): if service_name == "s3": return mock_s3 if service_name == "bedrock-runtime": return mock_bedrock return MagicMock() def resource_factory(service_name, *args, **kwargs): if service_name == "dynamodb": return mock_dynamodb return MagicMock() mock_client.side_effect = client_factory mock_resource.side_effect = resource_factory yield { "s3": mock_s3, "bedrock": mock_bedrock, "dynamodb": mock_dynamodb, "table": mock_table, } @pytest.fixture def mock_publish(): """Mock publish_image_update function.""" with patch("ragstack_common.appsync.publish_image_update") as mock: yield mock def create_test_zip(files: dict[str, bytes | str]) -> bytes: """Create a test ZIP file in memory.""" buffer = io.BytesIO() with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf: for filename, content in files.items(): if isinstance(content, str): content = content.encode("utf-8") zf.writestr(filename, content) return buffer.getvalue() class TestProcessZip: """Tests for process_zip Lambda handler.""" def test_process_zip_with_captions_json(self, mock_env, mock_boto3, mock_publish): """Test processing ZIP with captions.json manifest.""" module = _load_process_zip_module() # Create test ZIP with captions captions = { "image1.png": "A beautiful sunset", "image2.jpg": "A mountain landscape", } zip_content = create_test_zip( { "image1.png": b"\x89PNG\r\n\x1a\n" + b"fake image data 1", "image2.jpg": b"\xff\xd8\xff\xe0" + b"fake image data 2", "captions.json": json.dumps(captions), } ) # Mock S3 get_object to return ZIP mock_boto3["s3"].get_object.return_value = {"Body": io.BytesIO(zip_content)} event = { "upload_id": "test-upload-123", "bucket": "test-bucket", "key": "uploads/test-upload-123/archive.zip", "generate_captions": False, } result = module.lambda_handler(event, None) assert result["status"] == "COMPLETED" assert result["total_images"] == 2 assert result["processed_images"] == 2 assert result["failed_images"] == 0 assert len(result["errors"]) == 0 # Verify tracking records were created assert mock_boto3["table"].put_item.call_count == 2 def test_process_zip_without_captions_json(self, mock_env, mock_boto3, mock_publish): """Test processing ZIP without captions.json.""" module = _load_process_zip_module() # Create test ZIP without captions zip_content = create_test_zip( { "photo.png": b"\x89PNG\r\n\x1a\n" + b"fake image data", } ) mock_boto3["s3"].get_object.return_value = {"Body": io.BytesIO(zip_content)} event = { "upload_id": "test-upload-456", "bucket": "test-bucket", "key": "uploads/test-upload-456/archive.zip", "generate_captions": False, } result = module.lambda_handler(event, None) assert result["status"] == "COMPLETED" assert result["total_images"] == 1 assert result["processed_images"] == 1 # Verify image was processed without caption put_call = mock_boto3["table"].put_item.call_args item = put_call[1]["Item"] assert item["caption"] == "" def test_process_zip_with_ai_caption_generation(self, mock_env, mock_boto3, mock_publish): """Test processing ZIP with AI caption generation enabled.""" module = _load_process_zip_module() # Mock Bedrock response mock_boto3["bedrock"].invoke_model.return_value = { "body": io.BytesIO( json.dumps( {"content": [{"type": "text", "text": "AI generated description of image"}]} ).encode() ) } # Create test ZIP zip_content = create_test_zip( { "photo.png": b"\x89PNG\r\n\x1a\n" + b"fake image data", } ) mock_boto3["s3"].get_object.return_value = {"Body": io.BytesIO(zip_content)} event = { "upload_id": "test-upload-789", "bucket": "test-bucket", "key": "uploads/test-upload-789/archive.zip", "generate_captions": True, } result = module.lambda_handler(event, None) assert result["status"] == "COMPLETED" assert result["processed_images"] == 1 # Verify Bedrock was called assert mock_boto3["bedrock"].invoke_model.call_count == 1 # Verify AI caption was applied put_call = mock_boto3["table"].put_item.call_args item = put_call[1]["Item"] assert "AI generated description" in item["caption"] def test_process_zip_combines_user_and_ai_captions(self, mock_env, mock_boto3, mock_publish): """Test that user caption comes first, AI appends.""" module = _load_process_zip_module() # Mock Bedrock response mock_boto3["bedrock"].invoke_model.return_value = { "body": io.BytesIO( json.dumps({"content": [{"type": "text", "text": "AI description"}]}).encode() ) } # Create test ZIP with user caption captions = {"photo.png": "User provided caption"} zip_content = create_test_zip( { "photo.png": b"\x89PNG\r\n\x1a\n" + b"fake image data", "captions.json": json.dumps(captions), } ) mock_boto3["s3"].get_object.return_value = {"Body": io.BytesIO(zip_content)} event = { "upload_id": "test-upload-combo", "bucket": "test-bucket", "key": "uploads/test-upload-combo/archive.zip", "generate_captions": True, } result = module.lambda_handler(event, None) assert result["status"] == "COMPLETED" # Verify combined caption (user first) put_call = mock_boto3["table"].put_item.call_args item = put_call[1]["Item"] assert item["caption"] == "User provided caption. AI description" assert item["user_caption"] == "User provided caption" assert item["ai_caption"] == "AI description" def test_process_zip_skips_non_image_files(self, mock_env, mock_boto3, mock_publish): """Test that non-image files are skipped.""" module = _load_process_zip_module() # Create test ZIP with mixed content zip_content = create_test_zip( { "photo.png": b"\x89PNG\r\n\x1a\n" + b"fake image data", "readme.txt": "This is a text file", "data.csv": "col1,col2\n1,2", "nested/image.jpg": b"\xff\xd8\xff\xe0" + b"nested image", } ) mock_boto3["s3"].get_object.return_value = {"Body": io.BytesIO(zip_content)} event = { "upload_id": "test-upload-mixed", "bucket": "test-bucket", "key": "uploads/test-upload-mixed/archive.zip", "generate_captions": False, } result = module.lambda_handler(event, None) assert result["status"] == "COMPLETED" assert result["total_images"] == 2 # Only PNG and JPG assert result["processed_images"] == 2 def test_process_zip_skips_macosx_folder(self, mock_env, mock_boto3, mock_publish): """Test that __MACOSX folder is skipped.""" module = _load_process_zip_module() # Create test ZIP with macOS resource fork files zip_content = create_test_zip( { "photo.png": b"\x89PNG\r\n\x1a\n" + b"fake image data", "__MACOSX/._photo.png": b"resource fork data", } ) mock_boto3["s3"].get_object.return_value = {"Body": io.BytesIO(zip_content)} event = { "upload_id": "test-upload-macos", "bucket": "test-bucket", "key": "uploads/test-upload-macos/archive.zip", "generate_captions": False, } result = module.lambda_handler(event, None) assert result["status"] == "COMPLETED" assert result["total_images"] == 1 # Only real image def test_process_zip_empty_archive(self, mock_env, mock_boto3, mock_publish): """Test processing empty ZIP file.""" module = _load_process_zip_module() # Create empty test ZIP zip_content = create_test_zip({}) mock_boto3["s3"].get_object.return_value = {"Body": io.BytesIO(zip_content)} event = { "upload_id": "test-upload-empty", "bucket": "test-bucket", "key": "uploads/test-upload-empty/archive.zip", "generate_captions": False, } result = module.lambda_handler(event, None) assert result["status"] == "COMPLETED" assert result["total_images"] == 0 assert result["processed_images"] == 0 def test_process_zip_invalid_zip_file(self, mock_env, mock_boto3): """Test handling invalid ZIP file.""" module = _load_process_zip_module() # Return invalid ZIP data mock_boto3["s3"].get_object.return_value = {"Body": io.BytesIO(b"not a zip file")} event = { "upload_id": "test-upload-invalid", "bucket": "test-bucket", "key": "uploads/test-upload-invalid/archive.zip", "generate_captions": False, } result = module.lambda_handler(event, None) assert result["status"] == "FAILED" assert any("Invalid ZIP file" in e for e in result["errors"]) def test_process_zip_malformed_captions_json(self, mock_env, mock_boto3, mock_publish): """Test handling malformed captions.json.""" module = _load_process_zip_module() # Create test ZIP with invalid JSON zip_content = create_test_zip( { "photo.png": b"\x89PNG\r\n\x1a\n" + b"fake image data", "captions.json": "{ invalid json }", } ) mock_boto3["s3"].get_object.return_value = {"Body": io.BytesIO(zip_content)} event = { "upload_id": "test-upload-badjson", "bucket": "test-bucket", "key": "uploads/test-upload-badjson/archive.zip", "generate_captions": False, } result = module.lambda_handler(event, None) # Should still process images, just log warning about JSON assert result["status"] in ("COMPLETED", "COMPLETED_WITH_ERRORS") assert result["total_images"] == 1 def test_process_zip_missing_required_params(self, mock_env, mock_boto3): """Test error handling for missing parameters.""" module = _load_process_zip_module() event = {"upload_id": "test-123"} # Missing bucket and key with pytest.raises(ValueError, match="bucket.*key.*required"): module.lambda_handler(event, None) class TestHelperFunctions: """Tests for helper functions.""" def test_is_supported_image(self, mock_env, mock_boto3): """Test image extension validation.""" module = _load_process_zip_module() assert module.is_supported_image("photo.png") is True assert module.is_supported_image("photo.PNG") is True assert module.is_supported_image("photo.jpg") is True assert module.is_supported_image("photo.jpeg") is True assert module.is_supported_image("photo.gif") is True assert module.is_supported_image("photo.webp") is True assert module.is_supported_image("photo.txt") is False assert module.is_supported_image("photo.pdf") is False assert module.is_supported_image("photo") is False def test_combine_captions(self, mock_env, mock_boto3): """Test caption combination logic.""" module = _load_process_zip_module() # User only assert module.combine_captions("User caption", None) == "User caption" # AI only assert module.combine_captions(None, "AI caption") == "AI caption" # Both (user first) assert module.combine_captions("User caption", "AI caption") == "User caption. AI caption" # Neither assert module.combine_captions(None, None) == "" # With whitespace assert module.combine_captions(" User ", " AI ") == "User. AI"

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/HatmanStack/RAGStack-Lambda'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_process_zip.py•14.2 KiB