RAGStack-Lambda

Overview Schema Related Servers Score Discussions

test_process_image.py•10.9 KiB

"""Unit tests for process_image Lambda handler.""" import importlib.util import json import sys from pathlib import Path from unittest.mock import MagicMock, patch import pytest def _load_process_image_module(): """Load process_image module using importlib (avoids 'lambda' keyword issue).""" module_path = Path(__file__).parent.parent.parent.parent / "src/lambda/process_image/index.py" spec = importlib.util.spec_from_file_location("process_image_index", module_path) module = importlib.util.module_from_spec(spec) sys.modules["process_image_index"] = module spec.loader.exec_module(module) return module @pytest.fixture def mock_env(monkeypatch): """Set up environment variables for tests.""" monkeypatch.setenv("TRACKING_TABLE", "test-tracking-table") monkeypatch.setenv("GRAPHQL_ENDPOINT", "https://test.appsync.amazonaws.com/graphql") monkeypatch.setenv("AWS_REGION", "us-east-1") monkeypatch.setenv("KNOWLEDGE_BASE_ID", "test-kb-id") monkeypatch.setenv("DATA_SOURCE_ID", "test-ds-id") monkeypatch.setenv( "SYNC_REQUEST_QUEUE_URL", "https://sqs.us-east-1.amazonaws.com/123456789012/test-queue.fifo" ) @pytest.fixture def mock_boto3(): """Set up mocked boto3 clients and resources.""" with ( patch("boto3.client") as mock_client, patch("boto3.resource") as mock_resource, ): # Mock S3 client mock_s3 = MagicMock() mock_s3.head_object.return_value = { "ContentType": "image/png", "ContentLength": 12345, } mock_s3.get_object.return_value = { "Body": MagicMock( read=MagicMock( return_value=json.dumps( { "caption": "Test caption", "userCaption": "User caption", "aiCaption": "AI caption", "filename": "test.png", } ).encode() ) ) } # Mock Bedrock Runtime client (for AI caption generation) mock_bedrock_runtime = MagicMock() # Mock Bedrock Agent client (for StartIngestionJob) mock_bedrock_agent = MagicMock() mock_bedrock_agent.start_ingestion_job.return_value = { "ingestionJob": {"ingestionJobId": "job-123", "status": "STARTING"} } # Mock SQS client (for sync queue) mock_sqs = MagicMock() mock_sqs.send_message.return_value = {"MessageId": "test-message-id"} # Mock DynamoDB resource mock_table = MagicMock() mock_table.get_item.return_value = { "Item": { "document_id": "test-image-id", "filename": "test.png", "caption": "Test caption", "type": "image", "status": "PROCESSING", "input_s3_uri": "s3://test-bucket/content/test-image-id/test.png", } } mock_dynamodb = MagicMock() mock_dynamodb.Table.return_value = mock_table def client_factory(service, **kwargs): if service == "bedrock-runtime": return mock_bedrock_runtime if service == "bedrock-agent": return mock_bedrock_agent if service == "s3": return mock_s3 if service == "sqs": return mock_sqs return MagicMock() mock_client.side_effect = client_factory mock_resource.return_value = mock_dynamodb yield { "bedrock_runtime": mock_bedrock_runtime, "bedrock_agent": mock_bedrock_agent, "s3": mock_s3, "sqs": mock_sqs, "dynamodb": mock_dynamodb, "table": mock_table, } class TestProcessImage: """Tests for process_image Lambda handler.""" def test_process_image_success(self, mock_env, mock_boto3): """Test successful image processing creates caption and metadata files.""" module = _load_process_image_module() # Reinitialize module clients module.s3 = mock_boto3["s3"] module.sqs = mock_boto3["sqs"] module.dynamodb = mock_boto3["dynamodb"] module.bedrock_agent = mock_boto3["bedrock_agent"] event = { "image_id": "content/test-image-id/test.png", "input_s3_uri": "s3://test-bucket/content/test-image-id/test.png", } # Mock publish_image_update with patch.object(module, "publish_image_update"): result = module.lambda_handler(event, None) assert result["image_id"] == "test-image-id" # Status is SYNC_QUEUED until sync_status_checker confirms KB indexing assert result["status"] == "SYNC_QUEUED" # Verify S3 files were created (caption.txt, metadata files) put_calls = mock_boto3["s3"].put_object.call_args_list assert len(put_calls) >= 2 # At least caption.txt and image metadata # Check that image metadata file was created for visual embedding trigger keys_written = [call.kwargs.get("Key", "") for call in put_calls] assert any("test.png.metadata.json" in key for key in keys_written) # Verify DynamoDB was updated mock_boto3["table"].update_item.assert_called() def test_process_image_not_found_in_table(self, mock_env, mock_boto3): """Test error when image not in tracking table.""" module = _load_process_image_module() module.s3 = mock_boto3["s3"] module.dynamodb = mock_boto3["dynamodb"] # Return empty item mock_boto3["table"].get_item.return_value = {} event = { "image_id": "content/nonexistent-id/test.png", "input_s3_uri": "s3://test-bucket/content/nonexistent-id/test.png", } with pytest.raises(ValueError, match="not found"): module.lambda_handler(event, None) def test_process_image_not_image_type(self, mock_env, mock_boto3): """Test error when record is not an image type.""" module = _load_process_image_module() module.s3 = mock_boto3["s3"] module.dynamodb = mock_boto3["dynamodb"] # Return document type instead of image mock_boto3["table"].get_item.return_value = { "Item": { "document_id": "test-id", "type": "document", # Not image "status": "UPLOADED", } } event = { "image_id": "content/test-id/test.png", "input_s3_uri": "s3://test-bucket/content/test-id/test.png", } with pytest.raises(ValueError, match="not an image"): module.lambda_handler(event, None) def test_process_image_missing_image_id(self, mock_env, mock_boto3): """Test error when image_id is missing.""" module = _load_process_image_module() event = {"input_s3_uri": "s3://test-bucket/content/test-id/test.png"} with pytest.raises(ValueError, match="required"): module.lambda_handler(event, None) def test_process_image_missing_s3_uri_in_tracking(self, mock_env, mock_boto3): """Test error when tracking record has no input_s3_uri.""" module = _load_process_image_module() module.s3 = mock_boto3["s3"] module.dynamodb = mock_boto3["dynamodb"] # Return item without input_s3_uri mock_boto3["table"].get_item.return_value = { "Item": { "document_id": "test-image-id", "filename": "test.png", "type": "image", "status": "PROCESSING", # No input_s3_uri } } event = {"image_id": "content/test-image-id/test.png"} with pytest.raises(ValueError, match="No input_s3_uri"): module.lambda_handler(event, None) def test_process_image_s3_file_not_found(self, mock_env, mock_boto3): """Test error when image file not in S3.""" from botocore.exceptions import ClientError module = _load_process_image_module() module.s3 = mock_boto3["s3"] module.dynamodb = mock_boto3["dynamodb"] # S3 raises 404 mock_boto3["s3"].head_object.side_effect = ClientError( {"Error": {"Code": "404", "Message": "Not Found"}}, "HeadObject" ) event = { "image_id": "content/test-image-id/test.png", "input_s3_uri": "s3://test-bucket/content/test-image-id/test.png", } with pytest.raises(ValueError, match="not found in S3"): module.lambda_handler(event, None) def test_process_image_missing_tracking_table_env(self, monkeypatch): """Test error when TRACKING_TABLE env var is missing.""" monkeypatch.delenv("TRACKING_TABLE", raising=False) module = _load_process_image_module() event = { "image_id": "content/test-image-id/test.png", "input_s3_uri": "s3://test-bucket/content/test-image-id/test.png", } with pytest.raises(ValueError, match="TRACKING_TABLE"): module.lambda_handler(event, None) class TestBuildIngestionText: """Tests for build_ingestion_text helper function.""" def test_build_ingestion_text_with_all_captions(self, mock_env, mock_boto3): """Test text building with user and AI captions.""" module = _load_process_image_module() text = module.build_ingestion_text( image_id="test-id", filename="vacation.png", caption="My vacation photo. A sunset over the ocean.", metadata={ "userCaption": "My vacation photo", "aiCaption": "A sunset over the ocean", }, ) assert "image_id: test-id" in text assert "filename: vacation.png" in text assert "type: image" in text assert "user_caption: My vacation photo" in text assert "ai_caption: A sunset over the ocean" in text assert "My vacation photo. A sunset over the ocean." in text def test_build_ingestion_text_caption_only(self, mock_env, mock_boto3): """Test text building with only combined caption.""" module = _load_process_image_module() text = module.build_ingestion_text( image_id="test-id", filename="image.jpg", caption="A simple caption", metadata={}, ) assert "A simple caption" in text assert "image_id: test-id" in text def test_build_ingestion_text_empty_caption(self, mock_env, mock_boto3): """Test text building with no caption.""" module = _load_process_image_module() text = module.build_ingestion_text( image_id="test-id", filename="image.jpg", caption="", metadata={}, ) # Should still have frontmatter assert "image_id: test-id" in text assert "type: image" in text

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/HatmanStack/RAGStack-Lambda'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_process_image.py•10.9 KiB