image_utils.py•3.81 kB
from PIL import Image, ImageDraw, ImageFont
import io
import logging
import base64
from typing import Optional
import os
from ..llm.llm_client import LLMClient
logger = logging.getLogger(__name__)
# Helper Function
def stitch_images(img1: Image.Image, img2: Image.Image, label1="Baseline", label2="Current") -> Optional[Image.Image]:
        """Stitches two images side-by-side with labels."""
        if img1.size != img2.size:
            logger.error("Cannot stitch images of different sizes.")
            return None
        
        width1, height1 = img1.size
        width2, height2 = img2.size # Should be same as height1
        # Add padding for labels
        label_height = 30 # Adjust as needed
        total_width = width1 + width2
        total_height = height1 + label_height
        stitched_img = Image.new('RGBA', (total_width, total_height), (255, 255, 255, 255)) # White background
        # Paste images
        stitched_img.paste(img1, (0, label_height))
        stitched_img.paste(img2, (width1, label_height))
        # Add labels
        try:
            draw = ImageDraw.Draw(stitched_img)
            # Attempt to load a simple font (adjust path or use default if needed)
            try:
                # On Linux/macOS, common paths
                font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" 
                if not os.path.exists(font_path): font_path = "/System/Library/Fonts/Supplemental/Arial Bold.ttf" # macOS fallback
                font = ImageFont.truetype(font_path, 15)
            except IOError:
                logger.warning("Default font not found, using Pillow's default.")
                font = ImageFont.load_default()
            # Label 1 (Baseline)
            label1_pos = (10, 5)
            draw.text(label1_pos, f"1: {label1}", fill=(0, 0, 0, 255), font=font)
            
            # Label 2 (Current)
            label2_pos = (width1 + 10, 5)
            draw.text(label2_pos, f"2: {label2}", fill=(0, 0, 0, 255), font=font)
        except Exception as e:
            logger.warning(f"Could not add labels to stitched image: {e}")
            # Return image without labels if drawing fails
        stitched_img.save("./stitched.png")
        return stitched_img
    
def compare_images(prompt: str, image_bytes_1: bytes, image_bytes_2: bytes, image_client: LLMClient) -> str:
        """
        Compares two images using the multimodal LLM based on the prompt,
        by stitching them into a single image first.
        """
        
        logger.info("Preparing images for stitched comparison...")
        try:
            img1 = Image.open(io.BytesIO(image_bytes_1)).convert("RGBA")
            img2 = Image.open(io.BytesIO(image_bytes_2)).convert("RGBA")
            if img1.size != img2.size:
                 error_msg = f"Visual Comparison Failed: Image dimensions mismatch. Baseline: {img1.size}, Current: {img2.size}."
                 logger.error(error_msg)
                 return f"Error: {error_msg}" # Return error directly
            stitched_image_pil = stitch_images(img1, img2)
            if not stitched_image_pil:
                return "Error: Failed to stitch images."
            # Convert stitched image to bytes
            stitched_buffer = io.BytesIO()
            stitched_image_pil.save(stitched_buffer, format="PNG")
            stitched_image_bytes = stitched_buffer.getvalue()
            logger.info(f"Images stitched successfully (new size: {stitched_image_pil.size}). Requesting LLM comparison...")
        except Exception as e:
             logger.error(f"Error processing images for stitching: {e}", exc_info=True)
             return f"Error: Image processing failed - {e}"
        return image_client.generate_multimodal(prompt, stitched_image_bytes)