clothing_controller.py•3.97 kB
import base64
import os
import uuid
from fastapi import HTTPException
from PIL import Image
from backend.app.aws.rekognition_wrapper import rekognition
from backend.app.controllers.clothing_tagging import crop_by_bounding_box
from backend.app.controllers.tag_extractor import get_tags_from_clip
from backend.app.schemas.clothing_schemas import (
UploadClothingItemRequest,
UploadClothingItemResponse,
TagRequest,
TagResponse
)
# Upload directory for temporary storage
UPLOAD_DIR = "uploads"
os.makedirs(UPLOAD_DIR, exist_ok=True)
# Minimum confidence for AWS Rekognition detection
MIN_CONFIDENCE = 50
async def handle_upload_clothing_item(payload: UploadClothingItemRequest) -> UploadClothingItemResponse:
"""
Endpoint logic for handling clothing image upload and multi-garment tagging.
Uses AWS Rekognition for detection, and CLIP for tag extraction per garment.
"""
# Step 1: Decode image
try:
image_data = base64.b64decode(payload.image_base64)
filename = payload.filename or f"{uuid.uuid4().hex}.jpg"
image_path = os.path.join(UPLOAD_DIR, filename)
with open(image_path, "wb") as f:
f.write(image_data)
image = Image.open(image_path).convert("RGB")
except Exception as e:
raise HTTPException(status_code=400, detail=f"Invalid image data: {str(e)}")
# Step 2: Run AWS Rekognition
try:
with open(image_path, "rb") as f:
image_bytes = f.read()
response = rekognition.detect_labels(
Image={'Bytes': image_bytes},
MinConfidence=MIN_CONFIDENCE
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"AWS Rekognition error: {str(e)}")
# Step 3: Process detection results
results = []
seen_garment_types = set()
for label in response.get("Labels", []):
for instance in label.get("Instances", []):
box = instance.get("BoundingBox")
if not box:
continue
# Crop region based on bounding box
cropped = crop_by_bounding_box(image, box)
# Tag garment using CLIP
tags_result = get_tags_from_clip(cropped)
g_type = tags_result.get("garment_type", "Unknown")
# Avoid duplicate garment types
if g_type in seen_garment_types:
continue
seen_garment_types.add(g_type)
results.append({
"aws_label": label.get("Name", "Unknown"),
"box": box,
"garment_type": g_type,
"tags": {k: v[:10] for k, v in tags_result.get("tags", {}).items()}
})
# Step 4: Fallback if no Rekognition results
if not results:
tags_result = get_tags_from_clip(image)
results.append({
"aws_label": None,
"box": None,
"garment_type": tags_result.get("garment_type", "Unknown"),
"tags": {k: v[:10] for k, v in tags_result.get("tags", {}).items()}
})
# Step 5: Return structured response
return UploadClothingItemResponse(
id=str(uuid.uuid4()),
filename=filename,
tags={"garments": results}
)
async def handle_tag_request(payload: TagRequest) -> TagResponse:
"""
Optional: a simpler endpoint to return tags from a single image
(without Rekognition detection).
"""
try:
image_data = base64.b64decode(payload.image_base64)
temp_filename = f"temp_{uuid.uuid4().hex}.jpg"
image_path = os.path.join(UPLOAD_DIR, temp_filename)
with open(image_path, "wb") as f:
f.write(image_data)
image = Image.open(image_path).convert("RGB")
tags_result = get_tags_from_clip(image)
return TagResponse(tags=tags_result.get("tags", {}))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")