"""Utility functions for image processing and file handling."""
import os
import base64
import hashlib
from pathlib import Path
from typing import Optional, List, Tuple
from io import BytesIO
from datetime import datetime
from PIL import Image
def ensure_directory(path: Path) -> Path:
"""Ensure a directory exists, creating it if necessary."""
path.mkdir(parents=True, exist_ok=True)
return path
def generate_filename(
prefix: str = "asset",
suffix: str = "",
extension: str = "png"
) -> str:
"""Generate a unique filename with timestamp."""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
parts = [prefix, timestamp]
if suffix:
parts.append(suffix)
return f"{'_'.join(parts)}.{extension}"
def image_to_base64(image_bytes: bytes) -> str:
"""Convert image bytes to base64 string."""
return base64.b64encode(image_bytes).decode("utf-8")
def base64_to_image(b64_string: str) -> bytes:
"""Convert base64 string to image bytes."""
return base64.b64decode(b64_string)
def resize_image(
image_bytes: bytes,
width: int,
height: int,
resample: int = Image.Resampling.NEAREST
) -> bytes:
"""Resize an image to the specified dimensions."""
img = Image.open(BytesIO(image_bytes))
resized = img.resize((width, height), resample=resample)
buffer = BytesIO()
resized.save(buffer, format="PNG")
return buffer.getvalue()
def create_spritesheet(
images: List[bytes],
columns: int = 4,
padding: int = 0
) -> bytes:
"""Create a spritesheet from multiple images."""
if not images:
raise ValueError("No images provided")
# Load all images
pil_images = [Image.open(BytesIO(img)) for img in images]
# Get dimensions (assume all same size)
sprite_width = pil_images[0].width
sprite_height = pil_images[0].height
# Calculate sheet dimensions
rows = (len(pil_images) + columns - 1) // columns
sheet_width = columns * sprite_width + (columns - 1) * padding
sheet_height = rows * sprite_height + (rows - 1) * padding
# Create spritesheet
sheet = Image.new("RGBA", (sheet_width, sheet_height), (0, 0, 0, 0))
for i, img in enumerate(pil_images):
row = i // columns
col = i % columns
x = col * (sprite_width + padding)
y = row * (sprite_height + padding)
sheet.paste(img, (x, y))
buffer = BytesIO()
sheet.save(buffer, format="PNG")
return buffer.getvalue()
def split_spritesheet(
image_bytes: bytes,
sprite_width: int,
sprite_height: int,
count: Optional[int] = None
) -> List[bytes]:
"""Split a spritesheet into individual sprites."""
img = Image.open(BytesIO(image_bytes))
columns = img.width // sprite_width
rows = img.height // sprite_height
sprites = []
for row in range(rows):
for col in range(columns):
if count and len(sprites) >= count:
break
x = col * sprite_width
y = row * sprite_height
sprite = img.crop((x, y, x + sprite_width, y + sprite_height))
buffer = BytesIO()
sprite.save(buffer, format="PNG")
sprites.append(buffer.getvalue())
return sprites
def remove_background(image_bytes: bytes, threshold: int = 10) -> bytes:
"""Simple background removal by making near-white pixels transparent."""
img = Image.open(BytesIO(image_bytes)).convert("RGBA")
data = img.getdata()
new_data = []
for item in data:
# If pixel is close to white, make it transparent
if item[0] > 255 - threshold and item[1] > 255 - threshold and item[2] > 255 - threshold:
new_data.append((255, 255, 255, 0))
else:
new_data.append(item)
img.putdata(new_data)
buffer = BytesIO()
img.save(buffer, format="PNG")
return buffer.getvalue()
def add_outline(
image_bytes: bytes,
color: Tuple[int, int, int, int] = (0, 0, 0, 255),
thickness: int = 1
) -> bytes:
"""Add an outline around non-transparent pixels."""
img = Image.open(BytesIO(image_bytes)).convert("RGBA")
# Create outline image
outline = Image.new("RGBA", img.size, (0, 0, 0, 0))
pixels = img.load()
outline_pixels = outline.load()
for y in range(img.height):
for x in range(img.width):
if pixels[x, y][3] > 0: # Non-transparent pixel
# Check neighbors
for dy in range(-thickness, thickness + 1):
for dx in range(-thickness, thickness + 1):
nx, ny = x + dx, y + dy
if 0 <= nx < img.width and 0 <= ny < img.height:
if pixels[nx, ny][3] == 0: # Transparent neighbor
outline_pixels[x, y] = color
break
# Composite outline under original
result = Image.alpha_composite(outline, img)
buffer = BytesIO()
result.save(buffer, format="PNG")
return buffer.getvalue()
def validate_path(path: str, allowed_roots: List[str]) -> bool:
"""Validate that a path is within allowed directories."""
path = Path(path).resolve()
for root in allowed_roots:
root_path = Path(root).resolve()
try:
path.relative_to(root_path)
return True
except ValueError:
continue
return False
def hash_image(image_bytes: bytes) -> str:
"""Generate SHA256 hash of image data."""
return hashlib.sha256(image_bytes).hexdigest()[:16]
def create_depth_map(
width: int,
height: int,
view_type: str = "topdown",
shape: str = "flat"
) -> bytes:
"""Create a simple depth map for ControlNet guidance.
Args:
width: Image width
height: Image height
view_type: Camera angle - "topdown", "side", "front", "3/4"
shape: Object shape hint - "flat", "sphere", "cylinder", "box"
Returns:
Grayscale PNG depth map (white=close, black=far)
"""
try:
import numpy as np
depth = np.zeros((height, width), dtype=np.uint8)
cx, cy = width // 2, height // 2
if view_type == "topdown":
if shape == "flat":
depth[:, :] = 255
elif shape == "humanoid":
# Simple top-down humanoid: head/torso/arms/legs with different depth values
depth[:, :] = 0
y, x = np.ogrid[:height, :width]
head_r = int(min(width, height) * 0.10)
head_cy = int(height * 0.33)
head_mask = (x - cx) ** 2 + (y - head_cy) ** 2 <= head_r ** 2
depth[head_mask] = 235
torso_rx = int(width * 0.14)
torso_ry = int(height * 0.16)
torso_cy = int(height * 0.52)
torso_mask = ((x - cx) ** 2) / max(1, torso_rx ** 2) + ((y - torso_cy) ** 2) / max(1, torso_ry ** 2) <= 1.0
depth[torso_mask] = np.maximum(depth[torso_mask], 200)
arm_rx = int(width * 0.08)
arm_ry = int(height * 0.10)
arm_cy = int(height * 0.52)
left_arm_cx = int(width * 0.35)
right_arm_cx = int(width * 0.65)
left_arm_mask = ((x - left_arm_cx) ** 2) / max(1, arm_rx ** 2) + ((y - arm_cy) ** 2) / max(1, arm_ry ** 2) <= 1.0
right_arm_mask = ((x - right_arm_cx) ** 2) / max(1, arm_rx ** 2) + ((y - arm_cy) ** 2) / max(1, arm_ry ** 2) <= 1.0
depth[left_arm_mask] = np.maximum(depth[left_arm_mask], 170)
depth[right_arm_mask] = np.maximum(depth[right_arm_mask], 170)
leg_rx = int(width * 0.07)
leg_ry = int(height * 0.12)
leg_cy = int(height * 0.72)
left_leg_cx = int(width * 0.46)
right_leg_cx = int(width * 0.54)
left_leg_mask = ((x - left_leg_cx) ** 2) / max(1, leg_rx ** 2) + ((y - leg_cy) ** 2) / max(1, leg_ry ** 2) <= 1.0
right_leg_mask = ((x - right_leg_cx) ** 2) / max(1, leg_rx ** 2) + ((y - leg_cy) ** 2) / max(1, leg_ry ** 2) <= 1.0
depth[left_leg_mask] = np.maximum(depth[left_leg_mask], 145)
depth[right_leg_mask] = np.maximum(depth[right_leg_mask], 145)
elif shape == "sphere":
y, x = np.ogrid[:height, :width]
r = min(width, height) // 2 - 10
dist = np.sqrt((x - cx)**2 + (y - cy)**2)
mask = dist <= r
depth[mask] = (255 * (1 - (dist[mask] / r) ** 2)).astype(np.uint8)
elif shape == "cylinder":
y, x = np.ogrid[:height, :width]
r = min(width, height) // 2 - 10
dist = np.sqrt((x - cx)**2 + (y - cy)**2)
mask = dist <= r
# Beveled top surface: center brighter, edges slightly darker
depth[mask] = (200 + (1 - np.clip(dist[mask] / max(1, r), 0, 1)) * 55).astype(np.uint8)
elif shape == "box":
y, x = np.ogrid[:height, :width]
margin = min(width, height) // 8
x0, y0 = margin, margin
x1, y1 = width - margin - 1, height - margin - 1
inside = (x >= x0) & (x <= x1) & (y >= y0) & (y <= y1)
# Distance to nearest edge inside the rectangle
dist_left = x - x0
dist_right = x1 - x
dist_top = y - y0
dist_bottom = y1 - y
d = np.minimum(np.minimum(dist_left, dist_right), np.minimum(dist_top, dist_bottom))
dmax = max(1, int(min(width, height) * 0.12))
depth_val = 200 + (np.clip(d / dmax, 0, 1) * 55)
depth[inside] = np.maximum(depth[inside], depth_val[inside].astype(np.uint8))
elif view_type == "side":
for y in range(height):
depth[y, :] = int(255 * (y / height))
elif view_type == "front":
y, x = np.ogrid[:height, :width]
dist = np.sqrt((x - cx)**2 + (y - cy)**2)
max_dist = np.sqrt(cx**2 + cy**2)
depth = (255 * (1 - dist / max_dist)).astype(np.uint8)
elif view_type == "3/4":
for y in range(height):
for x in range(width):
v_grad = y / height
h_grad = x / width
depth[y, x] = int(255 * (0.6 * v_grad + 0.4 * h_grad))
else:
depth[:, :] = 255
img = Image.fromarray(depth, mode="L")
if view_type == "topdown":
from PIL import ImageFilter
img = img.filter(ImageFilter.GaussianBlur(radius=max(1, min(width, height) / 256)))
except ImportError:
from PIL import ImageDraw, ImageOps
def _linear_gradient_vertical() -> Image.Image:
if hasattr(Image, "linear_gradient"):
return Image.linear_gradient("L").rotate(90, expand=True).resize((width, height))
g = Image.new("L", (width, height))
g.putdata([int(255 * (y / max(1, height - 1))) for y in range(height) for _ in range(width)])
return g
def _linear_gradient_horizontal() -> Image.Image:
if hasattr(Image, "linear_gradient"):
return Image.linear_gradient("L").resize((width, height))
g = Image.new("L", (width, height))
g.putdata([int(255 * (x / max(1, width - 1))) for _ in range(height) for x in range(width)])
return g
def _radial_gradient_center_white() -> Image.Image:
if hasattr(Image, "radial_gradient"):
rg = Image.radial_gradient("L").resize((width, height))
return ImageOps.invert(rg)
g = Image.new("L", (width, height))
px = g.load()
cx, cy = width // 2, height // 2
max_dist = (cx * cx + cy * cy) ** 0.5
for y in range(height):
for x in range(width):
d = ((x - cx) ** 2 + (y - cy) ** 2) ** 0.5
v = int(255 * (1 - d / max(1e-6, max_dist)))
px[x, y] = max(0, min(255, v))
return g
if view_type == "topdown":
img = Image.new("L", (width, height), 0)
draw = ImageDraw.Draw(img)
if shape == "flat":
draw.rectangle([0, 0, width, height], fill=255)
elif shape == "humanoid":
# Head
head_r = int(min(width, height) * 0.10)
head_cx = width // 2
head_cy = int(height * 0.33)
draw.ellipse([head_cx - head_r, head_cy - head_r, head_cx + head_r, head_cy + head_r], fill=235)
# Torso
torso_rx = int(width * 0.14)
torso_ry = int(height * 0.16)
torso_cy = int(height * 0.52)
draw.ellipse([head_cx - torso_rx, torso_cy - torso_ry, head_cx + torso_rx, torso_cy + torso_ry], fill=200)
# Arms
arm_rx = int(width * 0.08)
arm_ry = int(height * 0.10)
arm_cy = torso_cy
left_arm_cx = int(width * 0.35)
right_arm_cx = int(width * 0.65)
draw.ellipse([left_arm_cx - arm_rx, arm_cy - arm_ry, left_arm_cx + arm_rx, arm_cy + arm_ry], fill=170)
draw.ellipse([right_arm_cx - arm_rx, arm_cy - arm_ry, right_arm_cx + arm_rx, arm_cy + arm_ry], fill=170)
# Legs
leg_rx = int(width * 0.07)
leg_ry = int(height * 0.12)
leg_cy = int(height * 0.72)
left_leg_cx = int(width * 0.46)
right_leg_cx = int(width * 0.54)
draw.ellipse([left_leg_cx - leg_rx, leg_cy - leg_ry, left_leg_cx + leg_rx, leg_cy + leg_ry], fill=145)
draw.ellipse([right_leg_cx - leg_rx, leg_cy - leg_ry, right_leg_cx + leg_rx, leg_cy + leg_ry], fill=145)
elif shape == "box":
margin = min(width, height) // 8
x0, y0 = margin, margin
x1, y1 = width - margin, height - margin
# Fill base
draw.rectangle([x0, y0, x1, y1], fill=200)
# Simple bevel by drawing smaller brighter rectangles
bevel_steps = max(3, min(width, height) // 96)
for i in range(1, bevel_steps + 1):
t = i / bevel_steps
v = int(200 + 55 * t)
draw.rectangle([x0 + i, y0 + i, x1 - i, y1 - i], outline=v)
elif shape == "cylinder":
r = min(width, height) // 2 - 10
# Base circle
draw.ellipse([width // 2 - r, height // 2 - r, width // 2 + r, height // 2 + r], fill=200)
# Bevel rings
bevel_steps = max(6, min(width, height) // 64)
for i in range(1, bevel_steps + 1):
t = i / bevel_steps
v = int(200 + 55 * (1 - t))
rr = int(r * (1 - 0.15 * t))
draw.ellipse([width // 2 - rr, height // 2 - rr, width // 2 + rr, height // 2 + rr], outline=v)
elif shape == "sphere":
grad = _radial_gradient_center_white()
mask = Image.new("L", (width, height), 0)
mdraw = ImageDraw.Draw(mask)
r = min(width, height) // 2 - 10
mdraw.ellipse([width // 2 - r, height // 2 - r, width // 2 + r, height // 2 + r], fill=255)
img.paste(grad, (0, 0), mask)
else:
draw.rectangle([0, 0, width, height], fill=255)
elif view_type == "side":
img = _linear_gradient_vertical()
elif view_type == "front":
img = _radial_gradient_center_white()
elif view_type == "3/4":
g_v = _linear_gradient_vertical()
g_h = _linear_gradient_horizontal()
img = Image.blend(g_v, g_h, 0.4)
else:
img = Image.new("L", (width, height), 255)
if view_type == "topdown":
from PIL import ImageFilter
img = img.filter(ImageFilter.GaussianBlur(radius=max(1, min(width, height) / 256)))
img = img.convert("RGB")
buffer = BytesIO()
img.save(buffer, format="PNG")
return buffer.getvalue()
def create_canny_edge(
image_bytes: bytes,
low_threshold: int = 100,
high_threshold: int = 200
) -> bytes:
"""Create Canny edge detection image for ControlNet.
Args:
image_bytes: Input image as PNG bytes
low_threshold: Canny low threshold
high_threshold: Canny high threshold
Returns:
Edge-detected PNG image
"""
try:
import cv2
import numpy as np
# Load image
nparr = np.frombuffer(image_bytes, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply Canny edge detection
edges = cv2.Canny(gray, low_threshold, high_threshold)
# Convert back to PNG
_, buffer = cv2.imencode('.png', edges)
return buffer.tobytes()
except ImportError:
# Fallback: simple edge detection with PIL
img = Image.open(BytesIO(image_bytes)).convert('L')
from PIL import ImageFilter
edges = img.filter(ImageFilter.FIND_EDGES)
buffer = BytesIO()
edges.save(buffer, format="PNG")
return buffer.getvalue()