gemini_image_tool.py•8.17 kB
#!/usr/bin/env python3
"""
Gemini Flash Image 2.5 (Nano Banana) Tool
A Python tool for generating and editing images using Google's Gemini 2.5 Flash Image API
"""
import os
import base64
import json
import argparse
from pathlib import Path
from typing import Optional, List, Dict, Any
import requests
from dotenv import load_dotenv
class GeminiImageTool:
"""Tool for interacting with Gemini 2.5 Flash Image API"""
BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image:generateContent"
# Available aspect ratios
ASPECT_RATIOS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"]
def __init__(self, api_key: Optional[str] = None):
"""
Initialize the Gemini Image Tool
Args:
api_key: Google AI API key. If not provided, will look for GEMINI_API_KEY env variable
"""
load_dotenv()
self.api_key = api_key or os.getenv("GEMINI_API_KEY")
if not self.api_key:
raise ValueError(
"API key required. Set GEMINI_API_KEY environment variable or pass api_key parameter.\n"
"Get your API key at: https://aistudio.google.com/apikey"
)
def _encode_image(self, image_path: str) -> Dict[str, Any]:
"""
Encode an image file to base64 for API request
Args:
image_path: Path to the image file
Returns:
Dict with inlineData containing MIME type and base64 data
"""
path = Path(image_path)
if not path.exists():
raise FileNotFoundError(f"Image file not found: {image_path}")
# Determine MIME type from extension
mime_types = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".webp": "image/webp",
".gif": "image/gif"
}
mime_type = mime_types.get(path.suffix.lower())
if not mime_type:
raise ValueError(f"Unsupported image format: {path.suffix}")
with open(image_path, "rb") as f:
image_data = base64.b64encode(f.read()).decode("utf-8")
return {
"inlineData": {
"mimeType": mime_type,
"data": image_data
}
}
def generate_content(
self,
prompt: str,
input_images: Optional[List[str]] = None,
aspect_ratio: str = "1:1",
output_modality: List[str] = None,
output_path: str = "output.png"
) -> Dict[str, Any]:
"""
Generate or edit images using Gemini Flash Image
Args:
prompt: Text prompt for image generation/editing
input_images: Optional list of image file paths for editing/composition
aspect_ratio: Output aspect ratio (default: "1:1")
output_modality: Response modality, e.g. ["Image"] or ["Text", "Image"] (default: ["Text", "Image"])
output_path: Path to save the generated image (default: "output.png")
Returns:
Dict containing the API response
"""
if aspect_ratio not in self.ASPECT_RATIOS:
raise ValueError(f"Invalid aspect ratio. Must be one of: {', '.join(self.ASPECT_RATIOS)}")
if output_modality is None:
output_modality = ["Text", "Image"]
# Build request payload
parts = [{"text": prompt}]
# Add input images if provided
if input_images:
for img_path in input_images:
parts.append(self._encode_image(img_path))
payload = {
"contents": [{
"parts": parts
}],
"generationConfig": {
"responseModalities": output_modality,
"imageConfig": {
"aspectRatio": aspect_ratio
}
}
}
# Make API request
headers = {
"Content-Type": "application/json"
}
url = f"{self.BASE_URL}?key={self.api_key}"
try:
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
result = response.json()
# Extract and save image if present
if "candidates" in result:
for candidate in result["candidates"]:
if "content" in candidate:
for part in candidate["content"].get("parts", []):
if "inlineData" in part:
self._save_image(part["inlineData"], output_path)
print(f"Image saved to: {output_path}")
elif "text" in part:
print(f"Text response: {part['text']}")
return result
except requests.exceptions.RequestException as e:
print(f"API request failed: {e}")
if hasattr(e, "response") and e.response is not None:
print(f"Response: {e.response.text}")
raise
def _save_image(self, inline_data: Dict[str, str], output_path: str):
"""
Save base64 encoded image to file
Args:
inline_data: Dict containing mimeType and base64 data
output_path: Path to save the image
"""
image_data = base64.b64decode(inline_data["data"])
with open(output_path, "wb") as f:
f.write(image_data)
def main():
"""Command-line interface for Gemini Image Tool"""
parser = argparse.ArgumentParser(
description="Generate and edit images using Gemini 2.5 Flash Image (Nano Banana)",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Generate an image from text
python gemini_image_tool.py "A cat eating a banana in space" -o cat_banana.png
# Edit an existing image
python gemini_image_tool.py "Remove the background" -i photo.jpg -o edited.png
# Compose multiple images
python gemini_image_tool.py "Combine these into a collage" -i img1.jpg -i img2.jpg -o collage.png
# Specify aspect ratio
python gemini_image_tool.py "A landscape photo" -o wide.png --aspect-ratio 16:9
# Image-only output (no text)
python gemini_image_tool.py "A red apple" -o apple.png --image-only
"""
)
parser.add_argument("prompt", help="Text prompt for image generation/editing")
parser.add_argument("-i", "--input", dest="input_images", action="append",
help="Input image file path (can be specified multiple times)")
parser.add_argument("-o", "--output", default="output.png",
help="Output image file path (default: output.png)")
parser.add_argument("-a", "--aspect-ratio", default="1:1",
choices=GeminiImageTool.ASPECT_RATIOS,
help="Output aspect ratio (default: 1:1)")
parser.add_argument("--image-only", action="store_true",
help="Request image-only output (no text response)")
parser.add_argument("--api-key", help="Google AI API key (or set GEMINI_API_KEY env variable)")
parser.add_argument("--save-json", help="Save full API response to JSON file")
args = parser.parse_args()
# Determine output modality
modality = ["Image"] if args.image_only else ["Text", "Image"]
try:
# Initialize tool
tool = GeminiImageTool(api_key=args.api_key)
# Generate content
result = tool.generate_content(
prompt=args.prompt,
input_images=args.input_images,
aspect_ratio=args.aspect_ratio,
output_modality=modality,
output_path=args.output
)
# Save JSON response if requested
if args.save_json:
with open(args.save_json, "w") as f:
json.dump(result, f, indent=2)
print(f"Full response saved to: {args.save_json}")
print("\nSuccess!")
except Exception as e:
print(f"Error: {e}")
return 1
return 0
if __name__ == "__main__":
exit(main())