MCP Screenshot Server

mcp-screenshot-server
src
mcp_screenshot_server

server.py•76.8 KiB

"""MCP Screenshot Server - Main server implementation.""" import argparse import json import math import os import subprocess import sys import tempfile import zipfile from datetime import datetime from pathlib import Path from typing import Annotated, Literal from mcp.server.fastmcp import FastMCP, Image from PIL import Image as PILImage from PIL import ImageColor, ImageDraw, ImageEnhance, ImageFilter, ImageOps from pydantic import BaseModel, Field from .models import ( AnnotationResult, Base64Result, ClipboardResult, ComparisonResult, ConfigureLimitsResult, DeleteResult, ImageInfo, ImageListResult, MemoryStatsResult, PreviewResult, SaveResult, ScreenshotResult, SessionExportResult, SessionImportResult, StepAnnotationResult, UndoCountResult, ) from .storage import ( _image_history, _image_metadata, _image_order, _image_store, evict_if_needed, get_callout_counter, get_font, get_limits, get_next_callout_number, get_total_memory_mb, image_to_base64, remove_image_internal, set_callout_counter, store_image, ) from .storage import ( configure_limits as storage_configure_limits, ) from .storage import ( get_image as storage_get_image, ) from .storage import ( reset_callout_counter as storage_reset_callout_counter, ) # Initialize the MCP server mcp = FastMCP( "Screenshot Server", json_response=True, instructions=""" MCP Screenshot Server - A powerful tool for capturing and annotating screenshots. ## 🎯 RECOMMENDED: Smart Annotation Tools (Use These!) ### annotate - Unified smart annotation with flexible positioning Position formats: named ("top-left", "center", "bottom-right"), percentage ("50%, 30%"), or pixels ("100, 200") Example: annotate(img, "box", "top-left", width=200, color="blue") Example: annotate(img, "text", "center", text="Important!") Example: annotate(img, "arrow", "20%,50%", end_position="80%,50%") ### batch_annotate - Apply multiple annotations in ONE call Pass JSON array: [{"type":"box","position":"top-left"},{"type":"text","position":"center","text":"Hi"}] ### label_regions - Quickly label multiple areas Pass JSON object: {"Header":"top-center", "Sidebar":"center-left", "Main":"center"} ## Capture Tools: - capture_screenshot: Capture full screen, window, or region - load_image: Load an existing image file ## Basic Annotation Tools (for precise pixel control): - add_box, add_line, add_arrow, add_text, add_circle, add_highlight - add_numbered_callout, add_border ## Editing Tools: - blur_region, crop_image, resize_image, rotate_image, flip_image - adjust_brightness, adjust_contrast, add_watermark - undo, get_undo_count ## Export Tools: - save_image, quick_save, copy_to_clipboard, open_in_preview ## Session Tools: - list_images, get_image, duplicate_image, delete_image """, ) # Aliases for backward compatibility with internal function names _store_image = store_image _get_image = storage_get_image _image_to_base64 = image_to_base64 _get_font = get_font # ============================================================================= # Screenshot Capture Tools # ============================================================================= @mcp.tool() def capture_screenshot( mode: Annotated[ Literal["fullscreen", "region", "window"], Field(description="Capture mode: fullscreen, region (interactive selection), or window") ] = "fullscreen", x: Annotated[int | None, Field(description="X coordinate for region capture")] = None, y: Annotated[int | None, Field(description="Y coordinate for region capture")] = None, width: Annotated[int | None, Field(description="Width for region capture")] = None, height: Annotated[int | None, Field(description="Height for region capture")] = None, window_id: Annotated[int | None, Field(description="Window ID for window capture (macOS). Use 'osascript' or 'GetWindowID' to find window IDs.")] = None, ) -> ScreenshotResult: """ Capture a screenshot of the screen, a region, or a specific window. On macOS, this uses the native screencapture command. On other systems, it uses PIL's ImageGrab or pyautogui as fallback. For window capture on macOS, you need the numeric window ID (not the window name). You can find window IDs using: osascript -e 'tell app "System Events" to get id of windows of processes' """ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: tmp_path = tmp.name try: if sys.platform == "darwin": # macOS - use native screencapture cmd = ["screencapture"] if mode == "region" and all(v is not None for v in [x, y, width, height]): # Capture specific region cmd.extend(["-R", f"{x},{y},{width},{height}"]) elif mode == "region": # Interactive region selection cmd.append("-i") elif mode == "window": if window_id is not None: # Capture specific window by ID (screencapture -l requires integer window ID) cmd.extend(["-l", str(window_id)]) else: # Interactive window selection cmd.append("-w") # fullscreen is default behavior cmd.append(tmp_path) result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) if result.returncode != 0: raise RuntimeError(f"screencapture failed: {result.stderr}") else: # Windows/Linux - use PIL or pyautogui try: from PIL import ImageGrab if mode == "region" and all(v is not None for v in [x, y, width, height]): bbox = (x, y, x + width, y + height) screenshot = ImageGrab.grab(bbox=bbox) else: screenshot = ImageGrab.grab() screenshot.save(tmp_path, "PNG") except Exception as e: raise RuntimeError(f"Screenshot capture failed: {e}") from e # Load and store the image image = PILImage.open(tmp_path) image_id = _store_image(image) return ScreenshotResult( image_id=image_id, width=image.width, height=image.height, message=f"Screenshot captured successfully ({mode} mode)" ) finally: # Cleanup temp file if os.path.exists(tmp_path): os.unlink(tmp_path) @mcp.tool() def load_image( path: Annotated[str, Field(description="Path to the image file to load")] ) -> ScreenshotResult: """Load an existing image file for annotation.""" path = os.path.expanduser(path) if not os.path.exists(path): raise FileNotFoundError(f"Image file not found: {path}") image = PILImage.open(path) image_id = _store_image(image) return ScreenshotResult( image_id=image_id, width=image.width, height=image.height, message=f"Image loaded from {path}" ) # ============================================================================= # Annotation Tools # ============================================================================= @mcp.tool() def add_box( image_id: Annotated[str, Field(description="ID of the image to annotate")], x: Annotated[int, Field(description="X coordinate of the top-left corner")], y: Annotated[int, Field(description="Y coordinate of the top-left corner")], width: Annotated[int, Field(description="Width of the box")], height: Annotated[int, Field(description="Height of the box")], color: Annotated[str, Field(description="Color of the box (e.g., 'red', '#FF0000')")] = "red", line_width: Annotated[int, Field(gt=0, description="Width of the box outline")] = 3, fill: Annotated[str | None, Field(description="Fill color (None for no fill)")] = None, ) -> AnnotationResult: """Draw a rectangle/box on the image.""" image = _get_image(image_id) draw = ImageDraw.Draw(image, "RGBA") # Convert fill color with transparency if needed fill_color = None if fill: if fill.startswith("#") and len(fill) == 7: # Add transparency to hex color fill_color = fill + "80" # 50% opacity else: fill_color = fill draw.rectangle( [x, y, x + width, y + height], outline=color, width=line_width, fill=fill_color ) _store_image(image, image_id) return AnnotationResult( image_id=image_id, message=f"Box added at ({x}, {y}) with size {width}x{height}" ) @mcp.tool() def add_line( image_id: Annotated[str, Field(description="ID of the image to annotate")], x1: Annotated[int, Field(description="X coordinate of the start point")], y1: Annotated[int, Field(description="Y coordinate of the start point")], x2: Annotated[int, Field(description="X coordinate of the end point")], y2: Annotated[int, Field(description="Y coordinate of the end point")], color: Annotated[str, Field(description="Color of the line")] = "red", line_width: Annotated[int, Field(gt=0, description="Width of the line")] = 3, ) -> AnnotationResult: """Draw a line on the image.""" image = _get_image(image_id) draw = ImageDraw.Draw(image) draw.line([(x1, y1), (x2, y2)], fill=color, width=line_width) _store_image(image, image_id) return AnnotationResult( image_id=image_id, message=f"Line drawn from ({x1}, {y1}) to ({x2}, {y2})" ) @mcp.tool() def add_arrow( image_id: Annotated[str, Field(description="ID of the image to annotate")], x1: Annotated[int, Field(description="X coordinate of the arrow start")], y1: Annotated[int, Field(description="Y coordinate of the arrow start")], x2: Annotated[int, Field(description="X coordinate of the arrow end (tip)")], y2: Annotated[int, Field(description="Y coordinate of the arrow end (tip)")], color: Annotated[str, Field(description="Color of the arrow")] = "red", line_width: Annotated[int, Field(gt=0, description="Width of the arrow line")] = 3, head_size: Annotated[int, Field(description="Size of the arrow head")] = 15, ) -> AnnotationResult: """Draw an arrow on the image.""" image = _get_image(image_id) draw = ImageDraw.Draw(image) # Draw the main line draw.line([(x1, y1), (x2, y2)], fill=color, width=line_width) # Calculate arrow head angle = math.atan2(y2 - y1, x2 - x1) angle1 = angle + math.pi * 0.8 angle2 = angle - math.pi * 0.8 # Arrow head points head_x1 = x2 + head_size * math.cos(angle1) head_y1 = y2 + head_size * math.sin(angle1) head_x2 = x2 + head_size * math.cos(angle2) head_y2 = y2 + head_size * math.sin(angle2) # Draw arrow head as filled triangle draw.polygon( [(x2, y2), (head_x1, head_y1), (head_x2, head_y2)], fill=color ) _store_image(image, image_id) return AnnotationResult( image_id=image_id, message=f"Arrow drawn from ({x1}, {y1}) to ({x2}, {y2})" ) @mcp.tool() def add_text( image_id: Annotated[str, Field(description="ID of the image to annotate")], x: Annotated[int, Field(description="X coordinate for the text")], y: Annotated[int, Field(description="Y coordinate for the text")], text: Annotated[str, Field(description="The text to add")], color: Annotated[str, Field(description="Color of the text")] = "red", font_size: Annotated[int, Field(gt=0, description="Size of the font")] = 24, background: Annotated[str | None, Field(description="Background color for the text")] = None, ) -> AnnotationResult: """Add text annotation to the image.""" image = _get_image(image_id) draw = ImageDraw.Draw(image) font = _get_font(font_size) # Draw background if specified if background: bbox = draw.textbbox((x, y), text, font=font) padding = 5 draw.rectangle( [bbox[0] - padding, bbox[1] - padding, bbox[2] + padding, bbox[3] + padding], fill=background ) draw.text((x, y), text, fill=color, font=font) _store_image(image, image_id) return AnnotationResult( image_id=image_id, message=f"Text '{text}' added at ({x}, {y})" ) @mcp.tool() def add_circle( image_id: Annotated[str, Field(description="ID of the image to annotate")], x: Annotated[int, Field(description="X coordinate of the center")], y: Annotated[int, Field(description="Y coordinate of the center")], radius: Annotated[int, Field(description="Radius of the circle")], color: Annotated[str, Field(description="Color of the circle")] = "red", line_width: Annotated[int, Field(gt=0, description="Width of the circle outline")] = 3, fill: Annotated[str | None, Field(description="Fill color (None for no fill)")] = None, ) -> AnnotationResult: """Draw a circle on the image.""" image = _get_image(image_id) draw = ImageDraw.Draw(image, "RGBA") # Calculate bounding box bbox = [x - radius, y - radius, x + radius, y + radius] draw.ellipse(bbox, outline=color, width=line_width, fill=fill) _store_image(image, image_id) return AnnotationResult( image_id=image_id, message=f"Circle added at ({x}, {y}) with radius {radius}" ) @mcp.tool() def add_highlight( image_id: Annotated[str, Field(description="ID of the image to annotate")], x: Annotated[int, Field(description="X coordinate of the top-left corner")], y: Annotated[int, Field(description="Y coordinate of the top-left corner")], width: Annotated[int, Field(description="Width of the highlight area")], height: Annotated[int, Field(description="Height of the highlight area")], color: Annotated[str, Field(description="Color of the highlight")] = "yellow", opacity: Annotated[int, Field(ge=0, le=255, description="Opacity (0-255)")] = 100, ) -> AnnotationResult: """Add a semi-transparent highlight region to the image.""" image = _get_image(image_id).convert("RGBA") # Create overlay overlay = PILImage.new("RGBA", image.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(overlay) # Parse color and add opacity try: rgb = ImageColor.getrgb(color) rgba = (*rgb, opacity) except ValueError: rgba = (255, 255, 0, opacity) # Default to yellow draw.rectangle([x, y, x + width, y + height], fill=rgba) # Composite the images result = PILImage.alpha_composite(image, overlay) result = result.convert("RGB") _store_image(result, image_id) return AnnotationResult( image_id=image_id, message=f"Highlight added at ({x}, {y}) with size {width}x{height}" ) # ============================================================================= # Smart Annotation Tools (Unified & Auto-Positioning) # ============================================================================= # Named position aliases - 5x5 grid for more precision _NAMED_POSITIONS = { # 3x3 basic grid (edges at 5%) "top-left": (0.05, 0.05), "top-center": (0.5, 0.05), "top-right": (0.95, 0.05), "center-left": (0.05, 0.5), "center": (0.5, 0.5), "center-right": (0.95, 0.5), "bottom-left": (0.05, 0.95), "bottom-center": (0.5, 0.95), "bottom-right": (0.95, 0.95), # 5x5 extended grid for more precision "top-left-quarter": (0.25, 0.25), "top-right-quarter": (0.75, 0.25), "bottom-left-quarter": (0.25, 0.75), "bottom-right-quarter": (0.75, 0.75), # Edge midpoints "top-left-edge": (0.0, 0.0), "top-right-edge": (1.0, 0.0), "bottom-left-edge": (0.0, 1.0), "bottom-right-edge": (1.0, 1.0), } # Anchor points for element alignment _ANCHORS = { "top-left": (0.0, 0.0), "top-center": (0.5, 0.0), "top-right": (1.0, 0.0), "center-left": (0.0, 0.5), "center": (0.5, 0.5), "center-right": (1.0, 0.5), "bottom-left": (0.0, 1.0), "bottom-center": (0.5, 1.0), "bottom-right": (1.0, 1.0), } def _parse_position( pos: str | tuple[float, float] | None, image_width: int, image_height: int, element_width: int = 0, element_height: int = 0, anchor: str = "center", offset_x: int = 0, offset_y: int = 0, ) -> tuple[int, int]: """ Parse position from named position, percentage, or pixels with anchor support. Formats: - Named: "top-left", "center", "bottom-right", etc. - Percentage: "50%, 30%" or (0.5, 0.3) - Pixels: "100px, 200px" or just "100, 200" for absolute positioning Anchor determines which part of the element aligns to the position: - "top-left": element's top-left corner at position - "center": element's center at position (default) - "bottom-right": element's bottom-right corner at position """ if pos is None: return (0, 0) # Parse the position to get ratios (0-1) or absolute pixels is_absolute = False if isinstance(pos, tuple): px, py = pos if px > 1 or py > 1: is_absolute = True elif pos in _NAMED_POSITIONS: px, py = _NAMED_POSITIONS[pos] else: # Parse string like "50%, 30%" or "100px, 200px" or "100, 200" parts = [p.strip() for p in pos.split(",")] if len(parts) != 2: raise ValueError(f"Invalid position format: {pos}") def parse_value(v: str, max_val: int) -> tuple[float, bool]: v = v.strip() if v.endswith("%"): return float(v[:-1]) / 100.0, False if v.endswith("px"): return float(v[:-2]), True val = float(v) # If > 1 without %, treat as pixels if val > 1: return val, True return val, False px, abs_x = parse_value(parts[0], image_width) py, abs_y = parse_value(parts[1], image_height) is_absolute = abs_x or abs_y # If one is absolute, convert both to pixels if is_absolute: if not abs_x: px = px * image_width if not abs_y: py = py * image_height # Convert ratios to pixels if is_absolute: x, y = int(px), int(py) else: x = int(px * image_width) y = int(py * image_height) # Apply anchor adjustment anchor_x, anchor_y = _ANCHORS.get(anchor, (0.5, 0.5)) x = x - int(element_width * anchor_x) y = y - int(element_height * anchor_y) # Apply offset x += offset_x y += offset_y # Clamp to image bounds (keep at least part of element visible) x = max(-element_width + 10, min(x, image_width - 10)) y = max(-element_height + 10, min(y, image_height - 10)) return (x, y) def _auto_adjust_position( x: int, y: int, width: int, height: int, image_width: int, image_height: int, padding: int = 10 ) -> tuple[int, int]: """Auto-adjust position to keep annotation within bounds with padding.""" # Ensure annotation stays within image bounds with padding x = max(padding, min(x, image_width - width - padding)) y = max(padding, min(y, image_height - height - padding)) return (x, y) class AnnotationSpec(BaseModel): """Specification for a single annotation.""" type: Literal["box", "circle", "arrow", "text", "highlight", "line", "callout"] position: str = Field(description="Position: 'top-left', 'center', '50%,30%', or '100,200'") text: str | None = Field(default=None, description="Text for text/callout annotations") width: int | None = Field(default=None, description="Width for box/highlight") height: int | None = Field(default=None, description="Height for box/highlight") radius: int | None = Field(default=None, description="Radius for circle") end_position: str | None = Field(default=None, description="End position for arrow/line") color: str = Field(default="red", description="Color") line_width: int = Field(default=3, description="Line width") font_size: int = Field(default=24, description="Font size for text") @mcp.tool() def precise_annotate( image_id: Annotated[str, Field(description="ID of the image to annotate")], annotation_type: Annotated[ Literal["box", "circle", "text", "arrow", "line"], Field(description="Type of annotation") ], x: Annotated[int, Field(description="X coordinate in pixels (from left edge)")], y: Annotated[int, Field(description="Y coordinate in pixels (from top edge)")], text: Annotated[str | None, Field(description="Text content (for text type)")] = None, width: Annotated[int, Field(description="Width in pixels (for box)")] = 100, height: Annotated[int, Field(description="Height in pixels (for box)")] = 50, radius: Annotated[int, Field(description="Radius in pixels (for circle)")] = 30, x2: Annotated[int | None, Field(description="End X coordinate (for arrow/line)")] = None, y2: Annotated[int | None, Field(description="End Y coordinate (for arrow/line)")] = None, color: Annotated[str, Field(description="Color (name or hex)")] = "red", line_width: Annotated[int, Field(gt=0, description="Line/border width")] = 3, font_size: Annotated[int, Field(gt=0, description="Font size for text")] = 24, ) -> AnnotationResult: """ Pixel-perfect annotation tool. Places annotations at EXACT pixel coordinates. No anchor adjustments, no auto-positioning - pure pixel placement. Coordinates: - x, y: Top-left corner for box, center for circle, start for text - x2, y2: End point for arrows/lines Examples: - precise_annotate(img, "box", x=100, y=200, width=150, height=50) - precise_annotate(img, "text", x=100, y=180, text="Label") - precise_annotate(img, "arrow", x=50, y=100, x2=200, y2=100) - precise_annotate(img, "circle", x=300, y=300, radius=40) """ image = _get_image(image_id) draw = ImageDraw.Draw(image, "RGBA") message = "" if annotation_type == "box": draw.rectangle([x, y, x + width, y + height], outline=color, width=line_width) message = f"Box at ({x}, {y}) size {width}x{height}" elif annotation_type == "circle": circle_bbox = [x - radius, y - radius, x + radius, y + radius] draw.ellipse(circle_bbox, outline=color, width=line_width) message = f"Circle at ({x}, {y}) radius {radius}" elif annotation_type == "text": if not text: raise ValueError("text parameter required for text type") font = _get_font(font_size) # Add background for readability text_bbox = draw.textbbox((x, y), text, font=font) padding = 4 draw.rectangle( [text_bbox[0] - padding, text_bbox[1] - padding, text_bbox[2] + padding, text_bbox[3] + padding], fill="white" ) draw.text((x, y), text, fill=color, font=font) message = f"Text '{text}' at ({x}, {y})" elif annotation_type == "arrow": if x2 is None or y2 is None: raise ValueError("x2 and y2 required for arrow type") draw.line([(x, y), (x2, y2)], fill=color, width=line_width) # Draw arrowhead angle = math.atan2(y2 - y, x2 - x) head_size = 15 left_angle = angle + math.pi * 0.85 right_angle = angle - math.pi * 0.85 left_x = x2 + head_size * math.cos(left_angle) left_y = y2 + head_size * math.sin(left_angle) right_x = x2 + head_size * math.cos(right_angle) right_y = y2 + head_size * math.sin(right_angle) draw.polygon([(x2, y2), (left_x, left_y), (right_x, right_y)], fill=color) message = f"Arrow from ({x}, {y}) to ({x2}, {y2})" elif annotation_type == "line": if x2 is None or y2 is None: raise ValueError("x2 and y2 required for line type") draw.line([(x, y), (x2, y2)], fill=color, width=line_width) message = f"Line from ({x}, {y}) to ({x2}, {y2})" _store_image(image, image_id) return AnnotationResult(image_id=image_id, message=message) @mcp.tool() def annotate( image_id: Annotated[str, Field(description="ID of the image to annotate")], annotation_type: Annotated[ Literal["box", "circle", "arrow", "text", "highlight", "line", "callout"], Field(description="Type of annotation") ], position: Annotated[ str, Field(description="Position: named, percentage ('50%,30%'), or pixels ('100px,200px' or '100,200')") ] = "center", text: Annotated[str | None, Field(description="Text content (for text/callout types)")] = None, width: Annotated[int | None, Field(description="Width in pixels")] = None, height: Annotated[int | None, Field(description="Height in pixels")] = None, radius: Annotated[int | None, Field(description="Radius for circles")] = None, end_position: Annotated[str | None, Field(description="End position for arrows/lines")] = None, color: Annotated[str, Field(description="Color")] = "red", line_width: Annotated[int, Field(gt=0, description="Line width")] = 3, font_size: Annotated[int, Field(gt=0, description="Font size for text")] = 24, anchor: Annotated[ str, Field(description="Anchor point: which part of element aligns to position (top-left, center, bottom-right, etc.)") ] = "center", offset_x: Annotated[int, Field(description="Horizontal offset in pixels (positive=right)")] = 0, offset_y: Annotated[int, Field(description="Vertical offset in pixels (positive=down)")] = 0, auto_adjust: Annotated[bool, Field(description="Auto-adjust to stay within bounds")] = True, ) -> AnnotationResult: """ Smart unified annotation tool with flexible positioning and anchor support. Position formats: - Named: "top-left", "center", "bottom-right", "top-left-quarter", etc. - Percentage: "50%, 30%" (from top-left corner) - Pixels: "100px, 200px" or "100, 200" (absolute x, y) Anchor controls which part of the annotation aligns to the position: - "top-left": annotation's top-left corner at position - "center": annotation's center at position (default) - "bottom-right": annotation's bottom-right corner at position Offset allows fine-tuning: offset_x=10 moves 10px right, offset_y=-5 moves 5px up. Examples: - annotate(img, "box", "50%,10%", width=200, height=50, anchor="top-center") - annotate(img, "text", "100px,50px", text="Label", anchor="top-left", offset_x=5) - annotate(img, "callout", "center", text="Note", offset_y=-20) """ image = _get_image(image_id) img_w, img_h = image.size # Calculate default sizes based on image default_width = max(100, img_w // 5) default_height = max(60, img_h // 8) default_radius = max(30, min(img_w, img_h) // 10) # Use provided or default values w = width or default_width h = height or default_height r = radius or default_radius # Parse start position with anchor and offset support x, y = _parse_position( position, img_w, img_h, w, h, anchor=anchor, offset_x=offset_x, offset_y=offset_y ) # Auto-adjust if enabled if auto_adjust: if annotation_type in ["box", "highlight"]: x, y = _auto_adjust_position(x, y, w, h, img_w, img_h) elif annotation_type == "circle": x, y = _auto_adjust_position(x - r, y - r, r * 2, r * 2, img_w, img_h) x, y = x + r, y + r # Convert back to center draw = ImageDraw.Draw(image, "RGBA") message = "" if annotation_type == "box": draw.rectangle([x, y, x + w, y + h], outline=color, width=line_width) message = f"Box at ({x},{y}) size {w}x{h}" elif annotation_type == "circle": circle_bbox = [x - r, y - r, x + r, y + r] draw.ellipse(circle_bbox, outline=color, width=line_width) message = f"Circle at ({x},{y}) radius {r}" elif annotation_type == "text": if not text: text = "Label" # Get font font = _get_font(font_size) # Add background for readability text_bbox = draw.textbbox((x, y), text, font=font) text_w = int(text_bbox[2] - text_bbox[0]) text_h = int(text_bbox[3] - text_bbox[1]) if auto_adjust: x, y = _auto_adjust_position(x, y, text_w + 10, text_h + 6, img_w, img_h) # Draw background draw.rectangle([x - 3, y - 3, x + text_w + 6, y + text_h + 6], fill=(0, 0, 0, 180)) draw.text((x, y), text, fill=color, font=font) message = f"Text '{text}' at ({x},{y})" elif annotation_type == "callout": # Use numbered callout callout_num = get_next_callout_number() num = str(callout_num) callout_r = max(15, font_size // 2 + 5) # Draw circle background draw.ellipse( [x - callout_r, y - callout_r, x + callout_r, y + callout_r], fill=color, outline="white", width=2 ) # Draw number font = _get_font(font_size) num_bbox = draw.textbbox((0, 0), num, font=font) tw, th = num_bbox[2] - num_bbox[0], num_bbox[3] - num_bbox[1] draw.text((x - tw // 2, y - th // 2 - 2), num, fill="white", font=font) # Add label if provided if text: draw.text((x + callout_r + 5, y - th // 2), text, fill=color, font=font) message = f"Callout #{callout_num} at ({x},{y})" + (f" '{text}'" if text else "") elif annotation_type in ["arrow", "line"]: if not end_position: # Default: draw toward center ex, ey = img_w // 2, img_h // 2 else: ex, ey = _parse_position(end_position, img_w, img_h) draw.line([(x, y), (ex, ey)], fill=color, width=line_width) if annotation_type == "arrow": # Draw arrowhead angle = math.atan2(ey - y, ex - x) head_size = line_width * 5 left_x = ex - head_size * math.cos(angle - math.pi / 6) left_y = ey - head_size * math.sin(angle - math.pi / 6) right_x = ex - head_size * math.cos(angle + math.pi / 6) right_y = ey - head_size * math.sin(angle + math.pi / 6) draw.polygon([(ex, ey), (left_x, left_y), (right_x, right_y)], fill=color) message = f"Arrow from ({x},{y}) to ({ex},{ey})" else: message = f"Line from ({x},{y}) to ({ex},{ey})" elif annotation_type == "highlight": try: rgb = ImageColor.getrgb(color) rgba = (*rgb, 100) except ValueError: rgba = (255, 255, 0, 100) overlay = PILImage.new("RGBA", image.size, (0, 0, 0, 0)) overlay_draw = ImageDraw.Draw(overlay) overlay_draw.rectangle([x, y, x + w, y + h], fill=rgba) image = PILImage.alpha_composite(image.convert("RGBA"), overlay) image = image.convert("RGB") message = f"Highlight at ({x},{y}) size {w}x{h}" _store_image(image, image_id) return AnnotationResult(image_id=image_id, message=message) @mcp.tool() def batch_annotate( image_id: Annotated[str, Field(description="ID of the image to annotate")], annotations: Annotated[ str, Field(description="""JSON array of annotations. Each object has: - type: "box"|"circle"|"arrow"|"text"|"highlight"|"line"|"callout" - position: "top-left", "center", "50%,30%", or "100,200" - Optional: text, width, height, radius, end_position, color, line_width, font_size Example: [{"type":"box","position":"top-left","width":100,"height":50,"color":"blue"},{"type":"text","position":"center","text":"Hello"}]""") ], ) -> AnnotationResult: """ Apply multiple annotations in a single call. Pass a JSON array of annotation specs. Each annotation follows the same format as the annotate() tool. Example: [ {"type": "box", "position": "top-left", "width": 200, "height": 100, "color": "blue"}, {"type": "text", "position": "top-left", "text": "Header Area"}, {"type": "arrow", "position": "center-left", "end_position": "center-right", "color": "green"}, {"type": "callout", "position": "bottom-right", "text": "Click here"} ] """ try: specs = json.loads(annotations) except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON: {e}") from e if not isinstance(specs, list): raise ValueError("annotations must be a JSON array") messages = [] for i, spec in enumerate(specs): if not isinstance(spec, dict): raise ValueError(f"Annotation {i} must be an object") ann_type = spec.get("type") if not ann_type: raise ValueError(f"Annotation {i} missing 'type'") result = annotate( image_id=image_id, annotation_type=ann_type, position=spec.get("position", "center"), text=spec.get("text"), width=spec.get("width"), height=spec.get("height"), radius=spec.get("radius"), end_position=spec.get("end_position"), color=spec.get("color", "red"), line_width=spec.get("line_width", 3), font_size=spec.get("font_size", 24), anchor=spec.get("anchor", "center"), offset_x=spec.get("offset_x", 0), offset_y=spec.get("offset_y", 0), auto_adjust=spec.get("auto_adjust", True), ) messages.append(result.message) return AnnotationResult( image_id=image_id, message=f"Applied {len(specs)} annotations: " + "; ".join(messages) ) @mcp.tool() def label_regions( image_id: Annotated[str, Field(description="ID of the image to label")], regions: Annotated[ str, Field(description="""JSON object mapping region names to positions. Example: {"Header": "top-center", "Sidebar": "center-left", "Main Content": "center", "Footer": "bottom-center"}""") ], style: Annotated[ Literal["box", "callout", "text"], Field(description="Style of labels") ] = "callout", color: Annotated[str, Field(description="Color for all labels")] = "red", ) -> AnnotationResult: """ Quickly label multiple regions of an image. Pass a JSON object mapping region names to positions. Example: { "Navigation": "top-left", "Search Bar": "top-center", "User Menu": "top-right", "Sidebar": "center-left", "Main Content": "center", "Footer": "bottom-center" } """ try: region_map = json.loads(regions) except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON: {e}") from e if not isinstance(region_map, dict): raise ValueError("regions must be a JSON object") # Reset callout counter for consistent numbering storage_reset_callout_counter() messages = [] for name, position in region_map.items(): # Apply annotation (result is used implicitly - modifies image in store) annotate( image_id=image_id, annotation_type=style, position=position, text=name, color=color, ) messages.append(f"{name} at {position}") return AnnotationResult( image_id=image_id, message=f"Labeled {len(region_map)} regions: " + ", ".join(messages) ) # ============================================================================= # Composite Annotation Tools # ============================================================================= @mcp.tool() def annotate_step( image_id: Annotated[str, Field(description="ID of the image to annotate")], step_number: Annotated[int | None, Field(description="Step number (auto-increments if None)", ge=1)] = None, target_position: Annotated[ str, Field(description="Position the arrow points to. Named ('center'), percentage ('50%, 30%'), or pixels ('830px, 195px')") ] = "center", callout_position: Annotated[ str | None, Field(description="Position for the callout. If None, auto-placed near target. Named, percentage, or pixels.") ] = None, text: Annotated[str | None, Field(description="Optional text label to display near the callout")] = None, color: Annotated[str, Field(description="Color for callout and arrow")] = "red", callout_size: Annotated[int, Field(description="Size of the callout circle", ge=20, le=100)] = 40, arrow_width: Annotated[int, Field(description="Width of the arrow line", ge=1, le=20)] = 3, ) -> StepAnnotationResult: """ Add a numbered step annotation with callout circle, arrow, and optional text. This is a composite tool that combines a numbered callout with an arrow pointing to a target location. Useful for creating step-by-step tutorials or highlighting UI elements. If callout_position is not specified, it's automatically placed offset from the target. """ if step_number is None: step_number = get_next_callout_number() image = _get_image(image_id) draw = ImageDraw.Draw(image) img_width, img_height = image.size # Parse target position target_x, target_y = _parse_position(target_position, img_width, img_height) # Auto-calculate callout position if not specified if callout_position is None: # Place callout offset from target, preferring top-left quadrant offset = callout_size + 50 callout_x = target_x - offset if target_x > img_width // 2 else target_x + offset callout_y = target_y - offset if target_y > img_height // 2 else target_y + offset # Clamp to image bounds callout_x = max(callout_size, min(img_width - callout_size, callout_x)) callout_y = max(callout_size, min(img_height - callout_size, callout_y)) else: callout_x, callout_y = _parse_position(callout_position, img_width, img_height) # Draw arrow from callout to target angle = math.atan2(target_y - callout_y, target_x - callout_x) arrow_start_x = int(callout_x + (callout_size // 2 + 5) * math.cos(angle)) arrow_start_y = int(callout_y + (callout_size // 2 + 5) * math.sin(angle)) # Draw arrow line draw.line([(arrow_start_x, arrow_start_y), (target_x, target_y)], fill=color, width=arrow_width) # Draw arrow head head_size = 12 angle1 = angle + math.pi * 0.8 angle2 = angle - math.pi * 0.8 head_x1 = target_x + head_size * math.cos(angle1) head_y1 = target_y + head_size * math.sin(angle1) head_x2 = target_x + head_size * math.cos(angle2) head_y2 = target_y + head_size * math.sin(angle2) draw.polygon([(target_x, target_y), (head_x1, head_y1), (head_x2, head_y2)], fill=color) # Draw callout circle radius = callout_size // 2 draw.ellipse( [callout_x - radius, callout_y - radius, callout_x + radius, callout_y + radius], fill=color, outline=color ) # Draw step number font_size = int(callout_size * 0.6) font = _get_font(font_size) number_text = str(step_number) text_bbox = draw.textbbox((0, 0), number_text, font=font) text_w = int(text_bbox[2] - text_bbox[0]) text_h = int(text_bbox[3] - text_bbox[1]) draw.text( (callout_x - text_w // 2, callout_y - text_h // 2 - 2), number_text, fill="white", font=font ) # Draw optional text label if text: label_font = _get_font(16) label_bbox = draw.textbbox((0, 0), text, font=label_font) label_w = int(label_bbox[2] - label_bbox[0]) label_h = int(label_bbox[3] - label_bbox[1]) # Position label below callout label_x = callout_x - label_w // 2 label_y = callout_y + radius + 8 # Draw background for readability padding = 4 draw.rectangle( [label_x - padding, label_y - padding, label_x + label_w + padding, label_y + label_h + padding], fill="white", outline=color ) draw.text((label_x, label_y), text, fill=color, font=label_font) _store_image(image, image_id) return StepAnnotationResult( image_id=image_id, step_number=step_number, callout_position=(callout_x, callout_y), target_position=(target_x, target_y), message=f"Step {step_number} annotation added pointing to ({target_x}, {target_y})" + (f" with label '{text}'" if text else "") ) # ============================================================================= # Advanced Editing Tools # ============================================================================= @mcp.tool() def blur_region( image_id: Annotated[str, Field(description="ID of the image to annotate")], x: Annotated[int, Field(description="X coordinate of the top-left corner")], y: Annotated[int, Field(description="Y coordinate of the top-left corner")], width: Annotated[int, Field(description="Width of the region to blur")], height: Annotated[int, Field(description="Height of the region to blur")], blur_strength: Annotated[int, Field(ge=1, le=50, description="Blur strength (1-50, higher = more blur)")] = 20, pixelate: Annotated[bool, Field(description="Use pixelation instead of blur")] = False, ) -> AnnotationResult: """ Blur or pixelate a region of the image to hide sensitive information. Use this for hiding passwords, email addresses, personal info, etc. """ image = _get_image(image_id) # Crop the region region = image.crop((x, y, x + width, y + height)) if pixelate: # Pixelation: shrink and enlarge pixel_size = max(1, blur_strength // 2) small = region.resize( (max(1, region.width // pixel_size), max(1, region.height // pixel_size)), PILImage.Resampling.NEAREST ) region = small.resize(region.size, PILImage.Resampling.NEAREST) else: # Gaussian blur region = region.filter(ImageFilter.GaussianBlur(radius=blur_strength)) # Paste back image.paste(region, (x, y)) _store_image(image, image_id) effect = "pixelated" if pixelate else "blurred" return AnnotationResult( image_id=image_id, message=f"Region {effect} at ({x}, {y}) with size {width}x{height}" ) @mcp.tool() def crop_image( image_id: Annotated[str, Field(description="ID of the image to crop")], x: Annotated[int, Field(description="X coordinate of the top-left corner")], y: Annotated[int, Field(description="Y coordinate of the top-left corner")], width: Annotated[int, Field(description="Width of the crop area")], height: Annotated[int, Field(description="Height of the crop area")], ) -> ScreenshotResult: """Crop the image to a specific region.""" image = _get_image(image_id) # Validate bounds x = max(0, min(x, image.width)) y = max(0, min(y, image.height)) width = min(width, image.width - x) height = min(height, image.height - y) cropped = image.crop((x, y, x + width, y + height)) _store_image(cropped, image_id) return ScreenshotResult( image_id=image_id, width=cropped.width, height=cropped.height, message=f"Image cropped to {width}x{height} starting at ({x}, {y})" ) @mcp.tool() def resize_image( image_id: Annotated[str, Field(description="ID of the image to resize")], width: Annotated[int | None, Field(description="New width (or None to calculate from height)")] = None, height: Annotated[int | None, Field(description="New height (or None to calculate from width)")] = None, scale: Annotated[float | None, Field(description="Scale factor (e.g., 0.5 for half size)")] = None, maintain_aspect: Annotated[bool, Field(description="Maintain aspect ratio")] = True, ) -> ScreenshotResult: """Resize the image by dimensions or scale factor.""" image = _get_image(image_id) if scale is not None: new_width = int(image.width * scale) new_height = int(image.height * scale) elif width is not None and height is not None and not maintain_aspect: new_width = width new_height = height elif width is not None: new_width = width new_height = int(image.height * (width / image.width)) if maintain_aspect else image.height elif height is not None: new_height = height new_width = int(image.width * (height / image.height)) if maintain_aspect else image.width else: raise ValueError("Must specify width, height, or scale") resized = image.resize((new_width, new_height), PILImage.Resampling.LANCZOS) _store_image(resized, image_id) return ScreenshotResult( image_id=image_id, width=new_width, height=new_height, message=f"Image resized to {new_width}x{new_height}" ) @mcp.tool() def add_numbered_callout( image_id: Annotated[str, Field(description="ID of the image to annotate")], x: Annotated[int, Field(description="X coordinate for the callout center")], y: Annotated[int, Field(description="Y coordinate for the callout center")], number: Annotated[int | None, Field(description="Number to display (auto-increments if None)")] = None, color: Annotated[str, Field(description="Background color of the callout")] = "#ff3333", text_color: Annotated[str, Field(description="Color of the number text")] = "#ffffff", size: Annotated[int, Field(description="Size of the callout circle")] = 40, ) -> AnnotationResult: """ Add a numbered callout (circled number) to the image. Numbers auto-increment if not specified, making it easy to add sequential callouts. """ if number is None: number = get_next_callout_number() image = _get_image(image_id) draw = ImageDraw.Draw(image) # Draw circle background radius = size // 2 draw.ellipse( [x - radius, y - radius, x + radius, y + radius], fill=color, outline=color ) # Draw number callout_font_size = int(size * 0.6) font = _get_font(callout_font_size) text = str(number) bbox = draw.textbbox((0, 0), text, font=font) text_width = bbox[2] - bbox[0] text_height = bbox[3] - bbox[1] text_x = x - text_width // 2 text_y = y - text_height // 2 - 2 # Slight adjustment for visual centering draw.text((text_x, text_y), text, fill=text_color, font=font) _store_image(image, image_id) return AnnotationResult( image_id=image_id, message=f"Callout #{number} added at ({x}, {y})" ) @mcp.tool() def reset_callout_counter() -> DeleteResult: """Reset the auto-increment callout counter to 0.""" storage_reset_callout_counter() return DeleteResult(message="Callout counter reset to 0") @mcp.tool() def add_border( image_id: Annotated[str, Field(description="ID of the image")], width: Annotated[int, Field(description="Border width in pixels")] = 10, color: Annotated[str, Field(description="Border color")] = "#000000", ) -> ScreenshotResult: """Add a border around the entire image.""" image = _get_image(image_id) # Parse color try: border_color = ImageColor.getrgb(color) except ValueError: border_color = (0, 0, 0) bordered = ImageOps.expand(image, border=width, fill=border_color) _store_image(bordered, image_id) return ScreenshotResult( image_id=image_id, width=bordered.width, height=bordered.height, message=f"Added {width}px border" ) @mcp.tool() def undo( image_id: Annotated[str, Field(description="ID of the image to undo")] ) -> AnnotationResult: """Undo the last annotation on an image.""" if image_id not in _image_history or not _image_history[image_id]: raise ValueError(f"No undo history available for image '{image_id}'") # Restore previous state _image_store[image_id] = _image_history[image_id].pop() remaining = len(_image_history[image_id]) return AnnotationResult( image_id=image_id, message=f"Undo successful. {remaining} more undo(s) available." ) @mcp.tool() def get_undo_count( image_id: Annotated[str, Field(description="ID of the image")] ) -> UndoCountResult: """Get the number of available undo operations for an image.""" count = len(_image_history.get(image_id, [])) return UndoCountResult(image_id=image_id, undo_count=count) @mcp.tool() def quick_save( image_id: Annotated[str, Field(description="ID of the image to save")], filename: Annotated[str, Field(description="Filename (without path)")] = "screenshot.png", location: Annotated[ Literal["desktop", "downloads", "documents", "temp"], Field(description="Where to save the file") ] = "desktop", ) -> SaveResult: """ Quick save to common locations (Desktop, Downloads, Documents, or temp). Automatically determines the correct path based on the operating system. """ image = _get_image(image_id) # Determine the save directory (same paths work on macOS, Windows, and Linux) home = Path.home() locations = { "desktop": home / "Desktop", "downloads": home / "Downloads", "documents": home / "Documents", "temp": Path(tempfile.gettempdir()), } save_dir = locations.get(location, locations["desktop"]) save_dir.mkdir(parents=True, exist_ok=True) # Handle filename conflicts save_path = save_dir / filename if save_path.exists(): base = save_path.stem ext = save_path.suffix counter = 1 while save_path.exists(): save_path = save_dir / f"{base}_{counter}{ext}" counter += 1 # Save the image image.save(save_path, "PNG") return SaveResult( path=str(save_path), message=f"Image saved to {save_path}" ) @mcp.tool() def rotate_image( image_id: Annotated[str, Field(description="ID of the image to rotate")], angle: Annotated[ Literal[90, 180, 270], Field(description="Rotation angle: 90 (left), 180, or 270 (right)") ] = 90, ) -> ScreenshotResult: """Rotate the image by 90, 180, or 270 degrees.""" image = _get_image(image_id) # PIL rotates counter-clockwise, so we negate for intuitive behavior if angle == 90: rotated = image.transpose(PILImage.Transpose.ROTATE_90) elif angle == 180: rotated = image.transpose(PILImage.Transpose.ROTATE_180) else: # 270 rotated = image.transpose(PILImage.Transpose.ROTATE_270) _store_image(rotated, image_id) return ScreenshotResult( image_id=image_id, width=rotated.width, height=rotated.height, message=f"Image rotated {angle} degrees" ) @mcp.tool() def flip_image( image_id: Annotated[str, Field(description="ID of the image to flip")], direction: Annotated[ Literal["horizontal", "vertical"], Field(description="Flip direction: horizontal (mirror) or vertical") ] = "horizontal", ) -> ScreenshotResult: """Flip the image horizontally (mirror) or vertically.""" image = _get_image(image_id) if direction == "horizontal": flipped = image.transpose(PILImage.Transpose.FLIP_LEFT_RIGHT) else: flipped = image.transpose(PILImage.Transpose.FLIP_TOP_BOTTOM) _store_image(flipped, image_id) return ScreenshotResult( image_id=image_id, width=flipped.width, height=flipped.height, message=f"Image flipped {direction}ly" ) @mcp.tool() def add_watermark( image_id: Annotated[str, Field(description="ID of the image")], text: Annotated[str, Field(description="Watermark text")], position: Annotated[ Literal["bottom-right", "bottom-left", "top-right", "top-left", "center"], Field(description="Position of the watermark") ] = "bottom-right", opacity: Annotated[int, Field(ge=0, le=255, description="Opacity (0-255)")] = 128, font_size: Annotated[int, Field(gt=0, description="Font size")] = 24, color: Annotated[str, Field(description="Text color")] = "#ffffff", ) -> AnnotationResult: """Add a text watermark to the image.""" image = _get_image(image_id).convert("RGBA") # Create watermark layer watermark = PILImage.new("RGBA", image.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(watermark) # Get font font = _get_font(font_size) # Calculate text size bbox = draw.textbbox((0, 0), text, font=font) text_width = bbox[2] - bbox[0] text_height = bbox[3] - bbox[1] # Calculate position padding = 20 if position == "bottom-right": x = image.width - text_width - padding y = image.height - text_height - padding elif position == "bottom-left": x = padding y = image.height - text_height - padding elif position == "top-right": x = image.width - text_width - padding y = padding elif position == "top-left": x = padding y = padding else: # center x = (image.width - text_width) // 2 y = (image.height - text_height) // 2 # Parse color and add opacity try: rgb = ImageColor.getrgb(color) rgba = (*rgb, opacity) except ValueError: rgba = (255, 255, 255, opacity) draw.text((x, y), text, fill=rgba, font=font) # Composite result = PILImage.alpha_composite(image, watermark) result = result.convert("RGB") _store_image(result, image_id) return AnnotationResult( image_id=image_id, message=f"Watermark '{text}' added at {position}" ) @mcp.tool() def adjust_brightness( image_id: Annotated[str, Field(description="ID of the image")], factor: Annotated[float, Field(description="Brightness factor (0.5=darker, 1.0=unchanged, 1.5=brighter)")] = 1.0, ) -> AnnotationResult: """Adjust image brightness.""" image = _get_image(image_id) enhancer = ImageEnhance.Brightness(image) adjusted = enhancer.enhance(factor) _store_image(adjusted, image_id) return AnnotationResult( image_id=image_id, message=f"Brightness adjusted by factor {factor}" ) @mcp.tool() def adjust_contrast( image_id: Annotated[str, Field(description="ID of the image")], factor: Annotated[float, Field(description="Contrast factor (0.5=less, 1.0=unchanged, 1.5=more)")] = 1.0, ) -> AnnotationResult: """Adjust image contrast.""" image = _get_image(image_id) enhancer = ImageEnhance.Contrast(image) adjusted = enhancer.enhance(factor) _store_image(adjusted, image_id) return AnnotationResult( image_id=image_id, message=f"Contrast adjusted by factor {factor}" ) # ============================================================================= # Image Comparison Tools # ============================================================================= @mcp.tool() def compare_images( image_id_1: Annotated[str, Field(description="ID of the first image")], image_id_2: Annotated[str, Field(description="ID of the second image")], highlight_differences: Annotated[bool, Field(description="Create a diff image highlighting differences")] = True, threshold: Annotated[int, Field(ge=0, le=255, description="Color difference threshold (0-255). Pixels differing by more than this are marked.")] = 10, diff_color: Annotated[str, Field(description="Color to highlight differences")] = "red", ) -> ComparisonResult: """ Compare two images and optionally create a diff image highlighting differences. Returns the percentage of pixels that differ and creates a new image showing the differences if highlight_differences is True. Useful for comparing before/after screenshots, detecting UI changes, or validating visual regression tests. """ img1 = _get_image(image_id_1) img2 = _get_image(image_id_2) # Ensure same size for comparison if img1.size != img2.size: raise ValueError( f"Images must be the same size. Image 1: {img1.size}, Image 2: {img2.size}. " "Use resize_image to match dimensions first." ) # Convert to RGB for comparison img1_rgb = img1.convert("RGB") img2_rgb = img2.convert("RGB") width, height = img1.size total_pixels = width * height diff_pixels = 0 # Create diff image if requested diff_img: PILImage.Image | None = None diff_draw: ImageDraw.ImageDraw | None = None diff_color_rgb: tuple[int, ...] | None = None if highlight_differences: diff_img = img1_rgb.copy() diff_draw = ImageDraw.Draw(diff_img) diff_color_rgb = ImageColor.getrgb(diff_color) # Compare pixels for y in range(height): for x in range(width): # For RGB images, getpixel returns (R, G, B) tuples # Use type: ignore since PIL's type hints are overly broad for converted RGB images p1: tuple[int, int, int] = img1_rgb.getpixel((x, y)) # type: ignore[assignment] p2: tuple[int, int, int] = img2_rgb.getpixel((x, y)) # type: ignore[assignment] # Calculate color difference color_diff = abs(p1[0] - p2[0]) + abs(p1[1] - p2[1]) + abs(p1[2] - p2[2]) if color_diff > threshold * 3: # threshold per channel diff_pixels += 1 if diff_draw is not None and diff_color_rgb is not None: diff_draw.point((x, y), fill=diff_color_rgb) diff_percentage = round((diff_pixels / total_pixels) * 100, 2) identical = diff_pixels == 0 if highlight_differences and diff_img is not None: diff_id = _store_image(diff_img) message = f"Comparison complete. {diff_percentage}% of pixels differ." if not identical: message += f" Diff image saved as '{diff_id}'." else: diff_id = image_id_1 # Return first image ID if no diff created message = f"Comparison complete. {diff_percentage}% of pixels differ." return ComparisonResult( image_id=diff_id, difference_percentage=diff_percentage, identical=identical, message=message, ) @mcp.tool() def create_side_by_side( image_id_1: Annotated[str, Field(description="ID of the first (left) image")], image_id_2: Annotated[str, Field(description="ID of the second (right) image")], label_1: Annotated[str, Field(description="Label for the first image")] = "Before", label_2: Annotated[str, Field(description="Label for the second image")] = "After", gap: Annotated[int, Field(ge=0, le=100, description="Gap between images in pixels")] = 20, label_height: Annotated[int, Field(ge=0, le=100, description="Height of label area (0 to disable labels)")] = 40, background_color: Annotated[str, Field(description="Background color for gap and labels")] = "#f0f0f0", ) -> ScreenshotResult: """ Create a side-by-side comparison image from two images. Useful for visual comparisons, before/after documentation, or A/B testing screenshots. Labels are added above each image if label_height > 0. """ img1 = _get_image(image_id_1) img2 = _get_image(image_id_2) # Calculate dimensions max_height = max(img1.height, img2.height) total_width = img1.width + gap + img2.width total_height = max_height + label_height # Create combined image bg_color = ImageColor.getrgb(background_color) combined = PILImage.new("RGB", (total_width, total_height), bg_color) # Paste images (vertically centered if different heights) y1_offset = label_height + (max_height - img1.height) // 2 y2_offset = label_height + (max_height - img2.height) // 2 combined.paste(img1, (0, y1_offset)) combined.paste(img2, (img1.width + gap, y2_offset)) # Add labels if enabled if label_height > 0: draw = ImageDraw.Draw(combined) font = _get_font(min(24, label_height - 8)) # Label 1 bbox1 = draw.textbbox((0, 0), label_1, font=font) text_w1 = bbox1[2] - bbox1[0] text_x1 = (img1.width - text_w1) // 2 text_y1 = (label_height - (bbox1[3] - bbox1[1])) // 2 draw.text((text_x1, text_y1), label_1, fill="black", font=font) # Label 2 bbox2 = draw.textbbox((0, 0), label_2, font=font) text_w2 = bbox2[2] - bbox2[0] text_x2 = img1.width + gap + (img2.width - text_w2) // 2 text_y2 = (label_height - (bbox2[3] - bbox2[1])) // 2 draw.text((text_x2, text_y2), label_2, fill="black", font=font) result_id = _store_image(combined) return ScreenshotResult( image_id=result_id, width=total_width, height=total_height, message=f"Side-by-side comparison created: '{label_1}' ({img1.width}x{img1.height}) | '{label_2}' ({img2.width}x{img2.height})", ) # ============================================================================= # Image Management Tools # ============================================================================= @mcp.tool() def list_images() -> ImageListResult: """List all images stored in the current session.""" images = [] for image_id, data in _image_store.items(): width, height = _image_metadata.get(image_id, (0, 0)) images.append(ImageInfo( image_id=image_id, width=width, height=height, size_bytes=len(data) )) return ImageListResult(images=images, count=len(images)) @mcp.tool() def get_memory_stats() -> MemoryStatsResult: """Get current memory usage statistics for the image store.""" max_images, max_memory_mb, undo_levels = get_limits() return MemoryStatsResult( image_count=len(_image_store), max_images=max_images, memory_mb=round(get_total_memory_mb(), 2), max_memory_mb=max_memory_mb, undo_levels=undo_levels, ) @mcp.tool() def configure_limits( max_images: Annotated[int | None, Field(description="Maximum number of images to keep (default: 50)", ge=1)] = None, max_memory_mb: Annotated[int | None, Field(description="Maximum memory in MB (default: 500)", ge=10)] = None, undo_levels: Annotated[int | None, Field(description="Maximum undo history per image (default: 10)", ge=1, le=100)] = None, ) -> ConfigureLimitsResult: """Configure memory limits for the image store. Triggers eviction if new limits are lower.""" new_max_images, new_max_memory_mb, new_undo_levels, evicted = storage_configure_limits( max_images, max_memory_mb, undo_levels ) return ConfigureLimitsResult( max_images=new_max_images, max_memory_mb=new_max_memory_mb, undo_levels=new_undo_levels, evicted_count=len(evicted), message=f"Limits updated. Evicted {len(evicted)} images." if evicted else "Limits updated.", ) @mcp.tool() def get_image( image_id: Annotated[str, Field(description="ID of the image to retrieve")] ) -> Image: """Get a specific image by its ID. Returns the image data.""" if image_id not in _image_store: raise ValueError(f"Image '{image_id}' not found. Use list_images to see available images.") return Image(data=_image_store[image_id], format="png") @mcp.tool() def duplicate_image( image_id: Annotated[str, Field(description="ID of the image to duplicate")] ) -> ScreenshotResult: """Create a copy of an existing image.""" image = _get_image(image_id) new_id = _store_image(image.copy()) return ScreenshotResult( image_id=new_id, width=image.width, height=image.height, message=f"Image duplicated from {image_id} to {new_id}" ) @mcp.tool() def delete_image( image_id: Annotated[str, Field(description="ID of the image to delete")] ) -> DeleteResult: """Delete an image from the session.""" if image_id not in _image_store: raise ValueError(f"Image '{image_id}' not found.") remove_image_internal(image_id) return DeleteResult(message=f"Image '{image_id}' deleted successfully") # ============================================================================= # Session Persistence Tools # ============================================================================= @mcp.tool() def export_session( path: Annotated[str, Field(description="Path to save the session archive (will be a .zip file)")], include_history: Annotated[bool, Field(description="Include undo history for each image")] = False, ) -> SessionExportResult: """ Export all images and metadata to a zip archive for later restoration. The archive contains: - All images as PNG files - A manifest.json with image IDs, dimensions, and order - Optionally, undo history for each image Use import_session to restore the session later. """ if not _image_store: raise ValueError("No images to export. Capture or load some images first.") path = os.path.expanduser(path) if not path.endswith(".zip"): path = f"{path}.zip" os.makedirs(os.path.dirname(path) or ".", exist_ok=True) images_list: list[dict[str, object]] = [] manifest: dict[str, object] = { "version": "1.0", "created_at": datetime.now().isoformat(), "image_count": len(_image_store), "images": images_list, "image_order": _image_order.copy(), "callout_counter": get_callout_counter(), } total_bytes = 0 with zipfile.ZipFile(path, "w", zipfile.ZIP_DEFLATED) as zf: for image_id, data in _image_store.items(): # Save image zf.writestr(f"images/{image_id}.png", data) total_bytes += len(data) # Add to manifest width, height = _image_metadata.get(image_id, (0, 0)) image_info = { "id": image_id, "width": width, "height": height, "size_bytes": len(data), } # Include history if requested if include_history and image_id in _image_history: history = _image_history[image_id] for i, hist_data in enumerate(history): zf.writestr(f"history/{image_id}/{i}.png", hist_data) total_bytes += len(hist_data) image_info["history_count"] = len(history) images_list.append(image_info) # Write manifest manifest_json = json.dumps(manifest, indent=2) zf.writestr("manifest.json", manifest_json) total_bytes += len(manifest_json) total_mb = round(total_bytes / (1024 * 1024), 2) return SessionExportResult( path=os.path.abspath(path), image_count=len(_image_store), total_size_mb=total_mb, message=f"Exported {len(_image_store)} images to {os.path.abspath(path)} ({total_mb} MB)", ) @mcp.tool() def import_session( path: Annotated[str, Field(description="Path to the session archive (.zip file)")], merge: Annotated[bool, Field(description="Merge with existing session (True) or replace (False)")] = True, restore_history: Annotated[bool, Field(description="Restore undo history if available")] = True, ) -> SessionImportResult: """ Import a previously exported session from a zip archive. By default, merges with the existing session. Set merge=False to replace all current images with the imported ones. """ path = os.path.expanduser(path) if not os.path.exists(path): raise ValueError(f"Session file not found: {path}") if not merge: # Clear existing session _image_store.clear() _image_history.clear() _image_metadata.clear() _image_order.clear() imported_ids = [] with zipfile.ZipFile(path, "r") as zf: # Read manifest try: manifest_data = zf.read("manifest.json") manifest = json.loads(manifest_data) except KeyError as e: raise ValueError("Invalid session archive: missing manifest.json") from e # Import images for image_info in manifest.get("images", []): image_id = image_info["id"] image_path = f"images/{image_id}.png" try: image_data = zf.read(image_path) except KeyError: continue # Skip missing images # Store image _image_store[image_id] = image_data _image_metadata[image_id] = (image_info.get("width", 0), image_info.get("height", 0)) if image_id not in _image_order: _image_order.append(image_id) imported_ids.append(image_id) # Restore history if available and requested if restore_history and image_info.get("history_count", 0) > 0: _image_history[image_id] = [] for i in range(image_info["history_count"]): hist_path = f"history/{image_id}/{i}.png" try: hist_data = zf.read(hist_path) _image_history[image_id].append(hist_data) except KeyError: break # Stop if history file missing # Restore callout counter if not merging if not merge and "callout_counter" in manifest: set_callout_counter(manifest["callout_counter"]) # Trigger eviction if needed evict_if_needed() return SessionImportResult( image_count=len(imported_ids), image_ids=imported_ids, message=f"Imported {len(imported_ids)} images from {path}", ) # ============================================================================= # Save and Export Tools # ============================================================================= @mcp.tool() def save_image( image_id: Annotated[str, Field(description="ID of the image to save")], path: Annotated[str, Field(description="File path to save the image to")], image_format: Annotated[ Literal["png", "jpg", "jpeg", "bmp", "gif", "webp"], Field(description="Image format") ] = "png", quality: Annotated[int, Field(ge=1, le=100, description="Quality for JPEG (1-100)")] = 95, ) -> SaveResult: """Save an image to disk.""" image = _get_image(image_id) # Expand user path path = os.path.expanduser(path) # Ensure directory exists os.makedirs(os.path.dirname(path) or ".", exist_ok=True) # Add extension if not present if not any(path.lower().endswith(f".{ext}") for ext in ["png", "jpg", "jpeg", "bmp", "gif", "webp"]): path = f"{path}.{image_format}" # Save with appropriate settings save_kwargs = {} pil_format = image_format.upper() # PIL uses 'JPEG' not 'JPG' if pil_format == "JPG": pil_format = "JPEG" if image_format.lower() in ["jpg", "jpeg"]: # Convert RGBA to RGB for JPEG if image.mode == "RGBA": image = image.convert("RGB") save_kwargs["quality"] = quality image.save(path, format=pil_format, **save_kwargs) return SaveResult( path=os.path.abspath(path), message=f"Image saved to {os.path.abspath(path)}" ) @mcp.tool() def copy_to_clipboard( image_id: Annotated[str, Field(description="ID of the image to copy")] ) -> ClipboardResult: """Copy an image to the system clipboard.""" if image_id not in _image_store: raise ValueError(f"Image '{image_id}' not found.") if sys.platform == "darwin": # macOS - use osascript with a temp file with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: tmp.write(_image_store[image_id]) tmp_path = tmp.name try: # Use AppleScript to copy image to clipboard script = f''' set theFile to POSIX file "{tmp_path}" set theImage to read theFile as TIFF picture set the clipboard to theImage ''' subprocess.run(["osascript", "-e", script], check=True, capture_output=True) return ClipboardResult(message="Image copied to clipboard successfully") finally: os.unlink(tmp_path) elif sys.platform == "win32": # Windows - use PowerShell with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: tmp.write(_image_store[image_id]) tmp_path = tmp.name try: ps_script = f''' Add-Type -AssemblyName System.Windows.Forms $image = [System.Drawing.Image]::FromFile("{tmp_path}") [System.Windows.Forms.Clipboard]::SetImage($image) ''' subprocess.run(["powershell", "-Command", ps_script], check=True, capture_output=True) return ClipboardResult(message="Image copied to clipboard successfully") finally: os.unlink(tmp_path) else: # Linux - try xclip or wl-copy with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: tmp.write(_image_store[image_id]) tmp_path = tmp.name try: # Try xclip first (X11) try: subprocess.run( ["xclip", "-selection", "clipboard", "-t", "image/png", "-i", tmp_path], check=True, capture_output=True ) return ClipboardResult(message="Image copied to clipboard successfully (xclip)") except FileNotFoundError: pass # Try wl-copy (Wayland) try: with open(tmp_path, "rb") as f: subprocess.run( ["wl-copy", "-t", "image/png"], stdin=f, check=True, capture_output=True ) return ClipboardResult(message="Image copied to clipboard successfully (wl-copy)") except FileNotFoundError as e: raise RuntimeError( "No clipboard tool found. Install xclip (X11) or wl-copy (Wayland)." ) from e finally: os.unlink(tmp_path) @mcp.tool() def get_image_base64( image_id: Annotated[str, Field(description="ID of the image")] ) -> Base64Result: """Get an image as a base64-encoded string.""" base64_data = _image_to_base64(image_id) return Base64Result( image_id=image_id, data=f"data:image/png;base64,{base64_data}", message="Image encoded as base64" ) @mcp.tool() def open_in_preview( image_id: Annotated[str, Field(description="ID of the image to open")], save_path: Annotated[str | None, Field(description="Optional path to save before opening")] = None, ) -> PreviewResult: """ Open an image in the native Preview app (macOS only). On macOS, this opens the image in Preview.app for viewing and native annotation. On other platforms, it opens with the default image viewer. """ if image_id not in _image_store: raise ValueError(f"Image '{image_id}' not found.") # Determine save path if save_path: file_path = os.path.expanduser(save_path) os.makedirs(os.path.dirname(file_path) or ".", exist_ok=True) else: # Create a temporary file that won't be auto-deleted file_path = os.path.join( tempfile.gettempdir(), f"mcp_screenshot_{image_id}.png" ) # Save the image image = _get_image(image_id) image.save(file_path, "PNG") # Open with native application if sys.platform == "darwin": # macOS - use Preview.app subprocess.run(["open", "-a", "Preview", file_path], check=True) return PreviewResult(message="Image opened in Preview.app", path=file_path) elif sys.platform == "win32": # Windows - use default viewer os.startfile(file_path) return PreviewResult(message="Image opened in default viewer", path=file_path) else: # Linux - use xdg-open subprocess.run(["xdg-open", file_path], check=True) return PreviewResult(message="Image opened in default viewer", path=file_path) @mcp.tool() def open_file_in_preview( path: Annotated[str, Field(description="Path to the image file to open")] ) -> PreviewResult: """ Open an image file directly in the native Preview app (macOS) or default viewer. This doesn't require loading the image into the session first. """ path = os.path.expanduser(path) if not os.path.exists(path): raise FileNotFoundError(f"File not found: {path}") if sys.platform == "darwin": subprocess.run(["open", "-a", "Preview", path], check=True) return PreviewResult(message=f"Opened {path} in Preview.app", path=path) elif sys.platform == "win32": os.startfile(path) return PreviewResult(message=f"Opened {path} in default viewer", path=path) else: subprocess.run(["xdg-open", path], check=True) return PreviewResult(message=f"Opened {path} in default viewer", path=path) # ============================================================================= # Server Entry Point # ============================================================================= def main(): """Run the MCP Screenshot Server.""" parser = argparse.ArgumentParser(description="MCP Screenshot Server") parser.add_argument( "--transport", choices=["stdio", "streamable-http", "sse"], default="stdio", help="Transport to use (default: stdio)" ) parser.add_argument("--host", default="0.0.0.0", help="Host for HTTP transports") parser.add_argument("--port", type=int, default=8000, help="Port for HTTP transports") args = parser.parse_args() if args.transport == "stdio": mcp.run(transport="stdio") else: mcp.settings.host = args.host mcp.settings.port = args.port mcp.run(transport=args.transport) if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/aamar-shahzad/mcp-screenshot-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.py•76.8 KiB