#!/usr/bin/env python3
"""
Screenshot UI Analyzer MCP Server
Analyzes app screenshots using OpenAI GPT-5.2 to identify UI/UX issues.
Uses FastMCP for simplified MCP server implementation.
References:
- GPT-5.2 Prompting Guide: https://cookbook.openai.com/examples/gpt-5/gpt-5-2_prompting_guide
- MCP Python SDK: https://github.com/modelcontextprotocol/python-sdk
"""
import asyncio
import base64
import io
import json
import os
import uuid
from datetime import datetime
from pathlib import Path
import httpx
from mcp.server.fastmcp import FastMCP
from PIL import Image
# ============================================================================
# Configuration
# ============================================================================
CONFIG_PATH = Path(__file__).parent / "config.json"
DEFAULT_CONFIG = {
"api_key": "",
"default_reasoning_effort": "high", # none, low, medium, high, xhigh
"default_model": "gpt-5.2",
"max_image_size": 1024, # Max dimension (width or height) in pixels
"jpeg_quality": 85, # JPEG compression quality (1-100)
"fast_mode": False, # Use smaller model for faster/cheaper analysis
"fast_mode_model": "gpt-4o-mini",
"fast_mode_reasoning": "low",
}
# Batch processing constants
MAX_BATCH_SIZE = 5
MAX_REPORT_HISTORY = 20
# GPT-5.2 Optimized System Prompt
# Following: Context → Task → Constraints → Output format
SYSTEM_PROMPT = """## Context
You are a senior developer with 20+ years of experience specializing in UI/UX design and mobile/web application development. You have deep expertise in Human Interface Guidelines (iOS), Material Design (Android), and modern web standards.
## Task
Analyze the provided app screenshots and identify UI/UX issues that need fixing. Act as the developer's "eyes" - catch what they might miss.
## Analysis Categories
1. **UI Bugs**: Broken layouts, misaligned elements, overflow, incorrect colors, missing assets
2. **UX Problems**: Confusing navigation, poor hierarchy, accessibility issues, touch targets too small
3. **Logical Errors**: Inconsistent states, impossible combinations, data display errors
4. **Platform Violations**: Deviations from HIG (iOS) or Material Design (Android)
5. **Quick Wins**: Low-effort improvements with high impact
## Constraints
- Be concise: max 2 sentences per issue
- Prioritize by severity: Critical issues first
- Focus on actionable items only
- Skip obvious/trivial issues unless they affect UX
## Output Format
Before answering, verify: Have I checked all UI elements? Are issues properly prioritized?
```
## Summary
[1 sentence overview: X critical, Y major, Z minor issues found]
## Critical Issues (Must Fix)
- [Location]: [Problem] → [Fix]
## Major Issues
- [Location]: [Problem] → [Fix]
## Minor Issues
- [Location]: [Problem] → [Fix]
## Suggestions
- [Enhancement idea]
```"""
# Design Comparison System Prompt
COMPARE_SYSTEM_PROMPT = """## Context
You are a senior UI developer with 20+ years of experience specializing in pixel-perfect implementation and design QA. You excel at spotting visual discrepancies between design mockups and actual implementations.
## Task
Compare the design mockup (FIRST image) with the actual screenshot (SECOND image).
Identify all visual discrepancies and implementation gaps.
## Analysis Categories
1. **Layout Differences**: Spacing, alignment, positioning, margins, padding
2. **Color/Style Mismatches**: Colors, gradients, shadows, borders, opacity
3. **Missing Elements**: Components in design but not in implementation
4. **Extra Elements**: Components not in design but appear in implementation
5. **Typography Differences**: Font family, size, weight, line height, letter spacing
6. **Icon/Image Differences**: Size, style, color, positioning of icons and images
7. **Interactive States**: Hover, focus, active states if visible
## Tolerance Levels
- **Strict**: Flag all differences, even 1px variations
- **Normal**: Flag noticeable differences (2-4px, slight color variations OK)
- **Relaxed**: Only flag significant differences that affect UX
## Constraints
- Score consistency from 0-100 (100 = perfect match)
- Prioritize by impact: critical differences first
- Be specific with measurements where possible
- Skip differences that are clearly due to different device sizes
## Output Format
Before answering, verify: Have I compared all visible elements? Are issues properly categorized?
```
## Consistency Score: XX/100
## Critical Differences (Design Intent Broken)
- [Element]: Design [value] → Actual [value]
## Major Differences (Noticeable)
- [Element]: Design [value] → Actual [value]
## Minor Differences (Acceptable)
- [Element]: Design [value] → Actual [value]
## Recommendation
[1-2 sentence summary of what to fix first]
```"""
# Batch Summary System Prompt
BATCH_SUMMARY_PROMPT = """## Task
Summarize the UI/UX analysis results from multiple screenshot batches.
## Input
You will receive individual batch reports. Synthesize them into a cohesive summary.
## Output Format
```
## Overall Summary
- Total screenshots analyzed: X
- Critical issues: X | Major: X | Minor: X
- Most common problem: [category] (X occurrences)
## Top Priority Fixes
1. [Most critical issue across all batches]
2. [Second most critical]
3. [Third most critical]
## Pattern Analysis
- [Any recurring issues across multiple screens]
## Recommendations
- [Prioritized action items]
```"""
# Report storage with history
last_report: str | None = None
report_history: dict[str, dict] = {} # {id: {report, timestamp, image_count, type}}
# ============================================================================
# Configuration Helpers
# ============================================================================
def load_config() -> dict:
"""Load configuration from config.json."""
if CONFIG_PATH.exists():
try:
return {**DEFAULT_CONFIG, **json.loads(CONFIG_PATH.read_text())}
except Exception:
pass
return DEFAULT_CONFIG
def save_config(config: dict) -> None:
"""Save configuration to config.json."""
CONFIG_PATH.write_text(json.dumps(config, indent=2, ensure_ascii=False))
CONFIG_PATH.chmod(0o600)
def store_report(report: str, image_count: int, report_type: str = "analysis") -> str:
"""
Store a report in history and return its ID.
Args:
report: The report content
image_count: Number of images analyzed
report_type: Type of report (analysis, batch, compare)
Returns:
Report ID for retrieval
"""
global last_report, report_history
report_id = str(uuid.uuid4())[:8]
last_report = report
report_history[report_id] = {
"report": report,
"timestamp": datetime.now().isoformat(),
"image_count": image_count,
"type": report_type,
}
# Keep only the most recent reports
if len(report_history) > MAX_REPORT_HISTORY:
oldest_id = min(report_history, key=lambda k: report_history[k]["timestamp"])
del report_history[oldest_id]
return report_id
def get_report(report_id: str) -> dict | None:
"""Retrieve a report by ID."""
return report_history.get(report_id)
# ============================================================================
# Image Processing
# ============================================================================
def compress_image(img: Image.Image, max_size: int = 1024, quality: int = 85) -> str:
"""
Compress and resize image, return as base64 JPEG string.
Args:
img: PIL Image object
max_size: Maximum dimension (width or height)
quality: JPEG quality (1-100)
Returns:
Base64-encoded JPEG string
"""
# Convert to RGB if necessary (for PNG with transparency)
if img.mode in ('RGBA', 'LA', 'P'):
background = Image.new('RGB', img.size, (255, 255, 255))
if img.mode == 'P':
img = img.convert('RGBA')
background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
img = background
elif img.mode != 'RGB':
img = img.convert('RGB')
# Resize if too large
width, height = img.size
if width > max_size or height > max_size:
if width > height:
new_width = max_size
new_height = int(height * (max_size / width))
else:
new_height = max_size
new_width = int(width * (max_size / height))
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
# Compress to JPEG
buffer = io.BytesIO()
img.save(buffer, format='JPEG', quality=quality, optimize=True)
buffer.seek(0)
return base64.b64encode(buffer.getvalue()).decode('utf-8')
# Common image signatures for auto-repair
IMAGE_SIGNATURES = {
b'\x89PNG\r\n\x1a\n': 'PNG',
b'\xff\xd8\xff': 'JPEG',
b'GIF87a': 'GIF',
b'GIF89a': 'GIF',
b'RIFF': 'WEBP', # RIFF....WEBP
b'BM': 'BMP',
}
def repair_corrupted_image(data: bytes) -> bytes | None:
"""
Attempt to repair a corrupted image file by finding the actual image data.
Common corruption: text/logs written before image binary data (e.g., Android screencap warnings).
Returns:
Repaired image bytes, or None if no image signature found.
"""
# Check if already valid (starts with image signature)
for sig in IMAGE_SIGNATURES:
if data.startswith(sig):
return data # Already valid
# Try to find image signature in the data
earliest_pos = len(data)
found_sig = None
for sig in IMAGE_SIGNATURES:
pos = data.find(sig)
if pos != -1 and pos < earliest_pos:
earliest_pos = pos
found_sig = sig
if found_sig and earliest_pos < len(data):
# Found image data after some garbage bytes
return data[earliest_pos:]
return None
def load_image_from_path(path: str, auto_repair: bool = True) -> Image.Image | None:
"""
Load image from local file path with auto-repair for corrupted files.
Args:
path: File path to the image
auto_repair: Attempt to fix corrupted images (e.g., screencap with text header)
"""
try:
p = Path(path).expanduser().resolve()
if not p.exists() or not p.is_file():
return None
# Try normal load first
try:
return Image.open(p)
except Exception:
if not auto_repair:
return None
# Attempt auto-repair for corrupted files
with open(p, 'rb') as f:
data = f.read()
repaired = repair_corrupted_image(data)
if repaired and repaired != data:
# Successfully found image data - load from memory
return Image.open(io.BytesIO(repaired))
return None
except Exception:
return None
async def load_image_from_url(url: str, auto_repair: bool = True) -> Image.Image | None:
"""Load image from URL with auto-repair for corrupted data."""
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.get(url)
if response.status_code == 200:
data = response.content
try:
return Image.open(io.BytesIO(data))
except Exception:
if not auto_repair:
return None
# Attempt repair
repaired = repair_corrupted_image(data)
if repaired:
return Image.open(io.BytesIO(repaired))
except Exception:
pass
return None
def decode_base64_image(b64: str, auto_repair: bool = True) -> Image.Image | None:
"""Decode base64 string to PIL Image with auto-repair."""
try:
# Remove data URL prefix if present
if b64.startswith('data:'):
b64 = b64.split(',', 1)[1]
data = base64.b64decode(b64)
try:
return Image.open(io.BytesIO(data))
except Exception:
if not auto_repair:
return None
# Attempt repair
repaired = repair_corrupted_image(data)
if repaired:
return Image.open(io.BytesIO(repaired))
except Exception:
pass
return None
async def collect_images(
images_base64: list[str] | None,
image_paths: list[str] | None,
image_urls: list[str] | None
) -> tuple[list[str], list[str]]:
"""
Collect images from all sources with CONCURRENT URL loading.
Returns:
Tuple of (base64_images, errors)
"""
config = load_config()
max_size = config.get("max_image_size", 1024)
quality = config.get("jpeg_quality", 85)
result = []
errors = []
# Process base64 images (sync - already in memory)
if images_base64:
for i, b64 in enumerate(images_base64):
img = decode_base64_image(b64)
if img:
result.append(compress_image(img, max_size, quality))
else:
errors.append(f"base64[{i}]: Failed to decode")
# Process file paths (sync - local disk is fast)
if image_paths:
for path in image_paths:
img = load_image_from_path(path)
if img:
result.append(compress_image(img, max_size, quality))
else:
errors.append(f"path: {path} - File not found or invalid")
# Process URLs CONCURRENTLY (async - network is slow)
if image_urls:
tasks = [load_image_from_url(url) for url in image_urls]
url_results = await asyncio.gather(*tasks, return_exceptions=True)
for url, img_result in zip(image_urls, url_results):
if isinstance(img_result, Exception):
errors.append(f"url: {url} - {type(img_result).__name__}")
elif img_result is None:
errors.append(f"url: {url} - Failed to fetch")
else:
result.append(compress_image(img_result, max_size, quality))
return result, errors
async def collect_images_from_folder(folder_path: str) -> tuple[list[str], list[str], list[str]]:
"""
Collect all images from a folder.
Returns:
Tuple of (base64_images, file_names, errors)
"""
config = load_config()
max_size = config.get("max_image_size", 1024)
quality = config.get("jpeg_quality", 85)
folder = Path(folder_path).expanduser().resolve()
if not folder.exists() or not folder.is_dir():
return [], [], [f"Folder not found: {folder_path}"]
result = []
file_names = []
errors = []
# Find all image files
image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp'}
image_files = sorted([
f for f in folder.iterdir()
if f.is_file() and f.suffix.lower() in image_extensions
])
for img_path in image_files:
img = load_image_from_path(str(img_path))
if img:
result.append(compress_image(img, max_size, quality))
file_names.append(img_path.name)
else:
errors.append(f"Failed to load: {img_path.name}")
return result, file_names, errors
# ============================================================================
# OpenAI API
# ============================================================================
async def analyze_with_gpt52(
images_base64: list[str],
context: dict,
system_prompt: str | None = None,
user_text: str | None = None
) -> dict:
"""
Call OpenAI GPT-5.2 Responses API for screenshot analysis.
Args:
images_base64: List of base64-encoded images
context: Context dict with platform, language, etc.
system_prompt: Custom system prompt (defaults to SYSTEM_PROMPT)
user_text: Custom user text (defaults to standard analysis prompt)
Uses high reasoning effort for thorough analysis (or fast mode if enabled).
Reference: https://platform.openai.com/docs/guides/latest-model
"""
config = load_config()
api_key = config.get("api_key", "")
if not api_key:
return {"error": "API key not configured. Use set_api_key tool first."}
# Determine model and reasoning based on fast mode
if config.get("fast_mode", False):
model = config.get("fast_mode_model", "gpt-4o-mini")
reasoning_effort = config.get("fast_mode_reasoning", "low")
else:
model = config.get("default_model", "gpt-5.2")
reasoning_effort = config.get("default_reasoning_effort", "high")
# Build user content with images
if user_text is None:
user_text = f"""Analyze these {len(images_base64)} screenshot(s).
**Platform**: {context.get('platform', 'unknown')}
**Language**: {context.get('language', 'unknown')}
**App Purpose**: {context.get('app_description', 'Not specified')}
**UI Style**: {context.get('ui_direction', 'Not specified')}
{f"**Focus On**: {', '.join(context.get('focus_areas', []))}" if context.get('focus_areas') else ''}"""
user_content = [{"type": "input_text", "text": user_text}]
# Add images (auto-detect PNG/JPEG from base64 header)
for img in images_base64:
if img.startswith("data:"):
image_url = img
else:
# Detect image type from base64 magic bytes
# PNG starts with iVBOR, JPEG starts with /9j/
if img.startswith("/9j/"):
mime = "image/jpeg"
else:
mime = "image/png" # Default to PNG
image_url = f"data:{mime};base64,{img}"
user_content.append({"type": "input_image", "image_url": image_url})
# GPT-5.2 Responses API payload
payload = {
"model": model,
"reasoning": {"effort": reasoning_effort},
"input": [
{"role": "system", "content": system_prompt or SYSTEM_PROMPT},
{"role": "user", "content": user_content}
]
}
try:
async with httpx.AsyncClient(timeout=180.0) as client:
response = await client.post(
"https://api.openai.com/v1/responses",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json=payload
)
if response.status_code != 200:
return {"error": f"API error {response.status_code}: {response.text[:500]}", "status_code": response.status_code}
return response.json()
except httpx.TimeoutException:
return {"error": "Request timed out (180s). Try fewer screenshots."}
except Exception as e:
return {"error": f"Request failed: {str(e)}"}
async def analyze_with_retry(
images_base64: list[str],
context: dict,
system_prompt: str | None = None,
user_text: str | None = None,
max_retries: int = 3
) -> dict:
"""
Call analyze_with_gpt52 with exponential backoff retry for rate limits.
Retries on 429 (rate limit) and timeout errors.
"""
for attempt in range(max_retries):
response = await analyze_with_gpt52(images_base64, context, system_prompt, user_text)
# Check for retryable errors
if "error" in response:
error_msg = str(response.get("error", ""))
status_code = response.get("status_code", 0)
# Retry on rate limit (429) or server errors (5xx)
if status_code == 429 or (500 <= status_code < 600):
if attempt < max_retries - 1:
wait_time = 2 ** attempt # 1s, 2s, 4s
await asyncio.sleep(wait_time)
continue
# Retry on timeout
if "timed out" in error_msg.lower():
if attempt < max_retries - 1:
await asyncio.sleep(1)
continue
return response
return {"error": f"Max retries ({max_retries}) exceeded"}
def extract_output(api_response: dict) -> str:
"""Extract text output from GPT-5.2 response."""
if "error" in api_response and api_response["error"]:
return f"**Error**: {api_response['error']}"
# GPT-5.2 Responses API: output[].content[].text (type: output_text)
if output := api_response.get("output"):
if isinstance(output, list):
for item in output:
if item.get("type") == "message":
for content in item.get("content", []):
if content.get("type") == "output_text":
return content.get("text", "")
elif isinstance(output, str):
return output
# Fallback: direct output_text field
if text := api_response.get("output_text"):
return text
return "No output received from API."
# ============================================================================
# MCP Server (FastMCP)
# ============================================================================
mcp = FastMCP("Screenshot UI Analyzer")
@mcp.tool()
async def analyze_screenshots(
platform: str,
image_paths: list[str] | None = None,
image_urls: list[str] | None = None,
images_base64: list[str] | None = None,
language: str = "unknown",
app_description: str = "",
ui_direction: str = "",
focus_areas: list[str] | None = None
) -> str:
"""
Analyze app screenshots for UI/UX issues using GPT-5.2.
Accepts images from multiple sources (paths, URLs, or base64).
Images are automatically compressed to optimize for analysis.
Args:
platform: Target platform (ios, android, web, desktop)
image_paths: Local file paths to screenshots (RECOMMENDED - bypasses token limits)
image_urls: URLs to screenshot images
images_base64: Base64-encoded images (for small images only)
language: Development language (Swift, Kotlin, React, etc.)
app_description: What the app does and who it's for
ui_direction: UI design style or reference
focus_areas: Specific areas to focus on
Returns:
Structured analysis report with issues and fixes
"""
global last_report
# Check if any images provided
if not any([images_base64, image_paths, image_urls]):
return "Error: No images provided. Use image_paths (recommended), image_urls, or images_base64."
# Collect and compress all images
processed_images, errors = await collect_images(images_base64, image_paths, image_urls)
if not processed_images:
return f"Error: No valid images found.\n\nErrors:\n" + "\n".join(f"- {e}" for e in errors)
context = {
"platform": platform,
"language": language,
"app_description": app_description or "Not specified",
"ui_direction": ui_direction or "Not specified",
"focus_areas": focus_areas or [],
}
# Call GPT-5.2
response = await analyze_with_gpt52(processed_images, context)
# Format report
error_section = ""
if errors:
error_section = f"\n\n**Warnings**: {len(errors)} image(s) failed to load:\n" + "\n".join(f"- {e}" for e in errors)
config = load_config()
model_info = config.get("fast_mode_model") if config.get("fast_mode") else config.get("default_model", "gpt-5.2")
reasoning_info = config.get("fast_mode_reasoning") if config.get("fast_mode") else config.get("default_reasoning_effort", "high")
report = f"""# UI/UX Analysis Report
**Platform**: {platform} | **Language**: {language} | **Screenshots**: {len(processed_images)}{error_section}
---
{extract_output(response)}
---
*Analyzed by {model_info} (reasoning: {reasoning_info})*
"""
report_id = store_report(report, len(processed_images), "analysis")
return f"{report}\n\n📋 Report ID: `{report_id}`"
@mcp.tool()
def get_last_report() -> str:
"""Get the last analysis report."""
return last_report or "No previous report available."
@mcp.tool()
def set_api_key(api_key: str) -> str:
"""
Set the OpenAI API key.
Args:
api_key: Your OpenAI API key (starts with sk-)
"""
# Accept both legacy (sk-) and project keys (sk-proj-)
if not (api_key.startswith("sk-") or api_key.startswith("sk-proj-")):
return "Error: Invalid API key format (should start with sk- or sk-proj-)"
config = load_config()
config["api_key"] = api_key
save_config(config)
return "API key configured successfully."
@mcp.tool()
def set_reasoning_effort(effort: str) -> str:
"""
Set the reasoning effort level for GPT-5.2.
Args:
effort: Reasoning level (none, low, medium, high, xhigh)
"""
valid = ["none", "low", "medium", "high", "xhigh"]
if effort not in valid:
return f"Error: Invalid effort. Choose from: {', '.join(valid)}"
config = load_config()
config["default_reasoning_effort"] = effort
save_config(config)
return f"Reasoning effort set to: {effort}"
@mcp.tool()
def get_config() -> str:
"""Get current configuration (API key masked)."""
config = load_config()
# Mask API key
key = config.get("api_key", "")
masked = f"{key[:7]}...{key[-4:]}" if len(key) > 11 else "(not set)"
return json.dumps({
"api_key": masked,
"model": config.get("default_model", "gpt-5.2"),
"reasoning_effort": config.get("default_reasoning_effort", "high"),
"fast_mode": config.get("fast_mode", False),
"fast_mode_model": config.get("fast_mode_model", "gpt-4o-mini"),
}, indent=2)
@mcp.tool()
async def batch_analyze(
platform: str,
folder_path: str | None = None,
image_paths: list[str] | None = None,
batch_size: int = 5,
language: str = "unknown",
app_description: str = "",
generate_summary: bool = True
) -> str:
"""
Batch analyze multiple screenshots with automatic splitting.
Processes images in batches to avoid API timeouts and generates
an overall summary of issues found across all batches.
Args:
platform: Target platform (ios, android, web, desktop)
folder_path: Path to folder containing screenshots (will analyze all images)
image_paths: List of image file paths (alternative to folder_path)
batch_size: Number of images per batch (default 5, max 10)
language: Development language (Swift, Kotlin, React, etc.)
app_description: What the app does and who it's for
generate_summary: Generate overall summary across batches (default True)
Returns:
Combined batch reports with optional summary
"""
# Validate batch size
batch_size = min(max(batch_size, 1), 10)
# Collect images
if folder_path:
all_images, file_names, errors = await collect_images_from_folder(folder_path)
if not all_images:
return f"Error: No valid images in folder.\n\nErrors:\n" + "\n".join(f"- {e}" for e in errors)
elif image_paths:
all_images, errors = await collect_images(None, image_paths, None)
file_names = [Path(p).name for p in image_paths]
if not all_images:
return f"Error: No valid images found.\n\nErrors:\n" + "\n".join(f"- {e}" for e in errors)
else:
return "Error: Provide either folder_path or image_paths."
total_images = len(all_images)
num_batches = (total_images + batch_size - 1) // batch_size
context = {
"platform": platform,
"language": language,
"app_description": app_description or "Not specified",
}
batch_reports = []
all_issues = {"critical": 0, "major": 0, "minor": 0}
# Process each batch
for batch_num in range(num_batches):
start_idx = batch_num * batch_size
end_idx = min(start_idx + batch_size, total_images)
batch_images = all_images[start_idx:end_idx]
batch_files = file_names[start_idx:end_idx] if file_names else []
# Analyze batch with retry
response = await analyze_with_retry(batch_images, context)
output = extract_output(response)
# Count issues (rough parsing)
output_lower = output.lower()
if "critical" in output_lower:
all_issues["critical"] += output_lower.count("critical")
if "major" in output_lower:
all_issues["major"] += output_lower.count("major")
if "minor" in output_lower:
all_issues["minor"] += output_lower.count("minor")
batch_report = f"""## Batch {batch_num + 1}/{num_batches} (Screenshots {start_idx + 1}-{end_idx})
**Files**: {', '.join(batch_files) if batch_files else f'{len(batch_images)} images'}
{output}
"""
batch_reports.append(batch_report)
# Build final report
config = load_config()
model_info = config.get("fast_mode_model") if config.get("fast_mode") else config.get("default_model", "gpt-5.2")
summary_section = ""
if generate_summary and num_batches > 1:
summary_section = f"""## Overall Summary
- **Total Screenshots**: {total_images}
- **Batches Processed**: {num_batches}
- **Issues Found**: ~{all_issues['critical']} critical, ~{all_issues['major']} major, ~{all_issues['minor']} minor
---
"""
report = f"""# Batch Analysis Report
**Platform**: {platform} | **Language**: {language} | **Total Screenshots**: {total_images}
---
{summary_section}{'---'.join(batch_reports)}
---
*Batch analyzed by {model_info}*
"""
report_id = store_report(report, total_images, "batch")
return f"{report}\n\n📋 Report ID: `{report_id}`"
@mcp.tool()
async def compare_designs(
design_path: str,
screenshot_path: str,
platform: str = "ios",
tolerance: str = "normal"
) -> str:
"""
Compare a design mockup with an actual screenshot.
Identifies visual discrepancies between the intended design and
the actual implementation, scoring consistency from 0-100.
Args:
design_path: Path to the design mockup image
screenshot_path: Path to the actual screenshot
platform: Target platform (ios, android, web, desktop)
tolerance: Comparison strictness (strict, normal, relaxed)
Returns:
Comparison report with consistency score and differences
"""
# Validate tolerance
valid_tolerances = ["strict", "normal", "relaxed"]
if tolerance not in valid_tolerances:
return f"Error: tolerance must be one of: {', '.join(valid_tolerances)}"
# Load both images
design_img = load_image_from_path(design_path)
screenshot_img = load_image_from_path(screenshot_path)
if not design_img:
return f"Error: Could not load design image: {design_path}"
if not screenshot_img:
return f"Error: Could not load screenshot: {screenshot_path}"
config = load_config()
max_size = config.get("max_image_size", 1024)
quality = config.get("jpeg_quality", 85)
# Compress images
design_b64 = compress_image(design_img, max_size, quality)
screenshot_b64 = compress_image(screenshot_img, max_size, quality)
# Build comparison prompt
tolerance_desc = {
"strict": "Flag ALL differences, even 1px variations or slight color shifts.",
"normal": "Flag noticeable differences (2-4px spacing, visible color mismatches).",
"relaxed": "Only flag significant differences that clearly affect UX or break design intent."
}
user_text = f"""Compare these two images:
- FIRST image: Design mockup (the intended design)
- SECOND image: Actual screenshot (the implementation)
**Platform**: {platform}
**Tolerance**: {tolerance.upper()} - {tolerance_desc[tolerance]}
Identify all discrepancies between the design and implementation."""
context = {"platform": platform}
# Call API with comparison prompt
response = await analyze_with_retry(
[design_b64, screenshot_b64],
context,
system_prompt=COMPARE_SYSTEM_PROMPT,
user_text=user_text
)
model_info = config.get("fast_mode_model") if config.get("fast_mode") else config.get("default_model", "gpt-5.2")
report = f"""# Design Comparison Report
**Design**: {Path(design_path).name}
**Screenshot**: {Path(screenshot_path).name}
**Platform**: {platform} | **Tolerance**: {tolerance}
---
{extract_output(response)}
---
*Compared by {model_info}*
"""
report_id = store_report(report, 2, "compare")
return f"{report}\n\n📋 Report ID: `{report_id}`"
@mcp.tool()
def set_fast_mode(enabled: bool) -> str:
"""
Toggle fast mode for quicker, cheaper analysis.
Fast mode uses gpt-4o-mini with low reasoning effort.
Good for initial scans; use standard mode for detailed analysis.
Args:
enabled: True to enable fast mode, False for standard (GPT-5.2)
"""
config = load_config()
config["fast_mode"] = enabled
save_config(config)
if enabled:
return f"✅ Fast mode ENABLED\n- Model: {config.get('fast_mode_model', 'gpt-4o-mini')}\n- Reasoning: {config.get('fast_mode_reasoning', 'low')}\n\n⚡ Faster & cheaper, but less thorough."
else:
return f"✅ Fast mode DISABLED\n- Model: {config.get('default_model', 'gpt-5.2')}\n- Reasoning: {config.get('default_reasoning_effort', 'high')}\n\n🔍 Full precision analysis."
@mcp.tool()
def get_report_history() -> str:
"""
Get history of recent analysis reports.
Returns list of report IDs with timestamps and types.
Use get_report_by_id to retrieve a specific report.
"""
if not report_history:
return "No reports in history."
lines = ["# Report History\n"]
sorted_reports = sorted(
report_history.items(),
key=lambda x: x[1]["timestamp"],
reverse=True
)
for report_id, data in sorted_reports:
ts = data["timestamp"][:16].replace("T", " ") # Format: YYYY-MM-DD HH:MM
report_type = data["type"]
img_count = data["image_count"]
lines.append(f"- `{report_id}` | {ts} | {report_type} | {img_count} images")
lines.append(f"\n*Total: {len(report_history)} reports (max {MAX_REPORT_HISTORY})*")
return "\n".join(lines)
@mcp.tool()
def get_report_by_id(report_id: str) -> str:
"""
Retrieve a specific report by its ID.
Args:
report_id: The 8-character report ID (from get_report_history)
"""
data = get_report(report_id)
if not data:
return f"Error: Report not found: {report_id}\n\nUse get_report_history to see available reports."
return data["report"]
# ============================================================================
# Entry Point
# ============================================================================
if __name__ == "__main__":
mcp.run()