import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import axios from "axios";
import * as fs from "fs";
import * as path from "path";
import * as os from "os";
import * as dotenv from "dotenv";
// Load environment variables
dotenv.config();
// User credentials from environment variables
const API_KEY = process.env.GOOGLE_API_KEY || '';
const PROJECT_NUMBER = process.env.GOOGLE_PROJECT_NUMBER || '';
// Default output directory - saves to current working directory (where Claude Desktop is running)
// This means images are saved in the project you have open in your IDE
const DEFAULT_OUTPUT_DIR = process.env.OUTPUT_DIR || path.join(process.cwd(), 'nanobanana-images');
// Create the MCP server
const server = new McpServer({
name: "Nano Banana",
version: "1.0.0",
});
// Helper to find the best image model
async function getBestImageModel(): Promise<string> {
try {
const listResponse = await axios.get(
`https://generativelanguage.googleapis.com/v1beta/models?key=${API_KEY}`
);
const models = listResponse.data.models.map((m: any) => m.name);
// Prefer imagen-4.0-fast, then imagen-3.0, then any imagen
const preferred = [
'models/imagen-4.0-fast-generate-001',
'models/imagen-3.0-generate-001',
];
for (const pref of preferred) {
if (models.includes(pref)) return pref;
}
// Fallback search
const found = models.find((m: string) => m.includes('imagen') && m.includes('generate'));
if (found) return found;
// Default fallback if listing fails or returns nothing useful but we hope it works
return 'models/imagen-3.0-generate-001';
} catch (error) {
console.error("Error listing models, using default:", error);
return 'models/imagen-3.0-generate-001';
}
}
server.tool(
"generate_image",
"Generate an image using Google's Imagen model and automatically save it",
{
prompt: z.string().describe("The text prompt to generate an image from"),
outputDir: z.string().optional().describe("Directory to save the images. If not specified, images are saved to ./nanobanana-images in your current project directory"),
aspectRatio: z.string().optional().describe("Aspect ratio of the image (e.g., '1:1', '16:9'). Default is '1:1'"),
numberOfImages: z.number().optional().default(1).describe("Number of images to generate (1-4)"),
},
async ({ prompt, outputDir, aspectRatio, numberOfImages }) => {
const modelName = await getBestImageModel();
console.error(`Using model: ${modelName}`);
// Construct request
// We try the 'predict' endpoint first as it's standard for Imagen on Vertex/GenAI
// But for API Key access, sometimes it's 'generateContent' on Generative Language API.
// Based on previous search, Imagen 3 is on Vertex AI but also accessible via Gemini API Key?
// Let's try the 'predict' schema specifically for Imagen.
const url = `https://generativelanguage.googleapis.com/v1beta/${modelName}:predict?key=${API_KEY}`;
const payload = {
instances: [
{
prompt: prompt
}
],
parameters: {
sampleCount: numberOfImages || 1,
// aspectRatio: aspectRatio || "1:1" // Some models take aspectRatio, some don't in 'parameters'.
// For Imagen 3, it's usually in 'parameters'
}
};
if (aspectRatio) {
(payload.parameters as any).aspectRatio = aspectRatio;
}
try {
const response = await axios.post(url, payload);
const predictions = response.data.predictions;
if (!predictions || predictions.length === 0) {
return {
content: [
{ type: "text", text: "No images generated." }
]
};
}
// Ensure output directory exists
const saveDir = outputDir || DEFAULT_OUTPUT_DIR;
if (!fs.existsSync(saveDir)) {
fs.mkdirSync(saveDir, { recursive: true });
}
const contentItems: any[] = [];
const savedFiles: string[] = [];
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
for (let i = 0; i < predictions.length; i++) {
const prediction = predictions[i];
let base64Image = "";
let mimeType = "image/png";
if (prediction.bytesBase64Encoded) {
base64Image = prediction.bytesBase64Encoded;
} else if (prediction.mimeType && prediction.bytesBase64Encoded) {
mimeType = prediction.mimeType;
base64Image = prediction.bytesBase64Encoded;
} else {
contentItems.push({ type: "text", text: `Unknown prediction format: ${JSON.stringify(prediction).substring(0, 100)}...` });
continue;
}
// Save image to file
const extension = mimeType.includes('png') ? 'png' : 'jpg';
const filename = `nanobanana_${timestamp}_${i + 1}.${extension}`;
const filepath = path.join(saveDir, filename);
const buffer = Buffer.from(base64Image, 'base64');
fs.writeFileSync(filepath, buffer);
savedFiles.push(filepath);
contentItems.push({
type: "image",
data: base64Image,
mimeType: mimeType
});
}
// Return summary with file paths
const summaryText = `Generated ${predictions.length} image(s) using ${modelName}\n\nSaved to:\n${savedFiles.map(f => `- ${f}`).join('\n')}`;
contentItems.unshift({ type: "text", text: summaryText });
return {
content: contentItems
};
} catch (error: any) {
// Fallback to generateContent if predict fails (maybe it's a Gemini model that can do images?)
// But we are explicitly using an 'imagen' model name.
const errorMessage = error.response ? JSON.stringify(error.response.data) : error.message;
console.error("Image generation failed:", errorMessage);
return {
content: [
{ type: "text", text: `Error generating image: ${errorMessage}` }
],
isError: true,
};
}
}
);
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error("Nano Banana MCP Server running on stdio");
}
main();