generate_image
Create custom images using Google's Gemini 2.0 Flash AI model. Input a text prompt and optional context (e.g., artistic, photorealistic) for enhanced results. Designed for Claude Desktop via the Gemini MCP Server.
Instructions
Generate an image using Google's Gemini 2.0 Flash Experimental model (with learned user preferences)
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| context | No | Optional context for intelligent enhancement (e.g., "artistic", "photorealistic", "technical") | |
| prompt | Yes | Text description of the desired image |
Implementation Reference
- src/tools/image-generation.js:48-116 (handler)The main handler function that performs prompt validation and enhancement, calls the Gemini service to generate the image, saves the base64 image data to a PNG file, learns from the interaction, and returns a formatted text response with the file path.async execute(args) { const prompt = validateNonEmptyString(args.prompt, 'prompt'); const context = args.context ? validateString(args.context, 'context') : null; log(`Generating image: "${prompt}" with context: ${context || 'general'}`, this.name); try { let enhancedPrompt = prompt; if (this.intelligenceSystem.initialized) { try { enhancedPrompt = await this.intelligenceSystem.enhancePrompt(prompt, context, this.name); log('Applied Tool Intelligence enhancement', this.name); } catch (err) { log(`Tool Intelligence enhancement failed: ${err.message}`, this.name); } } const formattedPrompt = `Create a detailed and high-quality image of: ${enhancedPrompt}`; const imageData = await this.geminiService.generateImage('IMAGE_GENERATION', formattedPrompt); if (imageData) { log('Successfully extracted image data', this.name); ensureDirectoryExists(config.OUTPUT_DIR, this.name); const timestamp = Date.now(); const hash = crypto.createHash('md5').update(prompt).digest('hex'); const imageName = `gemini-${hash}-${timestamp}.png`; const imagePath = path.join(config.OUTPUT_DIR, imageName); fs.writeFileSync(imagePath, Buffer.from(imageData, 'base64')); log(`Image saved to: ${imagePath}`, this.name); if (this.intelligenceSystem.initialized) { try { await this.intelligenceSystem.learnFromInteraction(prompt, enhancedPrompt, `Image generated successfully: ${imagePath}`, context, this.name); log('Tool Intelligence learned from interaction', this.name); } catch (err) { log(`Tool Intelligence learning failed: ${err.message}`, this.name); } } let finalResponse = `✓ Image successfully generated from prompt: "${prompt}"\n\nYou can find the image at: ${imagePath}`; // eslint-disable-line max-len if (context && this.intelligenceSystem.initialized) { finalResponse += `\n\n---\n_Enhancement applied based on context: ${context}_`; } return { content: [ { type: 'text', text: finalResponse, }, ], }; } log('No image data found in response', this.name); return { content: [ { type: 'text', text: `Could not generate image for: "${prompt}". No image data was returned by Gemini API.`, }, ], }; } catch (error) { log(`Error generating image: ${error.message}`, this.name); throw new Error(`Error generating image: ${error.message}`); } }
- src/tools/image-generation.js:22-35 (schema)Input schema defining the tool parameters: required 'prompt' string for image description, optional 'context' string for enhancement style.{ type: 'object', properties: { prompt: { type: 'string', description: 'Text description of the desired image', }, context: { type: 'string', description: 'Optional context for intelligent enhancement (e.g., "artistic", "photorealistic", "technical")', }, }, required: ['prompt'], },
- src/tools/index.js:78-78 (registration)Registers a new instance of ImageGenerationTool with the shared intelligenceSystem and geminiService instances in the central tool registry.registerTool(new ImageGenerationTool(intelligenceSystem, geminiService));
- src/gemini/gemini-service.js:58-76 (helper)Supporting method that interfaces with the Google Gemini API to generate image data (base64) from a text prompt using the IMAGE_GENERATION model configuration.async generateImage(modelType, prompt) { try { const modelConfig = getGeminiModelConfig(modelType); // Pass only the model name to getGenerativeModel const model = this.genAI.getGenerativeModel({ model: modelConfig.model }); const content = formatTextPrompt(prompt); // Image generation also uses text prompt // Pass the generationConfig to the generateContent method const result = await model.generateContent({ contents: [{ parts: content }], generationConfig: modelConfig.generationConfig, }); log(`Image generation response received from Gemini API for model type: ${modelType}`, 'gemini-service'); return extractImageData(result.response?.candidates?.[0]); } catch (error) { log(`Error generating image with Gemini API for model type ${modelType}: ${error.message}`, 'gemini-service'); throw new Error(`Gemini image generation failed: ${error.message}`); } }