gemini_analyze_image

Analyze images using Google Gemini's multimodal AI to extract descriptions, answer questions, or process visual content through configurable models and output formats.

Instructions

Use Google Gemini CLI to describe or analyze an image using multimodal capabilities.

Input Schema

TableJSON Schema

Name	Required	Description
`image`	Yes	Local file path, http(s) URL, or base64-encoded image to analyze.
`prompt`	No	Instruction for Gemini. Defaults to config value.
`model`	No	Gemini model identifier (e.g., gemini-2.0-flash).
`cliPath`	No	Override the Gemini CLI executable path.
`sandbox`	No	Whether to run the CLI with the sandbox flag (-s).
`outputFormat`	No	Request Gemini CLI to return the specified output format.
`extraFlags`	No	Additional CLI flags to append as-is.
`timeoutMs`	No	Maximum time (in milliseconds) to wait for CLI execution.

Implementation Reference

src/tools/geminiImage.ts:61-128 (handler)
The main handler function for the gemini_analyze_image tool. Prepares the image input, constructs the analysis prompt, invokes the Gemini image analysis helper, formats the output with metadata, and handles errors appropriately.
async execute(args: GeminiArgs) { const prepared = await prepareImage(args.image, appConfig); const prompt = args.prompt?.trim() ?? appConfig.gemini.defaultPrompt ?? "Describe this image."; const attachment = createAttachmentReference(prepared.path); try { const result = await runGeminiImageAnalysis({ prompt, model: args.model, sandbox: args.sandbox, outputFormat: args.outputFormat, extraFlags: args.extraFlags, timeoutMs: args.timeoutMs ?? appConfig.commandTimeoutMs, imageReference: attachment, preparedImage: prepared, commandOverride: args.cliPath, }); const cleaned = result.stdout || "(Gemini CLI returned no output)"; const metaLines = [ `model: ${result.model ?? "default"}`, `imageSource: ${prepared.source}`, `durationMs: ${result.durationMs}`, ]; if (prepared.source === "local") { metaLines.push(`imagePath: ${prepared.path}`); } return { content: [ { type: "text", text: [ "### Gemini Output", cleaned, "", metaLines.join("\n"), ].join("\n"), }, ], isError: false, } as const; } catch (error) { if (error instanceof CommandError) { const details = [ error.message, error.result.stderr.trim() && `stderr:\n${error.result.stderr.trim()}`, error.result.stdout.trim() && `stdout:\n${error.result.stdout.trim()}`, ] .filter(Boolean) .join("\n\n"); return { content: [ { type: "text", text: `Gemini CLI failed:\n${details}`, }, ], isError: true, }; } throw error; } finally { await prepared.cleanup(); } },
src/tools/geminiImage.ts:9-52 (schema)
Zod schema defining the input validation and descriptions for the gemini_analyze_image tool parameters.
const geminiSchema = z .object({ image: z .string() .min(1) .describe( "Local file path, http(s) URL, or base64-encoded image to analyze." ), prompt: z .string() .min(1) .optional() .describe("Instruction for Gemini. Defaults to config value."), model: z .string() .min(1) .optional() .describe("Gemini model identifier (e.g., gemini-2.0-flash)."), cliPath: z .string() .min(1) .optional() .describe("Override the Gemini CLI executable path."), sandbox: z .boolean() .optional() .describe("Whether to run the CLI with the sandbox flag (-s)."), outputFormat: z .enum(["text", "json"]) .optional() .describe("Request Gemini CLI to return the specified output format."), extraFlags: z .array(z.string().min(1)) .optional() .describe("Additional CLI flags to append as-is."), timeoutMs: z .number() .int() .positive() .max(600_000) .optional() .describe("Maximum time (in milliseconds) to wait for CLI execution."), }) .describe("Invoke Gemini CLI to analyze an image.");
src/tools/geminiImage.ts:56-129 (registration)
Tool registration call that defines the name, description, schema, and handler for gemini_analyze_image.
registerTool({ name: "gemini_analyze_image", description: "Use Google Gemini CLI to describe or analyze an image using multimodal capabilities.", schema: geminiSchema, async execute(args: GeminiArgs) { const prepared = await prepareImage(args.image, appConfig); const prompt = args.prompt?.trim() ?? appConfig.gemini.defaultPrompt ?? "Describe this image."; const attachment = createAttachmentReference(prepared.path); try { const result = await runGeminiImageAnalysis({ prompt, model: args.model, sandbox: args.sandbox, outputFormat: args.outputFormat, extraFlags: args.extraFlags, timeoutMs: args.timeoutMs ?? appConfig.commandTimeoutMs, imageReference: attachment, preparedImage: prepared, commandOverride: args.cliPath, }); const cleaned = result.stdout || "(Gemini CLI returned no output)"; const metaLines = [ `model: ${result.model ?? "default"}`, `imageSource: ${prepared.source}`, `durationMs: ${result.durationMs}`, ]; if (prepared.source === "local") { metaLines.push(`imagePath: ${prepared.path}`); } return { content: [ { type: "text", text: [ "### Gemini Output", cleaned, "", metaLines.join("\n"), ].join("\n"), }, ], isError: false, } as const; } catch (error) { if (error instanceof CommandError) { const details = [ error.message, error.result.stderr.trim() && `stderr:\n${error.result.stderr.trim()}`, error.result.stdout.trim() && `stdout:\n${error.result.stdout.trim()}`, ] .filter(Boolean) .join("\n\n"); return { content: [ { type: "text", text: `Gemini CLI failed:\n${details}`, }, ], isError: true, }; } throw error; } finally { await prepared.cleanup(); } }, });
src/providers/gemini.ts:25-67 (helper)
Helper function that constructs and executes the Gemini CLI command for image analysis, building arguments from options and running the command via runCommand.
export async function runGeminiImageAnalysis( options: GeminiImageOptions ): Promise<GeminiSuccessResult> { const cliArgs: string[] = []; const model = options.model ?? appConfig.gemini.defaultModel; if (model) { cliArgs.push("-m", model); } const outputFormat = options.outputFormat ?? appConfig.gemini.defaultOutputFormat; if (outputFormat && outputFormat !== "text") { cliArgs.push("-o", outputFormat); } if (options.sandbox) { cliArgs.push("-s"); } if (appConfig.gemini.extraArgs?.length) { cliArgs.push(...appConfig.gemini.extraArgs); } if (options.extraFlags?.length) { cliArgs.push(...options.extraFlags); } const finalPrompt = `${options.prompt.trim()}\n\n${options.imageReference}`; cliArgs.push("-p", finalPrompt); const command = options.commandOverride ?? appConfig.gemini.command; const result = await runCommand(command, cliArgs, { timeoutMs: options.timeoutMs, }); return { stdout: result.stdout.trim(), durationMs: result.durationMs, model, }; }

MCP Vision Relay

gemini_analyze_image

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API