gemini_analyze_image
Analyze images using Google Gemini's multimodal AI to generate descriptions, extract information, or answer questions about visual content from local files, URLs, or base64 data.
Instructions
Use Google Gemini CLI to describe or analyze an image using multimodal capabilities.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| image | Yes | Local file path, http(s) URL, or base64-encoded image to analyze. | |
| prompt | No | Instruction for Gemini. Defaults to config value. | |
| model | No | Gemini model identifier (e.g., gemini-2.0-flash). | |
| cliPath | No | Override the Gemini CLI executable path. | |
| sandbox | No | Whether to run the CLI with the sandbox flag (-s). | |
| outputFormat | No | Request Gemini CLI to return the specified output format. | |
| extraFlags | No | Additional CLI flags to append as-is. | |
| timeoutMs | No | Maximum time (in milliseconds) to wait for CLI execution. |
Implementation Reference
- src/tools/geminiImage.ts:61-128 (handler)The execute function implementing the core logic of the gemini_analyze_image tool: prepares image, constructs prompt, calls runGeminiImageAnalysis, formats output with metadata, handles errors, and cleans up.async execute(args: GeminiArgs) { const prepared = await prepareImage(args.image, appConfig); const prompt = args.prompt?.trim() ?? appConfig.gemini.defaultPrompt ?? "Describe this image."; const attachment = createAttachmentReference(prepared.path); try { const result = await runGeminiImageAnalysis({ prompt, model: args.model, sandbox: args.sandbox, outputFormat: args.outputFormat, extraFlags: args.extraFlags, timeoutMs: args.timeoutMs ?? appConfig.commandTimeoutMs, imageReference: attachment, preparedImage: prepared, commandOverride: args.cliPath, }); const cleaned = result.stdout || "(Gemini CLI returned no output)"; const metaLines = [ `model: ${result.model ?? "default"}`, `imageSource: ${prepared.source}`, `durationMs: ${result.durationMs}`, ]; if (prepared.source === "local") { metaLines.push(`imagePath: ${prepared.path}`); } return { content: [ { type: "text", text: [ "### Gemini Output", cleaned, "", metaLines.join("\n"), ].join("\n"), }, ], isError: false, } as const; } catch (error) { if (error instanceof CommandError) { const details = [ error.message, error.result.stderr.trim() && `stderr:\n${error.result.stderr.trim()}`, error.result.stdout.trim() && `stdout:\n${error.result.stdout.trim()}`, ] .filter(Boolean) .join("\n\n"); return { content: [ { type: "text", text: `Gemini CLI failed:\n${details}`, }, ], isError: true, }; } throw error; } finally { await prepared.cleanup(); } },
- src/tools/geminiImage.ts:9-52 (schema)Zod schema defining the input parameters and validation for the gemini_analyze_image tool.const geminiSchema = z .object({ image: z .string() .min(1) .describe( "Local file path, http(s) URL, or base64-encoded image to analyze." ), prompt: z .string() .min(1) .optional() .describe("Instruction for Gemini. Defaults to config value."), model: z .string() .min(1) .optional() .describe("Gemini model identifier (e.g., gemini-2.0-flash)."), cliPath: z .string() .min(1) .optional() .describe("Override the Gemini CLI executable path."), sandbox: z .boolean() .optional() .describe("Whether to run the CLI with the sandbox flag (-s)."), outputFormat: z .enum(["text", "json"]) .optional() .describe("Request Gemini CLI to return the specified output format."), extraFlags: z .array(z.string().min(1)) .optional() .describe("Additional CLI flags to append as-is."), timeoutMs: z .number() .int() .positive() .max(600_000) .optional() .describe("Maximum time (in milliseconds) to wait for CLI execution."), }) .describe("Invoke Gemini CLI to analyze an image.");
- src/tools/geminiImage.ts:56-129 (registration)Tool registration call that defines the name, description, schema, and execute handler for gemini_analyze_image.registerTool({ name: "gemini_analyze_image", description: "Use Google Gemini CLI to describe or analyze an image using multimodal capabilities.", schema: geminiSchema, async execute(args: GeminiArgs) { const prepared = await prepareImage(args.image, appConfig); const prompt = args.prompt?.trim() ?? appConfig.gemini.defaultPrompt ?? "Describe this image."; const attachment = createAttachmentReference(prepared.path); try { const result = await runGeminiImageAnalysis({ prompt, model: args.model, sandbox: args.sandbox, outputFormat: args.outputFormat, extraFlags: args.extraFlags, timeoutMs: args.timeoutMs ?? appConfig.commandTimeoutMs, imageReference: attachment, preparedImage: prepared, commandOverride: args.cliPath, }); const cleaned = result.stdout || "(Gemini CLI returned no output)"; const metaLines = [ `model: ${result.model ?? "default"}`, `imageSource: ${prepared.source}`, `durationMs: ${result.durationMs}`, ]; if (prepared.source === "local") { metaLines.push(`imagePath: ${prepared.path}`); } return { content: [ { type: "text", text: [ "### Gemini Output", cleaned, "", metaLines.join("\n"), ].join("\n"), }, ], isError: false, } as const; } catch (error) { if (error instanceof CommandError) { const details = [ error.message, error.result.stderr.trim() && `stderr:\n${error.result.stderr.trim()}`, error.result.stdout.trim() && `stdout:\n${error.result.stdout.trim()}`, ] .filter(Boolean) .join("\n\n"); return { content: [ { type: "text", text: `Gemini CLI failed:\n${details}`, }, ], isError: true, }; } throw error; } finally { await prepared.cleanup(); } }, });
- src/providers/gemini.ts:25-67 (helper)Helper function that constructs and executes the Gemini CLI command for image analysis, handling arguments, prompt with image reference, and returning stdout and metadata.export async function runGeminiImageAnalysis( options: GeminiImageOptions ): Promise<GeminiSuccessResult> { const cliArgs: string[] = []; const model = options.model ?? appConfig.gemini.defaultModel; if (model) { cliArgs.push("-m", model); } const outputFormat = options.outputFormat ?? appConfig.gemini.defaultOutputFormat; if (outputFormat && outputFormat !== "text") { cliArgs.push("-o", outputFormat); } if (options.sandbox) { cliArgs.push("-s"); } if (appConfig.gemini.extraArgs?.length) { cliArgs.push(...appConfig.gemini.extraArgs); } if (options.extraFlags?.length) { cliArgs.push(...options.extraFlags); } const finalPrompt = `${options.prompt.trim()}\n\n${options.imageReference}`; cliArgs.push("-p", finalPrompt); const command = options.commandOverride ?? appConfig.gemini.command; const result = await runCommand(command, cliArgs, { timeoutMs: options.timeoutMs, }); return { stdout: result.stdout.trim(), durationMs: result.durationMs, model, }; }