Skip to main content
Glama
geminiImage.ts3.73 kB
import { z } from "zod"; import { appConfig } from "../config/index.js"; import { runGeminiImageAnalysis } from "../providers/gemini.js"; import { prepareImage } from "../utils/file.js"; import { createAttachmentReference } from "../utils/prompt.js"; import { CommandError } from "../utils/cli.js"; import { registerTool } from "./registry.js"; const geminiSchema = z .object({ image: z .string() .min(1) .describe( "Local file path, http(s) URL, or base64-encoded image to analyze." ), prompt: z .string() .min(1) .optional() .describe("Instruction for Gemini. Defaults to config value."), model: z .string() .min(1) .optional() .describe("Gemini model identifier (e.g., gemini-2.0-flash)."), cliPath: z .string() .min(1) .optional() .describe("Override the Gemini CLI executable path."), sandbox: z .boolean() .optional() .describe("Whether to run the CLI with the sandbox flag (-s)."), outputFormat: z .enum(["text", "json"]) .optional() .describe("Request Gemini CLI to return the specified output format."), extraFlags: z .array(z.string().min(1)) .optional() .describe("Additional CLI flags to append as-is."), timeoutMs: z .number() .int() .positive() .max(600_000) .optional() .describe("Maximum time (in milliseconds) to wait for CLI execution."), }) .describe("Invoke Gemini CLI to analyze an image."); type GeminiArgs = z.infer<typeof geminiSchema>; registerTool({ name: "gemini_analyze_image", description: "Use Google Gemini CLI to describe or analyze an image using multimodal capabilities.", schema: geminiSchema, async execute(args: GeminiArgs) { const prepared = await prepareImage(args.image, appConfig); const prompt = args.prompt?.trim() ?? appConfig.gemini.defaultPrompt ?? "Describe this image."; const attachment = createAttachmentReference(prepared.path); try { const result = await runGeminiImageAnalysis({ prompt, model: args.model, sandbox: args.sandbox, outputFormat: args.outputFormat, extraFlags: args.extraFlags, timeoutMs: args.timeoutMs ?? appConfig.commandTimeoutMs, imageReference: attachment, preparedImage: prepared, commandOverride: args.cliPath, }); const cleaned = result.stdout || "(Gemini CLI returned no output)"; const metaLines = [ `model: ${result.model ?? "default"}`, `imageSource: ${prepared.source}`, `durationMs: ${result.durationMs}`, ]; if (prepared.source === "local") { metaLines.push(`imagePath: ${prepared.path}`); } return { content: [ { type: "text", text: [ "### Gemini Output", cleaned, "", metaLines.join("\n"), ].join("\n"), }, ], isError: false, } as const; } catch (error) { if (error instanceof CommandError) { const details = [ error.message, error.result.stderr.trim() && `stderr:\n${error.result.stderr.trim()}`, error.result.stdout.trim() && `stdout:\n${error.result.stdout.trim()}`, ] .filter(Boolean) .join("\n\n"); return { content: [ { type: "text", text: `Gemini CLI failed:\n${details}`, }, ], isError: true, }; } throw error; } finally { await prepared.cleanup(); } }, });

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ah-wq/mcp-vision-relay'

If you have feedback or need assistance with the MCP directory API, please join our Discord server