qwen_analyze_image
Analyze images using Qwen's multimodal AI to generate descriptions, answer questions, and extract information from local files, URLs, or base64-encoded images.
Instructions
Use Qwen CLI to describe or analyze an image with its multimodal capabilities.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| image | Yes | Local file path, http(s) URL, or base64-encoded image to analyze. | |
| prompt | No | Instruction for Qwen. Defaults to config value. | |
| model | No | Qwen model identifier. | |
| cliPath | No | Override the Qwen CLI executable path. | |
| sandbox | No | Whether to run the CLI with the sandbox flag (-s). | |
| extraFlags | No | Additional CLI flags to append as-is. | |
| timeoutMs | No | Maximum time (in milliseconds) to wait for CLI execution. |
Implementation Reference
- src/tools/qwenImage.ts:56-123 (handler)The handler function that orchestrates image preparation, Qwen CLI execution via runQwenImageAnalysis, output formatting, error handling, and cleanup.async execute(args: QwenArgs) { const originalInput = args.image.trim(); const prepared = await prepareImage(originalInput, appConfig); const prompt = args.prompt?.trim() ?? appConfig.qwen.defaultPrompt ?? "请描述这张图片的内容。"; try { const result = await runQwenImageAnalysis({ prompt, model: args.model, sandbox: args.sandbox, extraFlags: args.extraFlags, timeoutMs: args.timeoutMs ?? appConfig.commandTimeoutMs, originalInput, preparedImage: prepared, commandOverride: args.cliPath, }); const cleaned = result.stdout || "(Qwen CLI returned no output)"; const metaLines = [ `model: ${result.model ?? "default"}`, `imageSource: ${prepared.source}`, `durationMs: ${result.durationMs}`, ]; if (prepared.source === "local") { metaLines.push(`imagePath: ${prepared.path}`); } return { content: [ { type: "text", text: [ "### Qwen Output", cleaned, "", metaLines.join("\n"), ].join("\n"), }, ], isError: false, } as const; } catch (error) { if (error instanceof CommandError) { const details = [ error.message, error.result.stderr.trim() && `stderr:\n${error.result.stderr.trim()}`, error.result.stdout.trim() && `stdout:\n${error.result.stdout.trim()}`, ] .filter(Boolean) .join("\n\n"); return { content: [ { type: "text", text: `Qwen CLI failed:\n${details}`, }, ], isError: true, }; } throw error; } finally { await prepared.cleanup(); } },
- src/tools/qwenImage.ts:8-47 (schema)Zod schema defining the input parameters and validation for the qwen_analyze_image tool.const qwenSchema = z .object({ image: z .string() .min(1) .describe( "Local file path, http(s) URL, or base64-encoded image to analyze." ), prompt: z .string() .min(1) .optional() .describe("Instruction for Qwen. Defaults to config value."), model: z .string() .min(1) .optional() .describe("Qwen model identifier."), cliPath: z .string() .min(1) .optional() .describe("Override the Qwen CLI executable path."), sandbox: z .boolean() .optional() .describe("Whether to run the CLI with the sandbox flag (-s)."), extraFlags: z .array(z.string().min(1)) .optional() .describe("Additional CLI flags to append as-is."), timeoutMs: z .number() .int() .positive() .max(600_000) .optional() .describe("Maximum time (in milliseconds) to wait for CLI execution."), }) .describe("Invoke Qwen CLI to analyze an image.");
- src/tools/qwenImage.ts:51-55 (registration)Registration of the tool with name, description, schema reference, and handler reference.registerTool({ name: "qwen_analyze_image", description: "Use Qwen CLI to describe or analyze an image with its multimodal capabilities.", schema: qwenSchema,
- src/providers/qwen.ts:25-74 (helper)Helper function that constructs the data URL for the image, builds CLI arguments, runs the Qwen command, and returns stdout, model, timing.export async function runQwenImageAnalysis( options: QwenImageOptions ): Promise<QwenSuccessResult> { const buffer = await fs.readFile(options.preparedImage.path); const mimeType = guessImageMimeType( options.preparedImage.path, "image/jpeg" ); const dataUrl = `data:${mimeType};base64,${buffer.toString("base64")}`; const imageReference = /^https?:\/\//i.test(options.originalInput) ? options.originalInput : dataUrl; const cliArgs: string[] = []; const model = options.model ?? appConfig.qwen.defaultModel; if (model) { cliArgs.push("-m", model); } if (options.sandbox) { cliArgs.push("-s"); } if (appConfig.qwen.extraArgs?.length) { cliArgs.push(...appConfig.qwen.extraArgs); } if (options.extraFlags?.length) { cliArgs.push(...options.extraFlags); } const finalPrompt = `${options.prompt.trim()}\n\n${imageReference}`; cliArgs.push("-p", finalPrompt); const result = await runCommand( options.commandOverride ?? appConfig.qwen.command, cliArgs, { timeoutMs: options.timeoutMs, } ); return { stdout: result.stdout.trim(), durationMs: result.durationMs, model, promptUsed: finalPrompt, }; }