visualize-detection-result
Draw bounding boxes and labels on images to visualize object detection results from the DINO-X Image Detection MCP Server.
Instructions
Visualize detection results by drawing bounding boxes and labels on the original image. Images are saved to the directory specified by IMAGE_STORAGE_DIRECTORY environment variable.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| imageFileUri | Yes | URI of the input image. Preferred for remote or local files. Must start with 'https://' or 'file://'. | |
| detections | Yes | Array of detection results with name and bbox information. | |
| fontSize | No | Font size for labels (default: 24) | |
| boxThickness | No | Thickness of bounding box lines (default: 4) | |
| showLabels | No | Whether to show category labels (default: true) |
Implementation Reference
- src/servers/stdio-server.ts:295-339 (handler)MCP tool handler function for 'visualize-detection-result'. Validates inputs, calls the visualizeDetections helper to draw bounding boxes and labels on the image, saves the result, and returns the file path.async (args) => { try { const { imageFileUri, detections, fontSize, boxThickness, showLabels } = args; if (!imageFileUri || !detections || !Array.isArray(detections)) { return { content: [ { type: 'text', text: 'Image file URI and detections array are required', }, ], }; } const visualizedImagePath = await visualizeDetections( imageFileUri, detections, { fontSize, boxThickness, showLabels }, this.config.imageStorageDirectory || getDefaultImageStorageDirectory() ); return { content: [ { type: "text", text: `Visualization saved to: ${visualizedImagePath}` }, ] }; } catch (error) { return { content: [ { type: 'text', text: `Failed to visualize detections: ${error instanceof Error ? error.message : String(error)}`, }, ], }; } }
- src/utils/index.ts:154-272 (helper)Core helper function that implements the visualization logic: loads image, generates SVG overlays with bounding boxes and labels using category-specific colors, composites with Sharp, and saves the annotated image.export async function visualizeDetections( imageUri: string, detections: DetectionResult[], options: VisualizationOptions = {}, storageDir: string ): Promise<string> { let imageBuffer: Buffer; let imagePath = ''; if (imageUri.startsWith('file://')) { imagePath = decodeURIComponent(fileURLToPath(imageUri)); if (!fs.existsSync(imagePath)) { throw new Error('Image file not found: ' + imagePath); } imageBuffer = fs.readFileSync(imagePath); } else if (imageUri.startsWith('https://')) { imagePath = imageUri; // For HTTPS URLs, we need to fetch the image const response = await fetch(imageUri); imageBuffer = Buffer.from(await response.arrayBuffer()); } else { throw new Error('Invalid image file URI. Please use a valid file:// or https:// scheme.'); } // Get image metadata const imageInfo = await sharp(imageBuffer).metadata(); const { width = 0, height = 0 } = imageInfo; const { fontSize = 24, boxThickness = 4, colors = DEFAULT_COLORS, showLabels = true, } = options; const fontFamily = getPlatformFont(); // Create category to color mapping const categoryColorMap = new Map<string, string>(); let colorIndex = 0; // Build SVG overlay with all annotations const svgElements: string[] = []; for (const detection of detections) { if (!categoryColorMap.has(detection.name)) { categoryColorMap.set(detection.name, colors[colorIndex % colors.length]); colorIndex++; } const color = categoryColorMap.get(detection.name)!; const { bbox } = detection; const rectWidth = bbox.xmax - bbox.xmin; const rectHeight = bbox.ymax - bbox.ymin; svgElements.push(`<rect x="${bbox.xmin}" y="${bbox.ymin}" width="${rectWidth}" height="${rectHeight}" fill="none" stroke="${color}" stroke-width="${boxThickness}" shape-rendering="crispEdges"/>`); if (showLabels) { const label = detection.name; const textWidth = estimateTextWidth(label, fontSize); const textHeight = fontSize; const padding = 4; let labelY = bbox.ymin - textHeight - padding * 2; if (labelY < 0) { labelY = bbox.ymin; } const bgX = bbox.xmin; const bgY = labelY; const bgWidth = textWidth + padding * 2; const bgHeight = textHeight + padding * 2; // Draw label background svgElements.push(`<rect x="${bgX}" y="${bgY}" width="${bgWidth}" height="${bgHeight}" fill="${color}"/>`); // Draw label text const textX = bgX + padding; const textY = bgY + padding + textHeight * 0.8; // Escape special characters in label text for XML const escapedLabel = label.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>'); svgElements.push(`<text x="${textX}" y="${textY}" fill="white" font-size="${fontSize}" font-family='${fontFamily}' text-rendering="optimizeLegibility" dominant-baseline="alphabetic">${escapedLabel}</text>`); } } // Generate complete SVG overlay const svgOverlay = `<svg width="${width}" height="${height}" xmlns="http://www.w3.org/2000/svg"> <defs> <style> text { font-weight: normal; text-rendering: optimizeLegibility; } rect { shape-rendering: crispEdges; } </style> </defs> ${svgElements.join('\n ')} </svg>`; // Generate output path const outputPath = generateOutputPath(imagePath, storageDir); // Composite SVG overlay onto image using Sharp await sharp(imageBuffer) .composite([ { input: Buffer.from(svgOverlay), top: 0, left: 0 } ]) .png({ quality: 100, compressionLevel: 6 }) .toFile(outputPath); return outputPath; }
- src/servers/stdio-server.ts:275-341 (registration)Registration of the 'visualize-detection-result' tool in the stdio MCP server, including schema, description, and handler attachment.private registerVisualizeDetectionResultTool(): void { const { name, description } = ToolConfigs[Tool.VISUALIZE_DETECTION_RESULT]; this.server.tool( name, description, { imageFileUri: z.string().describe("URI of the input image. Preferred for remote or local files. Must start with 'https://' or 'file://'."), detections: z.array(z.object({ name: z.string().describe("Object category name"), bbox: z.object({ xmin: z.number(), ymin: z.number(), xmax: z.number(), ymax: z.number() }).describe("Bounding box coordinates") })).describe("Array of detection results with name and bbox information."), fontSize: z.number().optional().describe("Font size for labels (default: 24)"), boxThickness: z.number().optional().describe("Thickness of bounding box lines (default: 4)"), showLabels: z.boolean().optional().describe("Whether to show category labels (default: true)") }, async (args) => { try { const { imageFileUri, detections, fontSize, boxThickness, showLabels } = args; if (!imageFileUri || !detections || !Array.isArray(detections)) { return { content: [ { type: 'text', text: 'Image file URI and detections array are required', }, ], }; } const visualizedImagePath = await visualizeDetections( imageFileUri, detections, { fontSize, boxThickness, showLabels }, this.config.imageStorageDirectory || getDefaultImageStorageDirectory() ); return { content: [ { type: "text", text: `Visualization saved to: ${visualizedImagePath}` }, ] }; } catch (error) { return { content: [ { type: 'text', text: `Failed to visualize detections: ${error instanceof Error ? error.message : String(error)}`, }, ], }; } } ) }
- src/constants/tool.ts:28-31 (schema)Tool configuration including name and description used for registration.[Tool.VISUALIZE_DETECTION_RESULT]: { name: Tool.VISUALIZE_DETECTION_RESULT, description: "Visualize detection results by drawing bounding boxes and labels on the original image. Images are saved to the directory specified by IMAGE_STORAGE_DIRECTORY environment variable.", },
- src/utils/index.ts:90-107 (schema)Type definitions for detection results and visualization options used by the handler and helper.export interface DetectionResult { name: string; bbox: { xmin: number; ymin: number; xmax: number; ymax: number; }; description?: string; } export interface VisualizationOptions { fontSize?: number; boxThickness?: number; colors?: string[]; showLabels?: boolean; showConfidence?: boolean; }