Generate image

generate_image

Generate images from a text prompt using OpenAI's GPT Image and DALL·E models. Customize model, size, quality, and number of images. Saves results to disk and returns file paths.

Instructions

Create one or more images from a text prompt using OpenAI's image models (gpt-image-1.5, gpt-image-1, gpt-image-1-mini, dall-e-3, dall-e-2). Images are saved to disk and file paths are returned.

Input Schema

TableJSON Schema

Name	Required	Description
`prompt`	Yes	Text description of the image to generate.
`model`	No	Model to use. Defaults to env DALLE_DEFAULT_MODEL or gpt-image-1.5.
`size`	No	Image dimensions. Allowed values depend on the model (see list_models).
`quality`	No	GPT Image: auto\|low\|medium\|high. DALL·E 3: standard\|hd. DALL·E 2: standard.
`n`	No	Number of images to generate. DALL·E 3 supports only 1.
`background`	No	GPT Image only. Use 'transparent' with png/webp for alpha channel output.
`output_format`	No	GPT Image only. Output file format.
`output_compression`	No	GPT Image only. Compression % for jpeg/webp (0-100).
`moderation`	No	GPT Image only. Content moderation strictness.
`style`	No	DALL·E 3 only. Vivid = hyper-real/dramatic, natural = more muted.
`user`	No	End-user identifier for OpenAI abuse monitoring.
`output_dir`	No	Absolute directory to save generated images. Defaults to $DALLE_OUTPUT_DIR or ~/dalle-mcp-output.
`filename_prefix`	No	Prefix used when naming saved files (alphanumeric/underscore/dash).
`return_image_content`	No	If true, return the generated images as MCP image content blocks in addition to saving them to disk. Adds significant tokens. Default: false.

Implementation Reference

src/tools/generate.ts:86-160 (handler)

Registration and handler for the generate_image tool. Defines the tool on the MCP server with input schema and async callback that calls OpenAI image generation API and saves results to disk.

export function registerGenerateTool(server: McpServer): void {
  server.registerTool(
    "generate_image",
    {
      title: "Generate image",
      description:
        "Create one or more images from a text prompt using OpenAI's image models (gpt-image-1.5, gpt-image-1, gpt-image-1-mini, dall-e-3, dall-e-2). Images are saved to disk and file paths are returned.",
      inputSchema,
    },
    async (args) => {
      try {
        const model = (args.model ?? defaultModel()) as Model;
        const info = MODELS[model];

        const params: ImageGenerateParams = {
          model,
          prompt: args.prompt,
        };
        if (args.size) params.size = args.size as ImageGenerateParams["size"];
        if (args.quality) params.quality = args.quality as ImageGenerateParams["quality"];
        if (args.n !== undefined) params.n = args.n;
        if (args.user) params.user = args.user;

        if (info.family === "gpt-image") {
          if (args.background) params.background = args.background;
          if (args.output_format) params.output_format = args.output_format;
          if (args.output_compression !== undefined) params.output_compression = args.output_compression;
          if (args.moderation) params.moderation = args.moderation;
        } else {
          // dall-e-* support response_format; gpt-image does not.
          params.response_format = "b64_json";
          if (info.family === "dall-e-3" && args.style) {
            params.style = args.style;
          }
        }

        const client = getOpenAI();
        const response = await client.images.generate(params);
        const items = response.data ?? [];
        if (items.length === 0) {
          return errorContent(new Error("OpenAI returned no images."));
        }

        const outDir = resolveOutputDir(args.output_dir);
        const seed = `${Date.now()}_${args.prompt}`;
        const saved = await Promise.all(
          items.map(async (item, i) => {
            const extracted = await extractImage(item, response.output_format ?? args.output_format ?? null);
            return saveImage(extracted, outDir, args.filename_prefix, seed, i);
          }),
        );

        const lines: string[] = [
          `Generated ${saved.length} image${saved.length === 1 ? "" : "s"} with ${model}.`,
          `Saved to: ${outDir}`,
          "",
          ...saved.map((s, i) => {
            const parts = [`  [${i}] ${s.path} (${s.mime}, ${s.bytes} bytes)`];
            if (s.revisedPrompt) parts.push(`      revised_prompt: ${s.revisedPrompt}`);
            return parts.join("\n");
          }),
        ];
        if (response.usage) {
          lines.push("", `Usage: ${JSON.stringify(response.usage)}`);
        }

        return {
          content: buildContent(lines.join("\n"), saved, args.return_image_content === true),
        };
      } catch (err) {
        return errorContent(err);
      }
    },
  );
}

src/tools/generate.ts:14-84 (schema)

Input schema (Zod definitions) for the generate_image tool, defining all accepted parameters with descriptions and validation.

const inputSchema = {
  prompt: z.string().min(1).max(32000).describe("Text description of the image to generate."),
  model: z
    .enum(["gpt-image-1.5", "gpt-image-1", "gpt-image-1-mini", "dall-e-3", "dall-e-2"])
    .optional()
    .describe("Model to use. Defaults to env DALLE_DEFAULT_MODEL or gpt-image-1.5."),
  size: z
    .enum([
      "auto",
      "1024x1024",
      "1536x1024",
      "1024x1536",
      "1792x1024",
      "1024x1792",
      "512x512",
      "256x256",
    ])
    .optional()
    .describe("Image dimensions. Allowed values depend on the model (see list_models)."),
  quality: z
    .enum(["auto", "low", "medium", "high", "standard", "hd"])
    .optional()
    .describe(
      "GPT Image: auto|low|medium|high. DALL·E 3: standard|hd. DALL·E 2: standard.",
    ),
  n: z
    .number()
    .int()
    .min(1)
    .max(10)
    .optional()
    .describe("Number of images to generate. DALL·E 3 supports only 1."),
  background: z
    .enum(["transparent", "opaque", "auto"])
    .optional()
    .describe("GPT Image only. Use 'transparent' with png/webp for alpha channel output."),
  output_format: z
    .enum(["png", "jpeg", "webp"])
    .optional()
    .describe("GPT Image only. Output file format."),
  output_compression: z
    .number()
    .int()
    .min(0)
    .max(100)
    .optional()
    .describe("GPT Image only. Compression % for jpeg/webp (0-100)."),
  moderation: z
    .enum(["auto", "low"])
    .optional()
    .describe("GPT Image only. Content moderation strictness."),
  style: z
    .enum(["vivid", "natural"])
    .optional()
    .describe("DALL·E 3 only. Vivid = hyper-real/dramatic, natural = more muted."),
  user: z.string().optional().describe("End-user identifier for OpenAI abuse monitoring."),
  output_dir: z
    .string()
    .optional()
    .describe("Absolute directory to save generated images. Defaults to $DALLE_OUTPUT_DIR or ~/dalle-mcp-output."),
  filename_prefix: z
    .string()
    .optional()
    .describe("Prefix used when naming saved files (alphanumeric/underscore/dash)."),
  return_image_content: z
    .boolean()
    .optional()
    .describe(
      "If true, return the generated images as MCP image content blocks in addition to saving them to disk. Adds significant tokens. Default: false.",
    ),
};

src/server.ts:7-25 (registration)

Server creation where registerGenerateTool is called to register the generate_image tool on the MCP server.

export function createServer(): McpServer {
  const server = new McpServer(
    {
      name: "dalle-mcp",
      version: "0.1.0",
    },
    {
      instructions:
        "OpenAI image generation server. Call list_models first if you're unsure which model supports the size/quality/feature you want. generate_image creates from a prompt; edit_image modifies existing images (optionally with a mask); create_variation generates dall-e-2 variations. Images are saved to disk; pass return_image_content=true to also receive them as MCP image blocks.",
    },
  );

  registerModelsTool(server);
  registerGenerateTool(server);
  registerEditTool(server);
  registerVariationTool(server);

  return server;
}

src/util.ts:17-27 (helper)

Helper that maps output_format string to ImageMime type, used when extracting images from OpenAI responses.

export function mimeFromFormat(format: string | undefined | null): ImageMime {
  switch (format) {
    case "jpeg":
      return "image/jpeg";
    case "webp":
      return "image/webp";
    case "png":
    default:
      return "image/png";
  }
}

src/util.ts:102-121 (helper)

Helper that saves an extracted image buffer to disk, used by the generate_image handler to persist output files.

export async function saveImage(
  extracted: ExtractedImage,
  outputDir: string,
  prefix: string | undefined,
  seed: string,
  index: number,
): Promise<SavedImage> {
  await ensureDir(outputDir);
  const ext = extFromMime(extracted.mime);
  const name = buildFilename(prefix, seed, index, ext);
  const path = join(outputDir, name);
  await writeFile(path, extracted.buffer);
  return {
    path,
    mime: extracted.mime,
    bytes: extracted.buffer.byteLength,
    buffer: extracted.buffer,
    revisedPrompt: extracted.revisedPrompt,
  };
}

openai-images-mcp

Generate image

Instructions

Input Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API