send_media
Send images, videos, documents, and audio files to a WhatsApp contact or group using file paths, URLs, or base64 content. Supports optional captions and voice notes.
Instructions
Send media (image, video, document, audio) via WhatsApp.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| recipient_jid | Yes | The recipient JID (e.g., 123456789@s.whatsapp.net or 123456789-12345678@g.us) | |
| media_path | No | Absolute path to the local media file | |
| media_url | No | URL of the media file | |
| media_content | No | Base64 encoded media content | |
| mime_type | No | MIME type of the media_content (required if using media_content) | |
| filename | No | Filename for the media (recommended if using media_content) | |
| caption | No | Optional caption for the media | |
| as_audio_message | No | Send audio specifically as a voice note (requires ffmpeg for conversion if not opus/ogg) | |
| idempotency_key | No | Optional idempotency key. Repeating the same send_media request with the same key returns the original result instead of sending again. | |
| include_full_data | No | Whether to include the full base64 data in the response |
Implementation Reference
- src/tools/media.ts:77-281 (handler)The main tool handler function for 'send_media'. Accepts recipient_jid, media_path, media_url, media_content, mime_type, filename, caption, as_audio_message, idempotency_key, and include_full_data. Determines input type (path/url/base64), handles audio conversion if as_audio_message is true, delegates to whatsappService.sendMedia or sendMediaFromBase64, and returns the result.
async ({ recipient_jid, media_path, media_url, media_content, mime_type, filename, caption, as_audio_message, idempotency_key, include_full_data = false, }): Promise<CallToolResult> => { let input: string | null = null; let inputType: "path" | "url" | "base64" | null = null; if (media_path) { input = media_path; inputType = "path"; } else if (media_url) { input = media_url; inputType = "url"; } else if (media_content) { if (!mime_type) { return { content: [ { type: "text", text: "mime_type is required when using media_content", }, ], isError: true, }; } input = media_content; inputType = "base64"; } if (!input || !inputType) { return { content: [ { type: "text", text: "One of media_path, media_url, or media_content must be provided", }, ], isError: true, }; } try { const requestFingerprint = crypto .createHash("sha256") .update( JSON.stringify({ recipient_jid, media_path: media_path || null, media_url: media_url || null, media_content: media_content || null, mime_type: mime_type || null, filename: filename || null, caption: caption || null, as_audio_message, }), ) .digest("hex"); let sentMessage: any; let finalMediaPath = media_path; if (as_audio_message) { let audioPath: string; let tempFilePath: string | null = null; let needsCleanup = false; if (inputType === "path") { audioPath = input; } else if (inputType === "url") { const resp = await axios.get(input, { responseType: "arraybuffer", }); const buffer = Buffer.from(resp.data); const detected = await fileTypeFromBuffer(buffer); const ext = detected?.ext || "bin"; tempFilePath = path.join( os.tmpdir(), `whatsapp_audio_${Date.now()}.${ext}`, ); fs.writeFileSync(tempFilePath, buffer); audioPath = tempFilePath; needsCleanup = true; } else { const buffer = Buffer.from(input, "base64"); const detected = await fileTypeFromBuffer(buffer); const ext = detected?.ext || "bin"; tempFilePath = path.join( os.tmpdir(), `whatsapp_audio_${Date.now()}.${ext}`, ); fs.writeFileSync(tempFilePath, buffer); audioPath = tempFilePath; needsCleanup = true; } if (!audioPath.endsWith(".ogg")) { const convertedPath = await AudioUtils.convertToOpusOggTemp(audioPath); if (needsCleanup && tempFilePath && fs.existsSync(tempFilePath)) { fs.unlinkSync(tempFilePath); } tempFilePath = convertedPath; needsCleanup = true; audioPath = convertedPath; finalMediaPath = audioPath; } sentMessage = await whatsappService.sendMedia( recipient_jid, audioPath, caption, true, { idempotencyKey: idempotency_key, requestFingerprint, }, ); if (needsCleanup && tempFilePath && fs.existsSync(tempFilePath)) { fs.unlinkSync(tempFilePath); } } else { if (inputType === "base64") { sentMessage = await whatsappService.sendMediaFromBase64( recipient_jid, input, mime_type!, filename, caption, false, { idempotencyKey: idempotency_key, requestFingerprint, }, ); } else { sentMessage = await whatsappService.sendMedia( recipient_jid, input, caption, false, { idempotencyKey: idempotency_key, requestFingerprint, }, ); if (inputType === "path") { finalMediaPath = input; } } } const messageId = sentMessage?.key?.remoteJid && sentMessage?.key?.id ? `${sentMessage.key.remoteJid}:${sentMessage.key.id}` : undefined; const result: any = { success: true, message: `Media (${as_audio_message ? "audio message" : "file"}) sent successfully.`, messageId: messageId || "unknown", timestamp: Number(sentMessage?.messageTimestamp || Date.now() / 1000), filePathUsed: finalMediaPath, deduplicated: Boolean(sentMessage?.__deduplicated), idempotencyKey: idempotency_key || null, }; if (include_full_data && inputType === "base64") { result.mediaData = media_content; result.mimeType = mime_type; } else if ( include_full_data && inputType === "path" && input && fs.existsSync(input) ) { const buffer = fs.readFileSync(input); result.mediaData = buffer.toString("base64"); const detectedType = await fileTypeFromBuffer(buffer); result.mimeType = detectedType?.mime || "application/octet-stream"; } return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }], }; } catch (error: any) { log.error(`Error in send_media tool to ${recipient_jid}:`, error); return { content: [ { type: "text", text: `Error sending media to ${recipient_jid}: ${error.message}`, }, ], isError: true, }; } }, - src/tools/media.ts:28-76 (schema)Zod schema defining the input parameters for the send_media tool: recipient_jid (required), media_path, media_url, media_content, mime_type, filename, caption, as_audio_message (default false), idempotency_key, and include_full_data (default false).
{ recipient_jid: z .string() .describe( "The recipient JID (e.g., 123456789@s.whatsapp.net or 123456789-12345678@g.us)", ), media_path: z .string() .optional() .describe("Absolute path to the local media file"), media_url: z.string().url().optional().describe("URL of the media file"), media_content: z .string() .optional() .describe("Base64 encoded media content"), mime_type: z .string() .optional() .describe( "MIME type of the media_content (required if using media_content)", ), filename: z .string() .optional() .describe( "Filename for the media (recommended if using media_content)", ), caption: z.string().optional().describe("Optional caption for the media"), as_audio_message: z .boolean() .optional() .default(false) .describe( "Send audio specifically as a voice note (requires ffmpeg for conversion if not opus/ogg)", ), idempotency_key: z .string() .min(1) .max(200) .optional() .describe( "Optional idempotency key. Repeating the same send_media request with the same key returns the original result instead of sending again.", ), include_full_data: z .boolean() .optional() .default(false) .describe("Whether to include the full base64 data in the response"), }, - src/tools/media.ts:19-282 (registration)The registerMediaTools function registers the 'send_media' tool (and 'download_media') on the MCP server. Called from server.ts line 247.
export function registerMediaTools( server: McpServer, whatsappService: WhatsAppService, ): void { log.info("Registering media tools..."); server.tool( "send_media", "Send media (image, video, document, audio) via WhatsApp.", { recipient_jid: z .string() .describe( "The recipient JID (e.g., 123456789@s.whatsapp.net or 123456789-12345678@g.us)", ), media_path: z .string() .optional() .describe("Absolute path to the local media file"), media_url: z.string().url().optional().describe("URL of the media file"), media_content: z .string() .optional() .describe("Base64 encoded media content"), mime_type: z .string() .optional() .describe( "MIME type of the media_content (required if using media_content)", ), filename: z .string() .optional() .describe( "Filename for the media (recommended if using media_content)", ), caption: z.string().optional().describe("Optional caption for the media"), as_audio_message: z .boolean() .optional() .default(false) .describe( "Send audio specifically as a voice note (requires ffmpeg for conversion if not opus/ogg)", ), idempotency_key: z .string() .min(1) .max(200) .optional() .describe( "Optional idempotency key. Repeating the same send_media request with the same key returns the original result instead of sending again.", ), include_full_data: z .boolean() .optional() .default(false) .describe("Whether to include the full base64 data in the response"), }, async ({ recipient_jid, media_path, media_url, media_content, mime_type, filename, caption, as_audio_message, idempotency_key, include_full_data = false, }): Promise<CallToolResult> => { let input: string | null = null; let inputType: "path" | "url" | "base64" | null = null; if (media_path) { input = media_path; inputType = "path"; } else if (media_url) { input = media_url; inputType = "url"; } else if (media_content) { if (!mime_type) { return { content: [ { type: "text", text: "mime_type is required when using media_content", }, ], isError: true, }; } input = media_content; inputType = "base64"; } if (!input || !inputType) { return { content: [ { type: "text", text: "One of media_path, media_url, or media_content must be provided", }, ], isError: true, }; } try { const requestFingerprint = crypto .createHash("sha256") .update( JSON.stringify({ recipient_jid, media_path: media_path || null, media_url: media_url || null, media_content: media_content || null, mime_type: mime_type || null, filename: filename || null, caption: caption || null, as_audio_message, }), ) .digest("hex"); let sentMessage: any; let finalMediaPath = media_path; if (as_audio_message) { let audioPath: string; let tempFilePath: string | null = null; let needsCleanup = false; if (inputType === "path") { audioPath = input; } else if (inputType === "url") { const resp = await axios.get(input, { responseType: "arraybuffer", }); const buffer = Buffer.from(resp.data); const detected = await fileTypeFromBuffer(buffer); const ext = detected?.ext || "bin"; tempFilePath = path.join( os.tmpdir(), `whatsapp_audio_${Date.now()}.${ext}`, ); fs.writeFileSync(tempFilePath, buffer); audioPath = tempFilePath; needsCleanup = true; } else { const buffer = Buffer.from(input, "base64"); const detected = await fileTypeFromBuffer(buffer); const ext = detected?.ext || "bin"; tempFilePath = path.join( os.tmpdir(), `whatsapp_audio_${Date.now()}.${ext}`, ); fs.writeFileSync(tempFilePath, buffer); audioPath = tempFilePath; needsCleanup = true; } if (!audioPath.endsWith(".ogg")) { const convertedPath = await AudioUtils.convertToOpusOggTemp(audioPath); if (needsCleanup && tempFilePath && fs.existsSync(tempFilePath)) { fs.unlinkSync(tempFilePath); } tempFilePath = convertedPath; needsCleanup = true; audioPath = convertedPath; finalMediaPath = audioPath; } sentMessage = await whatsappService.sendMedia( recipient_jid, audioPath, caption, true, { idempotencyKey: idempotency_key, requestFingerprint, }, ); if (needsCleanup && tempFilePath && fs.existsSync(tempFilePath)) { fs.unlinkSync(tempFilePath); } } else { if (inputType === "base64") { sentMessage = await whatsappService.sendMediaFromBase64( recipient_jid, input, mime_type!, filename, caption, false, { idempotencyKey: idempotency_key, requestFingerprint, }, ); } else { sentMessage = await whatsappService.sendMedia( recipient_jid, input, caption, false, { idempotencyKey: idempotency_key, requestFingerprint, }, ); if (inputType === "path") { finalMediaPath = input; } } } const messageId = sentMessage?.key?.remoteJid && sentMessage?.key?.id ? `${sentMessage.key.remoteJid}:${sentMessage.key.id}` : undefined; const result: any = { success: true, message: `Media (${as_audio_message ? "audio message" : "file"}) sent successfully.`, messageId: messageId || "unknown", timestamp: Number(sentMessage?.messageTimestamp || Date.now() / 1000), filePathUsed: finalMediaPath, deduplicated: Boolean(sentMessage?.__deduplicated), idempotencyKey: idempotency_key || null, }; if (include_full_data && inputType === "base64") { result.mediaData = media_content; result.mimeType = mime_type; } else if ( include_full_data && inputType === "path" && input && fs.existsSync(input) ) { const buffer = fs.readFileSync(input); result.mediaData = buffer.toString("base64"); const detectedType = await fileTypeFromBuffer(buffer); result.mimeType = detectedType?.mime || "application/octet-stream"; } return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }], }; } catch (error: any) { log.error(`Error in send_media tool to ${recipient_jid}:`, error); return { content: [ { type: "text", text: `Error sending media to ${recipient_jid}: ${error.message}`, }, ], isError: true, }; } }, ); - src/server.ts:77-82 (registration)Execution metadata hints for the 'send_media' tool: readOnlyHint: false, idempotentHint: true, destructiveHint: false, openWorldHint: true.
send_media: { readOnlyHint: false, idempotentHint: true, destructiveHint: false, openWorldHint: true, }, - src/services/whatsapp.ts:2352-2448 (helper)The sendMedia method on WhatsAppService handles the actual sending of media files (from local path or URL) via the WhatsApp socket. It reads/downloads the file into a buffer, detects mime type, builds the media message, and supports idempotency.
async sendMedia( jid: string, input: string, caption?: string, asAudioMessage = false, options?: { idempotencyKey?: string | null; requestFingerprint?: string }, ): Promise<any> { let buffer: Buffer; let mimetype = "application/octet-stream"; let filename: string | undefined; if (input.startsWith("http://") || input.startsWith("https://")) { const resp = await axios.get(input, { responseType: "arraybuffer" }); buffer = Buffer.from(resp.data); const detected = await fileTypeFromBuffer(buffer); if (detected) { mimetype = detected.mime; filename = `file.${detected.ext}`; } } else { buffer = fs.readFileSync(input); const detected = await fileTypeFromBuffer(buffer); if (detected) { mimetype = detected.mime; filename = path.basename(input); } } const content = await this.buildMediaMessage( buffer, mimetype, filename, caption, asAudioMessage, ); const normalized = this.resolveLookupJid(jid); const send = () => this.getSocket().sendMessage(normalized, content); if (options?.idempotencyKey) { return await this.executeIdempotentOperation( "send_media", options.requestFingerprint || this.buildRequestFingerprint( normalized, JSON.stringify({ input, caption: caption || null, asAudioMessage, mimetype, filename: filename || null, }), ), send, { idempotencyKey: options.idempotencyKey, scopeJid: normalized }, ); } return await send(); } async sendMediaFromBase64( jid: string, base64: string, mimeType: string, filename?: string, caption?: string, asAudioMessage = false, options?: { idempotencyKey?: string | null; requestFingerprint?: string }, ): Promise<any> { const buffer = Buffer.from(base64, "base64"); const content = await this.buildMediaMessage( buffer, mimeType, filename, caption, asAudioMessage, ); const normalized = this.resolveLookupJid(jid); const send = () => this.getSocket().sendMessage(normalized, content); if (options?.idempotencyKey) { return await this.executeIdempotentOperation( "send_media", options.requestFingerprint || this.buildRequestFingerprint( normalized, JSON.stringify({ base64, mimeType, filename: filename || null, caption: caption || null, asAudioMessage, }), ), send, { idempotencyKey: options.idempotencyKey, scopeJid: normalized }, ); } return await send(); }