extract_images

Instructions

Extract and list images from a DOCX file

Input Schema

TableJSON Schema

Name	Required	Description	Default
`file_path`	Yes	Path to the .docx file
`output_dir`	No	Directory to save extracted images (optional)

Implementation Reference

src/index.ts:221-320 (registration)
Registration of the 'extract_images' tool using server.tool, including name, description, input schema, and inline handler function.
server.tool( 'extract_images', 'Extract and list images from a DOCX file', { file_path: z.string().describe('Path to the .docx file'), output_dir: z .string() .optional() .describe('Directory to save extracted images (optional)'), }, async ({ file_path, output_dir }) => { try { const absolutePath = path.resolve(file_path) if (!fs.existsSync(absolutePath)) { throw new Error(`File not found: ${absolutePath}`) } const options = { convertImage: mammoth.images.imgElement(function (image: any) { if (output_dir) { const outputPath = path.resolve(output_dir) if (!fs.existsSync(outputPath)) { fs.mkdirSync(outputPath, { recursive: true }) } const imagePath = path.join( outputPath, `image_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.${ image.contentType.split('/')[1] }` ) return image.read().then(function (imageBuffer: Buffer) { fs.writeFileSync(imagePath, imageBuffer) return { src: imagePath, alt: image.altText || 'Extracted image', } }) } else { return image.read().then(function (imageBuffer: Buffer) { return { src: `data:${image.contentType};base64,${imageBuffer.toString( 'base64' )}`, alt: image.altText || 'Embedded image', size: imageBuffer.length, } }) } }), } const result = await mammoth.convertToHtml( { path: absolutePath }, options ) const images = (result.value.match(/<img[^>]*>/gi) || []).map( (img: string) => { const srcMatch = img.match(/src="([^"]*)"/) const altMatch = img.match(/alt="([^"]*)"/) return { src: srcMatch ? srcMatch[1] : '', alt: altMatch ? altMatch[1] : '', is_base64: srcMatch ? srcMatch[1].startsWith('data:') : false, } } ) return { content: [ { type: 'text', text: JSON.stringify( { total_images: images.length, images: images, output_directory: output_dir || 'Images embedded as base64', messages: result.messages, }, null, 2 ), }, ], } } catch (error) { return { content: [ { type: 'text', text: `Error extracting images: ${(error as Error).message}`, }, ], isError: true, } } } )
src/index.ts:224-230 (schema)
Input schema for the 'extract_images' tool, defining file_path (required string) and optional output_dir (string). Uses Zod for validation.
{ file_path: z.string().describe('Path to the .docx file'), output_dir: z .string() .optional() .describe('Directory to save extracted images (optional)'), },
src/index.ts:231-319 (handler)
The handler logic for 'extract_images'. Converts DOCX to HTML using mammoth with custom image converter to either save images to disk or embed as base64. Parses the HTML to list all img tags and returns JSON summary with image details.
async ({ file_path, output_dir }) => { try { const absolutePath = path.resolve(file_path) if (!fs.existsSync(absolutePath)) { throw new Error(`File not found: ${absolutePath}`) } const options = { convertImage: mammoth.images.imgElement(function (image: any) { if (output_dir) { const outputPath = path.resolve(output_dir) if (!fs.existsSync(outputPath)) { fs.mkdirSync(outputPath, { recursive: true }) } const imagePath = path.join( outputPath, `image_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.${ image.contentType.split('/')[1] }` ) return image.read().then(function (imageBuffer: Buffer) { fs.writeFileSync(imagePath, imageBuffer) return { src: imagePath, alt: image.altText || 'Extracted image', } }) } else { return image.read().then(function (imageBuffer: Buffer) { return { src: `data:${image.contentType};base64,${imageBuffer.toString( 'base64' )}`, alt: image.altText || 'Embedded image', size: imageBuffer.length, } }) } }), } const result = await mammoth.convertToHtml( { path: absolutePath }, options ) const images = (result.value.match(/<img[^>]*>/gi) || []).map( (img: string) => { const srcMatch = img.match(/src="([^"]*)"/) const altMatch = img.match(/alt="([^"]*)"/) return { src: srcMatch ? srcMatch[1] : '', alt: altMatch ? altMatch[1] : '', is_base64: srcMatch ? srcMatch[1].startsWith('data:') : false, } } ) return { content: [ { type: 'text', text: JSON.stringify( { total_images: images.length, images: images, output_directory: output_dir || 'Images embedded as base64', messages: result.messages, }, null, 2 ), }, ], } } catch (error) { return { content: [ { type: 'text', text: `Error extracting images: ${(error as Error).message}`, }, ], isError: true, } } }

DOCX MCP Server

Instructions

Input Schema

Implementation Reference

Other Tools

Related Tools

Latest Blog Posts

MCP directory API