extract_images
Extract and save images from DOCX files to process document content efficiently.
Instructions
Extract and list images from a DOCX file
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | Path to the .docx file | |
| output_dir | No | Directory to save extracted images (optional) |
Implementation Reference
- src/index.ts:231-319 (handler)The handler function implements the core logic for extracting images from DOCX files using the mammoth library. It converts the DOCX to HTML with custom image handling to either save images to a specified directory or embed them as base64 data URLs. It then parses the HTML to list all images with their sources and alt text.async ({ file_path, output_dir }) => { try { const absolutePath = path.resolve(file_path) if (!fs.existsSync(absolutePath)) { throw new Error(`File not found: ${absolutePath}`) } const options = { convertImage: mammoth.images.imgElement(function (image: any) { if (output_dir) { const outputPath = path.resolve(output_dir) if (!fs.existsSync(outputPath)) { fs.mkdirSync(outputPath, { recursive: true }) } const imagePath = path.join( outputPath, `image_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.${ image.contentType.split('/')[1] }` ) return image.read().then(function (imageBuffer: Buffer) { fs.writeFileSync(imagePath, imageBuffer) return { src: imagePath, alt: image.altText || 'Extracted image', } }) } else { return image.read().then(function (imageBuffer: Buffer) { return { src: `data:${image.contentType};base64,${imageBuffer.toString( 'base64' )}`, alt: image.altText || 'Embedded image', size: imageBuffer.length, } }) } }), } const result = await mammoth.convertToHtml( { path: absolutePath }, options ) const images = (result.value.match(/<img[^>]*>/gi) || []).map( (img: string) => { const srcMatch = img.match(/src="([^"]*)"/) const altMatch = img.match(/alt="([^"]*)"/) return { src: srcMatch ? srcMatch[1] : '', alt: altMatch ? altMatch[1] : '', is_base64: srcMatch ? srcMatch[1].startsWith('data:') : false, } } ) return { content: [ { type: 'text', text: JSON.stringify( { total_images: images.length, images: images, output_directory: output_dir || 'Images embedded as base64', messages: result.messages, }, null, 2 ), }, ], } } catch (error) { return { content: [ { type: 'text', text: `Error extracting images: ${(error as Error).message}`, }, ], isError: true, } } }
- src/index.ts:224-230 (schema)Zod schema defining the input parameters for the extract_images tool: required file_path and optional output_dir.{ file_path: z.string().describe('Path to the .docx file'), output_dir: z .string() .optional() .describe('Directory to save extracted images (optional)'), },
- src/index.ts:221-320 (registration)The registration of the extract_images tool using McpServer's server.tool method, specifying name, description, input schema, and handler function.server.tool( 'extract_images', 'Extract and list images from a DOCX file', { file_path: z.string().describe('Path to the .docx file'), output_dir: z .string() .optional() .describe('Directory to save extracted images (optional)'), }, async ({ file_path, output_dir }) => { try { const absolutePath = path.resolve(file_path) if (!fs.existsSync(absolutePath)) { throw new Error(`File not found: ${absolutePath}`) } const options = { convertImage: mammoth.images.imgElement(function (image: any) { if (output_dir) { const outputPath = path.resolve(output_dir) if (!fs.existsSync(outputPath)) { fs.mkdirSync(outputPath, { recursive: true }) } const imagePath = path.join( outputPath, `image_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.${ image.contentType.split('/')[1] }` ) return image.read().then(function (imageBuffer: Buffer) { fs.writeFileSync(imagePath, imageBuffer) return { src: imagePath, alt: image.altText || 'Extracted image', } }) } else { return image.read().then(function (imageBuffer: Buffer) { return { src: `data:${image.contentType};base64,${imageBuffer.toString( 'base64' )}`, alt: image.altText || 'Embedded image', size: imageBuffer.length, } }) } }), } const result = await mammoth.convertToHtml( { path: absolutePath }, options ) const images = (result.value.match(/<img[^>]*>/gi) || []).map( (img: string) => { const srcMatch = img.match(/src="([^"]*)"/) const altMatch = img.match(/alt="([^"]*)"/) return { src: srcMatch ? srcMatch[1] : '', alt: altMatch ? altMatch[1] : '', is_base64: srcMatch ? srcMatch[1].startsWith('data:') : false, } } ) return { content: [ { type: 'text', text: JSON.stringify( { total_images: images.length, images: images, output_directory: output_dir || 'Images embedded as base64', messages: result.messages, }, null, 2 ), }, ], } } catch (error) { return { content: [ { type: 'text', text: `Error extracting images: ${(error as Error).message}`, }, ], isError: true, } } } )