extract_images
Extract and save images from a DOCX file by specifying the file path. Optionally define an output directory to store the extracted images, simplifying image retrieval from Word documents.
Instructions
Extract and list images from a DOCX file
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | Path to the .docx file | |
| output_dir | No | Directory to save extracted images (optional) |
Implementation Reference
- src/index.ts:221-320 (registration)Registration of the 'extract_images' tool using server.tool, including name, description, input schema, and inline handler function.server.tool( 'extract_images', 'Extract and list images from a DOCX file', { file_path: z.string().describe('Path to the .docx file'), output_dir: z .string() .optional() .describe('Directory to save extracted images (optional)'), }, async ({ file_path, output_dir }) => { try { const absolutePath = path.resolve(file_path) if (!fs.existsSync(absolutePath)) { throw new Error(`File not found: ${absolutePath}`) } const options = { convertImage: mammoth.images.imgElement(function (image: any) { if (output_dir) { const outputPath = path.resolve(output_dir) if (!fs.existsSync(outputPath)) { fs.mkdirSync(outputPath, { recursive: true }) } const imagePath = path.join( outputPath, `image_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.${ image.contentType.split('/')[1] }` ) return image.read().then(function (imageBuffer: Buffer) { fs.writeFileSync(imagePath, imageBuffer) return { src: imagePath, alt: image.altText || 'Extracted image', } }) } else { return image.read().then(function (imageBuffer: Buffer) { return { src: `data:${image.contentType};base64,${imageBuffer.toString( 'base64' )}`, alt: image.altText || 'Embedded image', size: imageBuffer.length, } }) } }), } const result = await mammoth.convertToHtml( { path: absolutePath }, options ) const images = (result.value.match(/<img[^>]*>/gi) || []).map( (img: string) => { const srcMatch = img.match(/src="([^"]*)"/) const altMatch = img.match(/alt="([^"]*)"/) return { src: srcMatch ? srcMatch[1] : '', alt: altMatch ? altMatch[1] : '', is_base64: srcMatch ? srcMatch[1].startsWith('data:') : false, } } ) return { content: [ { type: 'text', text: JSON.stringify( { total_images: images.length, images: images, output_directory: output_dir || 'Images embedded as base64', messages: result.messages, }, null, 2 ), }, ], } } catch (error) { return { content: [ { type: 'text', text: `Error extracting images: ${(error as Error).message}`, }, ], isError: true, } } } )
- src/index.ts:224-230 (schema)Input schema for the 'extract_images' tool, defining file_path (required string) and optional output_dir (string). Uses Zod for validation.{ file_path: z.string().describe('Path to the .docx file'), output_dir: z .string() .optional() .describe('Directory to save extracted images (optional)'), },
- src/index.ts:231-319 (handler)The handler logic for 'extract_images'. Converts DOCX to HTML using mammoth with custom image converter to either save images to disk or embed as base64. Parses the HTML to list all img tags and returns JSON summary with image details.async ({ file_path, output_dir }) => { try { const absolutePath = path.resolve(file_path) if (!fs.existsSync(absolutePath)) { throw new Error(`File not found: ${absolutePath}`) } const options = { convertImage: mammoth.images.imgElement(function (image: any) { if (output_dir) { const outputPath = path.resolve(output_dir) if (!fs.existsSync(outputPath)) { fs.mkdirSync(outputPath, { recursive: true }) } const imagePath = path.join( outputPath, `image_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.${ image.contentType.split('/')[1] }` ) return image.read().then(function (imageBuffer: Buffer) { fs.writeFileSync(imagePath, imageBuffer) return { src: imagePath, alt: image.altText || 'Extracted image', } }) } else { return image.read().then(function (imageBuffer: Buffer) { return { src: `data:${image.contentType};base64,${imageBuffer.toString( 'base64' )}`, alt: image.altText || 'Embedded image', size: imageBuffer.length, } }) } }), } const result = await mammoth.convertToHtml( { path: absolutePath }, options ) const images = (result.value.match(/<img[^>]*>/gi) || []).map( (img: string) => { const srcMatch = img.match(/src="([^"]*)"/) const altMatch = img.match(/alt="([^"]*)"/) return { src: srcMatch ? srcMatch[1] : '', alt: altMatch ? altMatch[1] : '', is_base64: srcMatch ? srcMatch[1].startsWith('data:') : false, } } ) return { content: [ { type: 'text', text: JSON.stringify( { total_images: images.length, images: images, output_directory: output_dir || 'Images embedded as base64', messages: result.messages, }, null, 2 ), }, ], } } catch (error) { return { content: [ { type: 'text', text: `Error extracting images: ${(error as Error).message}`, }, ], isError: true, } } }