Skip to main content
Glama

ClaudeHopper

by Arborist-ai
extractor.ts5.23 kB
/** * Image extraction utilities for construction drawings * * This module provides tools for extracting images from PDF documents, * with a focus on technical drawings and construction plans. */ import * as fs from 'fs'; import * as path from 'path'; import * as child_process from 'child_process'; import { PDFDocument } from 'pdf-lib'; /** * Extract images from PDF files * @param pdfPath Path to the PDF file * @param outputDir Directory to save extracted images * @param resolution DPI resolution for image extraction * @returns Array of paths to extracted images */ export async function extractImagesFromPdf(pdfPath: string, outputDir: string, resolution: number = 300): Promise<string[]> { try { // Create output directory if it doesn't exist await fs.promises.mkdir(outputDir, { recursive: true }); // Get the filename without extension const baseName = path.basename(pdfPath, '.pdf'); // Create a subdirectory for this PDF's images const pdfImagesDir = path.join(outputDir, baseName); await fs.promises.mkdir(pdfImagesDir, { recursive: true }); console.log(`Extracting images from ${pdfPath} to ${pdfImagesDir}...`); // First, load the PDF to determine the number of pages const pdfBytes = await fs.promises.readFile(pdfPath); const pdfDoc = await PDFDocument.load(pdfBytes); const pageCount = pdfDoc.getPageCount(); // Extract images using pdfimages (if available) or an alternative method try { // Try to use pdfimages (common on Linux/Mac) const pdfImagesPath = path.join(pdfImagesDir, 'image'); // Execute pdfimages with desired resolution await new Promise<void>((resolve, reject) => { const process = child_process.spawn('pdfimages', [ '-j', // Output JPEG images '-p', // Include page number in image filename pdfPath, // Input PDF pdfImagesPath // Output path prefix ]); process.on('close', (code) => { if (code === 0) { resolve(); } else { reject(new Error(`pdfimages exited with code ${code}`)); } }); }); // Get the list of extracted images const files = await fs.promises.readdir(pdfImagesDir); return files.filter(file => /\.(jpg|jpeg|png)$/i.test(file)) .map(file => path.join(pdfImagesDir, file)); } catch (error) { console.error(`Error using pdfimages, falling back to alternative method:`, error); // Fallback method: Generate whole page images using pdf2image try { // Try using pdftoppm for whole page extraction as fallback const pdfImagesPath = path.join(pdfImagesDir, 'page'); await new Promise<void>((resolve, reject) => { const process = child_process.spawn('pdftoppm', [ '-jpeg', // Output JPEG images '-r', resolution.toString(), // Resolution pdfPath, // Input PDF pdfImagesPath // Output path prefix ]); process.on('close', (code) => { if (code === 0) { resolve(); } else { reject(new Error(`pdftoppm exited with code ${code}`)); } }); }); // Get the list of extracted page images const files = await fs.promises.readdir(pdfImagesDir); return files.filter(file => /\.(jpg|jpeg|png)$/i.test(file)) .map(file => path.join(pdfImagesDir, file)); } catch (fallbackError) { console.error(`Fallback extraction also failed:`, fallbackError); console.error(`Image extraction fallback not successful. Please install pdfimages or pdftoppm.`); return []; } } } catch (error) { console.error(`Error extracting images from ${pdfPath}:`, error); return []; } } /** * Check if pdfimages utility is available on the system * @returns Promise resolving to boolean indicating availability */ export async function isPdfImagesAvailable(): Promise<boolean> { try { await new Promise<void>((resolve, reject) => { const process = child_process.spawn('pdfimages', ['-v']); process.on('close', (code) => { if (code === 0 || code === 1) { // pdfimages might return 1 for version info resolve(); } else { reject(new Error(`pdfimages check exited with code ${code}`)); } }); }); return true; } catch (error) { return false; } } /** * Check if pdftoppm utility is available on the system * @returns Promise resolving to boolean indicating availability */ export async function isPdftoppmAvailable(): Promise<boolean> { try { await new Promise<void>((resolve, reject) => { const process = child_process.spawn('pdftoppm', ['-v']); process.on('close', (code) => { if (code === 0 || code === 1) { // pdftoppm might return 1 for version info resolve(); } else { reject(new Error(`pdftoppm check exited with code ${code}`)); } }); }); return true; } catch (error) { return false; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arborist-ai/ClaudeHopper'

If you have feedback or need assistance with the MCP directory API, please join our Discord server