html_extract_resources
Extract images, videos, and links from an HTML file and save them to a specified directory.
Instructions
Extract all resources (images, videos, links) from HTML
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| inputPath | Yes | Path to the input HTML file | |
| outputDir | Yes | Directory where resources should be saved |
Implementation Reference
- src/tools/htmlTools.ts:72-90 (schema)The Tool schema definition for html_extract_resources, defining its name, description, and input schema (inputPath, outputDir).
// HTML 資源提取工具 export const HTML_EXTRACT_RESOURCES_TOOL: Tool = { name: "html_extract_resources", description: "Extract all resources (images, videos, links) from HTML", inputSchema: { type: "object", properties: { inputPath: { type: "string", description: "Path to the input HTML file", }, outputDir: { type: "string", description: "Directory where resources should be saved", }, }, required: ["inputPath", "outputDir"], }, }; - src/tools/htmlTools.ts:244-297 (handler)The handler function extractHtmlResources that implements the resource extraction logic: reads HTML, extracts images/videos/links, and saves to JSON.
// HTML 資源提取實作 export async function extractHtmlResources( inputPath: string, outputDir: string ) { try { console.error(`Starting resource extraction...`); console.error(`Input file: ${inputPath}`); console.error(`Output directory: ${outputDir}`); // 確保輸出目錄存在 try { await fs.access(outputDir); console.error(`Output directory exists: ${outputDir}`); } catch { console.error(`Creating output directory: ${outputDir}`); await fs.mkdir(outputDir, { recursive: true }); console.error(`Created output directory: ${outputDir}`); } const uniqueId = generateUniqueId(); const htmlContent = await fs.readFile(inputPath, "utf-8"); const dom = new JSDOM(htmlContent); const { document } = dom.window; // 提取資源 const resources = { images: Array.from(document.querySelectorAll("img")).map( (img) => (img as HTMLImageElement).src ), links: Array.from(document.querySelectorAll("a")).map( (a) => (a as HTMLAnchorElement).href ), videos: Array.from(document.querySelectorAll("video source")).map( (video) => (video as HTMLSourceElement).src ), }; const outputPath = path.join(outputDir, `resources_${uniqueId}.json`); await fs.writeFile(outputPath, JSON.stringify(resources, null, 2)); console.error(`Written resources to ${outputPath}`); return { success: true, data: `Successfully extracted resources: ${outputPath}`, }; } catch (error) { console.error(`Error in extractHtmlResources:`, error); return { success: false, error: error instanceof Error ? error.message : "Unknown error", }; } } - src/tools/_index.ts:1-32 (registration)The tool registration/index file that imports HTML_EXTRACT_RESOURCES_TOOL and includes it in the exported tools array.
import { DOCUMENT_READER_TOOL } from "./documentReader.js"; import { DOCX_TO_HTML_TOOL, DOCX_TO_PDF_TOOL } from "./docxTools.js"; import { EXCEL_READ_TOOL } from "./excelTools.js"; import { FORMAT_CONVERTER_TOOL } from "./formatConverterPlus.js"; import { HTML_CLEAN_TOOL, HTML_EXTRACT_RESOURCES_TOOL, HTML_FORMAT_TOOL, HTML_TO_MARKDOWN_TOOL, HTML_TO_TEXT_TOOL } from "./htmlTools.js"; import { PDF_MERGE_TOOL, PDF_SPLIT_TOOL } from "./pdfTools.js"; import { TEXT_DIFF_TOOL, TEXT_ENCODING_CONVERT_TOOL, TEXT_FORMAT_TOOL, TEXT_SPLIT_TOOL } from "./txtTools.js"; export const tools = [DOCUMENT_READER_TOOL, PDF_MERGE_TOOL, PDF_SPLIT_TOOL, DOCX_TO_PDF_TOOL, DOCX_TO_HTML_TOOL, HTML_CLEAN_TOOL, HTML_TO_TEXT_TOOL, HTML_TO_MARKDOWN_TOOL, HTML_EXTRACT_RESOURCES_TOOL, HTML_FORMAT_TOOL, TEXT_DIFF_TOOL, TEXT_SPLIT_TOOL, TEXT_FORMAT_TOOL, TEXT_ENCODING_CONVERT_TOOL, EXCEL_READ_TOOL, FORMAT_CONVERTER_TOOL]; export * from "./documentReader.js"; export * from "./docxTools.js"; export * from "./excelTools.js"; export * from "./formatConverterPlus.js"; export * from "./htmlTools.js"; export * from "./pdfTools.js"; export * from "./txtTools.js"; - src/index.ts:204-220 (registration)The request handler in src/index.ts that routes the 'html_extract_resources' tool call to the extractHtmlResources handler function.
if (name === "html_extract_resources") { const { inputPath, outputDir } = args as { inputPath: string; outputDir: string; }; const result = await extractHtmlResources(inputPath, outputDir); if (!result.success) { return { content: [{ type: "text", text: `Error: ${result.error}` }], isError: true, }; } return { content: [{ type: "text", text: fileOperationResponse(result.data) }], isError: false, }; }