Skip to main content
Glama

@pokutuna/mcp-chrome-tabs

by pokutuna
content-extraction-levels.ts7.2 kB
#!/usr/bin/env tsx /** * Content Extraction Levels Verification Script * * Issue#9 の準備実装として、defuddle で取得できるコンテンツのバリエーションを検証する * 3つの異なる抽出レベルでのコンテンツを比較する: * - auto: defuddle による自動抽出 (現在の動作) * - text: DOM の textContent による生テキスト抽出 * - html: 完全な HTML ソース */ import { Defuddle } from "defuddle/node"; import { JSDOM } from "jsdom"; interface ContentResult { level: "auto" | "text" | "html"; content: string; size: number; preview: string; } /** * 異なる抽出レベルでコンテンツを処理する */ async function extractContentWithLevels( html: string, url: string ): Promise<ContentResult[]> { const results: ContentResult[] = []; // Level: auto (defuddle) try { const defuddleResult = await Defuddle(html, url, { markdown: true }); const autoContent = defuddleResult?.content || ""; results.push({ level: "auto", content: autoContent, size: autoContent.length, preview: autoContent, // 全文表示 }); } catch (error) { const errorMessage = `ERROR: ${error instanceof Error ? error.message : String(error)}`; results.push({ level: "auto", content: errorMessage, size: 0, preview: errorMessage, }); } // Level: text (textContent) try { const dom = new JSDOM(html); const textContent = dom.window.document.body?.textContent || ""; results.push({ level: "text", content: textContent, size: textContent.length, preview: textContent, // 全文表示 }); } catch (error) { const errorMessage = `ERROR: ${error instanceof Error ? error.message : String(error)}`; results.push({ level: "text", content: errorMessage, size: 0, preview: errorMessage, }); } // Level: html (full HTML) const htmlContent = html; results.push({ level: "html", content: htmlContent, size: htmlContent.length, preview: htmlContent.slice(0, 200) + (htmlContent.length > 200 ? "..." : ""), // preview のみ }); return results; } /** * 現在のアクティブタブからコンテンツを取得する (AppleScript 経由) */ async function getCurrentTabContent( applicationName: string = "Google Chrome" ): Promise<{ title: string; url: string; html: string } | null> { try { const { executeAppleScript, separator } = await import( "../src/browser/osascript.js" ); const sep = separator(); // AppleScript でアクティブタブのコンテンツを直接取得 const appleScript = ` try tell application "${applicationName}" repeat with w in windows tell w set t to tab (active tab index) if URL of t is not "about:blank" then tell t set tabTitle to title set tabURL to URL set tabContent to execute javascript "document.documentElement.outerHTML" return tabTitle & "${sep}" & tabURL & "${sep}" & tabContent end tell end if end tell end repeat error "No active tab found" end tell on error errMsg return "ERROR" & "${sep}" & errMsg end try `; const result = await executeAppleScript(appleScript); if (result.startsWith(`ERROR${sep}`)) { throw new Error(result.split(sep)[1]); } const parts = result.split(sep).map((part) => part.trim()); if (parts.length < 3) { throw new Error("Failed to read the tab content"); } const [title, url, html] = parts; return { title, url, html }; } catch (error) { console.error("❌ Error getting current tab content:", error); return null; } } /** * 結果を表示する */ function displayResults(sampleName: string, results: ContentResult[]): void { console.log(`\n━━━ ${sampleName} ━━━`); results.forEach((result) => { console.log(`\n📄 Level: ${result.level.toUpperCase()}`); console.log(`📏 Size: ${result.size} characters`); if (result.level === "html") { console.log(`👀 Preview:`); console.log(result.preview); } else { console.log(`📄 Content:`); console.log(result.preview); // auto と text は全文 } console.log(`${"─".repeat(50)}`); }); // サイズ比較 const autoSize = results.find((r) => r.level === "auto")?.size || 0; const textSize = results.find((r) => r.level === "text")?.size || 0; const htmlSize = results.find((r) => r.level === "html")?.size || 0; console.log(`\n📊 Size Comparison:`); console.log( ` auto: ${autoSize} chars (${((autoSize / htmlSize) * 100).toFixed(1)}% of HTML)` ); console.log( ` text: ${textSize} chars (${((textSize / htmlSize) * 100).toFixed(1)}% of HTML)` ); console.log(` html: ${htmlSize} chars (100.0% of HTML)`); } /** * メイン実行関数 */ async function main(): Promise<void> { console.log("🔍 Content Extraction Levels Verification"); console.log("Testing different content extraction approaches for issue#9\n"); // 現在のタブからコンテンツを取得 console.log("📱 Getting content from current tab..."); const currentTab = await getCurrentTabContent(); if (!currentTab) { console.error("❌ Could not get current tab content."); console.error("Please make sure:"); console.error(" 1. Chrome is running with an active tab"); console.error( " 2. 'Allow JavaScript from Apple Events' is enabled in Chrome" ); console.error(" 3. The tab is not suspended or blank"); process.exit(1); } console.log(`🌐 Current tab: ${currentTab.title}`); console.log(`🔗 URL: ${currentTab.url}\n`); try { const results = await extractContentWithLevels( currentTab.html, currentTab.url ); displayResults("Current Tab", results); } catch (error) { console.error("❌ Error processing current tab:", error); process.exit(1); } console.log("\n✨ Verification complete!"); console.log("\n💡 Key Observations:"); console.log( " - 'auto' level provides clean, readable content but may miss some information" ); console.log( " - 'text' level gives raw text without formatting, useful for fallback" ); console.log( " - 'html' level provides complete content but requires client-side processing" ); console.log("\n📋 Next Steps:"); console.log( " 1. Implement content_level parameter in read_tab_content tool" ); console.log(" 2. Add proper error handling for each extraction method"); console.log(" 3. Consider HTML sanitization for security"); console.log(" 4. Update tool schema and documentation"); } // スクリプトとして直接実行された場合のみ main を呼び出す if (import.meta.url === `file://${process.argv[1]}`) { main().catch((error) => { console.error("❌ Script failed:", error); process.exit(1); }); } export { extractContentWithLevels, getCurrentTabContent };

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/pokutuna/mcp-chrome-tabs'

If you have feedback or need assistance with the MCP directory API, please join our Discord server