Skip to main content
Glama

webshot

Capture screenshots of web pages with customizable dimensions, device types, and image formats. Save screenshots to specified file paths for documentation, testing, or visual reference purposes.

Instructions

生成网页截图

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
deviceNo截图设备类型desktop
dpi_scaleNoDPI 缩放比例
formatNo截图文件格式png
heightNo浏览器窗口高度,0表示全页面截图
outputYes截图文件保存路径,比如: /path/to/screenshot.png
qualityNo图片质量(仅对 jpeg 和 webp 有效)
urlYes要截图的网页 URL,比如: https://www.baidu.com
widthNo浏览器窗口宽度

Implementation Reference

  • Registers the 'webshot' tool using @server.list_tools() decorator, including its schema and description.
    @server.list_tools() async def list_tools() -> list[Tool]: """列出可用的工具""" return [ Tool( name="webshot", description="生成网页截图", inputSchema={ "type": "object", "properties": { "url": { "type": "string", "description": "要截图的网页 URL,比如: https://www.baidu.com" }, "output": { "type": "string", "description": "截图文件保存路径,比如: /path/to/screenshot.png" }, "width": { "type": "integer", "description": "浏览器窗口宽度", "default": 1280 }, "height": { "type": "integer", "description": "浏览器窗口高度,0表示全页面截图", "default": 768 }, "dpi_scale": { "type": "number", "description": "DPI 缩放比例", "default": 2 }, "device": { "type": "string", "enum": ["desktop", "mobile", "tablet"], "description": "截图设备类型", "default": "desktop" }, "format": { "type": "string", "enum": ["png", "jpeg", "webp"], "description": "截图文件格式", "default": "png" }, "quality": { "type": "integer", "minimum": 0, "maximum": 100, "description": "图片质量(仅对 jpeg 和 webp 有效)", "default": 100 } }, "required": ["url", "output"] } ) ]
  • Main handler function for the webshot tool. Validates inputs, handles retries, and orchestrates the screenshot process by calling _take_screenshot_attempt.
    async def take_screenshot( url: str, output: str, width: int = 1280, height: int = 768, dpi_scale: float = 2, device: str = "desktop", format: str = "png", quality: int = 100, max_retries: int = 3 ) -> Dict[str, str]: """执行网页截图""" # 验证输入参数 if not url.startswith(("http://", "https://")): raise ValueError("URL 必须以 http:// 或 https:// 开头") if format not in ["png", "jpeg", "webp"]: raise ValueError("格式必须是 png、jpeg 或 webp") if quality < 0 or quality > 100: raise ValueError("质量必须在 0-100 之间") # 确保输出目录存在 output_path = Path(output) output_path.parent.mkdir(parents=True, exist_ok=True) # 重试机制 last_error = None for attempt in range(max_retries): try: return await _take_screenshot_attempt( url, output_path, width, height, dpi_scale, device, format, quality ) except Exception as e: last_error = e logger.warning(f"截图尝试 {attempt + 1}/{max_retries} 失败: {e}") if attempt < max_retries - 1: await asyncio.sleep(1) # 重试前等待1秒 else: logger.error(f"所有截图尝试都失败了") raise last_error
  • Core implementation of screenshot taking: launches browser (system Chrome/Edge preferred), sets up stealth mode, blocks trackers, caches resources, navigates, scrolls for lazy load, captures screenshot, processes image.
    async def _take_screenshot_attempt( url: str, output_path: Path, width: int, height: int, dpi_scale: float, device: str, format: str, quality: int ) -> Dict[str, str]: """单次截图尝试""" async with async_playwright() as p: # 启动浏览器,优先使用系统浏览器 browser, browser_type = await _create_browser(p) try: # 创建页面或上下文,并获取实际使用的尺寸 actual_width = width actual_height = height actual_dpi_scale = dpi_scale if device != "desktop" and device in DEVICE_MAPPING: device_name = DEVICE_MAPPING[device] if device_name in p.devices: # 使用 Playwright 内置设备配置 device_config = p.devices[device_name] context = await browser.new_context(**device_config) await _add_stealth_script(context) page = await context.new_page() # 获取设备的实际尺寸,避免后续强制调整导致变形 if 'viewport' in device_config: actual_width = device_config['viewport']['width'] actual_height = device_config['viewport']['height'] if height != 0 else 0 # 保持全页面截图设置 if 'device_scale_factor' in device_config: actual_dpi_scale = device_config['device_scale_factor'] logger.info(f"使用设备 {device_name},实际尺寸: {actual_width}x{actual_height if actual_height > 0 else '全页面'}, DPI: {actual_dpi_scale}") else: # 回退到默认配置 context = await browser.new_context( viewport={"width": width, "height": height}, device_scale_factor=dpi_scale ) await _add_stealth_script(context) page = await context.new_page() else: # 桌面设备使用自定义 viewport context = await browser.new_context( viewport={"width": width, "height": height}, device_scale_factor=dpi_scale ) await _add_stealth_script(context) page = await context.new_page() # 添加路由处理器来过滤不必要的请求和缓存静态资源 await page.route("**/*", _handle_resource_cache) logger.info("已启用请求过滤和缓存机制") # 设置超时 page.set_default_timeout(60000) # 60秒超时 page.set_default_navigation_timeout(60000) # 页面导航和加载 logger.info(f"开始导航到页面: {url}") await page.goto(url, wait_until='domcontentloaded') logger.info("页面导航完成,等待基础加载") # 步骤1:等待基础加载 try: await page.wait_for_load_state('load', timeout=20000) logger.info("页面基础加载完成") except Exception as e: logger.warning(f"基础加载超时,继续执行: {str(e)}") # 步骤2:等待网络空闲(较短超时) try: await page.wait_for_load_state('networkidle', timeout=8000) logger.info("网络空闲状态达成") except Exception as e: logger.warning(f"网络空闲超时,继续执行: {str(e)}") # 步骤3:智能滚动以触发lazy load logger.info("开始智能滚动以触发lazy load") await _smart_scroll_page(page, height) # 步骤4:处理自适应高度 if height == 0: logger.info("自适应高度模式,重新获取页面高度") try: # 滚动后重新获取页面高度 page_height = await page.evaluate('() => document.documentElement.scrollHeight') logger.info(f"滚动后页面实际高度: {page_height}") # 设置视口大小以适应页面高度,使用实际的设备宽度 await page.set_viewport_size({"width": actual_width, "height": page_height}) logger.info("视口大小调整完成") # 最后等待一次网络空闲 try: await page.wait_for_load_state('networkidle', timeout=3000) logger.info("最终网络空闲确认") except: logger.info("最终网络空闲超时,继续截图") pass except Exception as e: logger.warning(f"自适应高度处理警告: {str(e)}") # 步骤5:最终等待网络空闲 try: await page.wait_for_load_state('networkidle', timeout=5000) logger.info("最终网络空闲状态达成") except Exception as e: logger.warning(f"最终网络空闲超时,继续执行: {str(e)}") # 步骤6:滚动回页面顶部,确保截图从顶部开始 logger.info("滚动回页面顶部,准备截图") try: await page.evaluate('() => window.scrollTo(0, 0)') # 等待一小段时间让滚动完成 await page.wait_for_timeout(500) logger.info("页面已滚动回顶部") except Exception as e: logger.warning(f"滚动回顶部失败: {str(e)}") # 统一截图处理:Playwright 始终生成 PNG 格式 # 然后通过 Pillow 处理格式转换、质量压缩和尺寸调整 temp_png_path = output_path.with_suffix('.temp.png') screenshot_options = { "path": str(temp_png_path), "type": "png", "timeout": 30000 # 截图超时 } # 全页面截图 if height == 0: screenshot_options["full_page"] = True # 执行截图(始终生成 PNG) await page.screenshot(**screenshot_options) # 通过 Pillow 处理最终输出:格式转换、质量压缩、尺寸调整 # 区分桌面和移动设备的处理方式 await _process_final_image( temp_png_path, output_path, actual_width, actual_height, actual_dpi_scale, format, quality, device # 传递设备类型用于区分处理逻辑 ) # 删除临时 PNG 文件 if temp_png_path.exists(): temp_png_path.unlink() return { "status": "success", "message": f"截图已成功保存至 {output_path}" } except Exception as e: logger.error(f"截图过程中发生错误: {e}") raise finally: await browser.close()
  • Input schema definition for the webshot tool, specifying parameters like url (required), output (required), width, height, device, format, etc.
    Tool( name="webshot", description="生成网页截图", inputSchema={ "type": "object", "properties": { "url": { "type": "string", "description": "要截图的网页 URL,比如: https://www.baidu.com" }, "output": { "type": "string", "description": "截图文件保存路径,比如: /path/to/screenshot.png" }, "width": { "type": "integer", "description": "浏览器窗口宽度", "default": 1280 }, "height": { "type": "integer", "description": "浏览器窗口高度,0表示全页面截图", "default": 768 }, "dpi_scale": { "type": "number", "description": "DPI 缩放比例", "default": 2 }, "device": { "type": "string", "enum": ["desktop", "mobile", "tablet"], "description": "截图设备类型", "default": "desktop" }, "format": { "type": "string", "enum": ["png", "jpeg", "webp"], "description": "截图文件格式", "default": "png" }, "quality": { "type": "integer", "minimum": 0, "maximum": 100, "description": "图片质量(仅对 jpeg 和 webp 有效)", "default": 100 } }, "required": ["url", "output"] } ) ]
  • Configuration for blocking tracking domains, ads, and analytics services during page load to improve privacy and speed.
    # 需要精确匹配阻止的域名(只阻止这些特定域名) BLOCKED_EXACT_DOMAINS = { # Google Analytics & Ads - 精确匹配 'google-analytics.com', 'www.google-analytics.com', 'ssl.google-analytics.com', 'googletagmanager.com', 'www.googletagmanager.com', 'googleadservices.com', 'googlesyndication.com', 'googletagservices.com', 'analytics.google.com', 'stats.g.doubleclick.net', 'googleads.g.doubleclick.net', 'googletag.com', 'securepubads.g.doubleclick.net', # Microsoft Clarity - 精确匹配 'clarity.ms', 'c.clarity.ms', 'www.clarity.ms', # Facebook Tracking - 精确匹配 'connect.facebook.net', # 百度统计 - 精确匹配(避免阻止正常百度服务) 'hm.baidu.com', 'hmcdn.baidu.com', 'tongji.baidu.com', # CNZZ统计 - 精确匹配 'c.cnzz.com', 'w.cnzz.com', 's4.cnzz.com', 'cnzz.mmstat.com', # 51LA统计 - 精确匹配 'js.users.51.la', 'v6-web.51.la', # 其他分析服务的特定子域名 'static.hotjar.com', 'script.hotjar.com', 'cdn.mxpnl.com', 'api.mixpanel.com', 'cdn.segment.com', 'api.segment.io', 'api.amplitude.com', 'cdn.amplitude.com', 'fs.fullstory.com', 'edge.fullstory.com', 'cdn.mouseflow.com', 'script.crazyegg.com', 'pixel.quantserve.com', 'sb.scorecardresearch.com', 'widgets.outbrain.com', 'cdn.taboola.com', 'assets.growingio.com', 'api.growingio.com', 'static.sensorsdata.cn', 'sdk.talkingdata.com', 'sdk.jpush.cn', } # 需要完全阻止的域名(阻止整个域名及其所有子域名) BLOCKED_FULL_DOMAINS = { # 专门的广告/追踪域名(可以安全地完全阻止) 'doubleclick.net', 'googlesyndication.com', 'facebook.net', 'fbcdn.net', 'hotjar.com', 'mixpanel.com', 'segment.com', 'amplitude.com', 'fullstory.com', 'mouseflow.com', 'crazyegg.com', 'quantserve.com', 'scorecardresearch.com', 'outbrain.com', 'taboola.com', 'amazon-adsystem.com', # 专门的统计域名 'cnzz.com', '51.la', 'umeng.com', 'growingio.com', 'sensorsdata.cn', 'talkingdata.com', 'jpush.cn', } # 需要阻止的URL路径模式 BLOCKED_PATTERNS = { '/gtag/', '/analytics/', '/ga.js', '/analytics.js', '/gtm.js', '/clarity.js', '/hotjar', '/mixpanel', '/segment', '/amplitude', '/facebook.net/', '/fbevents.js', '/fbpixel', '/connect.facebook.net/', '/hm.js', '/tongji', '/cnzz', '/umeng', '/growingio', '/sensorsdata', '/adsense/', '/doubleclick/', '/googlesyndication/', '/googleadservices/', '/outbrain/', '/taboola/', '/amazon-adsystem/', '/googletag/', }

Other Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/bingal/webshot-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server