screenshot
Capture webpage screenshots with labeled interactive elements for visual web navigation and automated testing.
Instructions
Take a screenshot of the current page with labeled elements
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- src/atlas_browser_mcp/server.py:60-67 (registration)Tool registration for 'screenshot' with name, description, and inputSchema (no parameters required)
Tool( name="screenshot", description="Take a screenshot of the current page with labeled elements", inputSchema={ "type": "object", "properties": {} } ), - src/atlas_browser_mcp/server.py:147-151 (handler)Handler that routes 'screenshot' tool calls to browser.execute with action='observe'
elif name == "screenshot": result = await asyncio.to_thread( browser.execute, action="observe" ) - src/atlas_browser_mcp/browser.py:301-357 (handler)Core implementation of screenshot functionality via _observe() method: injects Set-of-Mark labels, captures screenshot, returns labeled elements and base64 image
def _observe(self, **_) -> BrowserResult: """Get visual observation of current page""" if self._page is None: return BrowserResult(success=False, error="No page open. Use navigate first.") try: self._page.wait_for_timeout(500) elements = self._page.evaluate(self.SOM_INJECT_SCRIPT) self._element_map = {} for el in elements: self._element_map[el['id']] = { 'x': el['x'], 'y': el['y'], 'width': el['width'], 'height': el['height'], 'tag': el['tag'], 'type': el['type'], 'text': el['text'] } screenshot_bytes = self._page.screenshot( type="jpeg", quality=self.SCREENSHOT_QUALITY ) screenshot_base64 = base64.b64encode(screenshot_bytes).decode('utf-8') elements_for_llm = [] for el in elements: element_info = { 'id': el['id'], 'tag': el['tag'], } if el['text']: element_info['text'] = el['text'] if el['type']: element_info['type'] = el['type'] elements_for_llm.append(element_info) return BrowserResult( success=True, data={ 'url': self._page.url, 'title': self._page.title(), 'screenshot': screenshot_base64, 'elements': elements_for_llm, 'element_count': len(elements) }, metadata={'has_image': True} ) except Exception as e: return BrowserResult( success=False, error=f"Observation failed: {str(e)}" ) - SOM_INJECT_SCRIPT: JavaScript that injects numeric labels [N] onto interactive elements (links, buttons, inputs, etc.) for visual identification
SOM_INJECT_SCRIPT = """ () => { // Remove old labels document.querySelectorAll('.atlas-som-label').forEach(el => el.remove()); const selectors = [ 'a[href]', 'button', 'input:not([type="hidden"])', 'select', 'textarea', '[role="button"]', '[role="link"]', '[role="checkbox"]', '[role="menuitem"]', '[onclick]', '[tabindex]:not([tabindex="-1"])' ]; const elements = []; let labelId = 0; function markElements(doc, offsetX = 0, offsetY = 0) { if (!doc) return; selectors.forEach(selector => { try { doc.querySelectorAll(selector).forEach(el => { const rect = el.getBoundingClientRect(); const style = window.getComputedStyle(el); if ( rect.width <= 0 || rect.height <= 0 || style.visibility === 'hidden' || style.display === 'none' || parseFloat(style.opacity) === 0 ) { return; } const viewportWidth = window.innerWidth; const viewportHeight = window.innerHeight; if ( rect.right < 0 || rect.bottom < 0 || rect.left > viewportWidth || rect.top > viewportHeight ) { return; } const label = document.createElement('div'); label.className = 'atlas-som-label'; label.textContent = labelId; label.style.cssText = ` position: fixed !important; left: ${rect.left + offsetX}px !important; top: ${rect.top + offsetY}px !important; background: #FFFF00 !important; color: #000000 !important; border: 2px solid #FF0000 !important; font-size: 12px !important; font-weight: bold !important; font-family: monospace !important; padding: 1px 4px !important; z-index: 2147483647 !important; pointer-events: none !important; border-radius: 3px !important; line-height: 1.2 !important; `; document.body.appendChild(label); let text = ''; if (el.tagName === 'INPUT') { text = el.placeholder || el.value || el.name || ''; } else if (el.tagName === 'SELECT') { text = el.options[el.selectedIndex]?.text || ''; } else { text = el.innerText || el.textContent || el.getAttribute('aria-label') || ''; } text = text.trim().substring(0, 50); elements.push({ id: labelId, x: Math.round(rect.left + rect.width / 2 + offsetX), y: Math.round(rect.top + rect.height / 2 + offsetY), width: Math.round(rect.width), height: Math.round(rect.height), tag: el.tagName.toLowerCase(), type: el.type || '', text: text }); labelId++; }); } catch (e) {} }); try { doc.querySelectorAll('iframe').forEach(iframe => { try { const iframeRect = iframe.getBoundingClientRect(); const iframeDoc = iframe.contentDocument || iframe.contentWindow?.document; if (iframeDoc) { markElements( iframeDoc, offsetX + iframeRect.left, offsetY + iframeRect.top ); } } catch (e) {} }); } catch (e) {} } markElements(document); return elements; } """ - Input schema definition for screenshot tool (empty object - no parameters required)
inputSchema={ "type": "object", "properties": {} }