Farnsworth

Overview Schema Related Servers Score Discussions

visual_debugging.py•3.76 KiB

""" Farnsworth Visual Debugging - Screenshot & Diagram Analysis. "Let me look at that with my special eyes!" This module implements: 1. UI Element Recognition (Buttons, Inputs, Errors) 2. Diagram Understanding (Architecture flows) 3. Visual Debugging (Correlating visual errors with logs) """ import asyncio import re from dataclasses import dataclass, field from typing import List, Optional, Dict, Any from loguru import logger from farnsworth.integration.vision import VisionModule, VisionTask, ImageInput @dataclass class UIElement: type: str # button, input, text, error_banner text: str location: Dict[str, int] # bbox {x, y, w, h} (Simulated or extracted) confidence: float @dataclass class DiagramNode: id: str label: str type: str # box, cylinder, cloud connections: List[str] = field(default_factory=list) @dataclass class VisualAnalysisResult: ui_elements: List[UIElement] = field(default_factory=list) diagram_nodes: List[DiagramNode] = field(default_factory=list) detected_errors: List[str] = field(default_factory=list) summary: str = "" class VisualDebugger: """ Expert system for visual debugging. Integrates OCR, Object Detection (simulated via VQA), and Error Pattern Matching. """ def __init__(self, vision_module: VisionModule): self.vision = vision_module async def analyze_screenshot(self, image_source: Any) -> VisualAnalysisResult: """ Analyze a screenshot for UI elements and errors. """ result = VisualAnalysisResult() # 1. OCR for text extraction ocr_res = await self.vision.extract_text(image_source) full_text = ocr_res.text or "" # 2. Heuristic Error Detection error_patterns = [ r"Error:.*", r"Exception:.*", r"Failed to.*", r"404 Not Found", r"500 Internal Server Error" ] for pattern in error_patterns: matches = re.findall(pattern, full_text, re.IGNORECASE) result.detected_errors.extend(matches) # 3. VQA for UI understanding (if BLIP is available) # We ask specific questions to identify layout ui_desc = await self.vision.caption(image_source) result.summary = ui_desc.caption or "No description" # Simulate UI Element detection via VQA # In a real system with LLaVA, we'd ask "List distinct UI buttons" return result async def parse_diagram(self, image_source: Any) -> VisualAnalysisResult: """ Parse an architecture diagram or flowchart. """ result = VisualAnalysisResult() # 1. Caption to get general idea caption = await self.vision.caption(image_source) result.summary = caption.caption or "" # 2. VQA to find connections (Simulated advanced behavior) # "What connects to the Database?" # For now, we rely on OCR and heuristic graph construction ocr_res = await self.vision.extract_text(image_source) # Heuristic: Text blocks close to arrows (hard to do without BBox) # We'll just list potential nodes found in text lines = (ocr_res.text or "").split('\n') for line in lines: line = line.strip() if len(line) > 2 and len(line) < 30: # Likely a label node = DiagramNode( id=line.lower().replace(" ", "_"), label=line, type="unknown" ) result.diagram_nodes.append(node) return result # Factory for easy usage def create_visual_debugger(device="auto"): vm = VisionModule(device=device) return VisualDebugger(vm)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/timowhite88/Farnsworth'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

visual_debugging.py•3.76 KiB