Skip to main content
Glama
Dazlarus
by Dazlarus
hover_detection.py6.81 kB
""" Pixel-based hover detection for ChatGPT sidebar. Uses classical CV approach (no vision LLMs needed): 1. Capture sidebar screenshot 2. Divide into row bands 3. Measure background brightness deviation from normal (249) 4. Highlighted row has largest deviation (~239-246 vs 249) Achieves 100% accuracy on test images. """ import numpy as np from PIL import Image, ImageGrab from typing import Optional, Tuple, List, Dict, Any import io class SidebarHoverDetector: """Detects which menu item is highlighted in ChatGPT sidebar using pixel analysis.""" # ChatGPT light theme colors NORMAL_BG: int = 249 # Normal background brightness HOVER_BG_MIN: int = 235 # Hover background is darker HOVER_BG_MAX: int = 247 def __init__(self, row_height: int = 35, top_skip: int = 35, bottom_skip: int = 40, deviation_threshold: float = 2.0) -> None: """ Initialize detector. Args: row_height: Approximate height of each menu item in pixels top_skip: Pixels to skip from top (header area) bottom_skip: Pixels to skip from bottom deviation_threshold: Minimum deviation from normal bg to detect hover """ self.row_height: int = row_height self.top_skip: int = top_skip self.bottom_skip: int = bottom_skip self.deviation_threshold: float = deviation_threshold def capture_sidebar(self, hwnd: int) -> Image.Image: """Capture sidebar region of ChatGPT window.""" import win32gui rect: Tuple[int, int, int, int] = win32gui.GetWindowRect(hwnd) window_width: int = rect[2] - rect[0] sidebar_width: int = int(window_width * 0.28) capture_rect: Tuple[int, int, int, int] = (rect[0], rect[1], rect[0] + sidebar_width, rect[3]) return ImageGrab.grab(bbox=capture_rect) def analyze_rows(self, sidebar_img: Image.Image) -> List[Dict[str, Any]]: """Analyze brightness of each row in the sidebar.""" arr: np.ndarray = np.array(sidebar_img) height: int width: int height, width = arr.shape[:2] # Convert to grayscale using luminance formula gray = np.dot(arr[..., :3], [0.299, 0.587, 0.114]) rows = [] y = self.top_skip row_idx = 0 while y + self.row_height <= height - self.bottom_skip: # Sample row, avoiding edges region = gray[y:y+self.row_height, 10:width-10] # Get only background pixels (bright pixels > 200) bg_pixels = region[region > 200] if len(bg_pixels) > 100: bg_mean = np.mean(bg_pixels) deviation = self.NORMAL_BG - bg_mean rows.append({ 'idx': row_idx, 'y_start': y, 'y_end': y + self.row_height, 'y_center': y + self.row_height // 2, 'bg_mean': bg_mean, 'deviation': deviation, }) y += self.row_height row_idx += 1 return rows def find_highlighted_row(self, sidebar_img: Image.Image) -> Optional[dict]: """ Find the highlighted (hovered) row in the sidebar. Returns: dict with row info if found, None otherwise """ rows = self.analyze_rows(sidebar_img) if not rows: return None # Find row with maximum deviation from normal background deviations = [r['deviation'] for r in rows] max_dev = max(deviations) if max_dev < self.deviation_threshold: return None max_idx = deviations.index(max_dev) return rows[max_idx] def find_highlighted_y_percent(self, sidebar_img: Image.Image) -> Optional[float]: """ Find the y position of highlighted row as percentage of image height. Returns: Float 0-100 representing y position, or None if no highlight found """ row = self.find_highlighted_row(sidebar_img) if row is None: return None img_height = sidebar_img.size[1] return 100 * row['y_center'] / img_height def detect_hover_position(self, hwnd: int) -> Optional[Tuple[int, int]]: """ Detect the screen coordinates of the highlighted row center. Args: hwnd: Window handle Returns: (x, y) screen coordinates of highlighted row center, or None """ import win32gui sidebar_img = self.capture_sidebar(hwnd) row = self.find_highlighted_row(sidebar_img) if row is None: return None # Convert to screen coordinates rect = win32gui.GetWindowRect(hwnd) window_width = rect[2] - rect[0] sidebar_width = int(window_width * 0.28) x = rect[0] + sidebar_width // 2 y = rect[1] + row['y_center'] return (x, y) # Convenience function for quick detection def detect_highlighted_item(hwnd: int) -> Optional[dict]: """ Detect which sidebar item is highlighted. Returns dict with: - y_percent: Position as percentage (0-100) - screen_coords: (x, y) screen coordinates - row_info: Full row analysis data """ detector = SidebarHoverDetector() sidebar_img = detector.capture_sidebar(hwnd) row = detector.find_highlighted_row(sidebar_img) if row is None: return None import win32gui rect = win32gui.GetWindowRect(hwnd) window_width = rect[2] - rect[0] sidebar_width = int(window_width * 0.28) return { 'y_percent': 100 * row['y_center'] / sidebar_img.size[1], 'screen_coords': (rect[0] + sidebar_width // 2, rect[1] + row['y_center']), 'row_info': row, } if __name__ == '__main__': # Quick test import sys sys.path.insert(0, 'src/drivers/win') from driver import ChatGPTDesktopDriver print("Testing SidebarHoverDetector...") driver = ChatGPTDesktopDriver() if not driver.find_chatgpt_window(): print("ChatGPT not found!") sys.exit(1) hwnd = driver._cached_handle result = detect_highlighted_item(hwnd) if result: print(f"Highlighted at: {result['y_percent']:.1f}%") print(f"Screen coords: {result['screen_coords']}") print(f"Row info: {result['row_info']}") else: print("No highlight detected")

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Dazlarus/chatgpt-escalation-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server