Skip to main content
Glama

Android Mobile MCP

by erichung9060
main.py9.66 kB
import uiautomator2 as u2 from fastmcp import FastMCP from fastmcp.utilities.types import Image import xml.etree.ElementTree as ET import json import io import re mcp = FastMCP("Android Mobile MCP Server") device = None ui_coords = set() def parse_bounds(bounds_str): if not bounds_str or bounds_str == '': return None try: bounds = bounds_str.replace('[', '').replace(']', ',').split(',') x1, y1, x2, y2 = map(int, bounds[:4]) center_x = (x1 + x2) // 2 center_y = (y1 + y2) // 2 return {"x": center_x, "y": center_y, "bounds": [x1, y1, x2, y2]} except: return None def get_children_texts(element): child_texts = [] """Check if element has any focusable children""" for child in list(element.iter())[1:]: child_text = child.get('text', '').strip() if child_text and child_text not in child_texts: child_texts.append(child_text) return child_texts def extract_ui_elements(element): resource_id = element.get('resource-id', '') text = element.get('text', '').strip() content_desc = element.get('content-desc', '').strip() hint = element.get('hint', '').strip() bounds = parse_bounds(element.get('bounds', '')) focusable = element.get('focusable', 'false').lower() == 'true' has_text = bool(text or content_desc or hint) children = [] for child in element: children.extend(extract_ui_elements(child)) if not (focusable or has_text): return children display_text = text or content_desc or hint if focusable and not display_text: child_texts = get_children_texts(element) display_text = ' '.join(child_texts).strip() element_info = { "text": display_text, "class": element.get('class', ''), "coordinates": {"x": bounds["x"], "y": bounds["y"]} if bounds else None } global ui_coords ui_coords.add((bounds["x"], bounds["y"])) if resource_id: element_info["resource_id"] = resource_id if children: filtered_children = [] for child in children: child_text = child.get("text", "") child_coords = child.get("coordinates") if not (child_text == element_info["text"] and child_coords == element_info["coordinates"]): filtered_children.append(child) if filtered_children: element_info["children"] = filtered_children return [element_info] @mcp.tool() def mobile_init() -> str: """Initialize the Android device connection. Must be called before using any other mobile tools. """ global device try: device = u2.connect() return "Device initialized successfully" except Exception as e: device = None return f"Error initializing device: {str(e)}" @mcp.tool() def mobile_dump_ui() -> str: """Get UI elements from Android screen as JSON with hierarchical structure. Returns a JSON structure where elements contain their child elements, showing parent-child relationships. Only includes focusable elements or elements with text/content_desc/hint attributes. """ if device is None: return "Error: Device not initialized. Please call mobile_init() first to establish connection with Android device." return _mobile_dump_ui() def _mobile_dump_ui(): try: xml_content = device.dump_hierarchy() root = ET.fromstring(xml_content) global current_ui_state ui_coords.clear() ui_elements = extract_ui_elements(root) return str(ui_elements) except Exception as e: return f"Error processing XML: {str(e)}" @mcp.tool() def mobile_click(x: int, y: int) -> str: """Click on a specific coordinate on the Android screen. Args: x: X coordinate to click y: Y coordinate to click """ if device is None: return "Error: Device not initialized. Please call mobile_init() first to establish connection with Android device." try: _mobile_dump_ui() global ui_coords if (x, y) not in ui_coords: return "Invalid elements coordinates. Please use mobile_dump_ui to get the latest UI state first." device.click(x, y) return f"Successfully clicked on coordinate ({x}, {y})" except Exception as e: return f"Error clicking coordinate ({x}, {y}): {str(e)}" @mcp.tool() def mobile_type(text: str, submit: bool = False) -> str: """Input text into the currently focused text field on Android. Args: text: The text to input submit: Whether to submit text (press Enter key) after typing """ if device is None: return "Error: Device not initialized. Please call mobile_init() first to establish connection with Android device." try: device.send_keys(text) if submit: device.press("enter") return f"Successfully input text: {text} and pressed Enter" return f"Successfully input text: {text}" except Exception as e: return f"Error inputting text: {str(e)}" @mcp.tool() def mobile_key_press(button: str) -> str: """Press a physical or virtual button on the Android device. Args: button: Button name (BACK, HOME, RECENT, ENTER) """ if device is None: return "Error: Device not initialized. Please call mobile_init() first to establish connection with Android device." button_map = { "BACK": "back", "HOME": "home", "RECENT": "recent", "ENTER": "enter" } key = button_map.get(button.upper(), button.lower()) try: device.press(key) return f"Successfully pressed {button} button" except Exception as e: return f"Error pressing {button} button: {str(e)}" @mcp.tool() def mobile_swipe(start_x: int, start_y: int, end_x: int, end_y: int, duration: float = 0.5) -> str: """Perform a swipe gesture on the Android screen. Args: start_x: Starting X coordinate start_y: Starting Y coordinate end_x: Ending X coordinate end_y: Ending Y coordinate duration: Duration of swipe in seconds (default: 0.5) """ if device is None: return "Error: Device not initialized. Please call mobile_init() first to establish connection with Android device." try: duration_ms = int(duration * 1000) cmd = f"input swipe {start_x} {start_y} {end_x} {end_y} {duration_ms}" device.shell(cmd) return f"Successfully swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) in {duration}s" except Exception as e: return f"Error swiping: {str(e)}" def is_system_app(package): exclude_patterns = [ r"^com\.android\.systemui", r"^com\.android\.providers\.", r"^com\.android\.internal\.", r"^com\.android\.cellbroadcast", r"^com\.android\.phone", r"^com\.android\.bluetooth", r"^com\.google\.android\.overlay", r"^com\.google\.mainline", r"^com\.google\.android\.ext", r"\.auto_generated_rro_", r"^android$", ] return any(re.search(p, package) for p in exclude_patterns) def is_launchable_app(package): if is_system_app(package): return False try: response = device.shell(f"cmd package resolve-activity --brief {package}") output = response.output return "/" in output except Exception: return False @mcp.tool() def mobile_list_apps() -> str: """List all installed applications on the Android device. Returns a JSON array with package names and application labels. """ if device is None: return "Error: Device not initialized. Please call mobile_init() first to establish connection with Android device." try: apps = device.app_list() launchable_apps = [pkg for pkg in apps if is_launchable_app(pkg)] return json.dumps(launchable_apps, ensure_ascii=False, indent=2) except Exception as e: return f"Error listing apps: {str(e)}" @mcp.tool() def mobile_launch_app(package_name: str) -> str: """Launch an application by its package name. Args: package_name: The package name of the app to launch (e.g., 'com.android.chrome') """ if device is None: return "Error: Device not initialized. Please call mobile_init() first to establish connection with Android device." try: apps = device.app_list() if package_name not in apps: return f"App {package_name} is not in the list of installed apps. Please use mobile_list_apps to get the current app list." device.app_start(package_name) return f"Successfully launched app: {package_name}" except Exception as e: return f"Error launching app {package_name}: {str(e)}" @mcp.tool() def mobile_take_screenshot() -> Image: """Take a screenshot of the current Android screen. Returns an image object that can be viewed by the LLM. """ if device is None: return "Error: Device not initialized. Please call mobile_init() first to establish connection with Android device." try: screenshot = device.screenshot() buf = io.BytesIO() screenshot.save(buf, format="PNG") img_bytes = buf.getvalue() return Image(data=img_bytes, format="png") except Exception as e: return f"Error taking screenshot: {str(e)}" def main(): mcp.run() if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/erichung9060/Android-Mobile-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server