ViperMCP

codex.prompt•14.5 KiB

import math class ImagePatch: """A Python class containing a crop of an image centered around a particular object, as well as relevant information. Attributes ---------- cropped_image : array_like An array-like of the cropped image taken from the original image. left, lower, right, upper : int An int describing the position of the (left/lower/right/upper) border of the crop's bounding box in the original image. alpha : array_like An array-like dictating the transparency of the image. Methods ------- find(object_name: str, mask: bool = False)->List[ImagePatch] Returns a list of new ImagePatch objects containing crops of the image centered around any objects found in the image matching the object_name. Optionally, return a masked version of the crop. exists(object_name: str)->bool Returns True if the object specified by object_name is found in the image, and False otherwise. verify_property(property: str)->bool Returns True if the property is met, and False otherwise. best_text_match(option_list: List[str], prefix: str)->str Returns the string that best matches the image. simple_query(question: str=None)->str Returns the answer to a basic question asked about the image. If no question is provided, returns the answer to "What is this?". llm_query(question: str, long_answer: bool = True)->str References a large language model (e.g., GPT) to produce a response to the given question. Default is short-form answers, can be made long-form responses with the long_answer flag. compute_depth()->float Returns the median depth of the masked image crop. crop(left: int, upper: int, right: int, lower: int)->ImagePatch Returns a new ImagePatch object containing a crop of the image at the given coordinates. """ def __init__(self, image, left: int = None, upper: int = None, right: int = None, lower: int = None): """Initializes an ImagePatch object by cropping the image at the given coordinates and stores the coordinates as attributes. If no coordinates are provided, the image is left unmodified, and the coordinates are set to the dimensions of the image. Parameters ------- image : array_like An array-like of the original image. left, upper, right, lower : int An int describing the position of the (left/upper/right/lower) border of the crop's bounding box in the original image. """ if left is None and right is None and upper is None and lower is None: self.cropped_image = image self.left = 0 self.upper = 0 self.right = image.shape[2] # width self.lower = image.shape[1] # height else: self.cropped_image = image[:, upper:lower, left:right] self.left = left + parent_left self.upper = upper + parent_upper self.right = right + parent_left self.lower = lower + parent_upper self.height = self.cropped_image.shape[1] self.width = self.cropped_image.shape[2] self.horizontal_center = (self.left + self.right) / 2 self.vertical_center = (self.lower + self.upper) / 2 self.bbox = (self.left, self.upper, self.right, self.lower) def find(self, object_name: str, mask:bool=False) -> List[ImagePatch]: """Returns a list of ImagePatch objects matching object_name contained in the crop if any are found. Otherwise, returns an empty list. Parameters ---------- object_name : str the name of the object to be found Returns ------- List[ImagePatch] a list of ImagePatch objects matching object_name contained in the crop Examples -------- >>> # return the foo >>> def execute_command(image) -> List[ImagePatch]: >>> image_patch = ImagePatch(image) >>> foo_patches = image_patch.find("foo") >>> return foo_patches """ return find(self.cropped_image, object_name, mask) def exists(self, object_name: str) -> bool: """Returns True if the object specified by object_name is found in the image, and False otherwise. Parameters ------- object_name : str A string describing the name of the object to be found in the image. Examples ------- >>> # Are there both foos and garply bars in the photo? >>> def execute_command(image)->str: >>> image_patch = ImagePatch(image) >>> is_foo = image_patch.exists("foo") >>> is_garply_bar = image_patch.exists("garply bar") >>> return bool_to_yesno(is_foo and is_garply_bar) """ return len(self.find(object_name)) > 0 def verify_property(self, object_name: str, visual_property: str) -> bool: """Returns True if the object possesses the visual property, and False otherwise. Differs from 'exists' in that it presupposes the existence of the object specified by object_name, instead checking whether the object possesses the property. Parameters ------- object_name : str A string describing the name of the object to be found in the image. visual_property : str A string describing the simple visual property (e.g., color, shape, material) to be checked. Examples ------- >>> # Do the letters have blue color? >>> def execute_command(image) -> str: >>> image_patch = ImagePatch(image) >>> letters_patches = image_patch.find("letters") >>> # Question assumes only one letter patch >>> return bool_to_yesno(letters_patches[0].verify_property("letters", "blue")) """ return verify_property(self.cropped_image, object_name, property) def best_text_match(self, option_list: List[str], prefix: str=None) -> str: """Returns the string that best matches the image. Parameters ------- option_list : str A list with the names of the different options prefix : str A string with the prefixes to append to the options Examples ------- >>> # Is the foo gold or white? >>> def execute_command(image)->str: >>> image_patch = ImagePatch(image) >>> foo_patches = image_patch.find("foo") >>> # Question assumes one foo patch >>> return foo_patches[0].best_text_match(["gold", "white"]) """ return best_text_match(self.cropped_image, option_list, prefix) def simple_query(self, question: str = None) -> str: """Returns the answer to a basic question asked about the image. If no question is provided, returns the answer to "What is this?". The questions are about basic perception, and are not meant to be used for complex reasoning or external knowledge. Parameters ------- question : str A string describing the question to be asked. Examples ------- >>> # Which kind of baz is not fredding? >>> def execute_command(image) -> str: >>> image_patch = ImagePatch(image) >>> baz_patches = image_patch.find("baz") >>> for baz_patch in baz_patches: >>> if not baz_patch.verify_property("baz", "fredding"): >>> return baz_patch.simple_query("What is this baz?") >>> # What color is the foo? >>> def execute_command(image) -> str: >>> image_patch = ImagePatch(image) >>> foo_patches = image_patch.find("foo") >>> foo_patch = foo_patches[0] >>> return foo_patch.simple_query("What is the color?") >>> # Is the second bar from the left quuxy? >>> def execute_command(image) -> str: >>> image_patch = ImagePatch(image) >>> bar_patches = image_patch.find("bar") >>> bar_patches.sort(key=lambda x: x.horizontal_center) >>> bar_patch = bar_patches[1] >>> return bar_patch.simple_query("Is the bar quuxy?") """ return simple_query(self.cropped_image, question) def compute_depth(self): """Returns the median depth of the image crop Parameters ---------- Returns ------- float the median depth of the image crop Examples -------- >>> # the bar furthest away >>> def execute_command(image)->ImagePatch: >>> image_patch = ImagePatch(image) >>> bar_patches = image_patch.find("bar", mask=True) >>> bar_patches.sort(key=lambda bar: bar.compute_depth()) >>> return bar_patches[-1] """ depth_map = compute_depth(self.cropped_image) return depth_map.median() def crop(self, left: int, upper: int, right: int, lower: int) -> ImagePatch: """Returns a new ImagePatch cropped from the current ImagePatch. Parameters ------- left, lower, right, upper : int The (left/upper/right/upper)most pixel of the cropped image. ------- """ return ImagePatch(self.cropped_image, left, upper, right, lower) def overlaps_with(self, left, upper, right, lower): """Returns True if a crop with the given coordinates overlaps with this one, else False. Parameters ---------- left, lower, right, upper : int the (left/lower/right/upper) border of the crop to be checked Returns ------- bool True if a crop with the given coordinates overlaps with this one, else False Examples -------- >>> # black foo on top of the qux >>> def execute_command(image) -> ImagePatch: >>> image_patch = ImagePatch(image) >>> qux_patches = image_patch.find("qux") >>> qux_patch = qux_patches[0] >>> foo_patches = image_patch.find("black foo") >>> for foo in foo_patches: >>> if foo.vertical_center > qux_patch.vertical_center >>> return foo """ return self.left <= right and self.right >= left and self.lower >= upper and self.upper <= lower def llm_query(self, question: str, long_answer: bool = True) -> str: '''Answers a text question using GPT-4. The input question is always a formatted string with a variable in it. Parameters ---------- question: str the text question to ask. Must not contain any reference to 'the image' or 'the photo', etc. long_answer: bool whether to return a short answer or a long answer. Short answers are one or at most two words, very concise. Long answers are longer, and may be paragraphs and explanations. Defalt is True (so long answer). Examples -------- >>> # What is the city this building is in? >>> def execute_command(image) -> str: >>> image_patch = ImagePatch(image) >>> building_patches = image_patch.find("building") >>> building_patch = building_patches[0] >>> building_name = building_patch.simple_query("What is the name of the building?") >>> return building_patch.llm_query(f"What city is {building_name} in?", long_answer=False) >>> # Who invented this object? >>> def execute_command(image) -> str: >>> image_patch = ImagePatch(image) >>> object_patches = image_patch.find("object") >>> object_patch = object_patches[0] >>> object_name = object_patch.simple_query("What is the name of the object?") >>> return object_patch.llm_query(f"Who invented {object_name}?", long_answer=False) >>> # Explain the history behind this object. >>> def execute_command(image) -> str: >>> image_patch = ImagePatch(image) >>> object_patches = image_patch.find("object") >>> object_patch = object_patches[0] >>> object_name = object_patch.simple_query("What is the name of the object?") >>> return object_patch.llm_query(f"What is the history behind {object_name}?", long_answer=True) ''' return llm_query(question, long_answer) def best_image_match(list_patches: List[ImagePatch], content: List[str], return_index=False) -> Union[ImagePatch, int]: """Returns the patch most likely to contain the content. Parameters ---------- list_patches : List[ImagePatch] content : List[str] the object of interest return_index : bool if True, returns the index of the patch most likely to contain the object Returns ------- int Patch most likely to contain the object """ return best_image_match(list_patches, content, return_index) def distance(patch_a: ImagePatch, patch_b: ImagePatch) -> float: """ Returns the distance between the edges of two ImagePatches. If the patches overlap, it returns a negative distance corresponding to the negative intersection over union. Parameters ---------- patch_a : ImagePatch patch_b : ImagePatch Examples -------- # Return the qux that is closest to the foo >>> def execute_command(image): >>> image_patch = ImagePatch(image) >>> qux_patches = image_patch.find('qux') >>> foo_patches = image_patch.find('foo') >>> foo_patch = foo_patches[0] >>> qux_patches.sort(key=lambda x: distance(x, foo_patch)) >>> return qux_patches[0] """ return distance(patch_a, patch_b) def bool_to_yesno(bool_answer: bool) -> str: return "yes" if bool_answer else "no" def coerce_to_numeric(string): """ This function takes a string as input and returns a float after removing any non-numeric characters. If the input string contains a range (e.g. "10-15"), it returns the first value in the range. """ return coerce_to_numeric(string) INSERT_ACTION_HERE Consider the following guidelines: - Assume that you have always been given an image to check for the existence of unknowns. - Use base Python (comparison, sorting) for basic logical operations, left/right/up/down, math, etc. - Use the llm_query function to access external information and answer informational questions not concerning the image. - Respond only in code. Do not produce a docstring or a content window wrapper like ```python...```. INSERT_PROMPT_HERE

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ryansherby/ViperMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

codex.prompt•14.5 KiB