"""
This implementation is based on the OpenAI types, while removing all the parts that are not needed for Browser Use.
"""
# region - Content parts
from typing import Literal, Union
from openai import BaseModel
def _truncate(text: str, max_length: int = 50) -> str:
"""Truncate text to max_length characters, adding ellipsis if truncated."""
if len(text) <= max_length:
return text
return text[: max_length - 3] + '...'
def _format_image_url(url: str, max_length: int = 50) -> str:
"""Format image URL for display, truncating if necessary."""
if url.startswith('data:'):
# Base64 image
media_type = url.split(';')[0].split(':')[1] if ';' in url else 'image'
return f'<base64 {media_type}>'
else:
# Regular URL
return _truncate(url, max_length)
class ContentPartTextParam(BaseModel):
text: str
type: Literal['text'] = 'text'
def __str__(self) -> str:
return f'Text: {_truncate(self.text)}'
def __repr__(self) -> str:
return f'ContentPartTextParam(text={_truncate(self.text)})'
class ContentPartRefusalParam(BaseModel):
refusal: str
type: Literal['refusal'] = 'refusal'
def __str__(self) -> str:
return f'Refusal: {_truncate(self.refusal)}'
def __repr__(self) -> str:
return f'ContentPartRefusalParam(refusal={_truncate(repr(self.refusal), 50)})'
SupportedImageMediaType = Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']
class ImageURL(BaseModel):
url: str
"""Either a URL of the image or the base64 encoded image data."""
detail: Literal['auto', 'low', 'high'] = 'auto'
"""Specifies the detail level of the image.
Learn more in the
[Vision guide](https://platform.openai.com/docs/guides/vision#low-or-high-fidelity-image-understanding).
"""
# needed for Anthropic
media_type: SupportedImageMediaType = 'image/png'
def __str__(self) -> str:
url_display = _format_image_url(self.url)
return f'🖼️ Image[{self.media_type}, detail={self.detail}]: {url_display}'
def __repr__(self) -> str:
url_repr = _format_image_url(self.url, 30)
return f'ImageURL(url={repr(url_repr)}, detail={repr(self.detail)}, media_type={repr(self.media_type)})'
class ContentPartImageParam(BaseModel):
image_url: ImageURL
type: Literal['image_url'] = 'image_url'
def __str__(self) -> str:
return str(self.image_url)
def __repr__(self) -> str:
return f'ContentPartImageParam(image_url={repr(self.image_url)})'
class Function(BaseModel):
arguments: str
"""
The arguments to call the function with, as generated by the model in JSON
format. Note that the model does not always generate valid JSON, and may
hallucinate parameters not defined by your function schema. Validate the
arguments in your code before calling your function.
"""
name: str
"""The name of the function to call."""
def __str__(self) -> str:
args_preview = _truncate(self.arguments, 80)
return f'{self.name}({args_preview})'
def __repr__(self) -> str:
args_repr = _truncate(repr(self.arguments), 50)
return f'Function(name={repr(self.name)}, arguments={args_repr})'
class ToolCall(BaseModel):
id: str
"""The ID of the tool call."""
function: Function
"""The function that the model called."""
type: Literal['function'] = 'function'
"""The type of the tool. Currently, only `function` is supported."""
def __str__(self) -> str:
return f'ToolCall[{self.id}]: {self.function}'
def __repr__(self) -> str:
return f'ToolCall(id={repr(self.id)}, function={repr(self.function)})'
# endregion
# region - Message types
class _MessageBase(BaseModel):
"""Base class for all message types"""
role: Literal['user', 'system', 'assistant']
cache: bool = False
"""Whether to cache this message. This is only applicable when using Anthropic models.
"""
class UserMessage(_MessageBase):
role: Literal['user'] = 'user'
"""The role of the messages author, in this case `user`."""
content: str | list[ContentPartTextParam | ContentPartImageParam]
"""The contents of the user message."""
name: str | None = None
"""An optional name for the participant.
Provides the model information to differentiate between participants of the same
role.
"""
@property
def text(self) -> str:
"""
Automatically parse the text inside content, whether it's a string or a list of content parts.
"""
if isinstance(self.content, str):
return self.content
elif isinstance(self.content, list):
return '\n'.join([part.text for part in self.content if part.type == 'text'])
else:
return ''
def __str__(self) -> str:
return f'UserMessage(content={self.text})'
def __repr__(self) -> str:
return f'UserMessage(content={repr(self.text)})'
class SystemMessage(_MessageBase):
role: Literal['system'] = 'system'
"""The role of the messages author, in this case `system`."""
content: str | list[ContentPartTextParam]
"""The contents of the system message."""
name: str | None = None
@property
def text(self) -> str:
"""
Automatically parse the text inside content, whether it's a string or a list of content parts.
"""
if isinstance(self.content, str):
return self.content
elif isinstance(self.content, list):
return '\n'.join([part.text for part in self.content if part.type == 'text'])
else:
return ''
def __str__(self) -> str:
return f'SystemMessage(content={self.text})'
def __repr__(self) -> str:
return f'SystemMessage(content={repr(self.text)})'
class AssistantMessage(_MessageBase):
role: Literal['assistant'] = 'assistant'
"""The role of the messages author, in this case `assistant`."""
content: str | list[ContentPartTextParam | ContentPartRefusalParam] | None
"""The contents of the assistant message."""
name: str | None = None
refusal: str | None = None
"""The refusal message by the assistant."""
tool_calls: list[ToolCall] = []
"""The tool calls generated by the model, such as function calls."""
@property
def text(self) -> str:
"""
Automatically parse the text inside content, whether it's a string or a list of content parts.
"""
if isinstance(self.content, str):
return self.content
elif isinstance(self.content, list):
text = ''
for part in self.content:
if part.type == 'text':
text += part.text
elif part.type == 'refusal':
text += f'[Refusal] {part.refusal}'
return text
else:
return ''
def __str__(self) -> str:
return f'AssistantMessage(content={self.text})'
def __repr__(self) -> str:
return f'AssistantMessage(content={repr(self.text)})'
BaseMessage = Union[UserMessage, SystemMessage, AssistantMessage]
# endregion