import json
from typing import overload
from langchain_core.messages import ( # pyright: ignore
AIMessage,
HumanMessage,
SystemMessage,
)
from langchain_core.messages import ( # pyright: ignore
ToolCall as LangChainToolCall,
)
from langchain_core.messages.base import BaseMessage as LangChainBaseMessage # pyright: ignore
from browser_use.llm.messages import (
AssistantMessage,
BaseMessage,
ContentPartImageParam,
ContentPartRefusalParam,
ContentPartTextParam,
ToolCall,
UserMessage,
)
from browser_use.llm.messages import (
SystemMessage as BrowserUseSystemMessage,
)
class LangChainMessageSerializer:
"""Serializer for converting between browser-use message types and LangChain message types."""
@staticmethod
def _serialize_user_content(
content: str | list[ContentPartTextParam | ContentPartImageParam],
) -> str | list[str | dict]:
"""Convert user message content for LangChain compatibility."""
if isinstance(content, str):
return content
serialized_parts = []
for part in content:
if part.type == 'text':
serialized_parts.append(
{
'type': 'text',
'text': part.text,
}
)
elif part.type == 'image_url':
# LangChain format for images
serialized_parts.append(
{'type': 'image_url', 'image_url': {'url': part.image_url.url, 'detail': part.image_url.detail}}
)
return serialized_parts
@staticmethod
def _serialize_system_content(
content: str | list[ContentPartTextParam],
) -> str:
"""Convert system message content to text string for LangChain compatibility."""
if isinstance(content, str):
return content
text_parts = []
for part in content:
if part.type == 'text':
text_parts.append(part.text)
return '\n'.join(text_parts)
@staticmethod
def _serialize_assistant_content(
content: str | list[ContentPartTextParam | ContentPartRefusalParam] | None,
) -> str:
"""Convert assistant message content to text string for LangChain compatibility."""
if content is None:
return ''
if isinstance(content, str):
return content
text_parts = []
for part in content:
if part.type == 'text':
text_parts.append(part.text)
# elif part.type == 'refusal':
# # Include refusal content as text
# text_parts.append(f'[Refusal: {part.refusal}]')
return '\n'.join(text_parts)
@staticmethod
def _serialize_tool_call(tool_call: ToolCall) -> LangChainToolCall:
"""Convert browser-use ToolCall to LangChain ToolCall."""
# Parse the arguments string to a dict for LangChain
try:
args_dict = json.loads(tool_call.function.arguments)
except json.JSONDecodeError:
# If parsing fails, wrap in a dict
args_dict = {'arguments': tool_call.function.arguments}
return LangChainToolCall(
name=tool_call.function.name,
args=args_dict,
id=tool_call.id,
)
# region - Serialize overloads
@overload
@staticmethod
def serialize(message: UserMessage) -> HumanMessage: ...
@overload
@staticmethod
def serialize(message: BrowserUseSystemMessage) -> SystemMessage: ...
@overload
@staticmethod
def serialize(message: AssistantMessage) -> AIMessage: ...
@staticmethod
def serialize(message: BaseMessage) -> LangChainBaseMessage:
"""Serialize a browser-use message to a LangChain message."""
if isinstance(message, UserMessage):
content = LangChainMessageSerializer._serialize_user_content(message.content)
return HumanMessage(content=content, name=message.name)
elif isinstance(message, BrowserUseSystemMessage):
content = LangChainMessageSerializer._serialize_system_content(message.content)
return SystemMessage(content=content, name=message.name)
elif isinstance(message, AssistantMessage):
# Handle content
content = LangChainMessageSerializer._serialize_assistant_content(message.content)
# For simplicity, we'll ignore tool calls in LangChain integration
# as requested by the user
return AIMessage(
content=content,
name=message.name,
)
else:
raise ValueError(f'Unknown message type: {type(message)}')
@staticmethod
def serialize_messages(messages: list[BaseMessage]) -> list[LangChainBaseMessage]:
"""Serialize a list of browser-use messages to LangChain messages."""
return [LangChainMessageSerializer.serialize(m) for m in messages]