action_generation.py•7.08 kB
#!/usr/bin/env python3
"""
Function to generate structured function metadata from user action descriptions.
"""
import asyncio
import json
import os
import re
from pathlib import Path
from dotenv import load_dotenv
from dedalus_labs import AsyncDedalus, DedalusRunner
from pydantic import BaseModel
from typing import List, Dict, Any
from lib import extract_json
# Load environment variables from .env file
env_path = Path(__file__).parent.parent / '.env'
load_dotenv(dotenv_path=env_path)
class FunctionParameter(BaseModel):
name: str
type: str
description: str
class FunctionMetadata(BaseModel):
function_name: str
parameters: List[FunctionParameter]
returns: List[FunctionParameter]
description: str
class State(BaseModel):
class Config:
extra = "allow" # Allow additional fields to be added dynamically
async def action_agent(state: State) -> State:
client = AsyncDedalus()
runner = DedalusRunner(client)
action_description = state.action_description
website_url = state.website_url
prompt = f"""You are a helpful assistant that generates function metadata for a user action.
So, assume that the user wants to interact with the following website: {website_url}, and the user is asking you to generate a function that can be used to interact with this website.
It's up to you to figure out the function name, parameters, and return values. .
User Action: {action_description}
Generate a function specification with:
1. A clear, snake_case function name
2. A list of parameters with name, type, and description. Bias towards less parameters.
3. A list of return values with name, type, and description. Bias towards less return values.
4. A detailed description of what the function should do. More detailed is better.
Respond with ONLY a JSON object with this exact structure:
{{
"function_name": "<snake_case_name>",
"parameters": [
{{
"name": "<param_name>",
"type": "<python_type>",
"description": "<what this parameter does>"
}}
],
"returns": [
{{
"name": "<return_name>",
"type": "<python_type>",
"description": "<what this return value represents>"
}}
],
"description": "<detailed description of what the function does>"
}}
Be specific and use appropriate Python types (str, int, float, bool, List, Dict, etc.)."""
result = await runner.run(
input=prompt,
model="openai/gpt-5",
stream=False
)
json_str = extract_json(result.final_output)
json_data = json.loads(json_str)
function_metadata = FunctionMetadata(**json_data)
# Add function_metadata to state
updated_state = state.model_copy(deep=True)
updated_state.function_metadata = function_metadata
return updated_state
async def input_agent(state: State) -> State:
"""
Generate realistic test inputs based on function parameters.
Args:
state: Current state containing function_metadata
Returns:
Updated state with test_inputs added (array of input objects)
"""
client = AsyncDedalus()
runner = DedalusRunner(client)
function_metadata = state.function_metadata
website_url = state.website_url
# Format parameters
params_detail = "\n".join([
f" - {p.name} ({p.type}): {p.description}"
for p in function_metadata.parameters
]) if function_metadata.parameters else " None"
prompt = f"""You are an expert at generating realistic test inputs for browser automation.
Given the following function specification for a website automation task, generate 3-5 realistic test inputs.
WEBSITE: {website_url}
FUNCTION: {function_metadata.function_name}
DESCRIPTION: {function_metadata.description}
PARAMETERS:
{params_detail}
Generate realistic test inputs that would be used to test this function. Consider:
1. Common use cases
2. Edge cases
3. Different types of searches or queries that make sense for this website
IMPORTANT: Return ONLY valid JSON. Do not use Python string operations or code. Use actual string values.
Respond with ONLY a JSON object with this structure:
{{
"test_inputs": [
{{"param_name": "value1"}},
{{"param_name": "value2"}},
{{"param_name": "value3"}}
]
}}
Each test input should be a dictionary mapping parameter names to realistic string values."""
result = await runner.run(
input=prompt,
model="openai/gpt-5",
stream=False
)
json_str = extract_json(result.final_output)
json_data = json.loads(json_str)
# Add test_inputs to state
updated_state = state.model_copy(deep=True)
updated_state.test_inputs = json_data['test_inputs']
return updated_state
async def task_agent(state: State) -> State:
client = AsyncDedalus()
runner = DedalusRunner(client)
function_metadata = state.function_metadata
website_url = state.website_url
# Format parameters and returns
params_detail = "\n".join([
f" - {p.name} ({p.type}): {p.description}"
for p in function_metadata.parameters
]) if function_metadata.parameters else " None"
returns_detail = "\n".join([
f" - {r.name} ({r.type}): {r.description}"
for r in function_metadata.returns
]) if function_metadata.returns else " None"
prompt = f"""You are an expert at writing detailed, actionable task descriptions for browser automation agents.
Given the following function specification, create a comprehensive task description that a browser automation agent can execute.
WEBSITE: {website_url}
FUNCTION SPECIFICATION:
- Name: {function_metadata.function_name}
- Description: {function_metadata.description}
PARAMETERS (inputs to use):
{params_detail}
EXPECTED RETURNS (what to extract/return):
{returns_detail}
Write a detailed task description for a browser automation agent that:
1. Clearly explains the objective and what needs to be accomplished on the website
2. Specifies exactly what data to extract.
3. Details what the final output should look like based on the expected returns
Note: there is no API available to the browser agent. it is only able to interact with the website through the browser.
Be specific. Write as if instructing a human assistant who needs clear guidance. Only return the task description, no other text.
Do not include any actions to perform on the website. You don't know what the website looks like.
TASK DESCRIPTION:"""
result = await runner.run(
input=prompt,
model="openai/gpt-5",
stream=False
)
# Add task_description to state
updated_state = state.model_copy(deep=True)
updated_state.task_description = result.final_output.strip()
return updated_state