We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/89jobrien/mcp-joecc'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""Reward functions for OpenPipe ART training.
Provides reward signals for training based on:
1. Task completion metrics from todo status transitions
2. RULER-based LLM judging
3. Combined weighted rewards
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from loguru import logger
if TYPE_CHECKING:
import art
def task_completion_reward(
_trajectory: art.Trajectory,
completion_signals: dict[str, Any] | None = None,
) -> float:
"""Calculate reward based on task completion signals.
Uses status transitions and completion metrics to determine
how well the model performed the task.
Args:
_trajectory: The trajectory (reserved for future metadata extraction).
completion_signals: Optional dict with completion metrics:
- task_completed: bool - whether task was marked done
- status_progression: list - status transitions made
- time_to_completion: float - time in seconds
- error_count: int - number of errors encountered
Returns:
Reward score between 0.0 and 1.0.
"""
if not completion_signals:
# No completion signals - use neutral reward
return 0.5
reward = 0.0
weights_sum = 0.0
# Task completed is most important
if "task_completed" in completion_signals:
weight = 0.5
reward += weight * (1.0 if completion_signals["task_completed"] else 0.0)
weights_sum += weight
# Proper status progression
if "status_progression" in completion_signals:
weight = 0.2
progression = completion_signals["status_progression"]
# Reward clean progressions (todo -> in_progress -> done)
expected = ["todo", "in_progress", "done"]
if progression == expected:
reward += weight * 1.0
elif "done" in progression:
reward += weight * 0.7
elif "in_progress" in progression:
reward += weight * 0.3
weights_sum += weight
# Penalize errors
if "error_count" in completion_signals:
weight = 0.2
error_count = completion_signals["error_count"]
# No errors = full reward, diminishes with errors
error_penalty = max(0.0, 1.0 - (error_count * 0.2))
reward += weight * error_penalty
weights_sum += weight
# Reward faster completion (if time metric available)
if "time_to_completion" in completion_signals:
weight = 0.1
time_sec = completion_signals["time_to_completion"]
# Normalize time (faster is better, cap at 60 seconds for full reward)
time_reward = max(0.0, 1.0 - (time_sec / 300.0)) # 5 min baseline
reward += weight * time_reward
weights_sum += weight
# Normalize by weights used
return reward / weights_sum if weights_sum > 0 else 0.5
async def ruler_reward(
trajectory: art.Trajectory,
ruler_model: str = "openrouter/openai/gpt-4o-mini",
_rubric: dict[str, Any] | None = None,
) -> float:
"""Score trajectory using RULER (LLM-as-judge).
RULER compares multiple responses and ranks them based on
how well they accomplish the task.
Args:
trajectory: The trajectory to score.
ruler_model: Model to use for judging.
_rubric: Optional custom rubric (reserved for future custom scoring).
Returns:
Reward score from RULER.
"""
import art
from art.rewards import ruler_score_group
# Create a single-trajectory group for scoring
group = art.TrajectoryGroup(trajectories=[trajectory])
max_retries = 5
for attempt in range(max_retries):
try:
judged_group = await ruler_score_group(group, ruler_model, debug=False)
if judged_group and judged_group.trajectories:
return judged_group.trajectories[0].reward
except Exception as e:
logger.warning(f"RULER scoring attempt {attempt + 1} failed: {e}")
continue
logger.error("Failed to get RULER score after max retries")
return 0.0
def combined_reward(
trajectory: art.Trajectory,
completion_signals: dict[str, Any] | None = None,
ruler_score: float | None = None,
weights: dict[str, float] | None = None,
) -> float:
"""Calculate weighted combination of reward signals.
Combines task completion metrics with RULER scoring for
a comprehensive reward signal.
Args:
trajectory: The trajectory to score.
completion_signals: Task completion metrics.
ruler_score: Pre-computed RULER score (if available).
weights: Custom weights for combining rewards:
- task_completion: weight for completion reward (default 0.3)
- ruler: weight for RULER reward (default 0.7)
Returns:
Combined reward score between 0.0 and 1.0.
"""
default_weights = {
"task_completion": 0.3,
"ruler": 0.7,
}
weights = weights or default_weights
total_reward = 0.0
total_weight = 0.0
# Task completion reward
if weights.get("task_completion", 0) > 0:
tc_reward = task_completion_reward(trajectory, completion_signals)
total_reward += weights["task_completion"] * tc_reward
total_weight += weights["task_completion"]
# RULER reward
if weights.get("ruler", 0) > 0 and ruler_score is not None:
total_reward += weights["ruler"] * ruler_score
total_weight += weights["ruler"]
return total_reward / total_weight if total_weight > 0 else 0.5
def extract_completion_signals_from_todo(
todo_before: dict[str, Any],
todo_after: dict[str, Any],
) -> dict[str, Any]:
"""Extract completion signals from todo state changes.
Analyzes the before/after state of a todo to generate
completion signals for reward calculation.
Args:
todo_before: Todo state before agent action.
todo_after: Todo state after agent action.
Returns:
Dict with completion signals.
"""
signals: dict[str, Any] = {}
# Check if task was completed
signals["task_completed"] = todo_after.get("status") == "done"
# Track status progression
status_before = todo_before.get("status", "todo")
status_after = todo_after.get("status", "todo")
signals["status_progression"] = [status_before, status_after]
# Check for errors (if error field exists)
signals["error_count"] = 0
if todo_after.get("error"):
signals["error_count"] = 1
return signals