/**
* Shared Agent Core — single source of truth for CLI + server agent
*
* Pure TypeScript, no runtime-specific APIs (no Deno.env, no process.env, no fs).
* Both the CLI (Node.js) and server (Fly container) import from here.
*/
import { getProvider } from "./constants.js";
// ============================================================================
// TYPES
// ============================================================================
export interface LoopCheckResult {
blocked: boolean;
reason?: string;
}
export interface BailCheckResult {
shouldBail: boolean;
message?: string;
}
export interface CostInfo {
inputTokens: number;
outputTokens: number;
costUsd: number;
cacheCreation: number;
cacheRead: number;
}
export interface ContextManagementConfig {
betas: string[];
config: { edits: Array<Record<string, unknown>> };
}
// ============================================================================
// MODEL-AWARE CONTEXT MANAGEMENT
// ============================================================================
/**
* Returns Anthropic beta flags and context_management config for the given model.
* - Opus 4.6: compact at 120K + clear at 80K/keep 3
* - All other Claude models: clear at 80K/keep 3 only
* - Non-Anthropic models (Gemini): no betas, no context management
*/
export function getContextManagement(model: string): ContextManagementConfig {
// Non-Anthropic models don't use Anthropic betas or context management
const provider = getProvider(model);
if (provider === "gemini" || provider === "openai") {
return { betas: [], config: { edits: [] } };
}
const edits: Array<Record<string, unknown>> = [];
const betas: string[] = ["context-management-2025-06-27"];
// Only enable server-side compaction for Opus 4.6 (the only model known to
// support compact_20260112). Do NOT broaden this to match "opus-4" generically —
// future Opus variants may not support this beta and would get invalid config.
if (model.includes("opus-4-6")) {
edits.push({
type: "compact_20260112",
trigger: { type: "input_tokens", value: 120_000 },
});
betas.push("compact-2026-01-12");
}
edits.push({
type: "clear_tool_uses_20250919",
trigger: { type: "input_tokens", value: 80_000 },
keep: { type: "tool_uses", value: 3 },
});
return { betas, config: { edits } };
}
/**
* Model-aware max output tokens.
* Agent config max_tokens takes priority but is capped at model maximum.
*/
const MODEL_MAX_OUTPUT_TOKENS: Record<string, number> = {
"gemini-3-pro-preview": 65536,
"gemini-3-flash-preview": 65536,
"gemini-2.5-pro": 65536,
"gemini-2.5-flash": 65536,
"gemini-2.5-flash-lite": 65536,
// OpenAI — GPT-5 family: 128K max output, o-series: 100K
"gpt-5": 128000,
"gpt-5-mini": 128000,
"gpt-5-nano": 128000,
"o3": 100000,
"o4-mini": 100000,
"gpt-4o": 16384,
};
export function getMaxOutputTokens(model: string, agentMax?: number): number {
const modelMax = MODEL_MAX_OUTPUT_TOKENS[model] ?? 16384;
return agentMax ? Math.min(agentMax, modelMax) : modelMax;
}
// ============================================================================
// MULTI-BREAKPOINT PROMPT CACHING
// ============================================================================
/**
* Add prompt cache breakpoints to tools and messages.
* Uses 2 of 4 allowed breakpoints:
* - Last tool definition
* - Turn boundary (second-to-last message)
* System prompt caching is handled by the caller.
*/
export function addPromptCaching(
tools: Array<Record<string, unknown>>,
messages: Array<Record<string, unknown>>
): { tools: Array<Record<string, unknown>>; messages: Array<Record<string, unknown>> } {
const cachedTools: Array<Record<string, unknown>> =
tools.length > 0
? [...tools.slice(0, -1), { ...tools[tools.length - 1], cache_control: { type: "ephemeral" } }]
: [...tools];
const cachedMessages = [...messages];
if (cachedMessages.length >= 2) {
const idx = cachedMessages.length - 2;
const msg = cachedMessages[idx] as any;
if (typeof msg.content === "string") {
cachedMessages[idx] = {
...msg,
content: [{ type: "text", text: msg.content, cache_control: { type: "ephemeral" } }],
};
} else if (Array.isArray(msg.content)) {
const blocks = [...msg.content];
blocks[blocks.length - 1] = { ...blocks[blocks.length - 1], cache_control: { type: "ephemeral" } };
cachedMessages[idx] = { ...msg, content: blocks };
}
}
return { tools: cachedTools, messages: cachedMessages };
}
// ============================================================================
// LOOP DETECTION
// ============================================================================
/** djb2 string hash — fast, deterministic, no dependencies */
function djb2Hash(str: string): string {
let hash = 5381;
for (let i = 0; i < str.length; i++) {
hash = ((hash << 5) + hash + str.charCodeAt(i)) & 0xffffffff;
}
return hash.toString(36);
}
export class LoopDetector {
private history: { name: string; inputHash: string }[] = [];
private consecutiveErrors = new Map<string, number>();
private turnErrors = 0;
private turnHadErrors = false;
private sessionErrors = new Map<string, number>();
private failedStrategies = new Set<string>();
private consecutiveFailedTurns = 0;
private totalSessionErrors = 0;
static IDENTICAL_CALL_LIMIT = 4;
static CONSECUTIVE_ERROR_LIMIT = 3;
static TURN_ERROR_LIMIT = 5;
static WINDOW = 20;
static SESSION_TOOL_ERROR_LIMIT = 10;
static CONSECUTIVE_FAILED_TURN_LIMIT = 3;
recordCall(name: string, input: Record<string, unknown>): LoopCheckResult {
const inputHash = djb2Hash(JSON.stringify({ name, ...input }));
if (this.failedStrategies.has(inputHash)) {
return {
blocked: true,
reason: `Blocked: this exact "${name}" call failed in a previous turn. Try a fundamentally different approach.`,
};
}
const sessionErrorCount = this.sessionErrors.get(name) || 0;
if (sessionErrorCount >= LoopDetector.SESSION_TOOL_ERROR_LIMIT) {
return {
blocked: true,
reason: `Tool "${name}" has failed ${sessionErrorCount} times this session. Stop using this tool and try a different approach.`,
};
}
if (this.turnErrors >= LoopDetector.TURN_ERROR_LIMIT) {
return {
blocked: true,
reason: `${this.turnErrors} errors this turn. Stop and re-assess your approach.`,
};
}
const errorCount = this.consecutiveErrors.get(name) || 0;
if (errorCount >= LoopDetector.CONSECUTIVE_ERROR_LIMIT) {
return {
blocked: true,
reason: `Tool "${name}" blocked: failed ${errorCount} times consecutively. Try a different approach.`,
};
}
const windowSlice = this.history.slice(-LoopDetector.WINDOW);
const identicalCount = windowSlice.filter((h) => h.inputHash === inputHash).length;
if (identicalCount >= LoopDetector.IDENTICAL_CALL_LIMIT) {
return {
blocked: true,
reason: `Tool "${name}" blocked: identical call made ${identicalCount} times. Try different parameters.`,
};
}
this.history.push({ name, inputHash });
if (this.history.length > LoopDetector.WINDOW * 2) {
this.history = this.history.slice(-LoopDetector.WINDOW);
}
return { blocked: false };
}
recordResult(name: string, success: boolean, input?: Record<string, unknown>): void {
if (success) {
this.consecutiveErrors.delete(name);
} else {
const current = this.consecutiveErrors.get(name) || 0;
this.consecutiveErrors.set(name, current + 1);
this.turnErrors++;
this.turnHadErrors = true;
const sessionCount = this.sessionErrors.get(name) || 0;
this.sessionErrors.set(name, sessionCount + 1);
this.totalSessionErrors++;
if (input) {
const inputHash = djb2Hash(JSON.stringify({ name, ...input }));
this.failedStrategies.add(inputHash);
if (this.failedStrategies.size > 200) {
const arr = Array.from(this.failedStrategies);
this.failedStrategies = new Set(arr.slice(-100));
}
}
}
}
endTurn(): BailCheckResult {
if (this.turnHadErrors) {
this.consecutiveFailedTurns++;
} else {
this.consecutiveFailedTurns = 0;
}
if (this.consecutiveFailedTurns >= LoopDetector.CONSECUTIVE_FAILED_TURN_LIMIT) {
return {
shouldBail: true,
message: `You have had errors in ${this.consecutiveFailedTurns} consecutive turns (${this.totalSessionErrors} total errors). Your approach is not working. STOP and explain to the user what's failing.`,
};
}
return { shouldBail: false };
}
resetTurn(): void {
this.history = [];
this.consecutiveErrors.clear();
this.turnErrors = 0;
this.turnHadErrors = false;
}
reset(): void {
this.resetTurn();
this.sessionErrors.clear();
this.failedStrategies.clear();
this.consecutiveFailedTurns = 0;
this.totalSessionErrors = 0;
}
getSessionStats(): { totalErrors: number; failedStrategies: number; consecutiveFailedTurns: number } {
return {
totalErrors: this.totalSessionErrors,
failedStrategies: this.failedStrategies.size,
consecutiveFailedTurns: this.consecutiveFailedTurns,
};
}
}
// ============================================================================
// EXTENDED THINKING
// ============================================================================
export interface ThinkingConfig {
type: "enabled" | "disabled" | "adaptive";
budget_tokens?: number;
}
/**
* Returns the thinking configuration and required beta string for the given model.
* - Opus 4.6: adaptive thinking (no budget needed)
* - Sonnet/Haiku: enabled with 10000 token budget
* - budget_tokens must be strictly < max_tokens
*/
export function getThinkingConfig(model: string, enabled: boolean): {
thinking: ThinkingConfig;
beta: string;
} {
if (!enabled) {
return { thinking: { type: "disabled" }, beta: "" };
}
const provider = getProvider(model);
// Gemini models: thinking is always-on for 2.5+/3.x — signal pass-through
if (provider === "gemini") {
return { thinking: { type: "enabled" }, beta: "" };
}
// OpenAI models: reasoning models (o-series) have built-in reasoning, GPT models don't support thinking
if (provider === "openai") {
const isReasoning = /^o\d/.test(model);
return { thinking: { type: isReasoning ? "enabled" : "disabled" }, beta: "" };
}
if (model.includes("opus-4-6")) {
return {
thinking: { type: "adaptive" },
beta: "adaptive-thinking-2026-01-28",
};
}
// Sonnet/Haiku: fixed budget
return {
thinking: { type: "enabled", budget_tokens: 10_000 },
beta: "interleaved-thinking-2025-05-14",
};
}
// ============================================================================
// COST TRACKING
// ============================================================================
export const MODEL_PRICING: Record<string, { inputPer1M: number; outputPer1M: number; thinkingPer1M?: number }> = {
// Anthropic direct
"claude-sonnet-4-20250514": { inputPer1M: 3.0, outputPer1M: 15.0, thinkingPer1M: 15.0 },
"claude-opus-4-6": { inputPer1M: 5.0, outputPer1M: 25.0, thinkingPer1M: 25.0 },
"claude-haiku-4-5-20251001": { inputPer1M: 1.0, outputPer1M: 5.0, thinkingPer1M: 5.0 },
// Bedrock (same Claude pricing)
"us.anthropic.claude-sonnet-4-20250514-v1:0": { inputPer1M: 3.0, outputPer1M: 15.0, thinkingPer1M: 15.0 },
"us.anthropic.claude-sonnet-4-5-20250929-v1:0": { inputPer1M: 3.0, outputPer1M: 15.0, thinkingPer1M: 15.0 },
"us.anthropic.claude-haiku-4-5-20251001-v1:0": { inputPer1M: 1.0, outputPer1M: 5.0, thinkingPer1M: 5.0 },
// Gemini (thinking tokens are cheaper than output)
"gemini-3-pro-preview": { inputPer1M: 1.25, outputPer1M: 10.0, thinkingPer1M: 2.50 },
"gemini-3-flash-preview": { inputPer1M: 0.15, outputPer1M: 0.60, thinkingPer1M: 0.15 },
"gemini-2.5-pro": { inputPer1M: 1.25, outputPer1M: 10.0, thinkingPer1M: 2.50 },
"gemini-2.5-flash": { inputPer1M: 0.15, outputPer1M: 0.60, thinkingPer1M: 0.15 },
"gemini-2.5-flash-lite": { inputPer1M: 0.075, outputPer1M: 0.30, thinkingPer1M: 0.075 },
// OpenAI
"gpt-5": { inputPer1M: 1.25, outputPer1M: 10.0 },
"gpt-5-mini": { inputPer1M: 0.25, outputPer1M: 2.0 },
"gpt-5-nano": { inputPer1M: 0.05, outputPer1M: 0.40 },
"o3": { inputPer1M: 2.0, outputPer1M: 8.0, thinkingPer1M: 8.0 },
"o4-mini": { inputPer1M: 1.10, outputPer1M: 4.40, thinkingPer1M: 4.40 },
"gpt-4o": { inputPer1M: 2.50, outputPer1M: 10.0 },
};
export function estimateCostUsd(
inputTokens: number,
outputTokens: number,
model: string,
thinkingTokens = 0,
cacheReadTokens = 0,
cacheCreationTokens = 0,
): number {
// Exact match first, then find a pricing key that is a prefix of the model ID
const pricing = MODEL_PRICING[model]
|| MODEL_PRICING[Object.keys(MODEL_PRICING).find((k) => model.startsWith(k)) ?? ""]
|| MODEL_PRICING["claude-sonnet-4-20250514"];
const thinkingRate = pricing.thinkingPer1M || pricing.outputPer1M;
const inputRate = pricing.inputPer1M;
// Base cost
let cost = (inputTokens / 1_000_000) * inputRate
+ (outputTokens / 1_000_000) * pricing.outputPer1M
+ (thinkingTokens / 1_000_000) * thinkingRate;
// Cache pricing — subtract savings for cached tokens
// Anthropic/Bedrock: cache reads 90% cheaper, cache creation 25% surcharge
// OpenAI: cache reads 50% cheaper, no creation surcharge
// Gemini: cache reads 75% cheaper
if (cacheReadTokens > 0 || cacheCreationTokens > 0) {
const provider = getProvider(model);
if (provider === "anthropic" || provider === "bedrock") {
cost -= (cacheReadTokens / 1_000_000) * inputRate * 0.9;
cost += (cacheCreationTokens / 1_000_000) * inputRate * 0.25;
} else if (provider === "openai") {
cost -= (cacheReadTokens / 1_000_000) * inputRate * 0.5;
} else if (provider === "gemini") {
cost -= (cacheReadTokens / 1_000_000) * inputRate * 0.75;
}
}
return cost;
}
// ============================================================================
// RETRY LOGIC
// ============================================================================
export function isRetryableError(err: unknown): boolean {
const e = err as any;
const status = e?.status || e?.statusCode;
if (status === 429 || status === 500 || status === 503 || status === 529) return true;
const msg = String(e?.message || "").toLowerCase();
return msg.includes("overloaded") || msg.includes("rate limit") || msg.includes("timeout");
}
// ============================================================================
// TOOL RESULT TRUNCATION
// ============================================================================
export function truncateToolResult(content: string, maxChars: number): string {
if (content.length <= maxChars) return content;
return content.slice(0, maxChars) + `\n\n... (truncated — ${content.length.toLocaleString()} chars total)`;
}
export function getMaxToolResultChars(contextConfig?: { max_tool_result_chars?: number } | null): number {
return contextConfig?.max_tool_result_chars || 20_000;
}
// ============================================================================
// UTILITY — sanitize errors (strip API keys, passwords)
// ============================================================================
export function sanitizeError(err: unknown): string {
const msg = String(err);
return msg
.replace(/sk-[a-zA-Z0-9_-]+/g, "sk-***")
.replace(/AIzaSy[a-zA-Z0-9_-]+/g, "AIzaSy***")
.replace(/AKIA[A-Z0-9]{16}/g, "AKIA***")
.replace(/key[=:]\s*["']?[a-zA-Z0-9_-]{20,}["']?/gi, "key=***")
.replace(/password[=:]\s*["']?[^\s"']+["']?/gi, "password=***")
.replace(/\n\s+at\s+.*/g, "")
.substring(0, 500);
}