SwagManager MCP

whale-mcp
src
shared

agent-core.ts•15.6 KiB

/** * Shared Agent Core — single source of truth for CLI + server agent * * Pure TypeScript, no runtime-specific APIs (no Deno.env, no process.env, no fs). * Both the CLI (Node.js) and server (Fly container) import from here. */ import { getProvider } from "./constants.js"; // ============================================================================ // TYPES // ============================================================================ export interface LoopCheckResult { blocked: boolean; reason?: string; } export interface BailCheckResult { shouldBail: boolean; message?: string; } export interface CostInfo { inputTokens: number; outputTokens: number; costUsd: number; cacheCreation: number; cacheRead: number; } export interface ContextManagementConfig { betas: string[]; config: { edits: Array<Record<string, unknown>> }; } // ============================================================================ // MODEL-AWARE CONTEXT MANAGEMENT // ============================================================================ /** * Returns Anthropic beta flags and context_management config for the given model. * - Opus 4.6: compact at 120K + clear at 80K/keep 3 * - All other Claude models: clear at 80K/keep 3 only * - Non-Anthropic models (Gemini): no betas, no context management */ export function getContextManagement(model: string): ContextManagementConfig { // Non-Anthropic models don't use Anthropic betas or context management const provider = getProvider(model); if (provider === "gemini" || provider === "openai") { return { betas: [], config: { edits: [] } }; } const edits: Array<Record<string, unknown>> = []; const betas: string[] = ["context-management-2025-06-27"]; // Only enable server-side compaction for Opus 4.6 (the only model known to // support compact_20260112). Do NOT broaden this to match "opus-4" generically — // future Opus variants may not support this beta and would get invalid config. if (model.includes("opus-4-6")) { edits.push({ type: "compact_20260112", trigger: { type: "input_tokens", value: 120_000 }, }); betas.push("compact-2026-01-12"); } edits.push({ type: "clear_tool_uses_20250919", trigger: { type: "input_tokens", value: 80_000 }, keep: { type: "tool_uses", value: 3 }, }); return { betas, config: { edits } }; } /** * Model-aware max output tokens. * Agent config max_tokens takes priority but is capped at model maximum. */ const MODEL_MAX_OUTPUT_TOKENS: Record<string, number> = { "gemini-3-pro-preview": 65536, "gemini-3-flash-preview": 65536, "gemini-2.5-pro": 65536, "gemini-2.5-flash": 65536, "gemini-2.5-flash-lite": 65536, // OpenAI — GPT-5 family: 128K max output, o-series: 100K "gpt-5": 128000, "gpt-5-mini": 128000, "gpt-5-nano": 128000, "o3": 100000, "o4-mini": 100000, "gpt-4o": 16384, }; export function getMaxOutputTokens(model: string, agentMax?: number): number { const modelMax = MODEL_MAX_OUTPUT_TOKENS[model] ?? 16384; return agentMax ? Math.min(agentMax, modelMax) : modelMax; } // ============================================================================ // MULTI-BREAKPOINT PROMPT CACHING // ============================================================================ /** * Add prompt cache breakpoints to tools and messages. * Uses 2 of 4 allowed breakpoints: * - Last tool definition * - Turn boundary (second-to-last message) * System prompt caching is handled by the caller. */ export function addPromptCaching( tools: Array<Record<string, unknown>>, messages: Array<Record<string, unknown>> ): { tools: Array<Record<string, unknown>>; messages: Array<Record<string, unknown>> } { const cachedTools: Array<Record<string, unknown>> = tools.length > 0 ? [...tools.slice(0, -1), { ...tools[tools.length - 1], cache_control: { type: "ephemeral" } }] : [...tools]; const cachedMessages = [...messages]; if (cachedMessages.length >= 2) { const idx = cachedMessages.length - 2; const msg = cachedMessages[idx] as any; if (typeof msg.content === "string") { cachedMessages[idx] = { ...msg, content: [{ type: "text", text: msg.content, cache_control: { type: "ephemeral" } }], }; } else if (Array.isArray(msg.content)) { const blocks = [...msg.content]; blocks[blocks.length - 1] = { ...blocks[blocks.length - 1], cache_control: { type: "ephemeral" } }; cachedMessages[idx] = { ...msg, content: blocks }; } } return { tools: cachedTools, messages: cachedMessages }; } // ============================================================================ // LOOP DETECTION // ============================================================================ /** djb2 string hash — fast, deterministic, no dependencies */ function djb2Hash(str: string): string { let hash = 5381; for (let i = 0; i < str.length; i++) { hash = ((hash << 5) + hash + str.charCodeAt(i)) & 0xffffffff; } return hash.toString(36); } export class LoopDetector { private history: { name: string; inputHash: string }[] = []; private consecutiveErrors = new Map<string, number>(); private turnErrors = 0; private turnHadErrors = false; private sessionErrors = new Map<string, number>(); private failedStrategies = new Set<string>(); private consecutiveFailedTurns = 0; private totalSessionErrors = 0; static IDENTICAL_CALL_LIMIT = 4; static CONSECUTIVE_ERROR_LIMIT = 3; static TURN_ERROR_LIMIT = 5; static WINDOW = 20; static SESSION_TOOL_ERROR_LIMIT = 10; static CONSECUTIVE_FAILED_TURN_LIMIT = 3; recordCall(name: string, input: Record<string, unknown>): LoopCheckResult { const inputHash = djb2Hash(JSON.stringify({ name, ...input })); if (this.failedStrategies.has(inputHash)) { return { blocked: true, reason: `Blocked: this exact "${name}" call failed in a previous turn. Try a fundamentally different approach.`, }; } const sessionErrorCount = this.sessionErrors.get(name) || 0; if (sessionErrorCount >= LoopDetector.SESSION_TOOL_ERROR_LIMIT) { return { blocked: true, reason: `Tool "${name}" has failed ${sessionErrorCount} times this session. Stop using this tool and try a different approach.`, }; } if (this.turnErrors >= LoopDetector.TURN_ERROR_LIMIT) { return { blocked: true, reason: `${this.turnErrors} errors this turn. Stop and re-assess your approach.`, }; } const errorCount = this.consecutiveErrors.get(name) || 0; if (errorCount >= LoopDetector.CONSECUTIVE_ERROR_LIMIT) { return { blocked: true, reason: `Tool "${name}" blocked: failed ${errorCount} times consecutively. Try a different approach.`, }; } const windowSlice = this.history.slice(-LoopDetector.WINDOW); const identicalCount = windowSlice.filter((h) => h.inputHash === inputHash).length; if (identicalCount >= LoopDetector.IDENTICAL_CALL_LIMIT) { return { blocked: true, reason: `Tool "${name}" blocked: identical call made ${identicalCount} times. Try different parameters.`, }; } this.history.push({ name, inputHash }); if (this.history.length > LoopDetector.WINDOW * 2) { this.history = this.history.slice(-LoopDetector.WINDOW); } return { blocked: false }; } recordResult(name: string, success: boolean, input?: Record<string, unknown>): void { if (success) { this.consecutiveErrors.delete(name); } else { const current = this.consecutiveErrors.get(name) || 0; this.consecutiveErrors.set(name, current + 1); this.turnErrors++; this.turnHadErrors = true; const sessionCount = this.sessionErrors.get(name) || 0; this.sessionErrors.set(name, sessionCount + 1); this.totalSessionErrors++; if (input) { const inputHash = djb2Hash(JSON.stringify({ name, ...input })); this.failedStrategies.add(inputHash); if (this.failedStrategies.size > 200) { const arr = Array.from(this.failedStrategies); this.failedStrategies = new Set(arr.slice(-100)); } } } } endTurn(): BailCheckResult { if (this.turnHadErrors) { this.consecutiveFailedTurns++; } else { this.consecutiveFailedTurns = 0; } if (this.consecutiveFailedTurns >= LoopDetector.CONSECUTIVE_FAILED_TURN_LIMIT) { return { shouldBail: true, message: `You have had errors in ${this.consecutiveFailedTurns} consecutive turns (${this.totalSessionErrors} total errors). Your approach is not working. STOP and explain to the user what's failing.`, }; } return { shouldBail: false }; } resetTurn(): void { this.history = []; this.consecutiveErrors.clear(); this.turnErrors = 0; this.turnHadErrors = false; } reset(): void { this.resetTurn(); this.sessionErrors.clear(); this.failedStrategies.clear(); this.consecutiveFailedTurns = 0; this.totalSessionErrors = 0; } getSessionStats(): { totalErrors: number; failedStrategies: number; consecutiveFailedTurns: number } { return { totalErrors: this.totalSessionErrors, failedStrategies: this.failedStrategies.size, consecutiveFailedTurns: this.consecutiveFailedTurns, }; } } // ============================================================================ // EXTENDED THINKING // ============================================================================ export interface ThinkingConfig { type: "enabled" | "disabled" | "adaptive"; budget_tokens?: number; } /** * Returns the thinking configuration and required beta string for the given model. * - Opus 4.6: adaptive thinking (no budget needed) * - Sonnet/Haiku: enabled with 10000 token budget * - budget_tokens must be strictly < max_tokens */ export function getThinkingConfig(model: string, enabled: boolean): { thinking: ThinkingConfig; beta: string; } { if (!enabled) { return { thinking: { type: "disabled" }, beta: "" }; } const provider = getProvider(model); // Gemini models: thinking is always-on for 2.5+/3.x — signal pass-through if (provider === "gemini") { return { thinking: { type: "enabled" }, beta: "" }; } // OpenAI models: reasoning models (o-series) have built-in reasoning, GPT models don't support thinking if (provider === "openai") { const isReasoning = /^o\d/.test(model); return { thinking: { type: isReasoning ? "enabled" : "disabled" }, beta: "" }; } if (model.includes("opus-4-6")) { return { thinking: { type: "adaptive" }, beta: "adaptive-thinking-2026-01-28", }; } // Sonnet/Haiku: fixed budget return { thinking: { type: "enabled", budget_tokens: 10_000 }, beta: "interleaved-thinking-2025-05-14", }; } // ============================================================================ // COST TRACKING // ============================================================================ export const MODEL_PRICING: Record<string, { inputPer1M: number; outputPer1M: number; thinkingPer1M?: number }> = { // Anthropic direct "claude-sonnet-4-20250514": { inputPer1M: 3.0, outputPer1M: 15.0, thinkingPer1M: 15.0 }, "claude-opus-4-6": { inputPer1M: 5.0, outputPer1M: 25.0, thinkingPer1M: 25.0 }, "claude-haiku-4-5-20251001": { inputPer1M: 1.0, outputPer1M: 5.0, thinkingPer1M: 5.0 }, // Bedrock (same Claude pricing) "us.anthropic.claude-sonnet-4-20250514-v1:0": { inputPer1M: 3.0, outputPer1M: 15.0, thinkingPer1M: 15.0 }, "us.anthropic.claude-sonnet-4-5-20250929-v1:0": { inputPer1M: 3.0, outputPer1M: 15.0, thinkingPer1M: 15.0 }, "us.anthropic.claude-haiku-4-5-20251001-v1:0": { inputPer1M: 1.0, outputPer1M: 5.0, thinkingPer1M: 5.0 }, // Gemini (thinking tokens are cheaper than output) "gemini-3-pro-preview": { inputPer1M: 1.25, outputPer1M: 10.0, thinkingPer1M: 2.50 }, "gemini-3-flash-preview": { inputPer1M: 0.15, outputPer1M: 0.60, thinkingPer1M: 0.15 }, "gemini-2.5-pro": { inputPer1M: 1.25, outputPer1M: 10.0, thinkingPer1M: 2.50 }, "gemini-2.5-flash": { inputPer1M: 0.15, outputPer1M: 0.60, thinkingPer1M: 0.15 }, "gemini-2.5-flash-lite": { inputPer1M: 0.075, outputPer1M: 0.30, thinkingPer1M: 0.075 }, // OpenAI "gpt-5": { inputPer1M: 1.25, outputPer1M: 10.0 }, "gpt-5-mini": { inputPer1M: 0.25, outputPer1M: 2.0 }, "gpt-5-nano": { inputPer1M: 0.05, outputPer1M: 0.40 }, "o3": { inputPer1M: 2.0, outputPer1M: 8.0, thinkingPer1M: 8.0 }, "o4-mini": { inputPer1M: 1.10, outputPer1M: 4.40, thinkingPer1M: 4.40 }, "gpt-4o": { inputPer1M: 2.50, outputPer1M: 10.0 }, }; export function estimateCostUsd( inputTokens: number, outputTokens: number, model: string, thinkingTokens = 0, cacheReadTokens = 0, cacheCreationTokens = 0, ): number { // Exact match first, then find a pricing key that is a prefix of the model ID const pricing = MODEL_PRICING[model] || MODEL_PRICING[Object.keys(MODEL_PRICING).find((k) => model.startsWith(k)) ?? ""] || MODEL_PRICING["claude-sonnet-4-20250514"]; const thinkingRate = pricing.thinkingPer1M || pricing.outputPer1M; const inputRate = pricing.inputPer1M; // Base cost let cost = (inputTokens / 1_000_000) * inputRate + (outputTokens / 1_000_000) * pricing.outputPer1M + (thinkingTokens / 1_000_000) * thinkingRate; // Cache pricing — subtract savings for cached tokens // Anthropic/Bedrock: cache reads 90% cheaper, cache creation 25% surcharge // OpenAI: cache reads 50% cheaper, no creation surcharge // Gemini: cache reads 75% cheaper if (cacheReadTokens > 0 || cacheCreationTokens > 0) { const provider = getProvider(model); if (provider === "anthropic" || provider === "bedrock") { cost -= (cacheReadTokens / 1_000_000) * inputRate * 0.9; cost += (cacheCreationTokens / 1_000_000) * inputRate * 0.25; } else if (provider === "openai") { cost -= (cacheReadTokens / 1_000_000) * inputRate * 0.5; } else if (provider === "gemini") { cost -= (cacheReadTokens / 1_000_000) * inputRate * 0.75; } } return cost; } // ============================================================================ // RETRY LOGIC // ============================================================================ export function isRetryableError(err: unknown): boolean { const e = err as any; const status = e?.status || e?.statusCode; if (status === 429 || status === 500 || status === 503 || status === 529) return true; const msg = String(e?.message || "").toLowerCase(); return msg.includes("overloaded") || msg.includes("rate limit") || msg.includes("timeout"); } // ============================================================================ // TOOL RESULT TRUNCATION // ============================================================================ export function truncateToolResult(content: string, maxChars: number): string { if (content.length <= maxChars) return content; return content.slice(0, maxChars) + `\n\n... (truncated — ${content.length.toLocaleString()} chars total)`; } export function getMaxToolResultChars(contextConfig?: { max_tool_result_chars?: number } | null): number { return contextConfig?.max_tool_result_chars || 20_000; } // ============================================================================ // UTILITY — sanitize errors (strip API keys, passwords) // ============================================================================ export function sanitizeError(err: unknown): string { const msg = String(err); return msg .replace(/sk-[a-zA-Z0-9_-]+/g, "sk-***") .replace(/AIzaSy[a-zA-Z0-9_-]+/g, "AIzaSy***") .replace(/AKIA[A-Z0-9]{16}/g, "AKIA***") .replace(/key[=:]\s*["']?[a-zA-Z0-9_-]{20,}["']?/gi, "key=***") .replace(/password[=:]\s*["']?[^\s"']+["']?/gi, "password=***") .replace(/\n\s+at\s+.*/g, "") .substring(0, 500); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/floradistro/whale-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

agent-core.ts•15.6 KiB