SwagManager MCP

whale-mcp
src
shared

api-client.ts•11.4 KiB

/** * Unified API Client — ONE server proxy caller with retry and config builder. * * Replaces proxy+direct call paths in agent-loop, subagent, teammate. * CLI-only: always calls through server proxy (no direct Anthropic SDK usage). */ import { getContextManagement, getMaxOutputTokens, getThinkingConfig, isRetryableError, addPromptCaching, } from "./agent-core.js"; import { getProvider } from "./constants.js"; import type { ContextProfile, APIRequestConfig } from "./types.js"; // ============================================================================ // CONSTANTS // ============================================================================ const MAX_RETRIES = 3; const RETRY_BASE_DELAY_MS = 1000; // ============================================================================ // API REQUEST CONFIG BUILDER // ============================================================================ /** * Build all Anthropic API config from a simple profile. * * Profiles: * - 'main': clear at 80K/keep 3, compact at 120K (Opus only) * - 'subagent': clear at 60K/keep 2, no compaction, 8192 max tokens * - 'teammate': clear at 80K/keep 3, no compaction */ export function buildAPIRequest(opts: { model: string; contextProfile: ContextProfile; thinkingEnabled?: boolean; maxOutputTokens?: number; }): APIRequestConfig { const { model, contextProfile, thinkingEnabled = false, maxOutputTokens } = opts; // Context management config per profile // Non-Anthropic models (Gemini, OpenAI, etc.) don't support Anthropic betas or context management const provider = getProvider(model); const isAnthropicModel = provider === "anthropic" || provider === "bedrock"; let betas: string[] = isAnthropicModel ? ["context-management-2025-06-27"] : []; let edits: Array<Record<string, unknown>> = []; switch (contextProfile) { case "main": { const ctxMgmt = getContextManagement(model); betas = [...ctxMgmt.betas]; edits = ctxMgmt.config.edits; break; } case "subagent": if (isAnthropicModel) { edits = [ { type: "clear_tool_uses_20250919", trigger: { type: "input_tokens", value: 60_000 }, keep: { type: "tool_uses", value: 2 }, }, ]; } break; case "teammate": { const ctxMgmt = getContextManagement(model); betas = [...ctxMgmt.betas]; edits = ctxMgmt.config.edits; break; } } // Thinking config const thinkingCfg = getThinkingConfig(model, thinkingEnabled); if (thinkingCfg.beta) betas.push(thinkingCfg.beta); // Max tokens const profileMaxTokens = contextProfile === "subagent" ? 8192 : undefined; const maxTokens = getMaxOutputTokens(model, maxOutputTokens ?? profileMaxTokens); // Build thinking param (ensure budget < maxTokens) // Only include for Anthropic/Bedrock — Gemini/OpenAI handle thinking internally let thinking: { type: string; budget_tokens?: number } | undefined; if (isAnthropicModel && thinkingCfg.thinking.type !== "disabled") { thinking = thinkingCfg.thinking.budget_tokens ? { ...thinkingCfg.thinking, budget_tokens: Math.min(thinkingCfg.thinking.budget_tokens, maxTokens - 1) } : thinkingCfg.thinking; } return { betas, contextManagement: { edits }, thinking, maxTokens, }; } // ============================================================================ // SERVER PROXY CALLER // ============================================================================ export interface CallServerProxyConfig { proxyUrl: string; token: string; model: string; system: Array<Record<string, unknown>>; messages: Array<Record<string, unknown>>; tools: Array<Record<string, unknown>>; apiConfig: APIRequestConfig; signal?: AbortSignal; timeoutMs?: number; fallbackModel?: string; storeId?: string; // Required for non-Anthropic providers (credential resolution) onFallback?: (fromModel: string, toModel: string) => void; onRetry?: (attempt: number, maxRetries: number, error: string) => void; } /** * Call server proxy endpoint and return raw SSE stream. * Retries with exponential backoff on 429/500/529. */ export async function callServerProxy( config: CallServerProxyConfig, ): Promise<ReadableStream<Uint8Array>> { const { proxyUrl, token, signal, timeoutMs } = config; for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const body: Record<string, unknown> = { mode: "proxy", messages: config.messages, system: config.system, tools: config.tools, model: config.model, max_tokens: config.apiConfig.maxTokens, stream: true, betas: config.apiConfig.betas, ...(config.apiConfig.contextManagement?.edits?.length ? { context_management: config.apiConfig.contextManagement } : {}), }; if (config.apiConfig.thinking) { body.thinking = config.apiConfig.thinking; } if (config.storeId) { body.store_id = config.storeId; } const fetchOpts: RequestInit = { method: "POST", headers: { "Content-Type": "application/json", "Authorization": `Bearer ${token}`, }, body: JSON.stringify(body), signal, }; // Apply timeout if specified let controller: AbortController | undefined; let timeout: ReturnType<typeof setTimeout> | undefined; if (timeoutMs && !signal) { controller = new AbortController(); timeout = setTimeout(() => controller!.abort(), timeoutMs); fetchOpts.signal = controller.signal; } try { const response = await fetch(proxyUrl, fetchOpts); if (timeout) clearTimeout(timeout); if (response.ok && response.body) { return response.body; } const errorBody = await response.text(); throw Object.assign( new Error(`Proxy error (${response.status}): ${errorBody}`), { status: response.status }, ); } catch (err) { if (timeout) clearTimeout(timeout); throw err; } } catch (err: unknown) { if (signal?.aborted) throw err; if (attempt < MAX_RETRIES && isRetryableError(err)) { const errMsg = err instanceof Error ? err.message : String(err); config.onRetry?.(attempt + 1, MAX_RETRIES, errMsg); const delay = RETRY_BASE_DELAY_MS * Math.pow(2, attempt); await new Promise((resolve) => setTimeout(resolve, delay)); // Fallback model on last retry if (attempt === MAX_RETRIES - 1 && config.fallbackModel) { const fromModel = config.model; config.model = config.fallbackModel; config.onFallback?.(fromModel, config.model); } continue; } throw err; } } throw new Error("Failed to get response after retries"); } // ============================================================================ // HELPERS — system prompt + caching setup // ============================================================================ /** * Build system blocks with prompt caching. * Cached system prompt + optional dynamic cost context (after cache breakpoint). */ export function buildSystemBlocks( systemPrompt: string, costContext?: string, enableCaching = true, ): Array<Record<string, unknown>> { const system: Array<Record<string, unknown>> = []; if (enableCaching) { system.push({ type: "text", text: systemPrompt, cache_control: { type: "ephemeral" }, }); } else { system.push({ type: "text", text: systemPrompt }); } if (costContext) { system.push({ type: "text", text: costContext }); } return system; } /** * Prepare messages and tools with prompt caching. * Skips cache_control injection for Gemini (uses implicit caching). */ export function prepareWithCaching( tools: Array<Record<string, unknown>>, messages: Array<Record<string, unknown>>, model?: string, ): { tools: Array<Record<string, unknown>>; messages: Array<Record<string, unknown>> } { if (model) { const provider = getProvider(model); if (provider === "gemini" || provider === "openai") { return { tools, messages }; } } return addPromptCaching(tools, messages); } // ============================================================================ // GEMINI CONTEXT TRIMMING // ============================================================================ /** * Client-side context trimming for Gemini (no server-side context management). * When estimated input tokens exceed threshold, replaces old tool result contents * with "[trimmed]" to stay within Gemini's 1M context window. */ export function trimGeminiContext( messages: Array<Record<string, unknown>>, estimatedTokens: number, threshold = 800_000, keepRecent = 5, ): Array<Record<string, unknown>> { if (estimatedTokens < threshold) return messages; // Find all tool_result blocks and trim oldest ones let toolResultCount = 0; for (const msg of messages) { if (Array.isArray(msg.content)) { for (const block of msg.content as Array<Record<string, unknown>>) { if (block.type === "tool_result") toolResultCount++; } } } if (toolResultCount <= keepRecent) return messages; const trimCount = toolResultCount - keepRecent; let trimmed = 0; return messages.map((msg) => { if (!Array.isArray(msg.content)) return msg; const content = (msg.content as Array<Record<string, unknown>>).map((block) => { if (block.type === "tool_result" && trimmed < trimCount) { trimmed++; if (typeof block.content === "string") { return { ...block, content: "[trimmed]" }; } else if (Array.isArray(block.content)) { return { ...block, content: [{ type: "text", text: "[trimmed]" }] }; } } return block; }); return { ...msg, content }; }); } // ============================================================================ // OPENAI CONTEXT TRIMMING // ============================================================================ /** * Client-side context trimming for OpenAI (no server-side context management). * When estimated input tokens exceed threshold, replaces old tool result contents * with "[trimmed]" to stay within OpenAI's 200K context window. */ export function trimOpenAIContext( messages: Array<Record<string, unknown>>, estimatedTokens: number, threshold = 150_000, keepRecent = 5, ): Array<Record<string, unknown>> { if (estimatedTokens < threshold) return messages; // Find all tool_result blocks and trim oldest ones let toolResultCount = 0; for (const msg of messages) { if (Array.isArray(msg.content)) { for (const block of msg.content as Array<Record<string, unknown>>) { if (block.type === "tool_result") toolResultCount++; } } } if (toolResultCount <= keepRecent) return messages; const trimCount = toolResultCount - keepRecent; let trimmed = 0; return messages.map((msg) => { if (!Array.isArray(msg.content)) return msg; const content = (msg.content as Array<Record<string, unknown>>).map((block) => { if (block.type === "tool_result" && trimmed < trimCount) { trimmed++; if (typeof block.content === "string") { return { ...block, content: "[trimmed]" }; } else if (Array.isArray(block.content)) { return { ...block, content: [{ type: "text", text: "[trimmed]" }] }; } } return block; }); return { ...msg, content }; }); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/floradistro/whale-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

api-client.ts•11.4 KiB