AutoDev Codebase MCP Server

openrouter.ts•14.6 KiB

import { OpenAI } from "openai" import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces/embedder" import { MAX_BATCH_TOKENS, MAX_ITEM_TOKENS, MAX_BATCH_RETRIES as MAX_RETRIES, INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS, } from "../constants" import { getDefaultModelId, getModelQueryPrefix } from "../../shared/embeddingModels" import { withValidationErrorHandling, HttpError, formatEmbeddingError } from "../shared/validation-helpers" import { handleOpenAIError } from "../shared/openai-error-handler" import { Mutex } from "async-mutex" interface EmbeddingItem { embedding: string | number[] [key: string]: any } interface OpenRouterEmbeddingResponse { data: EmbeddingItem[] usage?: { prompt_tokens?: number total_tokens?: number } } /** * OpenRouter implementation of the embedder interface with batching and rate limiting. * OpenRouter provides an OpenAI-compatible API that gives access to hundreds of models * through a single endpoint, automatically handling fallbacks and cost optimization. */ export class OpenRouterEmbedder implements IEmbedder { private embeddingsClient: OpenAI private readonly defaultModelId: string private readonly apiKey: string private readonly maxItemTokens: number private readonly baseUrl: string = "https://openrouter.ai/api/v1" private readonly _optimalBatchSize: number // Global rate limiting state shared across all instances private static globalRateLimitState = { isRateLimited: false, rateLimitResetTime: 0, consecutiveRateLimitErrors: 0, lastRateLimitError: 0, // Mutex to ensure thread-safe access to rate limit state mutex: new Mutex(), } /** * Creates a new OpenRouter embedder * @param apiKey The API key for authentication * @param modelId Optional model identifier (defaults to "openai/text-embedding-3-large") * @param maxItemTokens Optional maximum tokens per item (defaults to MAX_ITEM_TOKENS) */ constructor(apiKey: string, modelId?: string, maxItemTokens?: number, options?: { openrouterBatchSize?: number }) { if (!apiKey) { throw new Error("API key is required for OpenRouter embedder") } this.apiKey = apiKey // Wrap OpenAI client creation to handle invalid API key characters try { this.embeddingsClient = new OpenAI({ baseURL: this.baseUrl, apiKey: apiKey, defaultHeaders: { "HTTP-Referer": "https://github.com/RooCodeInc/Roo-Code", "X-Title": "Roo Code", }, }) } catch (error) { // Use the error handler to transform ByteString conversion errors throw handleOpenAIError(error, "OpenRouter") } this.defaultModelId = modelId || getDefaultModelId("openrouter") this.maxItemTokens = maxItemTokens || MAX_ITEM_TOKENS // Initialize optimal batch size for OpenRouter (can be customized via options) this._optimalBatchSize = options?.openrouterBatchSize || 60 } /** * Creates embeddings for the given texts with batching and rate limiting * @param texts Array of text strings to embed * @param model Optional model identifier * @returns Promise resolving to embedding response */ async createEmbeddings(texts: string[], model?: string): Promise<EmbeddingResponse> { const modelToUse = model || this.defaultModelId // Apply model-specific query prefix if required const queryPrefix = getModelQueryPrefix("openrouter", modelToUse) const processedTexts = queryPrefix ? texts.map((text, index) => { // Prevent double-prefixing if (text.startsWith(queryPrefix)) { return text } const prefixedText = `${queryPrefix}${text}` const estimatedTokens = Math.ceil(prefixedText.length / 4) if (estimatedTokens > MAX_ITEM_TOKENS) { console.warn( `Text at index ${index} with prefix exceeds token limit (${estimatedTokens} > ${MAX_ITEM_TOKENS}). Using original text.`, ) // Return original text if adding prefix would exceed limit return text } return prefixedText }) : texts const allEmbeddings: number[][] = [] const usage = { promptTokens: 0, totalTokens: 0 } const remainingTexts = [...processedTexts] while (remainingTexts.length > 0) { const currentBatch: string[] = [] let currentBatchTokens = 0 const processedIndices: number[] = [] for (let i = 0; i < remainingTexts.length; i++) { const text = remainingTexts[i] const itemTokens = Math.ceil(text.length / 4) if (itemTokens > this.maxItemTokens) { console.warn( `Text at index ${i} exceeds token limit (${itemTokens} > ${this.maxItemTokens}). Skipping.`, ) processedIndices.push(i) continue } if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS) { currentBatch.push(text) currentBatchTokens += itemTokens processedIndices.push(i) } else { break } } // Remove processed items from remainingTexts (in reverse order to maintain correct indices) for (let i = processedIndices.length - 1; i >= 0; i--) { remainingTexts.splice(processedIndices[i], 1) } if (currentBatch.length > 0) { const batchResult = await this._embedBatchWithRetries(currentBatch, modelToUse) allEmbeddings.push(...batchResult.embeddings) usage.promptTokens += batchResult.usage.promptTokens usage.totalTokens += batchResult.usage.totalTokens } } return { embeddings: allEmbeddings, usage } } /** * Helper method to handle batch embedding with retries and exponential backoff * @param batchTexts Array of texts to embed in this batch * @param model Model identifier to use * @returns Promise resolving to embeddings and usage statistics */ private async _embedBatchWithRetries( batchTexts: string[], model: string, ): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> { for (let attempts = 0; attempts < MAX_RETRIES; attempts++) { // Check global rate limit before attempting request await this.waitForGlobalRateLimit() try { const response = (await this.embeddingsClient.embeddings.create({ input: batchTexts, model: model, // OpenAI package (as of v4.78.1) has a parsing issue that truncates embedding dimensions to 256 // when processing numeric arrays, which breaks compatibility with models using larger dimensions. // By requesting base64 encoding, we bypass the package's parser and handle decoding ourselves. encoding_format: "base64", })) as OpenRouterEmbeddingResponse // Convert base64 embeddings to float32 arrays const processedEmbeddings = response.data.map((item: EmbeddingItem) => { if (typeof item.embedding === "string") { const buffer = Buffer.from(item.embedding, "base64") // Create Float32Array view over the buffer const float32Array = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4) return { ...item, embedding: Array.from(float32Array), } } return item }) // Replace the original data with processed embeddings response.data = processedEmbeddings const embeddings = response.data.map((item) => item.embedding as number[]) return { embeddings: embeddings, usage: { promptTokens: response.usage?.prompt_tokens || 0, totalTokens: response.usage?.total_tokens || 0, }, } } catch (error) { // TelemetryService calls removed as per requirements const hasMoreAttempts = attempts < MAX_RETRIES - 1 // Check if it's a rate limit error const httpError = error as HttpError if (httpError?.status === 429) { // Update global rate limit state await this.updateGlobalRateLimitState(httpError) if (hasMoreAttempts) { // Calculate delay based on global rate limit state const baseDelay = INITIAL_DELAY_MS * Math.pow(2, attempts) const globalDelay = await this.getGlobalRateLimitDelay() const delayMs = Math.max(baseDelay, globalDelay) console.warn( `Rate limit hit. Retrying in ${delayMs}ms (attempt ${attempts + 1}/${MAX_RETRIES})`, ) await new Promise((resolve) => setTimeout(resolve, delayMs)) continue } } // Log the error for debugging console.error(`OpenRouter embedder error (attempt ${attempts + 1}/${MAX_RETRIES}):`, error) // Format and throw the error throw formatEmbeddingError(error, MAX_RETRIES) } } throw new Error(`Failed to generate embeddings after ${MAX_RETRIES} attempts`) } /** * Validates the OpenRouter embedder configuration by testing API connectivity * @returns Promise resolving to validation result with success status and optional error message */ async validateConfiguration(): Promise<{ valid: boolean; error?: string }> { return withValidationErrorHandling(async () => { try { // Test with a minimal embedding request const testTexts = ["test"] const modelToUse = this.defaultModelId const response = (await this.embeddingsClient.embeddings.create({ input: testTexts, model: modelToUse, encoding_format: "base64", })) as OpenRouterEmbeddingResponse // Check if we got a valid response if (!response?.data || response.data.length === 0) { return { valid: false, error: "Invalid response from OpenRouter API", } } return { valid: true } } catch (error) { // TelemetryService calls removed as per requirements throw error } }, "openrouter") } /** * Returns information about this embedder */ get embedderInfo(): EmbedderInfo { return { name: "openrouter", } } /** * Gets the optimal batch size for this OpenRouter embedder */ get optimalBatchSize(): number { return this._optimalBatchSize } /** * Waits if there's an active global rate limit */ private async waitForGlobalRateLimit(): Promise<void> { const release = await OpenRouterEmbedder.globalRateLimitState.mutex.acquire() let mutexReleased = false try { const state = OpenRouterEmbedder.globalRateLimitState if (state.isRateLimited && state.rateLimitResetTime > Date.now()) { const waitTime = state.rateLimitResetTime - Date.now() // Silent wait - no logging to prevent flooding release() mutexReleased = true await new Promise((resolve) => setTimeout(resolve, waitTime)) return } // Reset rate limit if time has passed if (state.isRateLimited && state.rateLimitResetTime <= Date.now()) { state.isRateLimited = false state.consecutiveRateLimitErrors = 0 } } finally { // Only release if we haven't already if (!mutexReleased) { release() } } } /** * Updates global rate limit state when a 429 error occurs */ private async updateGlobalRateLimitState(error: HttpError): Promise<void> { const release = await OpenRouterEmbedder.globalRateLimitState.mutex.acquire() try { const state = OpenRouterEmbedder.globalRateLimitState const now = Date.now() // Increment consecutive rate limit errors if (now - state.lastRateLimitError < 60000) { // Within 1 minute state.consecutiveRateLimitErrors++ } else { state.consecutiveRateLimitErrors = 1 } state.lastRateLimitError = now // Calculate exponential backoff based on consecutive errors const baseDelay = 5000 // 5 seconds base const maxDelay = 300000 // 5 minutes max const exponentialDelay = Math.min(baseDelay * Math.pow(2, state.consecutiveRateLimitErrors - 1), maxDelay) // Set global rate limit state.isRateLimited = true state.rateLimitResetTime = now + exponentialDelay // Silent rate limit activation - no logging to prevent flooding } finally { release() } } /** * Gets the current global rate limit delay */ private async getGlobalRateLimitDelay(): Promise<number> { const release = await OpenRouterEmbedder.globalRateLimitState.mutex.acquire() try { const state = OpenRouterEmbedder.globalRateLimitState if (state.isRateLimited && state.rateLimitResetTime > Date.now()) { return state.rateLimitResetTime - Date.now() } return 0 } finally { release() } } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/anrgct/autodev-codebase'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

openrouter.ts•14.6 KiB