Relace MCP Server

search.py•8.14 KiB

import logging from typing import Any import openai from ..backend import RELACE_PROVIDER, OpenAIChatClient from ..backend.openai_backend import _env_bool from ..config import RelaceConfig from ..config.settings import ( RELACE_SEARCH_BASE_URL, RELACE_SEARCH_MODEL, SEARCH_TIMEOUT_SECONDS, ) logger = logging.getLogger(__name__) def _strip_tool_strict(tools: list[dict[str, Any]]) -> list[dict[str, Any]]: stripped: list[dict[str, Any]] = [] for tool in tools: tool_copy = dict(tool) func = tool_copy.get("function") if isinstance(func, dict) and "strict" in func: func_copy = dict(func) func_copy.pop("strict", None) tool_copy["function"] = func_copy stripped.append(tool_copy) return stripped class SearchLLMClient: """Search client for Fast Agentic Search. Supports Relace and OpenAI-compatible providers (OpenAI, OpenRouter, Cerebras, etc.). Environment variables: RELACE_SEARCH_PROVIDER: Provider name (default: relace) RELACE_SEARCH_ENDPOINT: API base URL RELACE_SEARCH_MODEL: Model name RELACE_SEARCH_API_KEY: API key (or use provider-specific key) RELACE_SEARCH_PARALLEL_TOOL_CALLS: Enable parallel tool calls (default: true) RELACE_SEARCH_TOOL_STRICT: Include strict field in tool schemas (default: true) Note: When using OpenAI-compatible providers with strict=true, parallel_tool_calls is automatically disabled to comply with OpenAI Structured Outputs limitations. """ def __init__(self, config: RelaceConfig) -> None: self._chat_client = OpenAIChatClient( config, provider_env="RELACE_SEARCH_PROVIDER", base_url_env="RELACE_SEARCH_ENDPOINT", model_env="RELACE_SEARCH_MODEL", default_base_url=RELACE_SEARCH_BASE_URL, default_model=RELACE_SEARCH_MODEL, timeout_seconds=SEARCH_TIMEOUT_SECONDS, ) self._disable_relace_sampling = False self._disable_parallel_tool_calls = False self._strip_tool_strict = False def chat( self, messages: list[dict[str, Any]], tools: list[dict[str, Any]], trace_id: str = "unknown", ) -> dict[str, Any]: if self._strip_tool_strict: tools = _strip_tool_strict(tools) include_relace_sampling = ( self._chat_client.api_compat == RELACE_PROVIDER and not self._disable_relace_sampling ) include_parallel_tool_calls = ( _env_bool("RELACE_SEARCH_PARALLEL_TOOL_CALLS", default=True) and not self._disable_parallel_tool_calls ) # OpenAI Structured Outputs 與 parallel_tool_calls 不相容 # 當使用 OpenAI-compatible provider 且 tools 含 strict=true 時，自動禁用 parallel_tool_calls if self._chat_client.api_compat != RELACE_PROVIDER and include_parallel_tool_calls: tool_has_strict = any( isinstance(t, dict) and isinstance(t.get("function"), dict) and t.get("function", {}).get("strict") for t in tools ) if tool_has_strict: logger.warning( "[%s] OpenAI Structured Outputs does not support parallel_tool_calls " "with strict=true. Disabling parallel_tool_calls for compatibility. " "Set RELACE_SEARCH_PARALLEL_TOOL_CALLS=0 to suppress this warning.", trace_id, ) include_parallel_tool_calls = False extra_body: dict[str, Any] = { "tools": tools, "tool_choice": "auto", "top_p": 0.95, } if include_relace_sampling: extra_body["top_k"] = 100 extra_body["repetition_penalty"] = 1.0 if include_parallel_tool_calls: extra_body["parallel_tool_calls"] = True try: data, _latency_ms = self._chat_client.chat_completions( messages=messages, temperature=1.0, extra_body=extra_body, trace_id=trace_id, ) return data except (openai.BadRequestError, openai.UnprocessableEntityError) as exc: try: retried = self._retry_compat_on_schema_error( exc, messages=messages, tools=tools, trace_id=trace_id, include_relace_sampling=include_relace_sampling, include_parallel_tool_calls=include_parallel_tool_calls, ) except openai.APIError as retry_exc: name = self._chat_client.provider_display_name raise RuntimeError( f"{name} Search API request failed after compatibility retry: {retry_exc}" ) from retry_exc if retried is not None: return retried name = self._chat_client.provider_display_name raise RuntimeError(f"{name} Search API request schema rejected: {exc}") from exc except openai.AuthenticationError as exc: name = self._chat_client.provider_display_name raise RuntimeError(f"{name} Search API authentication error: {exc}") from exc except openai.RateLimitError as exc: name = self._chat_client.provider_display_name raise RuntimeError(f"{name} Search API rate limit: {exc}") from exc except openai.APITimeoutError as exc: name = self._chat_client.provider_display_name raise RuntimeError( f"{name} Search API request timed out after {SEARCH_TIMEOUT_SECONDS}s." ) from exc except openai.APIConnectionError as exc: name = self._chat_client.provider_display_name raise RuntimeError(f"Failed to connect to {name} Search API: {exc}") from exc except openai.APIStatusError as exc: name = self._chat_client.provider_display_name if exc.status_code == 404: raise RuntimeError( f"{name} Search API returned 404. If using an OpenAI-compatible endpoint, " "set RELACE_SEARCH_ENDPOINT to the provider base URL (do not include " "`/chat/completions`)." ) from exc raise RuntimeError( f"{name} Search API error (status={exc.status_code}): {exc}" ) from exc def _retry_compat_on_schema_error( self, exc: openai.APIStatusError, *, messages: list[dict[str, Any]], tools: list[dict[str, Any]], trace_id: str, include_relace_sampling: bool, include_parallel_tool_calls: bool, ) -> dict[str, Any] | None: had_strict = any( isinstance(t, dict) and isinstance(t.get("function"), dict) and "strict" in t.get("function", {}) for t in tools ) if not include_relace_sampling and not include_parallel_tool_calls and not had_strict: return None stripped_tools = _strip_tool_strict(tools) compat_body: dict[str, Any] = { "tools": stripped_tools, "tool_choice": "auto", "top_p": 0.95, } logger.warning( "[%s] Request rejected (status=%s, provider=%s, api_compat=%s). Retrying with " "compatibility payload (no relace sampling params, no parallel_tool_calls, " "no strict tool fields). Error: %s", trace_id, exc.status_code, self._chat_client.provider, self._chat_client.api_compat, exc, ) data, _latency_ms = self._chat_client.chat_completions( messages=messages, temperature=1.0, extra_body=compat_body, trace_id=f"{trace_id}:compat", ) if include_relace_sampling: self._disable_relace_sampling = True if include_parallel_tool_calls: self._disable_parallel_tool_calls = True if had_strict: self._strip_tool_strict = True return data

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/possible055/relace-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

search.py•8.14 KiB