mcp-handlers.test.ts•12.2 kB
import { describe, it, expect } from "bun:test";
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import {
ListToolsRequestSchema,
CallToolRequestSchema,
} from "@modelcontextprotocol/sdk/types.js";
describe("MCP Protocol Handlers", () => {
let server: Server;
describe("ListTools Handler", () => {
it("should return consult_oracle tool in tools list", async () => {
server = new Server({
name: "test-oracle-mcp",
version: "0.1.0",
});
server.registerCapabilities({
tools: {},
});
const baseDescription = "Consult the oracle (gpt-5.1-codex-mini) via codex CLI";
const reasoningPart = " with medium-level reasoning";
const capabilityPart = ". The oracle provides expert reasoning and analysis for complex problem-solving.";
const usagePart =
" Use when: (1) planning complex tasks with multiple tradeoffs, (2) you are <=90% confident in your approach, (3) you need analysis of architectural decisions or design patterns.";
let capturedResponse: unknown;
server.setRequestHandler(
ListToolsRequestSchema,
async () => {
const response = {
tools: [
{
name: "consult_oracle",
description: baseDescription + reasoningPart + capabilityPart + usagePart,
inputSchema: {
type: "object",
properties: {
prompt: {
type: "string",
description: "The question or problem to consult the oracle about. Be specific about context, constraints, and what decision or analysis you need.",
},
},
required: ["prompt"],
},
},
],
};
capturedResponse = response;
return response;
}
);
expect(
(server as any)._capabilities && (server as any)._capabilities.tools
).toBeDefined();
});
it("should include tool description with model name", async () => {
const model = "gpt-5.1-codex-mini";
const command = "codex";
const baseDescription = `Consult the oracle (${model}) via ${command} CLI`;
const reasoning = "medium";
const reasoningPart = reasoning
? ` with ${reasoning}-level reasoning`
: "";
const capabilityPart = ". The oracle provides expert reasoning and analysis for complex problem-solving.";
const usagePart =
" Use when: (1) planning complex tasks with multiple tradeoffs, (2) you are <=90% confident in your approach, (3) you need analysis of architectural decisions or design patterns.";
const fullDescription = baseDescription + reasoningPart + capabilityPart + usagePart;
expect(fullDescription).toContain(model);
expect(fullDescription).toContain(command);
expect(fullDescription).toContain(reasoning);
expect(fullDescription).toContain("planning complex tasks");
expect(fullDescription).toContain("architectural decisions");
});
it("should include input schema with prompt property", async () => {
const schema = {
type: "object",
properties: {
prompt: {
type: "string",
description: "The prompt to send to the oracle",
},
},
required: ["prompt"],
};
expect(schema.properties.prompt).toBeDefined();
expect(schema.properties.prompt.type).toBe("string");
expect(schema.required).toContain("prompt");
});
});
describe("CallTool Handler", () => {
it("should handle consult_oracle tool calls", async () => {
const toolName = "consult_oracle";
const prompt = "test prompt";
const isOracle = toolName === "consult_oracle";
expect(isOracle).toBe(true);
});
it("should extract prompt from arguments", async () => {
const args = { prompt: "test prompt" } as Record<string, unknown>;
const prompt = (args as Record<string, unknown>).prompt as string;
expect(prompt).toBe("test prompt");
});
it("should return success response with content", async () => {
const stdout = "oracle response";
const response = {
content: [
{
type: "text",
text: stdout,
},
],
};
expect(response.content[0].type).toBe("text");
expect(response.content[0].text).toBe(stdout);
});
it("should return error response for unknown tool", async () => {
const toolName = "unknown_tool";
const response = {
content: [
{
type: "text",
text: `Unknown tool: ${toolName}`,
},
],
isError: true,
};
expect(response.isError).toBe(true);
expect(response.content[0].text).toContain("Unknown tool");
});
it("should handle errors from oracle invocation", async () => {
const errorMessage = "Command failed";
const response = {
content: [
{
type: "text",
text: `Error consulting oracle: ${errorMessage}`,
},
],
isError: true,
};
expect(response.isError).toBe(true);
expect(response.content[0].text).toContain("Error consulting oracle");
});
});
describe("Error Handling", () => {
it("should handle Error objects in responses", async () => {
const error = new Error("Test error");
const message =
error instanceof Error ? error.message : String(error);
const response = {
content: [
{
type: "text",
text: `Error consulting oracle: ${message}`,
},
],
isError: true,
};
expect(response.content[0].text).toContain("Test error");
});
it("should handle non-Error objects gracefully", async () => {
const error: unknown = "String error";
const message =
error instanceof Error ? error.message : String(error);
const response = {
content: [
{
type: "text",
text: `Error consulting oracle: ${message}`,
},
],
isError: true,
};
expect(response.content[0].text).toContain("String error");
});
});
describe("Response Format", () => {
it("should always use text type for content", async () => {
const response = {
content: [
{
type: "text",
text: "response text",
},
],
};
expect(response.content[0].type).toBe("text");
});
it("should not include isError flag on success", async () => {
const response = {
content: [
{
type: "text",
text: "success",
},
],
};
expect((response as Record<string, unknown>).isError).toBeUndefined();
});
it("should include isError: true on errors", async () => {
const response = {
content: [
{
type: "text",
text: "error message",
},
],
isError: true,
};
expect(response.isError).toBe(true);
});
it("should preserve multiline content in responses", async () => {
const stdout = "line 1\nline 2\nline 3";
const response = {
content: [
{
type: "text",
text: stdout,
},
],
};
expect(response.content[0].text).toContain("\n");
expect(response.content[0].text).toContain("line 1");
expect(response.content[0].text).toContain("line 3");
});
});
describe("Fallback Chain Handling", () => {
it("should attempt next oracle on first failure", async () => {
const prompt = "test prompt";
let attemptCount = 0;
// Simulate iteration through oracles
const oracles = [
{ model: "gpt-5.1", command: "codex" },
{ model: "gemini-2.5-pro", command: "gemini" },
];
for (const oracle of oracles) {
attemptCount++;
// First oracle fails, second oracle succeeds
if (attemptCount === 1) {
expect(oracle.model).toBe("gpt-5.1");
} else if (attemptCount === 2) {
expect(oracle.model).toBe("gemini-2.5-pro");
}
}
expect(attemptCount).toBe(2);
});
it("should return first successful oracle response", async () => {
const successResponse = {
content: [
{
type: "text",
text: "Success from oracle 2",
},
],
};
expect(successResponse.content[0].text).toBe("Success from oracle 2");
expect((successResponse as Record<string, unknown>).isError).toBeUndefined();
});
it("should format error message with all failed oracles", async () => {
const errors = [
"Oracle 1 (gpt-5.1): Connection failed",
"Oracle 2 (gemini-2.5-pro): Timeout",
"Oracle 3 (opus): Rate limited",
];
const errorResponse = {
content: [
{
type: "text",
text: `All oracles failed:\n${errors.join("\n")}`,
},
],
isError: true,
};
expect(errorResponse.isError).toBe(true);
expect(errorResponse.content[0].text).toContain("All oracles failed");
errors.forEach((error) => {
expect(errorResponse.content[0].text).toContain(error);
});
});
it("should include oracle index in error message", async () => {
const errorMsg = "Oracle 2 (gemini-2.5-pro): Service unavailable";
expect(errorMsg).toContain("Oracle 2");
expect(errorMsg).toMatch(/Oracle \d+/);
expect(errorMsg).toContain("gemini-2.5-pro");
});
it("should support multiple oracle configurations with single models", async () => {
const oracleConfigs = [
{ model: "gpt-5.1-codex-mini", reasoning: "medium", command: "codex" },
{ model: "gemini-2.5-pro", command: "gemini" },
{ model: "opus", command: "claude" },
];
expect(oracleConfigs.length).toBe(3);
expect(oracleConfigs[0].model).toBe("gpt-5.1-codex-mini");
expect(oracleConfigs[1].model).toBe("gemini-2.5-pro");
expect(oracleConfigs[2].model).toBe("opus");
});
it("should support multiple oracle configurations with multiple models each", async () => {
const oracleConfigs = [
{
models: ["gpt-5.1", "gpt-5.1-codex-max"],
reasoning: "medium",
command: "codex",
},
{
models: ["gemini-2.5-pro", "gemini-1.5-pro"],
command: "gemini",
},
{ models: ["opus", "sonnet"], command: "claude" },
];
expect(oracleConfigs.length).toBe(3);
expect(oracleConfigs[0].models).toEqual([
"gpt-5.1",
"gpt-5.1-codex-max",
]);
expect(oracleConfigs[1].models).toEqual([
"gemini-2.5-pro",
"gemini-1.5-pro",
]);
expect(oracleConfigs[2].models).toEqual(["opus", "sonnet"]);
});
it("should support fallback through multiple models in oracle", async () => {
const errors = [
"Oracle 1, model 1 (gpt-5.1): Connection timeout",
"Oracle 1, model 2 (gpt-5.1-codex-max): Rate limited",
"Oracle 2, model 1 (gemini-2.5-pro): Service unavailable",
];
const errorResponse = {
content: [
{
type: "text",
text: `All oracles failed:\n${errors.join("\n")}`,
},
],
isError: true,
};
expect(errorResponse.isError).toBe(true);
expect(errorResponse.content[0].text).toContain("Oracle 1, model 1");
expect(errorResponse.content[0].text).toContain("Oracle 1, model 2");
expect(errorResponse.content[0].text).toContain("Oracle 2, model 1");
expect(errorResponse.content[0].text).toContain("gpt-5.1");
expect(errorResponse.content[0].text).toContain("gpt-5.1-codex-max");
expect(errorResponse.content[0].text).toContain("gemini-2.5-pro");
});
});
});