HydraMCP

server.ts•13.4 KiB

/** * HydraMCP Server — the core that registers tools and handles requests. * * Architecture: * 1. MCP SDK handles the JSON-RPC protocol over stdio * 2. We register 5 tools: list_models, ask_model, compare_models, consensus, synthesize * 3. Each tool validates input with Zod, calls the provider, formats output * 4. The provider is injected — today it's CLIProxyAPI, tomorrow it could be anything */ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { Provider } from "./providers/provider.js"; import { askModelSchema, askModel } from "./tools/ask-model.js"; import { compareModelsSchema, compareModels } from "./tools/compare-models.js"; import { consensusSchema, consensus } from "./tools/consensus.js"; import { synthesizeSchema, synthesize } from "./tools/synthesize.js"; import { sessionRecapSchema, sessionRecap } from "./tools/session-recap.js"; import { analyzeFileSchema, analyzeFile } from "./tools/analyze-file.js"; import { smartReadSchema, smartRead } from "./tools/smart-read.js"; import { logger } from "./utils/logger.js"; export function createServer(provider: Provider): McpServer { const server = new McpServer({ name: "HydraMCP", version: "0.1.0", }); // --- list_models --- server.tool( "list_models", "List all available models across all providers. Run this first to see what you can query.", {}, async () => { logger.info("list_models: fetching from all providers"); try { const models = await provider.listModels(); if (models.length === 0) { return { content: [ { type: "text" as const, text: "No models available. Make sure CLIProxyAPI or Ollama is running.", }, ], }; } // Group by provider const grouped = new Map<string, string[]>(); for (const m of models) { const list = grouped.get(m.provider) ?? []; list.push(m.id); grouped.set(m.provider, list); } const lines: string[] = [`## Available Models (${models.length} total)`, ""]; for (const [prov, ids] of grouped) { lines.push(`### ${prov}`); for (const id of ids) { lines.push(`- \`${id}\``); } lines.push(""); } // Session stats footer (if SmartProvider is active) if ("getSessionSummary" in provider) { const summary = (provider as any).getSessionSummary(); if (summary.totalQueries > 0) { const parts = [`${summary.totalQueries} queries`]; if (summary.cacheHits > 0) { parts.push(`${summary.cacheHits} cache hits (~${summary.cacheTokensSaved} tokens saved)`); } if (summary.totalFailures > 0) { parts.push(`${summary.totalFailures} failures`); } lines.push("---"); lines.push(`**Session:** ${parts.join(" | ")}`); } } return { content: [{ type: "text" as const, text: lines.join("\n") }] }; } catch (err) { const message = err instanceof Error ? err.message : String(err); logger.error(`list_models failed: ${message}`); return { content: [{ type: "text" as const, text: `Error: ${message}` }], isError: true, }; } } ); // --- ask_model --- server.tool( "ask_model", `Query any AI model with a prompt. Returns the model's response with metadata. OUTPUT: Markdown with the model's response, latency, and token usage. If max_response_tokens is set and compression occurred, includes distillation metadata (original tokens, compressed tokens, compressor model, compressor latency). Shows "Saved: X tokens (Y% smaller)" when compression is active. Shows "(cached)" when response is served from cache. WHEN TO USE: When you need another model's perspective, analysis, or capabilities. Set max_response_tokens to control how much of your context window this response consumes — the response will be distilled by a fast model to fit the budget while preserving code, file paths, errors, and actionable details. Set include_raw=true to see both compressed and original responses for quality verification. FAILURE MODES: - "Model query failed (4xx/5xx)" → The model or provider is unavailable. Try a different model or check that CLIProxyAPI/Ollama is running. - "circuit breaker open" → The model failed too many times recently. Try a different model or wait for automatic recovery. - Compression silently skipped → If the compressor model is unavailable or the response already fits the budget, the raw response is returned unchanged. This is not an error.`, askModelSchema.shape, async (input) => { logger.info(`ask_model: querying ${input.model}`); try { const result = await askModel(provider, input); return { content: [{ type: "text" as const, text: result }] }; } catch (err) { const message = err instanceof Error ? err.message : String(err); logger.error(`ask_model failed: ${message}`); return { content: [{ type: "text" as const, text: `Error: ${message}` }], isError: true, }; } } ); // --- compare_models --- server.tool( "compare_models", "Query 2-5 models in parallel with the same prompt. Returns side-by-side comparison with latency and token metrics.", compareModelsSchema.shape, async (input) => { logger.info(`compare_models: querying ${input.models.join(", ")}`); try { const result = await compareModels(provider, input); return { content: [{ type: "text" as const, text: result }] }; } catch (err) { const message = err instanceof Error ? err.message : String(err); logger.error(`compare_models failed: ${message}`); return { content: [{ type: "text" as const, text: `Error: ${message}` }], isError: true, }; } } ); // --- consensus --- server.tool( "consensus", "Query 3-7 models and aggregate responses using voting strategy (majority/supermajority/unanimous). Returns consensus answer with confidence score.", consensusSchema.shape, async (input) => { logger.info( `consensus: polling ${input.models.length} models (${input.strategy})` ); try { const result = await consensus(provider, input); return { content: [{ type: "text" as const, text: result }] }; } catch (err) { const message = err instanceof Error ? err.message : String(err); logger.error(`consensus failed: ${message}`); return { content: [{ type: "text" as const, text: `Error: ${message}` }], isError: true, }; } } ); // --- synthesize --- server.tool( "synthesize", "Query 2-5 models in parallel, then combine their best ideas into one answer. Returns a synthesized response that's better than any single model.", synthesizeSchema.shape, async (input) => { logger.info( `synthesize: querying ${input.models.length} models, synthesizer: ${input.synthesizer_model ?? "auto"}` ); try { const result = await synthesize(provider, input); return { content: [{ type: "text" as const, text: result }] }; } catch (err) { const message = err instanceof Error ? err.message : String(err); logger.error(`synthesize failed: ${message}`); return { content: [{ type: "text" as const, text: `Error: ${message}` }], isError: true, }; } } ); // --- session_recap --- server.tool( "session_recap", `Read previous Claude Code sessions from disk and generate a smart-sized recap using a large-context model. Claude never sees the raw session data — only the distilled summary. OUTPUT: Returns markdown starting with "## Session Recap" containing sections: Project State, What Was Built, Key Decisions, Errors Resolved, Unfinished/In Progress, File Map. Empty sections are omitted. Output size is auto-calculated (1K-30K tokens) based on session density. WHEN TO USE: At the start of a new session when the user asks to restore context, recall previous work, or continue where they left off. FAILURE MODES: - "No recent project detected" + list of available projects → Retry with an explicit project path from the list. - "Project directory not found" + available projects → The project path was misspelled or encoded wrong. Retry with a path from the available list. - "No session files found" → The project directory exists but has no sessions. Try a different project. - "No models available" → CLIProxyAPI or Ollama is not running. Tell the user to start their model provider. - "Session Recap Failed" with error details → Both summarization passes failed. Retry with fewer sessions (sessions=1) or a different model. - "Triage Only" heading → Partial success. The triage pass worked but the full recap failed. The output still contains useful structured data. Do not retry.`, sessionRecapSchema.shape, async (input) => { logger.info(`session_recap: recapping ${input.sessions} sessions`); try { const result = await sessionRecap(provider, input); return { content: [{ type: "text" as const, text: result }] }; } catch (err) { const message = err instanceof Error ? err.message : String(err); logger.error(`session_recap failed: ${message}`); return { content: [{ type: "text" as const, text: `Error: ${message}` }], isError: true, }; } } ); // --- analyze_file --- server.tool( "analyze_file", `Offload file analysis to a worker model. The file is read server-side — it never enters your context window. You send a file path and a question, and get back only the analysis. OUTPUT: Markdown with the model's analysis of the file, including file metadata (path, lines, chars), latency, and token usage. If max_response_tokens is set and compression occurred, includes distillation metadata (original tokens, compressed tokens, compressor model, compressor latency). WHEN TO USE: When you need to analyze, review, or search a file but want to avoid reading it yourself. Especially valuable for large files (1000+ lines) where reading would consume significant context. The file is sent to a large-context model (Gemini 1M) that can process the entire file at once. FAILURE MODES: - "File not found" → The path is wrong. Retry with the correct absolute path. - "Binary file detected" → Only text files are supported. Do not retry with this file. - "File too large" → The file exceeds 800K chars. Try analyzing a specific section or ask the user to split the file. - "No models available" → CLIProxyAPI or Ollama is not running. Tell the user to start their model provider. - "Model query failed" → Try a different model or check provider status with list_models.`, analyzeFileSchema.shape, async (input) => { logger.info(`analyze_file: ${input.file_path}`); try { const result = await analyzeFile(provider, input); return { content: [{ type: "text" as const, text: result }] }; } catch (err) { const message = err instanceof Error ? err.message : String(err); logger.error(`analyze_file failed: ${message}`); return { content: [{ type: "text" as const, text: `Error: ${message}` }], isError: true, }; } } ); // --- smart_read --- server.tool( "smart_read", `Surgical code extraction from files. Returns ONLY relevant code sections with line numbers — not analysis. OUTPUT: Markdown with extracted code sections (verbatim, with line numbers), minimal annotations, file metadata, latency, token usage. Shows "Context saved" metric. Unlike analyze_file which returns prose analysis, smart_read returns actual code you can act on directly. WHEN TO USE: When you need to read a file but only care about specific sections. Use instead of the Read tool when you have a specific intent like "find the auth logic", "show error handling", "extract the database schema". Especially valuable for large files (1000+ lines) where reading the whole file wastes context tokens. For general questions about a file, use analyze_file instead. FAILURE MODES: - "File not found" → The path is wrong. Retry with the correct absolute path. - "Binary file detected" → Only text files are supported. Do not retry with this file. - "File too large" → The file exceeds 800K chars. Try a specific section. - "No models available" → CLIProxyAPI or Ollama is not running. Tell the user to start their model provider. - "No relevant sections found" → Try a broader query, or use analyze_file for general analysis. - "Model query failed" → Try a different model or check provider status with list_models.`, smartReadSchema.shape, async (input) => { logger.info(`smart_read: ${input.file_path}`); try { const result = await smartRead(provider, input); return { content: [{ type: "text" as const, text: result }] }; } catch (err) { const message = err instanceof Error ? err.message : String(err); logger.error(`smart_read failed: ${message}`); return { content: [{ type: "text" as const, text: `Error: ${message}` }], isError: true, }; } } ); logger.info(`HydraMCP server created with provider: ${provider.name}`); return server; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Pickle-Pixel/HydraMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.ts•13.4 KiB