MCP Ollama Consult Server

Overview Schema Related Servers Score Discussions

mcp-consult
src

modelSettings.ts•6.8 KiB

export interface ParsedModelInfo { family: string; size: string; quantization: string; variant: string; version: string; isCloud: boolean; } export interface SuggestedConsultSettings { temperature: number; timeoutMs: number; reasoning: string; } function clamp(n: number, min: number, max: number): number { return Math.max(min, Math.min(max, n)); } function looksLikeCloudModel(name: string): boolean { return Boolean(name) && (name.includes(':cloud') || name.includes('-cloud')); } /** * Best-effort model name parsing. * * This is intentionally heuristic (mirrors the StandardLlama “autonomy” diff idea): * infer rough family/size/quantization/variant/version from the model name so we can * pick sane default settings when the caller doesn't specify them. */ export function parseModelName(modelName: string): ParsedModelInfo { const lowerName = (modelName || '').toLowerCase(); const info: ParsedModelInfo = { family: 'unknown', size: 'unknown', quantization: 'unknown', variant: 'unknown', version: 'unknown', isCloud: looksLikeCloudModel(modelName || ''), }; if (lowerName.includes('gpt') || lowerName.includes('chatgpt')) { info.family = 'gpt'; } else if ( lowerName.includes('llama') || lowerName.includes('llama2') || lowerName.includes('llama3') ) { info.family = 'llama'; } else if (lowerName.includes('mistral') || lowerName.includes('dolphin-mistral')) { info.family = 'mistral'; } else if ( lowerName.includes('qwen') || lowerName.includes('deepseek') || lowerName.includes('qwq') ) { info.family = 'qwen'; } else if ( lowerName.includes('codellama') || lowerName.includes('code-llama') || lowerName.includes('codeqwen') ) { info.family = 'codellama'; } else if (lowerName.includes('gemma') || lowerName.includes('gemma3')) { info.family = 'gemma'; } else if (lowerName.includes('phi') || lowerName.includes('phi3')) { info.family = 'phi'; } else if (lowerName.includes('claude') || lowerName.includes('anthropic')) { info.family = 'claude'; } else if (lowerName.includes('falcon')) { info.family = 'falcon'; } else if (lowerName.includes('starcoder') || lowerName.includes('starchat')) { info.family = 'starcoder'; } // Size: accept “7b”, “1.5b”, etc. const sizeMatch = (modelName || '').match(/(\d+(?:\.\d+)?)\s*(b|k|m|t|kb|mb|gb|tb)/i); if (sizeMatch) { const num = sizeMatch[1]; const unit = sizeMatch[2].toLowerCase(); // Normalize to “B” (billions) where possible if (unit === 'k' || unit === 'kb') { info.size = `${parseFloat(num) / 1000}B`; } else if (unit === 'm' || unit === 'mb') { info.size = `${parseFloat(num) / 1_000_000}B`; } else if (unit === 'g' || unit === 'gb') { info.size = `${parseFloat(num) / 1_000_000_000}B`; } else if (unit === 't' || unit === 'tb') { info.size = `${parseFloat(num) / 1_000_000_000_000}B`; } else { info.size = `${num}${unit.toUpperCase()}`; } } // Quantization: Q2..Q8 family const quantMatch = (modelName || '').match(/(q[2-8](?:_[kmf01]+)*)/i); if (quantMatch) info.quantization = quantMatch[1].toUpperCase(); // Version: “v3”, “3.1”, etc. const versionMatch = (modelName || '').match(/(?:^|[^a-zA-Z0-9])v?(\d+(?:\.\d+)?)(?:$|[^0-9])/i); if (versionMatch) info.version = versionMatch[1]; // Variant if ( lowerName.includes('instruct') || lowerName.includes('chat') || lowerName.includes('conversational') ) { info.variant = 'instruct'; } else if (lowerName.includes('base') || lowerName.includes('pretrained')) { info.variant = 'base'; } else if ( lowerName.includes('code') || lowerName.includes('coder') || lowerName.includes('programming') ) { info.variant = 'code'; } else if (lowerName.includes('math') || lowerName.includes('mathematics')) { info.variant = 'math'; } else if (lowerName.includes('vision') || lowerName.includes('visual')) { info.variant = 'vision'; } return info; } function sizeToNumberBillions(size: string): number | null { if (!size || size === 'unknown') return null; const m = size.match(/(\d+(?:\.\d+)?)/); if (!m) return null; const n = parseFloat(m[1]); return Number.isFinite(n) ? n : null; } /** * Suggest defaults for this MCP server (temperature + timeout). * * Note: This server currently only sends `temperature` and uses request-level timeouts. * We don't attempt to set Ollama options like context window / max tokens here. */ export function suggestConsultSettings(args: { modelName: string; prompt: string; hasSystemPrompt?: boolean; baseTimeoutMs: number; }): SuggestedConsultSettings { const parsed = parseModelName(args.modelName); const promptLen = (args.prompt || '').length; const sizeB = sizeToNumberBillions(parsed.size); let temperature = 0.7; const reasons: string[] = []; if (parsed.variant === 'code' || parsed.family === 'codellama' || parsed.family === 'starcoder') { temperature = 0.3; reasons.push('code model → lower temperature'); } else if (parsed.variant === 'math') { temperature = 0.2; reasons.push('math model → very low temperature'); } else if (sizeB !== null && sizeB <= 3) { temperature = 0.8; reasons.push('small model → slightly higher temperature'); } else if (sizeB !== null && sizeB > 70) { temperature = 0.6; reasons.push('very large model → slightly lower temperature'); } temperature = clamp(temperature, 0, 2); let timeoutMs = args.baseTimeoutMs; if (promptLen > 1200) { timeoutMs = Math.round(timeoutMs * 1.5); reasons.push('long prompt → higher timeout'); } else if (promptLen > 3000) { timeoutMs = Math.round(timeoutMs * 2.0); reasons.push('very long prompt → higher timeout'); } if (args.hasSystemPrompt) { timeoutMs = Math.round(timeoutMs * 1.5); reasons.push('system prompt → higher timeout'); } if (sizeB !== null && sizeB >= 30) { timeoutMs = Math.round(timeoutMs * 1.5); reasons.push('large model → higher timeout'); } if (parsed.isCloud) { timeoutMs = Math.round(timeoutMs * 1.2); reasons.push('cloud model → higher timeout'); } timeoutMs = clamp(timeoutMs, 1000, 600_000); const reasoning = reasons.length > 0 ? `Auto settings applied (${reasons.join(', ')}).` : 'Auto settings applied (no special heuristics triggered).'; return { temperature, timeoutMs, reasoning }; } export function shouldAutoModelSettings(params: Record<string, unknown>): boolean { const localFlag = params.auto_settings; if (typeof localFlag === 'boolean') return localFlag; const env = (process.env.MCP_AUTO_MODEL_SETTINGS || process.env.MCP_AUTO_SETTINGS || '') .trim() .toLowerCase(); if (!env) return false; return env === '1' || env === 'true' || env === 'yes' || env === 'on'; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Atomic-Germ/mcp-consult'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

modelSettings.ts•6.8 KiB