generate
Run a one-shot text completion on a local Ollama model. Returns full response text, timing, and tokens per second for non-streaming queries.
Instructions
Run a one-shot text completion against a local model (non-streaming). Returns the full response text plus timing and tokens/second.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| model | Yes | Model name (e.g. "llama3.1:8b"). | |
| prompt | Yes | Prompt text. | |
| system | No | Optional system prompt. | |
| options | No | Ollama sampling/decoding options — e.g. {"temperature": 0.7, "num_predict": 100, "top_p": 0.9}. |
Implementation Reference
- server.js:179-208 (handler)The 'generate' tool handler function. Validates required args (model, prompt), builds request body with optional system/options, calls Ollama /api/generate endpoint, and returns formatted response with timing/token metrics.
async function generate(args) { const badModel = requireString(args, 'model'); if (badModel) return errorResult(badModel); const badPrompt = requireString(args, 'prompt'); if (badPrompt) return errorResult(badPrompt); const body = { model: args.model, prompt: args.prompt, stream: false, }; if (args.system && typeof args.system === 'string') body.system = args.system; if (args.options && typeof args.options === 'object') body.options = args.options; const r = await httpRequest('POST', '/api/generate', body); if (r.error) return errorResult(r.error); const d = r.data || {}; return textResult({ model: d.model || args.model, response: d.response || '', done_reason: d.done_reason || null, eval_count: d.eval_count || null, eval_duration_ms: d.eval_duration ? Math.round(d.eval_duration / 1e6) : null, prompt_eval_count: d.prompt_eval_count || null, total_duration_ms: d.total_duration ? Math.round(d.total_duration / 1e6) : null, tokens_per_second: d.eval_count && d.eval_duration ? Math.round((d.eval_count / (d.eval_duration / 1e9)) * 100) / 100 : null, }); } - server.js:307-326 (schema)Input schema registration for the 'generate' tool. Defines required params (model, prompt) and optional (system, options) with descriptions, used in the tools/list response for MCP discovery.
{ name: 'generate', description: 'Run a one-shot text completion against a local model (non-streaming). Returns the full response text plus timing and tokens/second.', annotations: { title: 'Generate text', readOnlyHint: false, destructiveHint: false, openWorldHint: true }, inputSchema: { type: 'object', properties: { model: { type: 'string', description: 'Model name (e.g. "llama3.1:8b").' }, prompt: { type: 'string', description: 'Prompt text.' }, system: { type: 'string', description: 'Optional system prompt.' }, options: { type: 'object', description: 'Ollama sampling/decoding options — e.g. {"temperature": 0.7, "num_predict": 100, "top_p": 0.9}.', additionalProperties: true, }, }, required: ['model', 'prompt'], additionalProperties: false, }, }, - server.js:385-394 (registration)HANDLERS map that registers the 'generate' function under the 'generate' key, used by the JSON-RPC dispatch to route tool calls.
const HANDLERS = { ollama_status: ollamaStatus, list_models: listModels, list_running: listRunning, show_model: showModel, generate: generate, chat: chat, pull_model: pullModel, delete_model: deleteModel, }; - server.js:57-107 (helper)httpRequest helper utility used by the generate handler to make HTTP POST calls to Ollama's /api/generate endpoint. Handles timeouts, errors, and JSON parsing.
function httpRequest(method, path, body) { return new Promise((resolve) => { let url; try { url = new URL(path, OLLAMA_URL); } catch (e) { resolve({ error: `invalid URL: ${e.message}` }); return; } const lib = url.protocol === 'https:' ? https : http; const opts = { method, hostname: url.hostname, port: url.port || (url.protocol === 'https:' ? 443 : 80), path: url.pathname + url.search, headers: { 'accept': 'application/json' }, }; let bodyBuf = null; if (body !== undefined) { bodyBuf = Buffer.from(JSON.stringify(body), 'utf8'); opts.headers['content-type'] = 'application/json'; opts.headers['content-length'] = bodyBuf.length; } const req = lib.request(opts, (res) => { let chunks = Buffer.alloc(0); res.on('data', (d) => { chunks = Buffer.concat([chunks, d]); }); res.on('end', () => { const text = chunks.toString('utf8'); if (res.statusCode >= 400) { resolve({ status: res.statusCode, error: `HTTP ${res.statusCode}: ${text.slice(0, 500)}` }); return; } // Some endpoints return text/plain (e.g. GET /); try JSON first, fall back to text. try { resolve({ status: res.statusCode, data: JSON.parse(text) }); } catch (_) { resolve({ status: res.statusCode, data: null, text }); } }); }); req.setTimeout(REQUEST_TIMEOUT_MS, () => { req.destroy(new Error(`request timed out after ${REQUEST_TIMEOUT_MS}ms`)); }); req.on('error', (e) => { // Give a friendly connection-refused message. const msg = /ECONNREFUSED|ENOTFOUND/.test(e.code || e.message) ? `cannot reach Ollama at ${OLLAMA_URL} — is the server running? Start it with \`ollama serve\` or open the Ollama app.` : e.message; resolve({ error: msg }); }); if (bodyBuf) req.write(bodyBuf); req.end(); }); } - server.js:109-114 (helper)requireString helper used by generate to validate that 'model' and 'prompt' args are non-empty strings.
function requireString(args, field) { if (typeof args[field] !== 'string' || !args[field].trim()) { return `${field} is required (non-empty string)`; } return null; }