list_running
Lists Ollama models currently loaded in VRAM, showing size, VRAM usage, and expiry. Returns empty list when idle.
Instructions
List models currently loaded into VRAM with their size, VRAM footprint, and expiry timestamp. Empty list means Ollama is idle.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- server.js:146-157 (handler)The handler function for the 'list_running' tool. Calls Ollama's /api/ps endpoint and returns a list of running models with their name, size, VRAM footprint, expiry, and digest.
async function listRunning() { const r = await httpRequest('GET', '/api/ps'); if (r.error) return errorResult(r.error); const models = (r.data?.models || []).map((m) => ({ name: m.name, size_bytes: m.size, size_vram_bytes: m.size_vram, expires_at: m.expires_at, digest: m.digest, })); return textResult({ count: models.length, models }); } - server.js:289-293 (schema)The tool registration schema for 'list_running' defining its name, description, annotations (read-only hint), and inputSchema (empty object, no parameters).
name: 'list_running', description: 'List models currently loaded into VRAM with their size, VRAM footprint, and expiry timestamp. Empty list means Ollama is idle.', annotations: { title: 'List running models', readOnlyHint: true, destructiveHint: false, openWorldHint: false }, inputSchema: { type: 'object', properties: {}, additionalProperties: false }, }, - server.js:385-394 (registration)The HANDLERS map that maps tool name 'list_running' to the listRunning function, used by the JSON-RPC dispatch to route tools/call requests.
const HANDLERS = { ollama_status: ollamaStatus, list_models: listModels, list_running: listRunning, show_model: showModel, generate: generate, chat: chat, pull_model: pullModel, delete_model: deleteModel, }; - server.js:57-107 (helper)The httpRequest helper function used by listRunning to make the GET request to Ollama's /api/ps endpoint.
function httpRequest(method, path, body) { return new Promise((resolve) => { let url; try { url = new URL(path, OLLAMA_URL); } catch (e) { resolve({ error: `invalid URL: ${e.message}` }); return; } const lib = url.protocol === 'https:' ? https : http; const opts = { method, hostname: url.hostname, port: url.port || (url.protocol === 'https:' ? 443 : 80), path: url.pathname + url.search, headers: { 'accept': 'application/json' }, }; let bodyBuf = null; if (body !== undefined) { bodyBuf = Buffer.from(JSON.stringify(body), 'utf8'); opts.headers['content-type'] = 'application/json'; opts.headers['content-length'] = bodyBuf.length; } const req = lib.request(opts, (res) => { let chunks = Buffer.alloc(0); res.on('data', (d) => { chunks = Buffer.concat([chunks, d]); }); res.on('end', () => { const text = chunks.toString('utf8'); if (res.statusCode >= 400) { resolve({ status: res.statusCode, error: `HTTP ${res.statusCode}: ${text.slice(0, 500)}` }); return; } // Some endpoints return text/plain (e.g. GET /); try JSON first, fall back to text. try { resolve({ status: res.statusCode, data: JSON.parse(text) }); } catch (_) { resolve({ status: res.statusCode, data: null, text }); } }); }); req.setTimeout(REQUEST_TIMEOUT_MS, () => { req.destroy(new Error(`request timed out after ${REQUEST_TIMEOUT_MS}ms`)); }); req.on('error', (e) => { // Give a friendly connection-refused message. const msg = /ECONNREFUSED|ENOTFOUND/.test(e.code || e.message) ? `cannot reach Ollama at ${OLLAMA_URL} — is the server running? Start it with \`ollama serve\` or open the Ollama app.` : e.message; resolve({ error: msg }); }); if (bodyBuf) req.write(bodyBuf); req.end(); }); }