analyze_video
Analyze or transcribe video files (mp4, mpeg, mov, webm) from local paths, URLs, or base64 data using multimodal models. Default: google/gemini-2.5-flash.
Instructions
Analyze or transcribe a video file using a multimodal model. Accepts mp4, mpeg, mov, or webm from a local file path, HTTP(S) URL, or base64 data URL. Default model: google/gemini-2.5-flash.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| video_path | Yes | File path, HTTP(S) URL, or base64 data URL. Supported formats: mp4, mpeg, mov, webm. | |
| question | No | Question or instruction about the video (default: describe). | |
| model | No | Override the model ID. |
Implementation Reference
- Main handler function that accepts a video path, question, and optional model; prepares video data (base64-encoded), sends it via the OpenAI SDK using OpenRouter's video_url multimodal extension, and returns the text response.
export async function handleAnalyzeVideo( request: { params: { arguments: AnalyzeVideoToolRequest } }, openai: OpenAI, defaultModel?: string, ) { const { video_path, question, model } = request.params.arguments ?? { video_path: '', }; if (!video_path) { return toolError(ErrorCode.INVALID_INPUT, 'video_path is required.'); } const pickedModel = model || process.env.OPENROUTER_DEFAULT_VIDEO_MODEL || defaultModel || FALLBACK_DEFAULT_MODEL; let videoData; try { videoData = await prepareVideoData(video_path); } catch (err) { const msg = err instanceof Error ? err.message : String(err); if (msg.includes('Blocked host')) { return toolErrorFrom(ErrorCode.UPSTREAM_REFUSED, err); } if (msg.includes('too large')) { return toolErrorFrom(ErrorCode.RESOURCE_TOO_LARGE, err); } if (msg.includes('Unsupported') || msg.includes('not a video')) { return toolErrorFrom(ErrorCode.UNSUPPORTED_FORMAT, err); } return toolErrorFrom(ErrorCode.INVALID_INPUT, err); } let completion: ChatCompletion; try { logger.debug('analyze_video.submit', { model: pickedModel, format: videoData.format, size_bytes: videoData.sizeBytes, }); completion = await openai.chat.completions.create({ model: pickedModel, messages: [ { role: 'user', content: [ { type: 'text', text: question || 'Describe what happens in this video, step by step.', }, { // The `video_url` content type is an OpenRouter extension; the // OpenAI SDK's typings don't know about it yet. See: // https://openrouter.ai/docs/guides/overview/multimodal/videos type: 'video_url', video_url: { url: `data:${videoData.mediaType};base64,${videoData.data}`, }, }, ], }, ] as unknown as ChatCompletionMessageParam[], }); } catch (err) { logger.warn('analyze_video.error', { err: err instanceof Error ? err.message : String(err), }); return classifyUpstreamError(err); } const extracted = extractCompletionText(completion); const cutoff = detectReasoningCutoff(extracted); if (cutoff) return cutoff; if (!extracted.text) { return toolError(ErrorCode.INTERNAL, 'Video model returned no textual content.', { finish_reason: extracted.finishReason, }); } return { content: [{ type: 'text' as const, text: extracted.text }], _meta: { finish_reason: extracted.finishReason, ...(toUsageMeta(extracted.usage) ?? {}), }, }; } - Input schema for the analyze_video tool: video_path (required), question (optional), model (optional).
export interface AnalyzeVideoToolRequest { video_path: string; question?: string; model?: string; } - src/tool-handlers.ts:188-213 (registration)Tool registration in the MCP server: defines the tool name, description, annotations, and inputSchema for analyze_video.
{ name: 'analyze_video', description: 'Analyze or transcribe a video file using a multimodal model. Accepts mp4, mpeg, mov, or webm from a local file path, HTTP(S) URL, or base64 data URL. Default model: google/gemini-2.5-flash.', annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: false, }, inputSchema: { type: 'object', properties: { video_path: { type: 'string', description: 'File path, HTTP(S) URL, or base64 data URL. Supported formats: mp4, mpeg, mov, webm.', }, question: { type: 'string', description: 'Question or instruction about the video (default: describe).', }, model: { type: 'string', description: 'Override the model ID.' }, }, required: ['video_path'], }, }, - src/tool-handlers.ts:487-492 (registration)Request handler that routes the 'analyze_video' tool call to handleAnalyzeVideo with wrapped arguments.
case 'analyze_video': return handleAnalyzeVideo( wrapToolArgs(args as AnalyzeVideoToolRequest | undefined), this.openai, this.defaultModel, ); - Helper function that prepares video data from any source (data URL, HTTP URL, or local file) by fetching/reading, detecting format, and base64-encoding.
export async function prepareVideoData(source: string): Promise<VideoData> { // --- data URL --- if (source.startsWith('data:')) { const parsed = parseBase64DataUrl(source); if (!parsed) throw new Error('Invalid video data URL'); if (!parsed.mediaType.startsWith('video/')) { throw new Error(`Data URL is not a video/* MIME: ${parsed.mediaType}`); } const format = mimeSubtypeToFormat(parsed.mediaType.slice(6)); if (!format) { throw new Error( `Unsupported video format from MIME: ${parsed.mediaType}. Supported: ${SUPPORTED_VIDEO_FORMATS.join(', ')}`, ); } const approxBytes = Math.ceil((parsed.base64.length * 3) / 4); if (approxBytes > getMaxDataUrlBytes()) throw new Error('Video data URL too large'); return { data: parsed.base64, format, mediaType: getVideoMimeType(format), sizeBytes: approxBytes, }; } // --- HTTP(S) URL --- if (source.startsWith('http://') || source.startsWith('https://')) { const { buffer, contentType } = await fetchHttpResource(source, { timeoutMs: getFetchTimeoutMs(), maxBytes: getMaxDownloadBytes(), maxRedirects: getMaxRedirects(), }); const urlPath = new URL(source).pathname; const format = detectVideoFormat(buffer) ?? getVideoFormat(urlPath) ?? formatFromContentType(contentType); if (!format) { throw new Error( `Could not determine video format from ${source}. Supported: ${SUPPORTED_VIDEO_FORMATS.join(', ')}`, ); } return { data: buffer.toString('base64'), format, mediaType: getVideoMimeType(format), sizeBytes: buffer.length, }; } // --- local file --- const buffer = await fs.readFile(source); const format = detectVideoFormat(buffer) ?? getVideoFormat(source); if (!format) { throw new Error( `Unsupported video format for file: ${source}. Supported: ${SUPPORTED_VIDEO_FORMATS.join(', ')}`, ); } return { data: buffer.toString('base64'), format, mediaType: getVideoMimeType(format), sizeBytes: buffer.length, }; }