#!/usr/bin/env node
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
CallToolRequestSchema,
ListToolsRequestSchema,
ErrorCode,
McpError,
Tool,
CallToolResult,
} from "@modelcontextprotocol/sdk/types.js";
import { VeoClient, StartVideoGenerationRequest, ExtendVideoRequest, ReferenceImage } from './veo-client.js';
import * as fs from 'fs';
// Define available tools
const TOOLS: Tool[] = [
{
name: "upload_image",
description: "Upload an image to Google Files API for use as reference, first frame, or last frame in video generation. Returns a fileUri that can be reused for 48 hours. This is the most token-efficient way to pass images to video generation.",
inputSchema: {
type: "object",
properties: {
source: {
type: "string",
enum: ["url", "file_path"],
description: "Source type: 'url' to download from web, 'file_path' for local file"
},
url: {
type: "string",
description: "URL to download (if source='url')"
},
filePath: {
type: "string",
description: "Local file path (if source='file_path')"
},
displayName: {
type: "string",
description: "Optional display name for the uploaded file"
}
},
required: ["source"]
}
},
{
name: "start_video_generation",
description: "Start a Veo 3.1 video generation job. This returns an operation ID immediately - use get_video_job to poll for completion. Supports text-to-video, reference images (up to 3), and first/last frame interpolation.",
inputSchema: {
type: "object",
properties: {
prompt: {
type: "string",
description: "Text description of the video to generate"
},
model: {
type: "string",
enum: ["veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"],
description: "Model to use: veo-3.1-generate-preview (quality, $0.75/sec) or veo-3.1-fast-generate-preview (speed, $0.10/sec). Default: fast"
},
durationSeconds: {
type: "number",
enum: [4, 6, 8],
description: "Video duration: 4, 6, or 8 seconds (default: 8)"
},
aspectRatio: {
type: "string",
enum: ["16:9", "9:16"],
description: "Aspect ratio (default: 16:9). Note: 9:16 may not work with reference images."
},
resolution: {
type: "string",
enum: ["720p", "1080p"],
description: "Video resolution (default: 1080p)"
},
seed: {
type: "number",
description: "Optional seed for reproducible generation"
},
sampleCount: {
type: "number",
description: "Number of videos to generate (1-4, default: 1)"
},
generateAudio: {
type: "boolean",
description: "Whether to generate synchronized audio (default: false). Costs 2x more."
},
referenceImages: {
type: "array",
description: "Up to 3 reference images for visual guidance. Each can be URL, file path, or fileUri.",
items: {
type: "object",
properties: {
source: {
type: "string",
enum: ["url", "file_uri", "file_path", "inline"],
description: "Source type"
},
url: { type: "string" },
fileUri: { type: "string", description: "Pre-uploaded fileUri from upload_image" },
filePath: { type: "string" },
mimeType: { type: "string" },
base64: { type: "string" }
},
required: ["source"]
}
},
firstFrame: {
type: "object",
description: "First frame for interpolation (must also provide lastFrame)",
properties: {
source: { type: "string", enum: ["url", "file_uri", "file_path", "inline"] },
url: { type: "string" },
fileUri: { type: "string" },
filePath: { type: "string" },
mimeType: { type: "string" },
base64: { type: "string" }
},
required: ["source"]
},
lastFrame: {
type: "object",
description: "Last frame for interpolation (must also provide firstFrame)",
properties: {
source: { type: "string", enum: ["url", "file_uri", "file_path", "inline"] },
url: { type: "string" },
fileUri: { type: "string" },
filePath: { type: "string" },
mimeType: { type: "string" },
base64: { type: "string" }
},
required: ["source"]
},
negativePrompt: {
type: "string",
description: "Optional: Things to avoid in the video"
},
resizeMode: {
type: "string",
enum: ["pad", "crop"],
description: "How to fit reference images (default: pad)"
}
},
required: ["prompt"]
}
},
{
name: "get_video_job",
description: "Check the status of a video generation job. Returns status and video URLs when complete. Videos are returned as download URLs - you can save them locally or share the links.",
inputSchema: {
type: "object",
properties: {
operationName: {
type: "string",
description: "Operation name from start_video_generation"
}
},
required: ["operationName"]
}
},
{
name: "extend_video",
description: "Extend a Veo-generated video by additional seconds. Input video must be from a previous Veo generation (not an arbitrary video).",
inputSchema: {
type: "object",
properties: {
videoFileUri: {
type: "string",
description: "FileUri of the Veo-generated video to extend"
},
additionalSeconds: {
type: "number",
description: "Number of seconds to add (typically 7 for 8s extension)"
},
prompt: {
type: "string",
description: "Optional: Continuation prompt describing what should happen next"
},
model: {
type: "string",
enum: ["veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"],
description: "Model to use (default: fast)"
},
seed: {
type: "number",
description: "Optional seed for reproducibility"
}
},
required: ["videoFileUri", "additionalSeconds"]
}
},
{
name: "start_batch_video_generation",
description: "Start multiple video generation jobs with controlled concurrency. Returns operation names for all jobs. Use this to generate multiple videos efficiently while respecting rate limits.",
inputSchema: {
type: "object",
properties: {
jobs: {
type: "array",
description: "Array of video generation requests",
items: {
type: "object",
properties: {
key: {
type: "string",
description: "Unique identifier for this job"
},
request: {
type: "object",
description: "Same parameters as start_video_generation"
}
},
required: ["key", "request"]
}
},
concurrency: {
type: "number",
description: "Max concurrent requests (default: 3, recommend <= 5 to avoid rate limits)"
}
},
required: ["jobs"]
}
},
{
name: "estimate_veo_cost",
description: "Estimate the cost in USD for a video generation request before starting it. Helps plan budgets and batch sizes.",
inputSchema: {
type: "object",
properties: {
model: {
type: "string",
enum: ["veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"],
description: "Model to use"
},
durationSeconds: {
type: "number",
enum: [4, 6, 8],
description: "Video duration"
},
sampleCount: {
type: "number",
description: "Number of videos (default: 1)"
},
generateAudio: {
type: "boolean",
description: "Whether to generate audio (default: false)"
}
},
required: ["model", "durationSeconds"]
}
}
];
class VeoMcpServer {
private veoClient: VeoClient;
private server: Server;
constructor() {
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) {
console.error('Error: GEMINI_API_KEY environment variable is required');
console.error('Get your API key at: https://aistudio.google.com/app/apikey');
process.exit(1);
}
// Get project ID and location from env or use defaults
const projectId = process.env.GOOGLE_CLOUD_PROJECT || 'gen-lang-client-0329190003';
const location = process.env.GOOGLE_CLOUD_LOCATION || 'us-central1';
this.veoClient = new VeoClient(apiKey, projectId, location);
this.server = new Server(
{
name: "veo-mcp-server",
version: "0.1.0",
},
{
capabilities: {
tools: {},
},
}
);
this.setupHandlers();
this.setupErrorHandling();
}
private setupHandlers(): void {
this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: TOOLS
}));
this.server.setRequestHandler(CallToolRequestSchema, async (request) =>
this.handleToolCall(request.params.name, request.params.arguments ?? {})
);
}
private async handleToolCall(name: string, args: any): Promise<CallToolResult> {
try {
switch (name) {
case "upload_image": {
const { source, url, filePath, displayName } = args;
let uploadResult;
if (source === 'file_path' && filePath) {
if (!fs.existsSync(filePath)) {
throw new McpError(ErrorCode.InvalidParams, `File not found: ${filePath}`);
}
uploadResult = await this.veoClient.uploadImage(filePath, displayName);
} else if (source === 'url' && url) {
// Download and upload
const ref: ReferenceImage = { source: 'url', url };
const resolved = await (this.veoClient as any).resolveReferenceImage(ref);
return {
content: [{
type: "text",
text: JSON.stringify({
success: true,
fileUri: resolved.fileUri,
mimeType: resolved.mimeType,
note: "File will expire in 48 hours"
}, null, 2),
}],
};
} else {
throw new McpError(ErrorCode.InvalidParams, "Invalid source or missing url/filePath");
}
return {
content: [{
type: "text",
text: JSON.stringify({
success: true,
file: {
uri: uploadResult.file.uri,
name: uploadResult.file.name,
displayName: uploadResult.file.displayName,
mimeType: uploadResult.file.mimeType,
sizeBytes: uploadResult.file.sizeBytes,
expirationTime: uploadResult.file.expirationTime
}
}, null, 2),
}],
};
}
case "start_video_generation": {
const request: StartVideoGenerationRequest = {
prompt: args.prompt,
model: args.model,
durationSeconds: args.durationSeconds,
aspectRatio: args.aspectRatio,
resolution: args.resolution,
seed: args.seed,
sampleCount: args.sampleCount,
generateAudio: args.generateAudio,
referenceImages: args.referenceImages,
firstFrame: args.firstFrame,
lastFrame: args.lastFrame,
negativePrompt: args.negativePrompt,
resizeMode: args.resizeMode,
};
const result = await this.veoClient.startVideoGeneration(request);
return {
content: [{
type: "text",
text: JSON.stringify({
success: true,
operationName: result.operationName,
done: false,
message: "Video generation started. Use get_video_job to poll for completion.",
estimatedTime: "30-120 seconds depending on complexity"
}, null, 2),
}],
};
}
case "get_video_job": {
const { operationName } = args;
const status = await this.veoClient.getVideoJob(operationName);
return {
content: [{
type: "text",
text: JSON.stringify(status, null, 2),
}],
};
}
case "extend_video": {
const request: ExtendVideoRequest = {
videoFileUri: args.videoFileUri,
additionalSeconds: args.additionalSeconds,
prompt: args.prompt,
model: args.model,
seed: args.seed,
};
const result = await this.veoClient.extendVideo(request);
return {
content: [{
type: "text",
text: JSON.stringify({
success: true,
operationName: result.operationName,
done: false,
message: "Video extension started. Use get_video_job to poll for completion."
}, null, 2),
}],
};
}
case "start_batch_video_generation": {
const { jobs, concurrency = 3 } = args;
const operations: Array<{ key: string; operationName: string; error?: string }> = [];
// Process jobs with concurrency control
const queue = [...jobs];
const active: Promise<void>[] = [];
while (queue.length > 0 || active.length > 0) {
while (active.length < concurrency && queue.length > 0) {
const job = queue.shift()!;
const promise = (async () => {
try {
const result = await this.veoClient.startVideoGeneration(job.request);
operations.push({
key: job.key,
operationName: result.operationName,
});
} catch (error) {
operations.push({
key: job.key,
operationName: '',
error: error instanceof Error ? error.message : String(error),
});
}
})();
active.push(promise);
}
// Wait for one to complete
if (active.length > 0) {
await Promise.race(active);
// Remove completed promises
for (let i = active.length - 1; i >= 0; i--) {
if (await Promise.race([active[i].then(() => true), Promise.resolve(false)])) {
active.splice(i, 1);
}
}
}
}
return {
content: [{
type: "text",
text: JSON.stringify({
success: true,
batchId: `batch_${Date.now()}`,
totalJobs: jobs.length,
operations,
message: "Use get_video_job for each operationName to check status"
}, null, 2),
}],
};
}
case "estimate_veo_cost": {
const request: StartVideoGenerationRequest = {
prompt: '',
model: args.model,
durationSeconds: args.durationSeconds,
sampleCount: args.sampleCount,
generateAudio: args.generateAudio,
};
const estimate = this.veoClient.estimateCost(request);
return {
content: [{
type: "text",
text: JSON.stringify(estimate, null, 2),
}],
};
}
default:
throw new McpError(
ErrorCode.MethodNotFound,
`Unknown tool: ${name}`
);
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
process.stderr.write(`Tool execution error: ${errorMessage}\n`);
return {
content: [{
type: "text",
text: `Error: ${errorMessage}`,
}],
isError: true,
};
}
}
private setupErrorHandling(): void {
this.server.onerror = (error) => {
process.stderr.write(`[MCP Error] ${error}\n`);
};
process.on('SIGINT', async () => {
await this.server.close();
process.exit(0);
});
process.on('uncaughtException', (error) => {
process.stderr.write(`[Uncaught Exception] ${error.message}\n`);
process.exit(1);
});
process.on('unhandledRejection', (reason) => {
process.stderr.write(`[Unhandled Rejection] ${reason}\n`);
});
}
async start(): Promise<void> {
const transport = new StdioServerTransport();
await this.server.connect(transport);
process.stderr.write('🎬 Veo 3.1 MCP server is running\n');
}
}
// Main execution
async function main() {
const server = new VeoMcpServer();
await server.start();
}
main().catch((error) => {
process.stderr.write(`Fatal server error: ${error.message}\n`);
process.exit(1);
});