import axios, { AxiosInstance } from 'axios';
import FormData from 'form-data';
import * as fs from 'fs';
import * as path from 'path';
import * as crypto from 'crypto';
export interface UploadImageResponse {
file: {
name: string;
displayName?: string;
mimeType: string;
sizeBytes: string;
createTime: string;
updateTime: string;
expirationTime: string;
sha256Hash: string;
uri: string;
state: string;
};
}
export interface ReferenceImage {
source: 'url' | 'file_uri' | 'file_path' | 'inline';
url?: string;
fileUri?: string;
filePath?: string;
mimeType?: string;
base64?: string;
}
export interface StartVideoGenerationRequest {
model?: 'veo-3.1-generate-preview' | 'veo-3.1-fast-generate-preview';
prompt: string;
durationSeconds?: 4 | 6 | 8;
aspectRatio?: '16:9' | '9:16';
resolution?: '720p' | '1080p';
seed?: number;
sampleCount?: number;
generateAudio?: boolean;
referenceImages?: ReferenceImage[];
firstFrame?: ReferenceImage;
lastFrame?: ReferenceImage;
negativePrompt?: string;
resizeMode?: 'pad' | 'crop';
}
export interface VideoJobStatus {
done: boolean;
status: string;
operationName: string;
videos?: Array<{
videoUri: string;
mimeType: string;
durationSeconds?: number;
resolution?: string;
}>;
error?: any;
}
export interface ExtendVideoRequest {
model?: 'veo-3.1-generate-preview' | 'veo-3.1-fast-generate-preview';
videoFileUri: string;
additionalSeconds: number;
prompt?: string;
seed?: number;
}
export interface CostEstimate {
estimatedCostUsd: number;
unitPricePerSec: number;
secondsBilled: number;
breakdown: string;
}
export class VeoClient {
private apiKey: string;
private baseUrl: string;
private filesBaseUrl: string;
private projectId: string;
private location: string;
private axios: AxiosInstance;
private fileCache: Map<string, { uri: string; uploadedAt: number }>; // hash -> {uri, timestamp}
constructor(apiKey: string, projectId?: string, location?: string) {
this.apiKey = apiKey;
this.projectId = projectId || 'gen-lang-client-0329190003';
this.location = location || 'us-central1';
// Use Gemini API endpoints (Veo 3.1 is available via Gemini API since Oct 2025)
this.baseUrl = 'https://generativelanguage.googleapis.com/v1beta';
this.filesBaseUrl = 'https://generativelanguage.googleapis.com/upload/v1beta';
this.fileCache = new Map();
this.axios = axios.create({
headers: {
'x-goog-api-key': this.apiKey,
},
});
}
/**
* Upload an image to Files API (for references, first/last frames)
*/
async uploadImage(filePath: string, displayName?: string): Promise<UploadImageResponse> {
try {
process.stderr.write(`š¤ Uploading image: ${filePath}\n`);
const fileBuffer = fs.readFileSync(filePath);
const fileName = path.basename(filePath);
const mimeType = this.getMimeType(filePath);
const formData = new FormData();
const metadata = {
file: {
displayName: displayName || fileName,
}
};
formData.append('metadata', JSON.stringify(metadata), {
contentType: 'application/json',
});
formData.append('file', fileBuffer, {
filename: fileName,
contentType: mimeType,
});
const response = await axios.post<UploadImageResponse>(
`${this.filesBaseUrl}/files`,
formData,
{
headers: {
...formData.getHeaders(),
'x-goog-api-key': this.apiKey,
},
}
);
process.stderr.write(`ā
Image uploaded: ${response.data.file.uri}\n`);
return response.data;
} catch (error) {
if (axios.isAxiosError(error)) {
const errorMsg = error.response?.data?.error?.message || error.message;
throw new Error(`Image upload failed: ${errorMsg}`);
}
throw error;
}
}
/**
* Resolve a reference image to fileUri (token-efficient, cached)
*/
private async resolveReferenceImage(ref: ReferenceImage): Promise<{ mimeType: string; fileUri: string }> {
// Already a fileUri
if (ref.source === 'file_uri' && ref.fileUri) {
return {
mimeType: ref.mimeType || 'image/png',
fileUri: ref.fileUri,
};
}
let bytes: Buffer | null = null;
let mimeType = ref.mimeType || 'image/png';
// Download from URL
if (ref.source === 'url' && ref.url) {
process.stderr.write(`š„ Downloading reference from URL: ${ref.url}\n`);
const response = await axios.get(ref.url, { responseType: 'arraybuffer' });
mimeType = response.headers['content-type'] || mimeType;
bytes = Buffer.from(response.data);
}
// Read from file path
else if (ref.source === 'file_path' && ref.filePath) {
process.stderr.write(`š Reading reference from file: ${ref.filePath}\n`);
bytes = fs.readFileSync(ref.filePath);
mimeType = this.getMimeType(ref.filePath);
}
// Inline base64
else if (ref.source === 'inline' && ref.base64) {
process.stderr.write(`š Using inline reference (base64)\n`);
bytes = Buffer.from(ref.base64, 'base64');
}
if (!bytes) {
throw new Error('Invalid reference image: no valid source provided');
}
// Check cache by content hash
const hash = crypto.createHash('sha256').update(bytes).digest('hex');
const cached = this.fileCache.get(hash);
// Cache valid for 48 hours
if (cached && (Date.now() - cached.uploadedAt) < 48 * 60 * 60 * 1000) {
process.stderr.write(`š¾ Using cached fileUri: ${cached.uri}\n`);
return { mimeType, fileUri: cached.uri };
}
// Upload to Files API
process.stderr.write(`ā¬ļø Uploading ${(bytes.length / 1024).toFixed(2)} KB to Files API...\n`);
const fileUri = await this.uploadBytes(bytes, mimeType);
// Cache for future use
this.fileCache.set(hash, { uri: fileUri, uploadedAt: Date.now() });
return { mimeType, fileUri };
}
/**
* Upload raw bytes to Files API
*/
private async uploadBytes(bytes: Buffer, mimeType: string): Promise<string> {
try {
const response = await axios.post(
`${this.filesBaseUrl}/files`,
bytes,
{
headers: {
'x-goog-api-key': this.apiKey,
'Content-Type': mimeType,
},
}
);
const uri = response.data.file?.uri;
if (!uri) {
throw new Error('Files API upload failed: no URI returned');
}
return uri;
} catch (error) {
if (axios.isAxiosError(error)) {
throw new Error(`Upload failed: ${error.response?.data?.error?.message || error.message}`);
}
throw error;
}
}
/**
* Start Veo 3.1 video generation (returns operation name for polling)
*/
async startVideoGeneration(request: StartVideoGenerationRequest): Promise<{ operationName: string; done: boolean }> {
try {
process.stderr.write(`\nš¬ Starting Veo 3.1 video generation...\n`);
process.stderr.write(` Prompt: ${request.prompt}\n`);
// Validate inputs
this.validateVideoRequest(request);
const model = request.model || 'veo-3.1-fast-generate-preview';
// Resolve reference images
const referenceImages: any[] = [];
if (request.referenceImages && request.referenceImages.length > 0) {
process.stderr.write(`šø Processing ${request.referenceImages.length} reference image(s)...\n`);
for (const ref of request.referenceImages) {
const resolved = await this.resolveReferenceImage(ref);
referenceImages.push({
fileData: {
mimeType: resolved.mimeType,
fileUri: resolved.fileUri,
}
});
}
}
// Resolve first/last frames
let firstFrame: any = null;
let lastFrame: any = null;
if (request.firstFrame && request.lastFrame) {
process.stderr.write(`šļø Processing first & last frames for interpolation...\n`);
const firstResolved = await this.resolveReferenceImage(request.firstFrame);
const lastResolved = await this.resolveReferenceImage(request.lastFrame);
firstFrame = {
fileData: {
mimeType: firstResolved.mimeType,
fileUri: firstResolved.fileUri,
}
};
lastFrame = {
fileData: {
mimeType: lastResolved.mimeType,
fileUri: lastResolved.fileUri,
}
};
}
// Build video config
const videoConfig: any = {
durationSeconds: request.durationSeconds || 8,
aspectRatio: request.aspectRatio || '16:9',
resolution: request.resolution || '1080p',
sampleCount: request.sampleCount || 1,
generateAudio: request.generateAudio || false,
};
if (request.seed !== undefined) {
videoConfig.seed = request.seed;
}
if (request.resizeMode) {
videoConfig.resizeMode = request.resizeMode;
}
// Build request body
const body: any = {
prompt: request.prompt,
videoConfig,
};
if (referenceImages.length > 0) {
body.referenceImages = referenceImages;
}
if (firstFrame) body.firstFrame = firstFrame;
if (lastFrame) body.lastFrame = lastFrame;
if (request.negativePrompt) body.negativePrompt = request.negativePrompt;
process.stderr.write(`š” Calling Veo API: ${model}\n`);
// Call Veo API via Gemini API using predictLongRunning endpoint
// Format: instances array + parameters object (as per official docs)
const requestBody: any = {
instances: [{
prompt: body.prompt,
...(body.referenceImages && { referenceImages: body.referenceImages }),
...(body.firstFrame && { firstFrame: body.firstFrame }),
...(body.lastFrame && { lastFrame: body.lastFrame }),
}]
};
// Parameters go in a separate parameters object
const parameters: any = {};
if (body.videoConfig) {
if (body.videoConfig.aspectRatio) parameters.aspectRatio = body.videoConfig.aspectRatio;
if (body.videoConfig.resolution) parameters.resolution = body.videoConfig.resolution;
if (body.videoConfig.durationSeconds) parameters.durationSeconds = body.videoConfig.durationSeconds;
if (body.videoConfig.seed !== undefined) parameters.seed = body.videoConfig.seed;
if (body.videoConfig.sampleCount) parameters.sampleCount = body.videoConfig.sampleCount;
if (body.videoConfig.resizeMode) parameters.resizeMode = body.videoConfig.resizeMode;
}
if (body.negativePrompt) parameters.negativePrompt = body.negativePrompt;
if (Object.keys(parameters).length > 0) {
requestBody.parameters = parameters;
}
const response = await this.axios.post(
`${this.baseUrl}/models/${model}:predictLongRunning`,
requestBody
);
const operationName = response.data.name;
process.stderr.write(`ā
Video generation started!\n`);
process.stderr.write(` Operation: ${operationName}\n`);
process.stderr.write(` Status: Processing (use get_video_job to poll)\n`);
return {
operationName,
done: false,
};
} catch (error) {
if (axios.isAxiosError(error)) {
const errorMsg = error.response?.data?.error?.message || error.message;
process.stderr.write(`ā Generation failed: ${errorMsg}\n`);
if (error.response?.data) {
process.stderr.write(`Full error: ${JSON.stringify(error.response.data, null, 2)}\n`);
}
throw new Error(`Video generation failed: ${errorMsg}`);
}
throw error;
}
}
/**
* Get video job status (poll operation)
*/
async getVideoJob(operationName: string): Promise<VideoJobStatus> {
try {
process.stderr.write(`š Checking job status: ${operationName}\n`);
const response = await this.axios.get(
`${this.baseUrl}/${operationName}`
);
const data = response.data;
if (!data.done) {
const status = data.metadata?.state || 'RUNNING';
process.stderr.write(`ā³ Status: ${status}\n`);
return {
done: false,
status,
operationName,
};
}
// Check for errors
if (data.error) {
process.stderr.write(`ā Job failed: ${data.error.message}\n`);
return {
done: true,
status: 'FAILED',
operationName,
error: data.error,
};
}
// Extract videos - REST API format is different from SDK
// REST: response.generateVideoResponse.generatedSamples[].video.uri
const generatedSamples = data.response?.generateVideoResponse?.generatedSamples || [];
const videos = generatedSamples.map((sample: any) => ({
videoUri: sample.video?.uri || '',
mimeType: sample.video?.mimeType || 'video/mp4',
durationSeconds: sample.video?.durationSeconds,
resolution: sample.video?.resolution,
}));
process.stderr.write(`ā
Job complete! ${videos.length} video(s) generated\n`);
videos.forEach((v: any, i: number) => {
process.stderr.write(` Video ${i + 1}: ${v.videoUri}\n`);
});
return {
done: true,
status: 'SUCCEEDED',
operationName,
videos,
};
} catch (error) {
if (axios.isAxiosError(error)) {
throw new Error(`Failed to get job status: ${error.response?.data?.error?.message || error.message}`);
}
throw error;
}
}
/**
* Extend a Veo-generated video
*/
async extendVideo(request: ExtendVideoRequest): Promise<{ operationName: string; done: boolean }> {
try {
process.stderr.write(`\nš¬ Extending video...\n`);
const model = request.model || 'veo-3.1-fast-generate-preview';
const body: any = {
videoFileUri: request.videoFileUri,
prompt: request.prompt || 'Continue the video',
videoConfig: {
durationSeconds: request.additionalSeconds,
},
};
if (request.seed !== undefined) {
body.videoConfig.seed = request.seed;
}
process.stderr.write(`š” Calling Veo extension API: ${model}\n`);
const requestBody: any = {
instances: [{
videoFileUri: body.videoFileUri,
prompt: body.prompt,
}]
};
// Parameters go in a separate parameters object
if (body.videoConfig) {
const parameters: any = {};
if (body.videoConfig.durationSeconds) parameters.durationSeconds = body.videoConfig.durationSeconds;
if (body.videoConfig.seed !== undefined) parameters.seed = body.videoConfig.seed;
requestBody.parameters = parameters;
}
const response = await this.axios.post(
`${this.baseUrl}/models/${model}:predictLongRunning`,
requestBody
);
const operationName = response.data.name;
process.stderr.write(`ā
Video extension started!\n`);
process.stderr.write(` Operation: ${operationName}\n`);
return {
operationName,
done: false,
};
} catch (error) {
if (axios.isAxiosError(error)) {
throw new Error(`Video extension failed: ${error.response?.data?.error?.message || error.message}`);
}
throw error;
}
}
/**
* Estimate cost for a video generation request
*/
estimateCost(request: StartVideoGenerationRequest): CostEstimate {
const model = request.model || 'veo-3.1-fast-generate-preview';
const durationSeconds = request.durationSeconds || 8;
const sampleCount = request.sampleCount || 1;
const generateAudio = request.generateAudio || false;
// Pricing per second (from Google docs: $0.75/sec for quality, includes audio)
let unitPricePerSec: number;
if (model === 'veo-3.1-generate-preview') {
unitPricePerSec = 0.75; // Includes audio
} else { // veo-3.1-fast-generate-preview
unitPricePerSec = 0.10; // Video + audio
}
const secondsBilled = durationSeconds * sampleCount;
const estimatedCostUsd = unitPricePerSec * secondsBilled;
const breakdown = `${model} (${generateAudio ? 'with audio' : 'video only'}): $${unitPricePerSec}/sec Ć ${durationSeconds}s Ć ${sampleCount} sample(s) = $${estimatedCostUsd.toFixed(2)}`;
return {
estimatedCostUsd,
unitPricePerSec,
secondsBilled,
breakdown,
};
}
/**
* Validate video generation request
*/
private validateVideoRequest(request: StartVideoGenerationRequest): void {
// Duration validation
if (request.durationSeconds && ![4, 6, 8].includes(request.durationSeconds)) {
throw new Error('durationSeconds must be 4, 6, or 8');
}
// Reference image count
if (request.referenceImages && request.referenceImages.length > 3) {
throw new Error('Maximum 3 reference images allowed');
}
// Sample count
if (request.sampleCount && request.sampleCount > 4) {
throw new Error('sampleCount must be <= 4');
}
// First/last frame validation
if ((request.firstFrame && !request.lastFrame) || (!request.firstFrame && request.lastFrame)) {
throw new Error('firstFrame and lastFrame must both be present or both absent');
}
// Aspect ratio validation for reference mode
if (request.referenceImages && request.referenceImages.length > 0 && request.aspectRatio === '9:16') {
process.stderr.write(`ā ļø Warning: 9:16 aspect ratio may not be supported with reference images\n`);
}
}
/**
* Get MIME type from file extension
*/
private getMimeType(filePath: string): string {
const ext = path.extname(filePath).toLowerCase();
const mimeTypes: Record<string, string> = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.webp': 'image/webp',
'.bmp': 'image/bmp',
};
return mimeTypes[ext] || 'image/jpeg';
}
}