veo.ts•7.32 kB
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {
ActionMetadata,
GenerateResponseData,
Operation,
modelActionMetadata,
z,
} from 'genkit';
import {
BackgroundModelAction,
modelRef,
type GenerateRequest,
type ModelInfo,
type ModelReference,
} from 'genkit/model';
import { backgroundModel as pluginBackgroundModel } from 'genkit/plugin';
import { veoCheckOperation, veoPredict } from './client.js';
import {
ClientOptions,
GoogleAIPluginOptions,
Model,
VeoOperation,
VeoParameters,
VeoPredictRequest,
} from './types.js';
import {
calculateApiKey,
checkModelName,
extractText,
extractVeoImage,
extractVersion,
modelName,
} from './utils.js';
/**
* See https://ai.google.dev/gemini-api/docs/video
*/
export const VeoConfigSchema = z
.object({
// NOTE: Documentation notes numberOfVideos parameter to pick the number of
// output videos, but this setting does not seem to work
negativePrompt: z.string().optional(),
aspectRatio: z
.enum(['9:16', '16:9'])
.describe('Desired aspect ratio of the output video.')
.optional(),
personGeneration: z
.enum(['dont_allow', 'allow_adult', 'allow_all'])
.describe(
'Control if/how images of people will be generated by the model.'
)
.optional(),
durationSeconds: z
.number()
.step(1)
.min(5)
.max(8)
.describe('Length of each output video in seconds, between 5 and 8.')
.optional(),
enhancePrompt: z
.boolean()
.describe('Enable or disable the prompt rewriter. Enabled by default.')
.optional(),
})
.passthrough();
export type VeoConfigSchemaType = typeof VeoConfigSchema;
export type VeoConfig = z.infer<VeoConfigSchemaType>;
// This contains all the Veo config schema types
type ConfigSchemaType = VeoConfigSchemaType;
function commonRef(
name: string,
info?: ModelInfo,
configSchema: ConfigSchemaType = VeoConfigSchema
): ModelReference<ConfigSchemaType> {
return modelRef({
name: `googleai/${name}`,
configSchema,
info:
info ??
({
supports: {
media: true,
multiturn: false,
tools: false,
systemRole: false,
output: ['media'],
longRunning: true,
},
} as ModelInfo), // TODO(ifielker): Remove this cast if we fix longRunning
});
}
const GENERIC_MODEL = commonRef('veo');
const KNOWN_MODELS = {
'veo-3.0-generate-preview': commonRef('veo-3.0-generate-preview'),
'veo-3.0-fast-generate-preview': commonRef('veo-3.0-fast-generate-preview'),
'veo-2.0-generate-001': commonRef('veo-2.0-generate-001'),
} as const;
export type KnownModels = keyof typeof KNOWN_MODELS; // For autocomplete
export type VeoModelName = `veo-${string}`;
export function isVeoModelName(value?: string): value is VeoModelName {
return !!value?.startsWith('veo-');
}
export function model(
version: string,
config: VeoConfig = {}
): ModelReference<ConfigSchemaType> {
const name = checkModelName(version);
return modelRef({
name: `googleai/${name}`,
config,
configSchema: VeoConfigSchema,
info: { ...GENERIC_MODEL.info },
});
}
// Takes a full list of models, filters for current Veo models only
// and returns a modelActionMetadata for each.
export function listActions(models: Model[]): ActionMetadata[] {
return (
models
.filter(
(m) =>
m.supportedGenerationMethods.includes('predictLongRunning') &&
isVeoModelName(modelName(m.name))
)
// Filter out deprecated
.filter((m) => !m.description || !m.description.includes('deprecated'))
.map((m) => {
const ref = model(m.name);
return modelActionMetadata({
name: ref.name,
info: ref.info,
configSchema: ref.configSchema,
});
})
);
}
export function listKnownModels(options?: GoogleAIPluginOptions) {
return Object.keys(KNOWN_MODELS).map((name: string) =>
defineModel(name, options)
);
}
/**
* Defines a new GoogleAI Veo model.
*/
export function defineModel(
name: string,
pluginOptions?: GoogleAIPluginOptions
): BackgroundModelAction<VeoConfigSchemaType> {
const ref = model(name);
const clientOptions: ClientOptions = {
apiVersion: pluginOptions?.apiVersion,
baseUrl: pluginOptions?.baseUrl,
};
return pluginBackgroundModel({
name: ref.name,
...ref.info,
configSchema: ref.configSchema,
async start(request) {
const apiKey = calculateApiKey(pluginOptions?.apiKey, undefined);
const veoPredictRequest: VeoPredictRequest = {
instances: [
{
prompt: extractText(request),
image: extractVeoImage(request),
},
],
parameters: toVeoParameters(request),
};
const response = await veoPredict(
apiKey,
extractVersion(ref),
veoPredictRequest,
clientOptions
);
return fromVeoOperation(response);
},
async check(operation) {
const apiKey = calculateApiKey(pluginOptions?.apiKey, undefined);
const response = await veoCheckOperation(
apiKey,
operation.id,
clientOptions
);
return fromVeoOperation(response);
},
});
}
function toVeoParameters(
request: GenerateRequest<VeoConfigSchemaType>
): VeoParameters {
const out = {
...request?.config,
};
for (const k in out) {
// undefined is handled by JSON.stringify
// false is needed so we can set enhancePrompt to false
if (out[k] === null) delete out[k];
}
// This is not part of the request parameters sent to the endpoint
// It's pulled out and used separately
delete out.apiKey;
// This was used to help us figure out which model. We no longer need
// it here.
delete out.version;
return out;
}
function fromVeoOperation(
apiOp: VeoOperation
): Operation<GenerateResponseData> {
const res = { id: apiOp.name } as Operation<GenerateResponseData>;
if (apiOp.done !== undefined) {
res.done = apiOp.done;
}
if (apiOp.error) {
res.error = { message: apiOp.error.message };
}
if (
apiOp.response &&
apiOp.response.generateVideoResponse &&
apiOp.response.generateVideoResponse.generatedSamples
) {
res.output = {
finishReason: 'stop',
raw: apiOp.response,
message: {
role: 'model',
content: apiOp.response.generateVideoResponse.generatedSamples.map(
(s) => {
return {
media: {
url: s.video.uri,
},
};
}
),
},
};
}
return res;
}
export const TEST_ONLY = {
toVeoParameters,
fromVeoOperation,
GENERIC_MODEL,
KNOWN_MODELS,
};