Minimax MCP Tools

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

schemas.ts•16.4 kB

import { z } from 'zod'; import { CONSTRAINTS, VOICES, type VoiceId, type AspectRatio, type StyleType, type Emotion, type AudioFormat, type SampleRate, type Bitrate, type SoundEffect } from './constants.js'; // Base schemas const filePathSchema = z.string().min(1, 'File path is required'); const positiveIntSchema = z.number().int().positive(); // Helper functions for generating descriptions const getSoundEffectsDescription = () => { const descriptions = { 'spacious_echo': 'spacious_echo (空旷回音)', 'auditorium_echo': 'auditorium_echo (礼堂广播)', 'lofi_telephone': 'lofi_telephone (电话失真)', 'robotic': 'robotic (机械音)' }; return `Sound effects. Options: ${CONSTRAINTS.TTS.SOUND_EFFECTS.map(effect => descriptions[effect] || effect).join(', ')}. Only one sound effect can be used per request`; }; // Image generation schema export const imageGenerationSchema = z.object({ prompt: z.string() .min(1, 'Prompt is required') .max(CONSTRAINTS.IMAGE.PROMPT_MAX_LENGTH, `Prompt must not exceed ${CONSTRAINTS.IMAGE.PROMPT_MAX_LENGTH} characters`), outputFile: filePathSchema.describe('Absolute path for generated image'), aspectRatio: z.enum(CONSTRAINTS.IMAGE.ASPECT_RATIOS as readonly [AspectRatio, ...AspectRatio[]]) .default('1:1' as AspectRatio) .describe(`Aspect ratio for the image. Options: ${CONSTRAINTS.IMAGE.ASPECT_RATIOS.join(', ')}`), customSize: z.object({ width: z.number() .min(CONSTRAINTS.IMAGE.MIN_DIMENSION) .max(CONSTRAINTS.IMAGE.MAX_DIMENSION) .multipleOf(CONSTRAINTS.IMAGE.DIMENSION_STEP), height: z.number() .min(CONSTRAINTS.IMAGE.MIN_DIMENSION) .max(CONSTRAINTS.IMAGE.MAX_DIMENSION) .multipleOf(CONSTRAINTS.IMAGE.DIMENSION_STEP) }).optional().describe('Custom image dimensions (width x height in pixels). Range: 512-2048, must be multiples of 8. Total resolution should stay under 2M pixels. Only supported with image-01 model (cannot be used with style parameter). When both customSize and aspectRatio are set, aspectRatio takes precedence'), seed: positiveIntSchema.optional().describe('Random seed for reproducible results'), subjectReference: z.string().optional().describe('File path to a portrait image for maintaining facial characteristics in generated images. Only supported with image-01 model (cannot be used with style parameter). Provide a clear frontal face photo for best results. Supports local file paths and URLs. Max 10MB, formats: jpg, jpeg, png'), style: z.object({ style_type: z.enum(CONSTRAINTS.IMAGE.STYLE_TYPES as readonly [StyleType, ...StyleType[]]) .describe(`Art style type. Options: ${CONSTRAINTS.IMAGE.STYLE_TYPES.join(', ')}`), style_weight: z.number() .min(CONSTRAINTS.IMAGE.STYLE_WEIGHT_MIN, 'Style weight must be greater than 0') .max(CONSTRAINTS.IMAGE.STYLE_WEIGHT_MAX, 'Style weight must not exceed 1') .default(0.8) .describe('Style control weight (0-1]. Higher values apply stronger style effects. Default: 0.8') }).optional().describe('Art style control settings. Uses image-01-live model which does not support customSize or subjectReference parameters. Cannot be combined with customSize or subjectReference'), }); // Text-to-speech schema export const textToSpeechSchema = z.object({ text: z.string() .min(1, 'Text is required') .max(CONSTRAINTS.TTS.TEXT_MAX_LENGTH, `Text to convert to speech. Max ${CONSTRAINTS.TTS.TEXT_MAX_LENGTH} characters. Use newlines for paragraph breaks. For custom pauses, insert <#x#> where x is seconds (0.01-99.99, max 2 decimals). Pause markers must be between pronounceable text and cannot be consecutive`), outputFile: filePathSchema.describe('Absolute path for audio file'), highQuality: z.boolean() .default(false) .describe('Use high-quality model (speech-02-hd) for audiobooks/premium content. Default: false (uses faster speech-02-turbo)'), voiceId: z.enum(Object.keys(VOICES) as [VoiceId, ...VoiceId[]]) .default('female-shaonv' as VoiceId) .describe(`Voice ID for speech generation. Available voices: ${Object.keys(VOICES).map(id => `${id} (${VOICES[id as VoiceId]?.name || id})`).join(', ')}`), speed: z.number() .min(CONSTRAINTS.TTS.SPEED_MIN) .max(CONSTRAINTS.TTS.SPEED_MAX) .default(1.0) .describe(`Speech speed multiplier (${CONSTRAINTS.TTS.SPEED_MIN}-${CONSTRAINTS.TTS.SPEED_MAX}). Higher values = faster speech`), volume: z.number() .min(CONSTRAINTS.TTS.VOLUME_MIN) .max(CONSTRAINTS.TTS.VOLUME_MAX) .default(1.0) .describe(`Audio volume level (${CONSTRAINTS.TTS.VOLUME_MIN}-${CONSTRAINTS.TTS.VOLUME_MAX}). Higher values = louder audio`), pitch: z.number() .min(CONSTRAINTS.TTS.PITCH_MIN) .max(CONSTRAINTS.TTS.PITCH_MAX) .default(0) .describe(`Pitch adjustment in semitones (${CONSTRAINTS.TTS.PITCH_MIN} to ${CONSTRAINTS.TTS.PITCH_MAX}). Negative = lower pitch, Positive = higher pitch`), emotion: z.enum(CONSTRAINTS.TTS.EMOTIONS as readonly [Emotion, ...Emotion[]]) .default('neutral' as Emotion) .describe(`Emotional tone of the speech. Options: ${CONSTRAINTS.TTS.EMOTIONS.join(', ')}`), format: z.enum(CONSTRAINTS.TTS.FORMATS as readonly [AudioFormat, ...AudioFormat[]]) .default('mp3' as AudioFormat) .describe(`Output audio format. Options: ${CONSTRAINTS.TTS.FORMATS.join(', ')}`), sampleRate: z.enum(CONSTRAINTS.TTS.SAMPLE_RATES as readonly [SampleRate, ...SampleRate[]]) .default("32000" as SampleRate) .describe(`Audio sample rate in Hz. Options: ${CONSTRAINTS.TTS.SAMPLE_RATES.join(', ')}`), bitrate: z.enum(CONSTRAINTS.TTS.BITRATES as readonly [Bitrate, ...Bitrate[]]) .default("128000" as Bitrate) .describe(`Audio bitrate in bps. Options: ${CONSTRAINTS.TTS.BITRATES.join(', ')}`), languageBoost: z.string().default('auto').describe('Enhance recognition for specific languages/dialects. Options: Chinese, Chinese,Yue, English, Arabic, Russian, Spanish, French, Portuguese, German, Turkish, Dutch, Ukrainian, Vietnamese, Indonesian, Japanese, Italian, Korean, Thai, Polish, Romanian, Greek, Czech, Finnish, Hindi, Bulgarian, Danish, Hebrew, Malay, Persian, Slovak, Swedish, Croatian, Filipino, Hungarian, Norwegian, Slovenian, Catalan, Nynorsk, Tamil, Afrikaans, auto. Use "auto" for automatic detection'), intensity: z.number() .int() .min(CONSTRAINTS.TTS.VOICE_MODIFY_INTENSITY_MIN) .max(CONSTRAINTS.TTS.VOICE_MODIFY_INTENSITY_MAX) .optional() .describe('Voice intensity adjustment (-100 to 100). Values closer to -100 make voice more robust, closer to 100 make voice softer'), timbre: z.number() .int() .min(CONSTRAINTS.TTS.VOICE_MODIFY_TIMBRE_MIN) .max(CONSTRAINTS.TTS.VOICE_MODIFY_TIMBRE_MAX) .optional() .describe('Voice timbre adjustment (-100 to 100). Values closer to -100 make voice more mellow, closer to 100 make voice more crisp'), sound_effects: z.enum(CONSTRAINTS.TTS.SOUND_EFFECTS as readonly [SoundEffect, ...SoundEffect[]]) .optional() .describe(getSoundEffectsDescription()) }); // Task barrier schema export const taskBarrierSchema = z.object({}); // Type definitions for parsed schemas export type ImageGenerationParams = z.infer<typeof imageGenerationSchema>; export type TextToSpeechParams = z.infer<typeof textToSpeechSchema>; export type TaskBarrierParams = z.infer<typeof taskBarrierSchema>; // MCP Tool Schemas (for registerTool API) export const imageGenerationToolSchema = { type: "object", properties: { prompt: { type: "string", description: `Image generation prompt (max ${CONSTRAINTS.IMAGE.PROMPT_MAX_LENGTH} characters)`, maxLength: CONSTRAINTS.IMAGE.PROMPT_MAX_LENGTH }, outputFile: { type: "string", description: "Absolute path for generated image file" }, aspectRatio: { type: "string", enum: [...CONSTRAINTS.IMAGE.ASPECT_RATIOS], default: "1:1", description: `Aspect ratio for the image. Options: ${CONSTRAINTS.IMAGE.ASPECT_RATIOS.join(', ')}` }, customSize: { type: "object", properties: { width: { type: "number", minimum: CONSTRAINTS.IMAGE.MIN_DIMENSION, maximum: CONSTRAINTS.IMAGE.MAX_DIMENSION, multipleOf: CONSTRAINTS.IMAGE.DIMENSION_STEP }, height: { type: "number", minimum: CONSTRAINTS.IMAGE.MIN_DIMENSION, maximum: CONSTRAINTS.IMAGE.MAX_DIMENSION, multipleOf: CONSTRAINTS.IMAGE.DIMENSION_STEP } }, required: ["width", "height"], description: "Custom image dimensions (width x height in pixels). Range: 512-2048, must be multiples of 8. Total resolution should stay under 2M pixels. Only supported with image-01 model (cannot be used with style parameter). When both customSize and aspectRatio are set, aspectRatio takes precedence" }, seed: { type: "number", description: "Random seed for reproducible results" }, subjectReference: { type: "string", description: "File path to a portrait image for maintaining facial characteristics in generated images. Only supported with image-01 model (cannot be used with style parameter). Provide a clear frontal face photo for best results. Supports local file paths and URLs. Max 10MB, formats: jpg, jpeg, png" }, style: { type: "object", properties: { style_type: { type: "string", enum: [...CONSTRAINTS.IMAGE.STYLE_TYPES], description: `Art style type. Options: ${CONSTRAINTS.IMAGE.STYLE_TYPES.join(', ')}` }, style_weight: { type: "number", exclusiveMinimum: 0, maximum: CONSTRAINTS.IMAGE.STYLE_WEIGHT_MAX, default: 0.8, description: "Style control weight (0-1]. Higher values apply stronger style effects. Default: 0.8" } }, required: ["style_type"], description: "Art style control settings. Uses image-01-live model which does not support customSize or subjectReference parameters. Cannot be combined with customSize or subjectReference" } }, required: ["prompt", "outputFile"] } as const; export const textToSpeechToolSchema = { type: "object", properties: { text: { type: "string", description: `Text to convert to speech. Max ${CONSTRAINTS.TTS.TEXT_MAX_LENGTH} characters. Use newlines for paragraph breaks. For custom pauses, insert <#x#> where x is seconds (0.01-99.99, max 2 decimals). Pause markers must be between pronounceable text and cannot be consecutive`, maxLength: CONSTRAINTS.TTS.TEXT_MAX_LENGTH, minLength: 1 }, outputFile: { type: "string", description: "Absolute path for audio file" }, highQuality: { type: "boolean", default: false, description: "Use high-quality model (speech-02-hd) for audiobooks/premium content. Default: false (uses faster speech-02-turbo)" }, voiceId: { type: "string", enum: Object.keys(VOICES), default: "female-shaonv", description: `Voice ID for speech generation. Available voices: ${Object.keys(VOICES).map(id => `${id} (${VOICES[id as VoiceId]?.name || id})`).join(', ')}` }, speed: { type: "number", minimum: CONSTRAINTS.TTS.SPEED_MIN, maximum: CONSTRAINTS.TTS.SPEED_MAX, default: 1.0, description: `Speech speed multiplier (${CONSTRAINTS.TTS.SPEED_MIN}-${CONSTRAINTS.TTS.SPEED_MAX}). Higher values = faster speech` }, volume: { type: "number", minimum: CONSTRAINTS.TTS.VOLUME_MIN, maximum: CONSTRAINTS.TTS.VOLUME_MAX, default: 1.0, description: `Audio volume level (${CONSTRAINTS.TTS.VOLUME_MIN}-${CONSTRAINTS.TTS.VOLUME_MAX}). Higher values = louder audio` }, pitch: { type: "number", minimum: CONSTRAINTS.TTS.PITCH_MIN, maximum: CONSTRAINTS.TTS.PITCH_MAX, default: 0, description: `Pitch adjustment in semitones (${CONSTRAINTS.TTS.PITCH_MIN} to ${CONSTRAINTS.TTS.PITCH_MAX}). Negative = lower pitch, Positive = higher pitch` }, emotion: { type: "string", enum: [...CONSTRAINTS.TTS.EMOTIONS], default: "neutral", description: `Emotional tone of the speech. Options: ${CONSTRAINTS.TTS.EMOTIONS.join(', ')}` }, format: { type: "string", enum: [...CONSTRAINTS.TTS.FORMATS], default: "mp3", description: `Output audio format. Options: ${CONSTRAINTS.TTS.FORMATS.join(', ')}` }, sampleRate: { type: "string", enum: [...CONSTRAINTS.TTS.SAMPLE_RATES], default: "32000", description: `Audio sample rate in Hz. Options: ${CONSTRAINTS.TTS.SAMPLE_RATES.join(', ')}` }, bitrate: { type: "string", enum: [...CONSTRAINTS.TTS.BITRATES], default: "128000", description: `Audio bitrate in bps. Options: ${CONSTRAINTS.TTS.BITRATES.join(', ')}` }, languageBoost: { type: "string", default: "auto", description: "Enhance recognition for specific languages/dialects. Options: Chinese, Chinese,Yue, English, Arabic, Russian, Spanish, French, Portuguese, German, Turkish, Dutch, Ukrainian, Vietnamese, Indonesian, Japanese, Italian, Korean, Thai, Polish, Romanian, Greek, Czech, Finnish, Hindi, Bulgarian, Danish, Hebrew, Malay, Persian, Slovak, Swedish, Croatian, Filipino, Hungarian, Norwegian, Slovenian, Catalan, Nynorsk, Tamil, Afrikaans, auto. Use 'auto' for automatic detection" }, intensity: { type: "number", minimum: CONSTRAINTS.TTS.VOICE_MODIFY_INTENSITY_MIN, maximum: CONSTRAINTS.TTS.VOICE_MODIFY_INTENSITY_MAX, description: "Voice intensity adjustment (-100 to 100). Values closer to -100 make voice more robust, closer to 100 make voice softer" }, timbre: { type: "number", minimum: CONSTRAINTS.TTS.VOICE_MODIFY_TIMBRE_MIN, maximum: CONSTRAINTS.TTS.VOICE_MODIFY_TIMBRE_MAX, description: "Voice timbre adjustment (-100 to 100). Values closer to -100 make voice more mellow, closer to 100 make voice more crisp" }, sound_effects: { type: "string", enum: [...CONSTRAINTS.TTS.SOUND_EFFECTS], description: getSoundEffectsDescription() } }, required: ["text", "outputFile"] } as const; export const taskBarrierToolSchema = { type: "object", properties: {} } as const; // Validation helper functions export function validateImageParams(params: unknown): ImageGenerationParams { try { const parsed = imageGenerationSchema.parse(params); // Manual validation for incompatible parameter combinations const hasStyle = !!parsed.style; const hasCustomSize = !!parsed.customSize; const hasSubjectReference = !!parsed.subjectReference; if (hasStyle && hasCustomSize) { throw new Error('Style parameter (image-01-live model) cannot be combined with customSize (image-01 model feature)'); } if (hasStyle && hasSubjectReference) { throw new Error('Style parameter (image-01-live model) cannot be combined with subjectReference (image-01 model feature)'); } return parsed; } catch (error) { if (error instanceof z.ZodError) { const messages = error.errors.map(e => `${e.path.join('.')}: ${e.message}`); throw new Error(`Validation failed: ${messages.join(', ')}`); } throw error; } } export function validateTTSParams(params: unknown): TextToSpeechParams { try { return textToSpeechSchema.parse(params); } catch (error) { if (error instanceof z.ZodError) { const messages = error.errors.map(e => `${e.path.join('.')}: ${e.message}`); throw new Error(`Validation failed: ${messages.join(', ')}`); } throw error; } } export function validateTaskBarrierParams(params: unknown): TaskBarrierParams { try { return taskBarrierSchema.parse(params); } catch (error) { if (error instanceof z.ZodError) { const messages = error.errors.map(e => `${e.path.join('.')}: ${e.message}`); throw new Error(`Validation failed: ${messages.join(', ')}`); } throw error; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PsychArch/minimax-mcp-tools'

If you have feedback or need assistance with the MCP directory API, please join our Discord server