respondAudio

Generate audio responses from text prompts and play them through your system. Choose from multiple voices and set a seed for reproducible results.

Instructions

Generate an audio response to a text prompt and play it through the system

Input Schema

TableJSON Schema

Name	Required	Description
`prompt`	Yes	The text prompt to respond to with audio
`voice`	No	Voice to use for audio generation (default: "alloy"). Available options: "alloy", "echo", "fable", "onyx", "nova", "shimmer", "coral", "verse", "ballad", "ash", "sage", "amuch", "dan"
`seed`	No	Seed for reproducible results (default: random)

Implementation Reference

src/services/audioService.js:20-91 (handler)

The main handler function for respondAudio. It takes a text prompt, voice, seed, and voice instructions, builds a URL to the Pollinations Text API, fetches audio data, converts it to base64, and returns the audio data with mime type and metadata.

export async function respondAudio(prompt, voice = "alloy", seed, voiceInstructions, authConfig = null) {
  if (!prompt || typeof prompt !== 'string') {
    throw new Error('Prompt is required and must be a string');
  }

  // Build the query parameters
  const queryParams = new URLSearchParams();
  queryParams.append('model', 'openai-audio'); // Required for audio generation
  queryParams.append('voice', voice);
  if (seed !== undefined) queryParams.append('seed', seed);

  // Construct the URL
  let finalPrompt = prompt;

  // Add voice instructions if provided
  if (voiceInstructions) {
    finalPrompt = `${voiceInstructions}\n\n${prompt}`;
  }

  const encodedPrompt = encodeURIComponent(finalPrompt);
  const baseUrl = 'https://text.pollinations.ai';
  let url = `${baseUrl}/${encodedPrompt}`;

  // Add query parameters
  const queryString = queryParams.toString();
  url += `?${queryString}`;

  try {
    // Prepare fetch options with optional auth headers
    const fetchOptions = {};
    if (authConfig) {
      fetchOptions.headers = {};
      if (authConfig.token) {
        fetchOptions.headers['Authorization'] = `Bearer ${authConfig.token}`;
      }
      if (authConfig.referrer) {
        fetchOptions.headers['Referer'] = authConfig.referrer;
      }
    }

    // Fetch the audio from the URL
    const response = await fetch(url, fetchOptions);

    if (!response.ok) {
      throw new Error(`Failed to generate audio: ${response.statusText}`);
    }

    // Get the audio data as an ArrayBuffer
    const audioBuffer = await response.arrayBuffer();

    // Convert the ArrayBuffer to a base64 string
    const base64Data = Buffer.from(audioBuffer).toString('base64');

    // Determine the mime type from the response headers or default to audio/mpeg
    const contentType = response.headers.get('content-type') || 'audio/mpeg';

    return {
      data: base64Data,
      mimeType: contentType,
      metadata: {
        prompt,
        voice,
        model: 'openai-audio',
        seed,
        voiceInstructions
      }
    };
  } catch (error) {
    log('Error generating audio:', error);
    throw error;
  }
}

src/services/audioSchema.js:8-29 (schema)

The input schema definition for respondAudio, specifying 'prompt' (required string), 'voice' (optional string), and 'seed' (optional number).

export const respondAudioSchema = {
  name: 'respondAudio',
  description: 'Generate an audio response to a text prompt and play it through the system',
  inputSchema: {
    type: 'object',
    properties: {
      prompt: {
        type: 'string',
        description: 'The text prompt to respond to with audio'
      },
      voice: {
        type: 'string',
        description: 'Voice to use for audio generation (default: "alloy"). Available options: "alloy", "echo", "fable", "onyx", "nova", "shimmer", "coral", "verse", "ballad", "ash", "sage", "amuch", "dan"'
      },
      seed: {
        type: 'number',
        description: 'Seed for reproducible results (default: random)'
      }
    },
    required: ['prompt']
  }
};

pollinations-mcp-server.js:257-296 (registration)

The MCP server registration/handler for respondAudio. Extracts args (prompt, voice, seed, voiceInstructions), calls the respondAudio function, saves the resulting audio to a temp file, plays it via the system audio player, and returns metadata as text content.

} else if (name === 'respondAudio') {
  try {
    const { prompt, voice = defaultConfig.audio.voice, seed, voiceInstructions } = args;
    const result = await respondAudio(prompt, voice, seed, voiceInstructions, finalAuthConfig);

    // Save audio to a temporary file
    const tempDir = os.tmpdir();
    const tempFilePath = path.join(tempDir, `pollinations-audio-${Date.now()}.mp3`);

    // Decode base64 and write to file
    fs.writeFileSync(tempFilePath, Buffer.from(result.data, 'base64'));

    // Play the audio file
    audioPlayer.play(tempFilePath, (err) => {
      if (err) log('Error playing audio:', err);

      // Clean up the temporary file after playing
      try {
        fs.unlinkSync(tempFilePath);
      } catch (cleanupErr) {
        log('Error cleaning up temp file:', cleanupErr);
      }
    });

    return {
      content: [
        {
          type: 'text',
          text: `Audio has been played.\n\nAudio metadata: ${JSON.stringify(result.metadata, null, 2)}`
        }
      ]
    };
  } catch (error) {
    return {
      content: [
        { type: 'text', text: `Error generating audio: ${error.message}` }
      ],
      isError: true
    };
  }

src/schemas.js:5-26 (helper)

Central schema re-export: imports and re-exports respondAudioSchema for use by the MCP server.

import { generateImageUrlSchema, generateImageSchema, editImageSchema, generateImageFromReferenceSchema, listImageModelsSchema } from './services/imageSchema.js';
import { respondAudioSchema, listAudioVoicesSchema } from './services/audioSchema.js';
import { respondTextSchema, listTextModelsSchema } from './services/textSchema.js';


// Re-export all schemas
export {
  // Image schemas
  generateImageUrlSchema,
  generateImageSchema,
  editImageSchema,
  generateImageFromReferenceSchema,
  listImageModelsSchema,

  // Audio schemas
  respondAudioSchema,
  listAudioVoicesSchema,

  // Text schemas
  respondTextSchema,
  listTextModelsSchema
};

src/index.js:8-29 (helper)

Central service re-export: imports and re-exports the respondAudio function from audioService.js for consumption by the MCP server.

import { generateImageUrl, generateImage, editImage, generateImageFromReference, listImageModels } from './services/imageService.js';
import { respondAudio, listAudioVoices } from './services/audioService.js';
import { respondText, listTextModels } from './services/textService.js';


// Export all service functions
export {
  // Image services
  generateImageUrl,
  generateImage,
  editImage,
  generateImageFromReference,
  listImageModels,

  // Audio services
  respondAudio,
  listAudioVoices,

  // Text services
  respondText,
  listTextModels,
};

MCPollinations Multimodal MCP Server

respondAudio

Instructions

Input Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API