/**
* Pollinations Audio Service
*
* Functions and schemas for interacting with the Pollinations Audio API
*/
import { createMCPResponse, createTextContent, buildUrl } from '../utils/coreUtils.js';
import { z } from 'zod';
// Constants
const AUDIO_API_BASE_URL = process.env.POLLINATIONS_API_URL || 'https://text.pollinations.ai';
/**
* Generates an audio response to a text prompt using the Pollinations Text API
*
* @param {Object} params - The parameters for audio generation
* @param {string} params.prompt - The text prompt to respond to with audio
* @param {string} [params.voice="nova"] - Voice to use for audio generation
* @param {string} [params.format="mp3"] - Format of the audio (mp3, wav, etc.)
* @param {string} [params.voiceInstructions] - Additional instructions for voice character/style
* @param {Object} [params.audioPlayer] - Optional audio player for terminal playback
* @param {string} [params.tempDir] - Optional temporary directory for audio playback
* @returns {Promise<Object>} - MCP response object with the audio data
*/
async function respondAudio(params) {
const { prompt, voice = "nova", format = "mp3", voiceInstructions, audioPlayer, tempDir, _isRestCall } = params;
// Detect if we're being called via REST (mcpo doesn't set _isRestCall flag)
const isRestCall = _isRestCall || process.env.MCPO_MODE === 'true' || !process.stdin.isTTY;
if (!prompt || typeof prompt !== 'string') {
throw new Error('Prompt is required and must be a string');
}
// Get API token from environment variables
const apiToken = process.env.POLLINATIONS_API_TOKEN;
// Prepare the query parameters
const queryParams = {
model: 'openai-audio',
voice,
format
};
// Add token to query params if available
if (apiToken) {
queryParams.token = apiToken;
}
// Add referrer for authentication
queryParams.referrer = 'chat.code-x.my';
// Prepare the prompt
let finalPrompt = prompt;
// Add voice instructions if provided
if (voiceInstructions) {
finalPrompt = `${voiceInstructions}\n\n${prompt}`;
}
// Build the URL using the utility function
const url = buildUrl(AUDIO_API_BASE_URL, encodeURIComponent(finalPrompt), queryParams);
try {
console.log(`Audio URL generated: ${url}`);
// Return just the URL for better web UI compatibility (no markdown bloat)
return createMCPResponse([
createTextContent(url)
]);
} catch (error) {
console.error('Error generating audio:', error);
throw error;
}
}
/**
* Generates speech from text with a verbatim instruction
*
* @param {Object} params - The parameters for speech generation
* @param {string} params.text - The text to speak verbatim
* @param {string} [params.voice="nova"] - Voice to use for audio generation
* @param {string} [params.format="mp3"] - Format of the audio (mp3, wav, etc.)
* @param {string} [params.voiceInstructions] - Additional instructions for voice character/style
* @param {Object} [params.audioPlayer] - Optional audio player for terminal playback
* @param {string} [params.tempDir] - Optional temporary directory for audio playback
* @returns {Promise<Object>} - MCP response object with the audio data
*/
async function sayText(params) {
const { text, voice = "nova", format = "mp3", voiceInstructions, audioPlayer, tempDir, _isRestCall } = params;
// Detect if we're being called via REST (mcpo doesn't set _isRestCall flag)
const isRestCall = _isRestCall || process.env.MCPO_MODE === 'true' || !process.stdin.isTTY;
if (!text || typeof text !== 'string') {
throw new Error('Text is required and must be a string');
}
// Get API token from environment variables
const apiToken = process.env.POLLINATIONS_API_TOKEN;
// Prepare the query parameters
const queryParams = {
model: 'openai-audio',
voice,
format
};
// Add token to query params if available
if (apiToken) {
queryParams.token = apiToken;
}
// Add referrer for authentication
queryParams.referrer = 'chat.code-x.my';
// Prepare the prompt with the verbatim instruction
let finalPrompt = `Say verbatim: ${text}`;
// Add voice instructions if provided
if (voiceInstructions) {
finalPrompt = `${voiceInstructions}\n\n${finalPrompt}`;
}
// Build the URL using the utility function
const url = buildUrl(AUDIO_API_BASE_URL, encodeURIComponent(finalPrompt), queryParams);
try {
console.log(`Audio URL generated: ${url}`);
// Return just the URL for better web UI compatibility (no markdown bloat)
return createMCPResponse([
createTextContent(url)
]);
} catch (error) {
console.error('Error generating audio:', error);
throw error;
}
}
/**
* List available audio voices from Pollinations API
*
* @param {Object} params - The parameters for listing audio voices
* @returns {Promise<Object>} - MCP response object with the list of available voice options
*/
async function listAudioVoices(params) {
try {
// Get API token from environment variables
const apiToken = process.env.POLLINATIONS_API_TOKEN;
// Prepare query parameters with token if available
const queryParams = {};
if (apiToken) {
queryParams.token = apiToken;
}
// Add referrer for authentication
queryParams.referrer = 'chat.code-x.my';
const url = buildUrl(AUDIO_API_BASE_URL, 'models', queryParams);
// Prepare headers for API authentication
const headers = {
'User-Agent': 'Pollinations-MCP/1.0',
'Accept': 'application/json'
};
// Add Authorization header if token is available
if (apiToken) {
headers['Authorization'] = `Bearer ${apiToken}`;
}
console.log(`Making request to: ${url}`);
console.log('Headers:', headers);
const response = await fetch(url, {
method: 'GET',
headers
});
console.log(`Response status: ${response.status} ${response.statusText}`);
if (!response.ok) {
const errorText = await response.text().catch(() => 'No error details available');
console.error(`API Error Response: ${errorText}`);
throw new Error(`Failed to list models: ${response.status} ${response.statusText} - ${errorText}`);
}
const models = await response.json();
// Find the openai-audio model and extract its voices
const audioModel = models.find(model => model.name === 'openai-audio');
let voices;
if (audioModel && Array.isArray(audioModel.voices)) {
voices = audioModel.voices;
} else {
// Default voices if we can't find the list
voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'];
}
// Return the response in MCP format using utility functions
return createMCPResponse([
createTextContent(voices, true)
]);
} catch (error) {
console.error('Error listing audio voices:', error);
// Return default voices if there's an error
const defaultVoices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'];
// Return the response in MCP format using utility functions
return createMCPResponse([
createTextContent(defaultVoices, true)
]);
}
}
/**
* Export tools as complete arrays ready to be passed to server.tool()
*/
export const audioTools = [
[
'respondAudio',
'Generate an audio response to a text prompt',
{
prompt: z.string().describe('The text prompt to respond to with audio'),
voice: z.string().optional().describe('Voice to use for audio generation (default: "nova")'),
format: z.string().optional().describe('Format of the audio (mp3, wav, etc.)'),
voiceInstructions: z.string().optional().describe('Additional instructions for voice character/style (e.g., "Speak with enthusiasm" or "Use a calm tone")')
},
respondAudio
],
[
'sayText',
'Generate speech that says the provided text verbatim',
{
text: z.string().describe('The text to speak verbatim'),
voice: z.string().optional().describe('Voice to use for audio generation (default: "nova")'),
format: z.string().optional().describe('Format of the audio (mp3, wav, etc.)'),
voiceInstructions: z.string().optional().describe('Additional instructions for voice character/style (e.g., "Speak with enthusiasm" or "Use a calm tone")')
},
sayText
],
[
'audio/speech',
'Generate speech that says the provided text verbatim (OpenAI-compatible endpoint)',
{
input: z.string().describe('The text to speak verbatim'),
voice: z.string().optional().describe('Voice to use for audio generation (default: "nova")'),
response_format: z.string().optional().describe('Format of the audio (mp3, wav, etc.)'),
voiceInstructions: z.string().optional().describe('Additional instructions for voice character/style (e.g., "Speak with enthusiasm" or "Use a calm tone")')
},
async (params) => {
// Map OpenAI-compatible parameters to our sayText function
const mappedParams = {
text: params.input,
voice: params.voice,
format: params.response_format || params.format,
voiceInstructions: params.voiceInstructions,
_isRestCall: params._isRestCall
};
return await sayText(mappedParams);
}
],
[
'listAudioVoices',
'List available audio voices',
{},
listAudioVoices
]
];