create_flowspeech
Convert text or URL content into speech episodes with AI-enhanced grammar correction or direct processing modes.
Instructions
Create a FlowSpeech episode by converting text or URL content to speech. Supports smart mode (AI-enhanced, fixes grammar) and direct mode (no modifications). This tool will automatically poll until generation is complete.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| sourceType | Yes | Source type: text or url | |
| sourceContent | Yes | Source content (text or URL) | |
| speakerId | Yes | Speaker name or ID. Use speaker name from get_speakers tool output (the "name" field, not speakerId). Full speaker ID also supported. | |
| language | No | Language code (e.g., "zh" for Chinese, "en" for English). Default: zh | |
| mode | No | Generation mode: "smart" (AI-enhanced, fixes grammar) or "direct" (no modifications) | smart |
Implementation Reference
- source/tools/flowspeech.ts:65-171 (handler)Executes the create_flowspeech tool: resolves speaker, prepares request data, calls the client API, polls status until complete using pollUntilComplete, handles errors, and formats the output with formatFlowspeechEpisode.async execute(args, {log}: {log: any}) { try { // Resolve speaker name/ID to actual speaker ID log.info('Resolving speaker identifier', { input: args.speakerId, language: args.language, }); const resolvedSpeakers = await client.resolveSpeakers([args.speakerId]); const speakers = [{speakerId: resolvedSpeakers[0]?.speakerId}]; const sources: FlowspeechSource[] = [ { type: args.sourceType, content: args.sourceContent, }, ]; // Use provided language or infer from resolved speaker const allSpeakers = await client.getCachedSpeakers(); const resolvedSpeaker = allSpeakers.find( (s) => s.speakerId === resolvedSpeakers[0]?.speakerId, ); const language = args.language ?? resolvedSpeaker?.language ?? 'zh'; log.info('Creating FlowSpeech episode', { sourceType: args.sourceType, contentLength: args.sourceContent.length, speakerId: resolvedSpeakers[0]?.speakerId, language, mode: args.mode, }); const requestData: CreateFlowspeechRequest = { sources, speakers, mode: args.mode, language, }; const submitResponse = await client.flowspeech.createFlowspeech(requestData); if (submitResponse.code !== 0) { return `Failed to submit task: ${submitResponse.message ?? 'Unknown error'}`; } const episodeId = submitResponse.data?.episodeId; if (!episodeId) { return 'Failed to submit task: No episodeId returned'; } log.info(`FlowSpeech task submitted successfully`, {episodeId}); const result = await pollUntilComplete( async () => { const statusResponse = await client.flowspeech.getFlowspeechStatus(episodeId); if (statusResponse.code !== 0) { throw new Error( statusResponse.message ?? 'Failed to query status', ); } if (!statusResponse.data) { throw new Error('No episode data returned'); } return statusResponse.data; }, { pollInterval: 5000, maxRetries: 120, onProgress(status, retry) { log.debug(`FlowSpeech generation status: ${status}`, { episodeId, retry: `${retry}/120`, }); }, }, ); if (!result.success) { if (result.error) { log.error('FlowSpeech generation failed', { episodeId, error: result.error, }); return `FlowSpeech generation failed: ${result.error}`; } log.warn('FlowSpeech generation timeout', { episodeId, lastStatus: result.lastStatus, }); return `FlowSpeech generation timeout\nLast status: ${result.lastStatus}\nEpisode ID: ${episodeId}`; } const episode = result.data!; log.info('FlowSpeech generation completed', {episodeId}); return formatFlowspeechEpisode(episode); } catch (error) { const errorMessage = formatError(error); log.error('Failed to create FlowSpeech', {error: errorMessage}); return `Failed to create FlowSpeech: ${errorMessage}`; } },
- source/tools/flowspeech.ts:38-60 (schema)Zod input schema definition for the tool parameters: sourceType, sourceContent, speakerId, language, mode.parameters: z.object({ sourceType: z.enum(['text', 'url']).describe('Source type: text or url'), sourceContent: z.string().min(1).describe('Source content (text or URL)'), speakerId: z .string() .min(1) .describe( 'Speaker name or ID. Use speaker name from get_speakers tool output (the "name" field, not speakerId). Full speaker ID also supported.', ), language: z .string() .optional() .describe( 'Language code (e.g., "zh" for Chinese, "en" for English). Default: zh', ), mode: z .enum(['smart', 'direct']) .default('smart') .describe( 'Generation mode: "smart" (AI-enhanced, fixes grammar) or "direct" (no modifications)', ), }), annotations: {
- source/tools/flowspeech.ts:35-172 (registration)Registers the create_flowspeech tool with the FastMCP server using server.addTool, including description, annotations, and the execute handler.name: 'create_flowspeech', description: 'Create a FlowSpeech episode by converting text or URL content to speech. Supports smart mode (AI-enhanced, fixes grammar) and direct mode (no modifications). This tool will automatically poll until generation is complete.', parameters: z.object({ sourceType: z.enum(['text', 'url']).describe('Source type: text or url'), sourceContent: z.string().min(1).describe('Source content (text or URL)'), speakerId: z .string() .min(1) .describe( 'Speaker name or ID. Use speaker name from get_speakers tool output (the "name" field, not speakerId). Full speaker ID also supported.', ), language: z .string() .optional() .describe( 'Language code (e.g., "zh" for Chinese, "en" for English). Default: zh', ), mode: z .enum(['smart', 'direct']) .default('smart') .describe( 'Generation mode: "smart" (AI-enhanced, fixes grammar) or "direct" (no modifications)', ), }), annotations: { title: 'Create FlowSpeech', openWorldHint: true, readOnlyHint: false, }, async execute(args, {log}: {log: any}) { try { // Resolve speaker name/ID to actual speaker ID log.info('Resolving speaker identifier', { input: args.speakerId, language: args.language, }); const resolvedSpeakers = await client.resolveSpeakers([args.speakerId]); const speakers = [{speakerId: resolvedSpeakers[0]?.speakerId}]; const sources: FlowspeechSource[] = [ { type: args.sourceType, content: args.sourceContent, }, ]; // Use provided language or infer from resolved speaker const allSpeakers = await client.getCachedSpeakers(); const resolvedSpeaker = allSpeakers.find( (s) => s.speakerId === resolvedSpeakers[0]?.speakerId, ); const language = args.language ?? resolvedSpeaker?.language ?? 'zh'; log.info('Creating FlowSpeech episode', { sourceType: args.sourceType, contentLength: args.sourceContent.length, speakerId: resolvedSpeakers[0]?.speakerId, language, mode: args.mode, }); const requestData: CreateFlowspeechRequest = { sources, speakers, mode: args.mode, language, }; const submitResponse = await client.flowspeech.createFlowspeech(requestData); if (submitResponse.code !== 0) { return `Failed to submit task: ${submitResponse.message ?? 'Unknown error'}`; } const episodeId = submitResponse.data?.episodeId; if (!episodeId) { return 'Failed to submit task: No episodeId returned'; } log.info(`FlowSpeech task submitted successfully`, {episodeId}); const result = await pollUntilComplete( async () => { const statusResponse = await client.flowspeech.getFlowspeechStatus(episodeId); if (statusResponse.code !== 0) { throw new Error( statusResponse.message ?? 'Failed to query status', ); } if (!statusResponse.data) { throw new Error('No episode data returned'); } return statusResponse.data; }, { pollInterval: 5000, maxRetries: 120, onProgress(status, retry) { log.debug(`FlowSpeech generation status: ${status}`, { episodeId, retry: `${retry}/120`, }); }, }, ); if (!result.success) { if (result.error) { log.error('FlowSpeech generation failed', { episodeId, error: result.error, }); return `FlowSpeech generation failed: ${result.error}`; } log.warn('FlowSpeech generation timeout', { episodeId, lastStatus: result.lastStatus, }); return `FlowSpeech generation timeout\nLast status: ${result.lastStatus}\nEpisode ID: ${episodeId}`; } const episode = result.data!; log.info('FlowSpeech generation completed', {episodeId}); return formatFlowspeechEpisode(episode); } catch (error) { const errorMessage = formatError(error); log.error('Failed to create FlowSpeech', {error: errorMessage}); return `Failed to create FlowSpeech: ${errorMessage}`; } }, });
- source/types/flowspeech.ts:25-35 (schema)TypeScript type definitions for CreateFlowspeechRequest (lines 25-30) and CreateFlowspeechResponse (32-35) used by the client and tool.export type CreateFlowspeechRequest = { sources: FlowspeechSource[]; speakers: Array<{speakerId?: string}>; language?: string; mode?: 'smart' | 'direct'; }; export type CreateFlowspeechResponse = { episodeId: string; };
- source/client/flowspeech.ts:25-32 (helper)FlowspeechClient.createFlowspeech method: sends POST request to '/v1/flow-speech/episodes' API endpoint with the request data.async createFlowspeech( data: CreateFlowspeechRequest, ): Promise<ApiResponse<CreateFlowspeechResponse>> { const response = await this.axiosInstance.post< ApiResponse<CreateFlowspeechResponse> >('/v1/flow-speech/episodes', data); return response.data; }