gaudio_sync_lyrics
Synchronize lyrics with audio by uploading files to create a job, polling for results, and receiving timestamped lyrics in CSV and JSON reports.
Instructions
All-in-one lyrics sync: upload audio + text files → create gts_lyrics_line_v1 job → poll → return CSV (timestamp, lyric_text, confidence_score) + JSON report URLs. Text file requirements: .txt format, UTF-8, minimum 2 lines, max 60 characters per line. Audio limit: 1GB / 10 minutes.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| audioFilePath | No | Path to local audio file. Either audioFilePath or audioUploadId required. | |
| audioUploadId | No | Existing audio uploadId to reuse. | |
| textFilePath | No | Path to local .txt lyrics file. Either textFilePath or textUploadId required. | |
| textUploadId | No | Existing text uploadId to reuse. | |
| language | Yes | Language of the lyrics: en (English), ko (Korean), ja (Japanese), zh-cn (Chinese Simplified) | |
| pollInterval | No | Polling interval in seconds (default: 10) |
Implementation Reference
- src/tools/sync-lyrics.ts:6-116 (handler)Main handler for gaudio_sync_lyrics tool. Orchestrates uploading audio + text files, creating a gts_lyrics_line_v1 job, polling for completion, and returning the result.
export function registerSyncLyrics(server: McpServer, client: GaudioClient) { server.tool( "gaudio_sync_lyrics", "All-in-one lyrics sync: upload audio + text files → create gts_lyrics_line_v1 job → poll → return CSV (timestamp, lyric_text, confidence_score) + JSON report URLs. Text file requirements: .txt format, UTF-8, minimum 2 lines, max 60 characters per line. Audio limit: 1GB / 10 minutes.", { audioFilePath: z .string() .optional() .describe("Path to local audio file. Either audioFilePath or audioUploadId required."), audioUploadId: z .string() .optional() .describe("Existing audio uploadId to reuse."), textFilePath: z .string() .optional() .describe("Path to local .txt lyrics file. Either textFilePath or textUploadId required."), textUploadId: z .string() .optional() .describe("Existing text uploadId to reuse."), language: z .enum(["en", "ko", "ja", "zh-cn"]) .describe("Language of the lyrics: en (English), ko (Korean), ja (Japanese), zh-cn (Chinese Simplified)"), pollInterval: z .number() .optional() .default(10) .describe("Polling interval in seconds (default: 10)"), }, async ({ audioFilePath, audioUploadId, textFilePath, textUploadId, language, pollInterval, }) => { if (!audioFilePath && !audioUploadId) { return { content: [{ type: "text" as const, text: "Either audioFilePath or audioUploadId is required." }], isError: true, }; } if (!textFilePath && !textUploadId) { return { content: [{ type: "text" as const, text: "Either textFilePath or textUploadId is required." }], isError: true, }; } const messages: string[] = []; const log = (msg: string) => messages.push(msg); // Step 1: Upload audio if needed let resolvedAudioId = audioUploadId; if (!resolvedAudioId) { log("오디오 파일 업로드 중..."); const res = await client.uploadFile(audioFilePath!); resolvedAudioId = res.uploadId; log(`오디오 업로드 완료. uploadId: ${resolvedAudioId}`); } // Step 2: Upload text if needed let resolvedTextId = textUploadId; if (!resolvedTextId) { log("텍스트 파일 업로드 중..."); const res = await client.uploadFile(textFilePath!); resolvedTextId = res.uploadId; log(`텍스트 업로드 완료. uploadId: ${resolvedTextId}`); } // Step 3: Create job const model = "gts_lyrics_line_v1"; const { jobId } = await client.createJob(model, { audioUploadId: resolvedAudioId, textUploadId: resolvedTextId, language, }); log(`Job 생성 완료. jobId: ${jobId}`); // Step 4: Poll const intervalMs = (pollInterval ?? 10) * 1000; const result = await pollJob(client, model, jobId, intervalMs, 30, log); const output: Record<string, unknown> = { jobId: result.jobId, status: result.status, model, audioUploadId: resolvedAudioId, textUploadId: resolvedTextId, language, }; if (result.downloadUrl) output.downloadUrl = result.downloadUrl; if (result.expireAt) output.expireAt = result.expireAt; if (result.errorMessage) output.errorMessage = result.errorMessage; messages.push(JSON.stringify(output, null, 2)); return { content: [ { type: "text" as const, text: messages.join("\n"), }, ], }; }, ); } - src/tools/sync-lyrics.ts:10-35 (schema)Input schema definition for gaudio_sync_lyrics using Zod. Defines optional audioFilePath/audioUploadId, optional textFilePath/textUploadId, required language enum, and optional pollInterval.
{ audioFilePath: z .string() .optional() .describe("Path to local audio file. Either audioFilePath or audioUploadId required."), audioUploadId: z .string() .optional() .describe("Existing audio uploadId to reuse."), textFilePath: z .string() .optional() .describe("Path to local .txt lyrics file. Either textFilePath or textUploadId required."), textUploadId: z .string() .optional() .describe("Existing text uploadId to reuse."), language: z .enum(["en", "ko", "ja", "zh-cn"]) .describe("Language of the lyrics: en (English), ko (Korean), ja (Japanese), zh-cn (Chinese Simplified)"), pollInterval: z .number() .optional() .default(10) .describe("Polling interval in seconds (default: 10)"), }, - src/index.ts:11-33 (registration)Registration of the gaudio_sync_lyrics tool in the MCP server. Imported and called alongside other tool registrations.
import { registerSyncLyrics } from "./tools/sync-lyrics.js"; import { registerGetKeyInfo } from "./tools/get-key-info.js"; const apiKey = process.env.GAUDIO_API_KEY; if (!apiKey) { console.error("GAUDIO_API_KEY environment variable is required."); process.exit(1); } const server = new McpServer({ name: "com.gaudiolab/mcp-developers", version: "1.0.0", }); const client = new GaudioClient(apiKey); registerListModels(server); registerUploadFile(server, client); registerCreateJob(server, client); registerGetJob(server, client); registerSeparateAudio(server, client); registerSyncLyrics(server, client); registerGetKeyInfo(server, client); - src/utils/polling.ts:12-69 (helper)Polling utility used by gaudio_sync_lyrics to wait for job completion. Polls the job status up to 30 times with configurable interval.
export async function pollJob( client: GaudioClient, model: string, jobId: string, intervalMs: number = 10_000, maxAttempts: number = 30, onProgress?: (message: string) => void, ): Promise<PollResult> { for (let attempt = 0; attempt < maxAttempts; attempt++) { let result; try { result = await client.getJob(model, jobId); } catch (err) { if (err instanceof GaudioApiError) { return { jobId, status: "failed", errorMessage: err.message, }; } throw err; } const status = result.resultData?.status as string; if (status === "success") { onProgress?.("처리 완료!"); return { jobId, status: "success", downloadUrl: result.resultData?.downloadUrl as Record<string, unknown>, expireAt: result.resultData?.expireAt as string, }; } if (status === "failed") { return { jobId, status: "failed", errorMessage: (result.resultData?.errorMessage as string) ?? "Job failed", }; } if (attempt === 0) { onProgress?.("처리 대기 중..."); } else { onProgress?.(`처리 중... (${attempt + 1}/${maxAttempts})`); } await new Promise((resolve) => setTimeout(resolve, intervalMs)); } return { jobId, status: "polling_timeout", errorMessage: `${maxAttempts}회 폴링 후에도 미완료. gaudio_get_job으로 나중에 확인하세요. jobId: ${jobId}`, }; } - src/api/client.ts:14-178 (helper)API client with uploadFile, createJob, and getJob methods used by gaudio_sync_lyrics handler.
export class GaudioClient { private apiKey: string; constructor(apiKey: string) { this.apiKey = apiKey; } private async request( method: string, path: string, body?: unknown, ): Promise<ApiResponse> { const url = `${BASE_URL}${path}`; for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { const response = await fetch(url, { method, headers: { "x-ga-apikey": this.apiKey, "Content-Type": "application/json", }, body: body ? JSON.stringify(body) : undefined, }); if (response.status === 429) { if (attempt < MAX_RETRIES) { await new Promise((r) => setTimeout(r, RATE_LIMIT_WAIT_MS)); continue; } throw new GaudioApiError(getHttpErrorMessage(429), 429); } if (!response.ok) { throw new GaudioApiError( getHttpErrorMessage(response.status), response.status, ); } const data = (await response.json()) as ApiResponse; if (data.resultCode !== 1000) { throw new GaudioApiError( getResultCodeMessage(data.resultCode), 200, data.resultCode, ); } return data; } throw new GaudioApiError("Max retries exceeded"); } async uploadCreate( fileName: string, fileSize: number, ): Promise<{ uploadId: string; chunkSize: number; preSignedUrl: string[]; }> { const res = await this.request("POST", "/files/upload-multipart/create", { fileName, fileSize, }); return res.resultData as { uploadId: string; chunkSize: number; preSignedUrl: string[]; }; } async uploadChunk( presignedUrl: string, chunk: Uint8Array, contentType: string, ): Promise<string> { const response = await fetch(presignedUrl, { method: "PUT", headers: { "Content-Type": contentType }, body: chunk as unknown as BodyInit, }); if (!response.ok) { throw new GaudioApiError( `Chunk upload failed: ${response.status}`, response.status, ); } const etag = response.headers.get("ETag"); if (!etag) { throw new GaudioApiError("ETag header missing from chunk upload response"); } return etag.replace(/"/g, ""); } async uploadComplete( uploadId: string, parts: { awsETag: string; partNumber: number }[], ): Promise<void> { await this.request("POST", "/files/upload-multipart/complete", { uploadId, parts, }); } async uploadFile(filePath: string): Promise<{ uploadId: string }> { const stat = statSync(filePath); const fileName = basename(filePath); const fileSize = stat.size; const fileBuffer = readFileSync(filePath); const ext = fileName.split(".").pop()?.toLowerCase() ?? ""; const contentTypeMap: Record<string, string> = { wav: "audio/wav", flac: "audio/flac", mp3: "audio/mpeg", m4a: "audio/mp4", mov: "video/quicktime", mp4: "video/mp4", txt: "text/plain", }; const contentType = contentTypeMap[ext] ?? "application/octet-stream"; const { uploadId, chunkSize, preSignedUrl } = await this.uploadCreate( fileName, fileSize, ); const parts: { awsETag: string; partNumber: number }[] = []; for (let i = 0; i < preSignedUrl.length; i++) { const start = i * chunkSize; const end = Math.min(start + chunkSize, fileSize); const chunk = fileBuffer.subarray(start, end); const etag = await this.uploadChunk(preSignedUrl[i], new Uint8Array(chunk.buffer, chunk.byteOffset, chunk.byteLength), contentType); parts.push({ awsETag: etag, partNumber: i + 1 }); } parts.sort((a, b) => a.partNumber - b.partNumber); await this.uploadComplete(uploadId, parts); return { uploadId }; } async createJob( model: string, params: Record<string, unknown>, ): Promise<{ jobId: string }> { const res = await this.request("POST", `/${model}/jobs`, params); return { jobId: res.resultData?.jobId as string }; } async getJob(model: string, jobId: string): Promise<ApiResponse> { return this.request("GET", `/${model}/jobs/${jobId}`); } async getKeyInfo(): Promise<ApiResponse> { return this.request("GET", "/key/info"); } }