Skip to main content
Glama

Voice Recognition MCP Service

by yangsenessa
voice_service.py10.9 kB
import os import time import base64 import re from typing import Dict, Any, Optional import requests class VoiceService: def __init__(self, api_url: Optional[str] = None, api_key: Optional[str] = None): self.name = "Voice Recognition Service" self.version = "1.0.0" self.author = "AIO-2030" self.github = "https://github.com/AIO-2030/mcp_voice_identify" # Initialize API configuration self.api_url = api_url or os.getenv("API_URL") self.api_key = api_key or os.getenv("API_KEY") if not self.api_url or not self.api_key: raise ValueError("API_URL and API_KEY must be provided either through constructor or environment variables") def parse_label_result(self, label_result: str) -> Dict[str, str]: """Parse label result into structured format""" # Extract values between <| and |> pattern = r'<\|(.*?)\|>' matches = re.findall(pattern, label_result) # Initialize result dictionary result = { "lan": "unknown", "emo": "unknown", "type": "unknown", "speaker": "unknown", "text": "" } # Map labels to keys label_mapping = { "en": "lan", "EMO_UNKNOWN": "emo", "Speech": "type", "woitn": "speaker" } # Process matches for match in matches: if match in label_mapping: result[label_mapping[match]] = match.lower() elif match not in ["", " "]: result["text"] = match return result def restructure_response(self, response: Dict[str, Any]) -> Dict[str, Any]: """Restructure API response with parsed label result""" if "label_result" in response: parsed_label = self.parse_label_result(response["label_result"]) return { "jsonrpc": "2.0", "output": { "type": "voice", "message": response.get("message", "Voice processed successfully"), "text": response.get("results", ""), "metadata": { "language": parsed_label["lan"], "emotion": parsed_label["emo"], "audio_type": parsed_label["type"], "speaker": parsed_label["speaker"], "raw_text": parsed_label["text"] } }, "id": response.get("id", int(time.time() * 1000)) } return response def get_help_info(self, include_mcp: bool = True) -> Dict[str, Any]: """Return help information""" help_info = { "type": "voice_service", "description": "This service provides voice recognition and text extraction services", "author": self.author, "version": self.version, "github": self.github, "transport": ["stdio"], "methods": [ { "name": "help", "description": "Show this help information." }, { "name": "identify_voice", "description": "Identify voice from file", "inputSchema": { "type": "object", "properties": { "file_path": { "type": "string", "description": "Voice file path" } }, "required": ["file_path"] } }, { "name": "identify_voice_base64", "description": "Identify voice from base64 encoded data", "inputSchema": { "type": "object", "properties": { "base64_data": { "type": "string", "description": "Base64 encoded voice data" } }, "required": ["base64_data"] } }, { "name": "extract_text", "description": "Extract text", "inputSchema": { "type": "object", "properties": { "text": { "type": "string", "description": "Text to extract" } }, "required": ["text"] } } ] } if include_mcp: help_info["transport"].append("mcp") help_info["methods"].extend([ { "name": "tools_list", "description": "List all available tools" }, { "name": "tools_call", "description": "Call a tool", "inputSchema": { "type": "object", "properties": { "name": { "type": "string", "description": "Tool name" }, "arguments": { "type": "object", "description": "Tool arguments" } }, "required": ["name"] } } ]) help_info["prompts"] = [ { "name": "voice_recognition_prompt", "description": "Create a voice recognition prompt template", "inputSchema": { "type": "object", "properties": { "file_path": { "type": "string", "description": "Voice file path" } }, "required": ["file_path"] } } ] help_info["resources"] = [ { "name": "voice_resource", "description": "Provide voice file content as a resource", "inputSchema": { "type": "object", "properties": { "file_path": { "type": "string", "description": "Voice file path" } }, "required": ["file_path"] } } ] return help_info def help(self) -> Dict[str, Any]: """Return help information in JSON-RPC 2.0 format""" return { "jsonrpc": "2.0", "output": self.get_help_info(include_mcp=False), "id": int(time.time() * 1000) } def identify_voice(self, file_path: str) -> Dict[str, Any]: """Identify voice from file""" try: with open(file_path, "rb") as f: files = {'file': f} headers = { 'Authorization': f'Bearer {self.api_key}', 'accept': 'application/json' } response = requests.post( self.api_url, headers=headers, files=files ) response.raise_for_status() result = response.json() return self.restructure_response(result) except Exception as e: return { "jsonrpc": "2.0", "output": { "type": "error", "message": str(e), "error_code": response.status_code if 'response' in locals() else 500 }, "id": int(time.time() * 1000) } def identify_voice_base64(self, base64_data: str) -> Dict[str, Any]: """Identify voice from base64 encoded data""" try: # Convert base64 to file-like object import io file_data = base64.b64decode(base64_data) file_obj = io.BytesIO(file_data) file_obj.name = 'audio.wav' # Set a filename files = {'file': file_obj} headers = { 'Authorization': f'Bearer {self.api_key}', 'accept': 'application/json' } response = requests.post( self.api_url, headers=headers, files=files ) response.raise_for_status() result = response.json() return self.restructure_response(result) except Exception as e: return { "jsonrpc": "2.0", "output": { "type": "error", "message": str(e), "error_code": response.status_code if 'response' in locals() else 500 }, "id": int(time.time() * 1000) } def extract_text(self, text: str) -> Dict[str, Any]: """Extract text""" try: response = requests.post( self.api_url, headers={"Authorization": f"Bearer {self.api_key}"}, json={"text": text} ) response.raise_for_status() result = response.json() return self.restructure_response(result) except Exception as e: return { "jsonrpc": "2.0", "output": { "type": "error", "message": str(e), "error_code": response.status_code if 'response' in locals() else 500 }, "id": int(time.time() * 1000) } def voice_recognition_prompt(self, file_path: str) -> str: """Create a voice recognition prompt template""" return f"Please process this voice file: {file_path}" def voice_resource(self, file_path: str) -> str: """Provide voice file content as a resource""" try: with open(file_path, "rb") as f: return base64.b64encode(f.read()).decode() except Exception as e: return {"error": str(e)}

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yangsenessa/mcp_voice_identify'

If you have feedback or need assistance with the MCP directory API, please join our Discord server