voice.proto•11.1 kB
/*
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
* http://github.com/fonoster/fonoster
*
* This file is part of Fonoster
*
* Licensed under the MIT License (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/MIT
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto3";
package fonoster.voice.v1beta2;
import "google/protobuf/struct.proto";
// The voice service definition
service Voice {
rpc CreateSession(stream VoiceInStream) returns (stream VoiceOutStream) {}
}
// CreateSessionRequest is the request message for the CreateSession method
message CreateSessionRequest {
// The account making the request on behalf of the caller
string access_key_id = 1;
// Reference to an application living in the platform
string app_ref = 2;
// gRPC endpoint for the voice service
string endpoint = 3;
// PSTN number or SIP URI associated with the session
string ingress_number = 4;
// The name of the caller
string caller_name = 5;
// The caller number for the phone calling from the
string caller_number = 6;
// The session ID generated by the Media Server
string session_ref = 7;
// Use this token to validate the provenance of the request
// If the app_ref is provided, the token must include the app_ref as a claim
string session_token = 8;
// The direction of the call
enum CallDirection {
FROM_PSTN = 0;
TO_PSTN = 1;
INTRA_NETWORK = 2;
}
CallDirection call_direction = 9;
// Metadata associated with the session
map<string, string> metadata = 10;
}
// Common request for verbs
message VerbRequest {
string session_ref = 1;
}
// Common response for verbs
message VerbResponse {
string session_ref = 1;
}
// Request to play a sound
message PlayRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The URL of the sound to play
string url = 2;
}
// Request to play a sound
message PlayResponse {
// The session reference generated by the Media Server
string session_ref = 1;
// The playback reference generated by the Media Server
string playback_ref = 2;
}
// The request message for the PlaybackControl method
message PlaybackControlRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The playback reference generated by the Media Server
string playback_ref = 2;
// The action to take
enum Action {
STOP = 0;
RESTART = 1;
PAUSE = 2;
UNPAUSE = 3;
FORWARD = 4;
}
Action action = 3;
}
// PlayDtmfRequest is the request message for the PlayDtmf method
message PlayDtmfRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The digits to play
string digits = 2;
}
// Request to mute or unmute the call
message MuteRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The direction to mute
enum MuteDirection {
IN = 0;
OUT = 1;
BOTH = 2;
}
MuteDirection direction = 2;
}
// Request to gather speech or DTMF
message GatherRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The source of the gather
enum GatherSource {
SPEECH = 0;
DTMF = 1;
SPEECH_AND_DTMF = 2;
}
GatherSource source = 2;
// Optional key to finish the gather
string finish_on_key = 3;
// Optional timeout in seconds
int32 timeout = 4;
// Optional number of digits to gather
int32 max_digits = 5;
}
// Response to a gather request
message GatherResponse {
// The session reference generated by the Media Server
string session_ref = 1;
// The gathered speech or digits
oneof content {
string speech = 2;
string digits = 3;
}
int32 response_time = 4;
}
// RecordRequest is the request message to initiate a recording
message RecordRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The maximum duration of the recording in seconds
int32 max_duration = 2;
// The maximum silence before ending the recording in seconds
int32 max_silence = 3;
// Play a beep before recording
bool beep = 4;
// Finish recording on this key
string finish_on_key = 5;
}
// Response to a record request
message RecordResponse {
// The session reference generated by the Media Server
string session_ref = 1;
// The name of the recording
string name = 2;
// The duration of the recording
int32 duration = 3;
// The format of the recording
enum RecordFormat {
WAV = 0;
}
RecordFormat format = 4;
}
// Request to say a text
message SayRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The text to say
string text = 2;
// Assign a playback reference to the request
string playback_ref = 3;
// TTS options
google.protobuf.Struct options = 4;
}
// Dial request message
message DialRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The number to dial
string destination = 2;
// The caller ID to use (reserved for future use)
string caller_id = 3;
// The timeout in seconds
int32 timeout = 4;
// Record direction
enum RecordDirection {
IN = 0;
OUT = 1;
BOTH = 2;
}
RecordDirection record_direction = 5;
}
// Dial status message
message DialStatus {
// The session reference generated by the Media Server
string session_ref = 1;
// The status of the dial
enum Status {
TRYING = 0;
CANCEL = 1;
ANSWER = 2;
BUSY = 3;
PROGRESS = 4;
NOANSWER = 5;
// Maps from Asterisk's CHANUNAVAIL and CONGESTION
FAILED = 6;
}
Status status = 2;
}
// Request to start an audio stream
message StartStreamRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The direction of the stream
enum StreamDirection {
IN = 0;
OUT = 1;
BOTH = 2;
}
StreamDirection direction = 2;
// Output format
enum StreamAudioFormat {
WAV = 0;
}
StreamAudioFormat format = 3;
}
// Response to a start stream request
message StartStreamResponse {
// The session reference generated by the Media Server
string session_ref = 1;
// The stream reference generated by the Media Server
string stream_ref = 2;
}
// Request to stop an audio stream
message StopStreamRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The stream reference generated by the Media Server
string stream_ref = 2;
}
// Message for the bidirectional streams
message StreamPayload {
// The session reference generated by the Media Server
string session_ref = 1;
// The stream reference generated by the Media Server
string stream_ref = 2;
// The message type
enum StreamMessageType {
AUDIO_IN = 0;
AUDIO_OUT = 1;
ERROR = 2;
}
StreamMessageType type = 3;
// The audio data
bytes data = 4;
// (Q. should we use the same error codes as gRPC?)
// Error code if any
string code = 5;
// Error message if any
string message = 6;
}
// Request for Stream Gather
message StartStreamGatherRequest {
// The session reference generated by the Media Server
string session_ref = 1;
// The source of the gather
enum StreamGatherSource {
SPEECH = 0;
DTMF = 1;
SPEECH_AND_DTMF = 2;
}
StreamGatherSource source = 2;
}
// Request to stop a Stream Gather
message StopStreamGatherRequest {
// The session reference generated by the Media Server
string session_ref = 1;
}
// Response to Stream Gather request
message StreamGatherPayload {
// The session reference generated by the Media Server
string session_ref = 1;
// The gathered speech or a single digit
oneof content {
string speech = 2;
string digit = 3;
}
int32 response_time = 4;
}
// VoiceInStream is the input stream for the voice service
message VoiceInStream {
oneof content {
// Initial request to create a new session
CreateSessionRequest request = 1;
// Command to accept the call
VerbResponse answer_response = 2;
// Response to a play request
PlayResponse play_response = 3;
// Response to a hangup request
VerbResponse hangup_response = 4;
// Response to a mute request
VerbResponse mute_response = 5;
// Response to a unmute request
VerbResponse unmute_response = 6;
// Response to a play dtmf request
VerbResponse play_dtmf_response = 7;
// Response to a gather request
GatherResponse gather_response = 8;
// Response to a playback control request
VerbResponse playback_control_response = 9;
// Response to a say request (It shares the same response as play)
PlayResponse say_response = 10;
// Response to a record request
RecordResponse record_response = 11;
// Status of a dial request progress
DialStatus dial_status = 12;
// Response to a start stream request
StartStreamResponse start_stream_response = 13;
// Message for the bidirectional streams
StreamPayload stream_payload = 14;
// Response to start a stream gather
VerbResponse start_stream_gather_response = 15;
// Response to stop a stream gather
VerbResponse stop_stream_gather_response = 16;
// Message with payload for the stream gather
StreamGatherPayload stream_gather_payload = 17;
}
}
// VoiceOutStream is the output stream for the voice service
message VoiceOutStream {
oneof content {
// Request to accept the call
VerbRequest answer_request = 1;
// Request to play a sound
PlayRequest play_request = 2;
// Request to hangup the call
VerbRequest hangup_request = 3;
// Request to mute the call
MuteRequest mute_request = 4;
// Request to unmute the call
MuteRequest unmute_request = 5;
// Request to play dtmf
PlayDtmfRequest play_dtmf_request = 6;
// Request to gather speech or DTMF
GatherRequest gather_request = 7;
// Request to control a playback
PlaybackControlRequest playback_control_request = 9;
// Request to say a text
SayRequest say_request = 10;
// Request to record
RecordRequest record_request = 11;
// Request to dial a number
DialRequest dial_request = 12;
// Request to start an audio stream
StartStreamRequest start_stream_request = 14;
// Request to stop an audio stream
StopStreamRequest stop_stream_request = 15;
// Message for the bidirectional streams
StreamPayload stream_payload = 16;
// Request to start a stream gather
StartStreamGatherRequest start_stream_gather_request = 17;
// Request to stop a stream gather
StopStreamGatherRequest stop_stream_gather_request = 18;
}
}