@@ -19,7 +19,6 @@ package google.cloud.speech.v1p1beta1;
19
19
import "google/api/annotations.proto" ;
20
20
import "google/api/client.proto" ;
21
21
import "google/api/field_behavior.proto" ;
22
- import "google/api/resource.proto" ;
23
22
import "google/cloud/speech/v1p1beta1/resource.proto" ;
24
23
import "google/longrunning/operations.proto" ;
25
24
import "google/protobuf/any.proto" ;
@@ -37,8 +36,7 @@ option objc_class_prefix = "GCS";
37
36
// Service that implements Google Cloud Speech API.
38
37
service Speech {
39
38
option (google.api.default_host ) = "speech.googleapis.com" ;
40
- option (google.api.oauth_scopes ) =
41
- "https://www.googleapis.com/auth/cloud-platform" ;
39
+ option (google.api.oauth_scopes ) = "https://www.googleapis.com/auth/cloud-platform" ;
42
40
43
41
// Performs synchronous speech recognition: receive results after all audio
44
42
// has been sent and processed.
@@ -56,8 +54,7 @@ service Speech {
56
54
// a `LongRunningRecognizeResponse` message.
57
55
// For more information on asynchronous speech recognition, see the
58
56
// [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
59
- rpc LongRunningRecognize (LongRunningRecognizeRequest )
60
- returns (google .longrunning .Operation ) {
57
+ rpc LongRunningRecognize (LongRunningRecognizeRequest ) returns (google .longrunning .Operation ) {
61
58
option (google.api.http ) = {
62
59
post : "/v1p1beta1/speech:longrunningrecognize"
63
60
body : "*"
@@ -71,8 +68,8 @@ service Speech {
71
68
72
69
// Performs bidirectional streaming speech recognition: receive results while
73
70
// sending audio. This method is only available via the gRPC API (not REST).
74
- rpc StreamingRecognize (stream StreamingRecognizeRequest )
75
- returns ( stream StreamingRecognizeResponse ) { }
71
+ rpc StreamingRecognize (stream StreamingRecognizeRequest ) returns ( stream StreamingRecognizeResponse ) {
72
+ }
76
73
}
77
74
78
75
// The top-level message sent by the client for the `Recognize` method.
@@ -94,6 +91,19 @@ message LongRunningRecognizeRequest {
94
91
95
92
// Required. The audio data to be recognized.
96
93
RecognitionAudio audio = 2 [(google.api.field_behavior ) = REQUIRED ];
94
+
95
+ // Optional. Specifies an optional destination for the recognition results.
96
+ TranscriptOutputConfig output_config = 4 [(google.api.field_behavior ) = OPTIONAL ];
97
+ }
98
+
99
+ // Specifies an optional destination for the recognition results.
100
+ message TranscriptOutputConfig {
101
+ oneof output_type {
102
+ // Specifies a Cloud Storage URI for the recognition results. Must be
103
+ // specified in the format: `gs://bucket_name/object_name`, and the bucket
104
+ // must already exist.
105
+ string gcs_uri = 1 ;
106
+ }
97
107
}
98
108
99
109
// The top-level message sent by the client for the `StreamingRecognize` method.
@@ -171,7 +181,7 @@ message RecognitionConfig {
171
181
// a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech
172
182
// recognition can be reduced if lossy codecs are used to capture or transmit
173
183
// audio, particularly if background noise is present. Lossy codecs include
174
- // `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, and `MP3`.
184
+ // `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, `MP3`.
175
185
//
176
186
// The `FLAC` and `WAV` audio file formats include a header that describes the
177
187
// included audio content. You can request recognition for `WAV` files that
@@ -182,8 +192,7 @@ message RecognitionConfig {
182
192
// an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
183
193
// encoding configuration must match the encoding described in the audio
184
194
// header; otherwise the request returns an
185
- // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
186
- // code.
195
+ // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
187
196
enum AudioEncoding {
188
197
// Not specified.
189
198
ENCODING_UNSPECIFIED = 0 ;
@@ -237,8 +246,7 @@ message RecognitionConfig {
237
246
238
247
// Encoding of audio data sent in all `RecognitionAudio` messages.
239
248
// This field is optional for `FLAC` and `WAV` audio files and required
240
- // for all other audio formats. For details, see
241
- // [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
249
+ // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
242
250
AudioEncoding encoding = 1 ;
243
251
244
252
// Sample rate in Hertz of the audio data sent in all
@@ -247,8 +255,7 @@ message RecognitionConfig {
247
255
// source to 16000 Hz. If that's not possible, use the native sample rate of
248
256
// the audio source (instead of re-sampling).
249
257
// This field is optional for FLAC and WAV audio files, but is
250
- // required for all other audio formats. For details, see
251
- // [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
258
+ // required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
252
259
int32 sample_rate_hertz = 2 ;
253
260
254
261
// The number of channels in the input audio data.
@@ -424,8 +431,10 @@ message SpeakerDiarizationConfig {
424
431
int32 max_speaker_count = 3 ;
425
432
426
433
// Output only. Unused.
427
- int32 speaker_tag = 5
428
- [deprecated = true , (google.api.field_behavior ) = OUTPUT_ONLY ];
434
+ int32 speaker_tag = 5 [
435
+ deprecated = true ,
436
+ (google.api.field_behavior ) = OUTPUT_ONLY
437
+ ];
429
438
}
430
439
431
440
// Description of audio data to be recognized.
@@ -589,8 +598,8 @@ message SpeechContext {
589
598
590
599
// Contains audio data in the encoding specified in the `RecognitionConfig`.
591
600
// Either `content` or `uri` must be supplied. Supplying both or neither
592
- // returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
593
- // See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
601
+ // returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
602
+ // [content limits](https://cloud.google.com/speech-to-text/quotas#content).
594
603
message RecognitionAudio {
595
604
// The audio source, which is either inline content or a Google Cloud
596
605
// Storage uri.
@@ -605,9 +614,8 @@ message RecognitionAudio {
605
614
// Currently, only Google Cloud Storage URIs are
606
615
// supported, which must be specified in the following format:
607
616
// `gs://bucket_name/object_name` (other URI formats return
608
- // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
609
- // For more information, see [Request
610
- // URIs](https://cloud.google.com/storage/docs/reference-uris).
617
+ // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
618
+ // [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
611
619
string uri = 2 ;
612
620
}
613
621
}
@@ -630,6 +638,12 @@ message LongRunningRecognizeResponse {
630
638
// Sequential list of transcription results corresponding to
631
639
// sequential portions of audio.
632
640
repeated SpeechRecognitionResult results = 2 ;
641
+
642
+ // Original output config if present in the request.
643
+ TranscriptOutputConfig output_config = 6 ;
644
+
645
+ // If the transcript output fails this field contains the relevant error.
646
+ google.rpc.Status output_error = 7 ;
633
647
}
634
648
635
649
// Describes the progress of a long-running `LongRunningRecognize` call. It is
@@ -646,9 +660,12 @@ message LongRunningRecognizeMetadata {
646
660
// Time of the most recent processing update.
647
661
google.protobuf.Timestamp last_update_time = 3 ;
648
662
649
- // Output only. The URI of the audio file being transcribed. Empty if the
650
- // audio was sent as byte content.
663
+ // Output only. The URI of the audio file being transcribed. Empty if the audio was sent
664
+ // as byte content.
651
665
string uri = 4 [(google.api.field_behavior ) = OUTPUT_ONLY ];
666
+
667
+ // Output only. A copy of the TranscriptOutputConfig if it was set in the request.
668
+ TranscriptOutputConfig output_config = 5 [(google.api.field_behavior ) = OUTPUT_ONLY ];
652
669
}
653
670
654
671
// `StreamingRecognizeResponse` is the only message returned to the client by
@@ -762,9 +779,9 @@ message StreamingRecognitionResult {
762
779
// For audio_channel_count = N, its output values can range from '1' to 'N'.
763
780
int32 channel_tag = 5 ;
764
781
765
- // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
766
- // language tag of the language in this result. This language code was
767
- // detected to have the most likelihood of being spoken in the audio.
782
+ // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
783
+ // of the language in this result. This language code was detected to have
784
+ // the most likelihood of being spoken in the audio.
768
785
string language_code = 6 [(google.api.field_behavior ) = OUTPUT_ONLY ];
769
786
}
770
787
@@ -781,9 +798,9 @@ message SpeechRecognitionResult {
781
798
// For audio_channel_count = N, its output values can range from '1' to 'N'.
782
799
int32 channel_tag = 2 ;
783
800
784
- // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
785
- // language tag of the language in this result. This language code was
786
- // detected to have the most likelihood of being spoken in the audio.
801
+ // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
802
+ // of the language in this result. This language code was detected to have
803
+ // the most likelihood of being spoken in the audio.
787
804
string language_code = 5 [(google.api.field_behavior ) = OUTPUT_ONLY ];
788
805
}
789
806
0 commit comments