Skip to content
This repository was archived by the owner on Apr 20, 2024. It is now read-only.

Commit 5974564

Browse files
feat: Support output transcript to Google Cloud Storage for LongRunningRecognize (#128)
* chore: upgrade gapic-generator-python to 0.42.2 PiperOrigin-RevId: 361662015 Source-Author: Google APIs <[email protected]> Source-Date: Mon Mar 8 14:47:18 2021 -0800 Source-Repo: googleapis/googleapis Source-Sha: 28a591963253d52ce3a25a918cafbdd9928de8cf Source-Link: googleapis/googleapis@28a5919 * feat: Support output transcript to GCS for LongRunningRecognize. PiperOrigin-RevId: 362294447 Source-Author: Google APIs <[email protected]> Source-Date: Thu Mar 11 08:07:37 2021 -0800 Source-Repo: googleapis/googleapis Source-Sha: b6ebac16c3aecb798d4f25443d96df2f42a965ca Source-Link: googleapis/googleapis@b6ebac1 * feat: Support output transcript to GCS for LongRunningRecognize. PiperOrigin-RevId: 362934100 Source-Author: Google APIs <[email protected]> Source-Date: Mon Mar 15 07:18:03 2021 -0700 Source-Repo: googleapis/googleapis Source-Sha: 72326861be446be27d53af95c87e6e313367c371 Source-Link: googleapis/googleapis@7232686
1 parent a3bfd74 commit 5974564

File tree

11 files changed

+469
-101
lines changed

11 files changed

+469
-101
lines changed

google/cloud/speech_v1/types/__init__.py

+22-22
Original file line numberDiff line numberDiff line change
@@ -16,41 +16,41 @@
1616
#
1717

1818
from .cloud_speech import (
19-
RecognizeRequest,
19+
LongRunningRecognizeMetadata,
2020
LongRunningRecognizeRequest,
21-
StreamingRecognizeRequest,
22-
StreamingRecognitionConfig,
21+
LongRunningRecognizeResponse,
22+
RecognitionAudio,
2323
RecognitionConfig,
24-
SpeakerDiarizationConfig,
2524
RecognitionMetadata,
26-
SpeechContext,
27-
RecognitionAudio,
25+
RecognizeRequest,
2826
RecognizeResponse,
29-
LongRunningRecognizeResponse,
30-
LongRunningRecognizeMetadata,
31-
StreamingRecognizeResponse,
32-
StreamingRecognitionResult,
33-
SpeechRecognitionResult,
27+
SpeakerDiarizationConfig,
28+
SpeechContext,
3429
SpeechRecognitionAlternative,
30+
SpeechRecognitionResult,
31+
StreamingRecognitionConfig,
32+
StreamingRecognitionResult,
33+
StreamingRecognizeRequest,
34+
StreamingRecognizeResponse,
3535
WordInfo,
3636
)
3737

3838
__all__ = (
39-
"RecognizeRequest",
39+
"LongRunningRecognizeMetadata",
4040
"LongRunningRecognizeRequest",
41-
"StreamingRecognizeRequest",
42-
"StreamingRecognitionConfig",
41+
"LongRunningRecognizeResponse",
42+
"RecognitionAudio",
4343
"RecognitionConfig",
44-
"SpeakerDiarizationConfig",
4544
"RecognitionMetadata",
46-
"SpeechContext",
47-
"RecognitionAudio",
45+
"RecognizeRequest",
4846
"RecognizeResponse",
49-
"LongRunningRecognizeResponse",
50-
"LongRunningRecognizeMetadata",
51-
"StreamingRecognizeResponse",
52-
"StreamingRecognitionResult",
53-
"SpeechRecognitionResult",
47+
"SpeakerDiarizationConfig",
48+
"SpeechContext",
5449
"SpeechRecognitionAlternative",
50+
"SpeechRecognitionResult",
51+
"StreamingRecognitionConfig",
52+
"StreamingRecognitionResult",
53+
"StreamingRecognizeRequest",
54+
"StreamingRecognizeResponse",
5555
"WordInfo",
5656
)

google/cloud/speech_v1p1beta1/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from .types.cloud_speech import StreamingRecognitionResult
3434
from .types.cloud_speech import StreamingRecognizeRequest
3535
from .types.cloud_speech import StreamingRecognizeResponse
36+
from .types.cloud_speech import TranscriptOutputConfig
3637
from .types.cloud_speech import WordInfo
3738
from .types.cloud_speech_adaptation import CreateCustomClassRequest
3839
from .types.cloud_speech_adaptation import CreatePhraseSetRequest
@@ -59,7 +60,6 @@ class SpeechClient(SpeechHelpers, SpeechClient):
5960

6061

6162
__all__ = (
62-
"AdaptationClient",
6363
"CreateCustomClassRequest",
6464
"CreatePhraseSetRequest",
6565
"CustomClass",
@@ -82,15 +82,17 @@ class SpeechClient(SpeechHelpers, SpeechClient):
8282
"RecognizeResponse",
8383
"SpeakerDiarizationConfig",
8484
"SpeechAdaptation",
85+
"SpeechClient",
8586
"SpeechContext",
8687
"SpeechRecognitionAlternative",
8788
"SpeechRecognitionResult",
8889
"StreamingRecognitionConfig",
8990
"StreamingRecognitionResult",
9091
"StreamingRecognizeRequest",
9192
"StreamingRecognizeResponse",
93+
"TranscriptOutputConfig",
9294
"UpdateCustomClassRequest",
9395
"UpdatePhraseSetRequest",
9496
"WordInfo",
95-
"SpeechClient",
97+
"AdaptationClient",
9698
)

google/cloud/speech_v1p1beta1/proto/cloud_speech.proto

+46-29
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package google.cloud.speech.v1p1beta1;
1919
import "google/api/annotations.proto";
2020
import "google/api/client.proto";
2121
import "google/api/field_behavior.proto";
22-
import "google/api/resource.proto";
2322
import "google/cloud/speech/v1p1beta1/resource.proto";
2423
import "google/longrunning/operations.proto";
2524
import "google/protobuf/any.proto";
@@ -37,8 +36,7 @@ option objc_class_prefix = "GCS";
3736
// Service that implements Google Cloud Speech API.
3837
service Speech {
3938
option (google.api.default_host) = "speech.googleapis.com";
40-
option (google.api.oauth_scopes) =
41-
"https://www.googleapis.com/auth/cloud-platform";
39+
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
4240

4341
// Performs synchronous speech recognition: receive results after all audio
4442
// has been sent and processed.
@@ -56,8 +54,7 @@ service Speech {
5654
// a `LongRunningRecognizeResponse` message.
5755
// For more information on asynchronous speech recognition, see the
5856
// [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
59-
rpc LongRunningRecognize(LongRunningRecognizeRequest)
60-
returns (google.longrunning.Operation) {
57+
rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
6158
option (google.api.http) = {
6259
post: "/v1p1beta1/speech:longrunningrecognize"
6360
body: "*"
@@ -71,8 +68,8 @@ service Speech {
7168

7269
// Performs bidirectional streaming speech recognition: receive results while
7370
// sending audio. This method is only available via the gRPC API (not REST).
74-
rpc StreamingRecognize(stream StreamingRecognizeRequest)
75-
returns (stream StreamingRecognizeResponse) {}
71+
rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
72+
}
7673
}
7774

7875
// The top-level message sent by the client for the `Recognize` method.
@@ -94,6 +91,19 @@ message LongRunningRecognizeRequest {
9491

9592
// Required. The audio data to be recognized.
9693
RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];
94+
95+
// Optional. Specifies an optional destination for the recognition results.
96+
TranscriptOutputConfig output_config = 4 [(google.api.field_behavior) = OPTIONAL];
97+
}
98+
99+
// Specifies an optional destination for the recognition results.
100+
message TranscriptOutputConfig {
101+
oneof output_type {
102+
// Specifies a Cloud Storage URI for the recognition results. Must be
103+
// specified in the format: `gs://bucket_name/object_name`, and the bucket
104+
// must already exist.
105+
string gcs_uri = 1;
106+
}
97107
}
98108

99109
// The top-level message sent by the client for the `StreamingRecognize` method.
@@ -171,7 +181,7 @@ message RecognitionConfig {
171181
// a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech
172182
// recognition can be reduced if lossy codecs are used to capture or transmit
173183
// audio, particularly if background noise is present. Lossy codecs include
174-
// `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, and `MP3`.
184+
// `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, `MP3`.
175185
//
176186
// The `FLAC` and `WAV` audio file formats include a header that describes the
177187
// included audio content. You can request recognition for `WAV` files that
@@ -182,8 +192,7 @@ message RecognitionConfig {
182192
// an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
183193
// encoding configuration must match the encoding described in the audio
184194
// header; otherwise the request returns an
185-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
186-
// code.
195+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
187196
enum AudioEncoding {
188197
// Not specified.
189198
ENCODING_UNSPECIFIED = 0;
@@ -237,8 +246,7 @@ message RecognitionConfig {
237246

238247
// Encoding of audio data sent in all `RecognitionAudio` messages.
239248
// This field is optional for `FLAC` and `WAV` audio files and required
240-
// for all other audio formats. For details, see
241-
// [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
249+
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
242250
AudioEncoding encoding = 1;
243251

244252
// Sample rate in Hertz of the audio data sent in all
@@ -247,8 +255,7 @@ message RecognitionConfig {
247255
// source to 16000 Hz. If that's not possible, use the native sample rate of
248256
// the audio source (instead of re-sampling).
249257
// This field is optional for FLAC and WAV audio files, but is
250-
// required for all other audio formats. For details, see
251-
// [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
258+
// required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
252259
int32 sample_rate_hertz = 2;
253260

254261
// The number of channels in the input audio data.
@@ -424,8 +431,10 @@ message SpeakerDiarizationConfig {
424431
int32 max_speaker_count = 3;
425432

426433
// Output only. Unused.
427-
int32 speaker_tag = 5
428-
[deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
434+
int32 speaker_tag = 5 [
435+
deprecated = true,
436+
(google.api.field_behavior) = OUTPUT_ONLY
437+
];
429438
}
430439

431440
// Description of audio data to be recognized.
@@ -589,8 +598,8 @@ message SpeechContext {
589598

590599
// Contains audio data in the encoding specified in the `RecognitionConfig`.
591600
// Either `content` or `uri` must be supplied. Supplying both or neither
592-
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
593-
// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
601+
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
602+
// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
594603
message RecognitionAudio {
595604
// The audio source, which is either inline content or a Google Cloud
596605
// Storage uri.
@@ -605,9 +614,8 @@ message RecognitionAudio {
605614
// Currently, only Google Cloud Storage URIs are
606615
// supported, which must be specified in the following format:
607616
// `gs://bucket_name/object_name` (other URI formats return
608-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
609-
// For more information, see [Request
610-
// URIs](https://cloud.google.com/storage/docs/reference-uris).
617+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
618+
// [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
611619
string uri = 2;
612620
}
613621
}
@@ -630,6 +638,12 @@ message LongRunningRecognizeResponse {
630638
// Sequential list of transcription results corresponding to
631639
// sequential portions of audio.
632640
repeated SpeechRecognitionResult results = 2;
641+
642+
// Original output config if present in the request.
643+
TranscriptOutputConfig output_config = 6;
644+
645+
// If the transcript output fails this field contains the relevant error.
646+
google.rpc.Status output_error = 7;
633647
}
634648

635649
// Describes the progress of a long-running `LongRunningRecognize` call. It is
@@ -646,9 +660,12 @@ message LongRunningRecognizeMetadata {
646660
// Time of the most recent processing update.
647661
google.protobuf.Timestamp last_update_time = 3;
648662

649-
// Output only. The URI of the audio file being transcribed. Empty if the
650-
// audio was sent as byte content.
663+
// Output only. The URI of the audio file being transcribed. Empty if the audio was sent
664+
// as byte content.
651665
string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
666+
667+
// Output only. A copy of the TranscriptOutputConfig if it was set in the request.
668+
TranscriptOutputConfig output_config = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
652669
}
653670

654671
// `StreamingRecognizeResponse` is the only message returned to the client by
@@ -762,9 +779,9 @@ message StreamingRecognitionResult {
762779
// For audio_channel_count = N, its output values can range from '1' to 'N'.
763780
int32 channel_tag = 5;
764781

765-
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
766-
// language tag of the language in this result. This language code was
767-
// detected to have the most likelihood of being spoken in the audio.
782+
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
783+
// of the language in this result. This language code was detected to have
784+
// the most likelihood of being spoken in the audio.
768785
string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
769786
}
770787

@@ -781,9 +798,9 @@ message SpeechRecognitionResult {
781798
// For audio_channel_count = N, its output values can range from '1' to 'N'.
782799
int32 channel_tag = 2;
783800

784-
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
785-
// language tag of the language in this result. This language code was
786-
// detected to have the most likelihood of being spoken in the audio.
801+
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
802+
// of the language in this result. This language code was detected to have
803+
// the most likelihood of being spoken in the audio.
787804
string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
788805
}
789806

google/cloud/speech_v1p1beta1/proto/cloud_speech_adaptation.proto

+2
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ service Adaptation {
6969
patch: "/v1p1beta1/{phrase_set.name=projects/*/locations/*/phraseSets/*}"
7070
body: "phrase_set"
7171
};
72+
option (google.api.method_signature) = "phrase_set,update_mask";
7273
}
7374

7475
// Delete a phrase set.
@@ -110,6 +111,7 @@ service Adaptation {
110111
patch: "/v1p1beta1/{custom_class.name=projects/*/locations/*/customClasses/*}"
111112
body: "custom_class"
112113
};
114+
option (google.api.method_signature) = "custom_class,update_mask";
113115
}
114116

115117
// Delete a custom class.

0 commit comments

Comments
 (0)