Skip to content
This repository was archived by the owner on Apr 20, 2024. It is now read-only.

Commit 07b5203

Browse files
feat: add total_billed_time response field (#224)
Committer: @cherba PiperOrigin-RevId: 389755489 Source-Link: googleapis/googleapis@10185d0 Source-Link: https://github.com/googleapis/googleapis-gen/commit/27d21b1b5a0ca1ec55013da57c30c3ac1ac35449
1 parent f3ede39 commit 07b5203

File tree

4 files changed

+53
-20
lines changed

4 files changed

+53
-20
lines changed

google/cloud/speech_v1/services/speech/async_client.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from google.api_core import operation # type: ignore
3939
from google.api_core import operation_async # type: ignore
4040
from google.cloud.speech_v1.types import cloud_speech
41+
from google.protobuf import duration_pb2 # type: ignore
4142
from google.rpc import status_pb2 # type: ignore
4243
from .transports.base import SpeechTransport, DEFAULT_CLIENT_INFO
4344
from .transports.grpc_asyncio import SpeechGrpcAsyncIOTransport
@@ -379,7 +380,7 @@ def streaming_recognize(
379380
single_utterance is set to false, then no messages
380381
are streamed back to the client.
381382
382-
Here's an example of a series of ten
383+
Here's an example of a series of
383384
StreamingRecognizeResponses that might be returned
384385
while processing audio:
385386

google/cloud/speech_v1/services/speech/client.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from google.api_core import operation # type: ignore
4444
from google.api_core import operation_async # type: ignore
4545
from google.cloud.speech_v1.types import cloud_speech
46+
from google.protobuf import duration_pb2 # type: ignore
4647
from google.rpc import status_pb2 # type: ignore
4748
from .transports.base import SpeechTransport, DEFAULT_CLIENT_INFO
4849
from .transports.grpc import SpeechGrpcTransport
@@ -553,7 +554,7 @@ def streaming_recognize(
553554
single_utterance is set to false, then no messages
554555
are streamed back to the client.
555556
556-
Here's an example of a series of ten
557+
Here's an example of a series of
557558
StreamingRecognizeResponses that might be returned
558559
while processing audio:
559560

google/cloud/speech_v1/types/cloud_speech.py

+48-18
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,17 @@ class StreamingRecognitionConfig(proto.Message):
138138
``END_OF_SINGLE_UTTERANCE`` event and cease recognition. It
139139
will return no more than one ``StreamingRecognitionResult``
140140
with the ``is_final`` flag set to ``true``.
141+
142+
The ``single_utterance`` field can only be used with
143+
specified models, otherwise an error is thrown. The
144+
``model`` field in [``RecognitionConfig``][] must be set to:
145+
146+
- ``command_and_search``
147+
- ``phone_call`` AND additional field
148+
``useEnhanced``\ =\ ``true``
149+
- The ``model`` field is left undefined. In this case the
150+
API auto-selects a model based on any other parameters
151+
that you set in ``RecognitionConfig``.
141152
interim_results (bool):
142153
If ``true``, interim results (tentative hypotheses) may be
143154
returned as they become available (these interim results are
@@ -214,7 +225,7 @@ class RecognitionConfig(proto.Message):
214225
[SpeechContext][google.cloud.speech.v1.SpeechContext]. A
215226
means to provide context to assist the speech recognition.
216227
For more information, see `speech
217-
adaptation <https://cloud.google.com/speech-to-text/docs/context-strength>`__.
228+
adaptation <https://cloud.google.com/speech-to-text/docs/adaptation>`__.
218229
enable_word_time_offsets (bool):
219230
If ``true``, the top result includes a list of words and the
220231
start and end time offsets (timestamps) for those words. If
@@ -226,11 +237,7 @@ class RecognitionConfig(proto.Message):
226237
available in select languages. Setting this for
227238
requests in other languages has no effect at
228239
all. The default 'false' value does not add
229-
punctuation to result hypotheses. Note: This is
230-
currently offered as an experimental service,
231-
complimentary to all users. In the future this
232-
may be exclusively available as a premium
233-
feature.
240+
punctuation to result hypotheses.
234241
diarization_config (google.cloud.speech_v1.types.SpeakerDiarizationConfig):
235242
Config to enable speaker diarization and set
236243
additional parameters to make diarization better
@@ -270,7 +277,7 @@ class RecognitionConfig(proto.Message):
270277
</tr>
271278
<tr>
272279
<td><code>video</code></td>
273-
<td>Best for audio that originated from from video or includes multiple
280+
<td>Best for audio that originated from video or includes multiple
274281
speakers. Ideally the audio is recorded at a 16khz or greater
275282
sampling rate. This is a premium model that costs more than the
276283
standard rate.</td>
@@ -306,7 +313,7 @@ class AudioEncoding(proto.Enum):
306313
The accuracy of the speech recognition can be reduced if lossy
307314
codecs are used to capture or transmit audio, particularly if
308315
background noise is present. Lossy codecs include ``MULAW``,
309-
``AMR``, ``AMR_WB``, ``OGG_OPUS``, ``SPEEX_WITH_HEADER_BYTE``, and
316+
``AMR``, ``AMR_WB``, ``OGG_OPUS``, ``SPEEX_WITH_HEADER_BYTE``,
310317
``MP3``.
311318
312319
The ``FLAC`` and ``WAV`` audio file formats include a header that
@@ -370,7 +377,7 @@ class SpeakerDiarizationConfig(proto.Message):
370377
automatically determine the correct number of
371378
speakers. If not set, the default value is 6.
372379
speaker_tag (int):
373-
Unused.
380+
Output only. Unused.
374381
"""
375382

376383
enable_speaker_diarization = proto.Field(proto.BOOL, number=1,)
@@ -531,11 +538,17 @@ class RecognizeResponse(proto.Message):
531538
results (Sequence[google.cloud.speech_v1.types.SpeechRecognitionResult]):
532539
Sequential list of transcription results
533540
corresponding to sequential portions of audio.
541+
total_billed_time (google.protobuf.duration_pb2.Duration):
542+
When available, billed audio seconds for the
543+
corresponding request.
534544
"""
535545

536546
results = proto.RepeatedField(
537547
proto.MESSAGE, number=2, message="SpeechRecognitionResult",
538548
)
549+
total_billed_time = proto.Field(
550+
proto.MESSAGE, number=3, message=duration_pb2.Duration,
551+
)
539552

540553

541554
class LongRunningRecognizeResponse(proto.Message):
@@ -550,11 +563,17 @@ class LongRunningRecognizeResponse(proto.Message):
550563
results (Sequence[google.cloud.speech_v1.types.SpeechRecognitionResult]):
551564
Sequential list of transcription results
552565
corresponding to sequential portions of audio.
566+
total_billed_time (google.protobuf.duration_pb2.Duration):
567+
When available, billed audio seconds for the
568+
corresponding request.
553569
"""
554570

555571
results = proto.RepeatedField(
556572
proto.MESSAGE, number=2, message="SpeechRecognitionResult",
557573
)
574+
total_billed_time = proto.Field(
575+
proto.MESSAGE, number=3, message=duration_pb2.Duration,
576+
)
558577

559578

560579
class LongRunningRecognizeMetadata(proto.Message):
@@ -572,13 +591,18 @@ class LongRunningRecognizeMetadata(proto.Message):
572591
Time when the request was received.
573592
last_update_time (google.protobuf.timestamp_pb2.Timestamp):
574593
Time of the most recent processing update.
594+
uri (str):
595+
Output only. The URI of the audio file being
596+
transcribed. Empty if the audio was sent as byte
597+
content.
575598
"""
576599

577600
progress_percent = proto.Field(proto.INT32, number=1,)
578601
start_time = proto.Field(proto.MESSAGE, number=2, message=timestamp_pb2.Timestamp,)
579602
last_update_time = proto.Field(
580603
proto.MESSAGE, number=3, message=timestamp_pb2.Timestamp,
581604
)
605+
uri = proto.Field(proto.STRING, number=4,)
582606

583607

584608
class StreamingRecognizeResponse(proto.Message):
@@ -588,9 +612,8 @@ class StreamingRecognizeResponse(proto.Message):
588612
client. If there is no recognizable audio, and ``single_utterance``
589613
is set to false, then no messages are streamed back to the client.
590614
591-
Here's an example of a series of ten
592-
``StreamingRecognizeResponse``\ s that might be returned while
593-
processing audio:
615+
Here's an example of a series of ``StreamingRecognizeResponse``\ s
616+
that might be returned while processing audio:
594617
595618
1. results { alternatives { transcript: "tube" } stability: 0.01 }
596619
@@ -648,6 +671,10 @@ class StreamingRecognizeResponse(proto.Message):
648671
``is_final=false`` results (the interim results).
649672
speech_event_type (google.cloud.speech_v1.types.StreamingRecognizeResponse.SpeechEventType):
650673
Indicates the type of speech event.
674+
total_billed_time (google.protobuf.duration_pb2.Duration):
675+
When available, billed audio seconds for the
676+
stream. Set only if this is the last response in
677+
the stream.
651678
"""
652679

653680
class SpeechEventType(proto.Enum):
@@ -660,6 +687,9 @@ class SpeechEventType(proto.Enum):
660687
proto.MESSAGE, number=2, message="StreamingRecognitionResult",
661688
)
662689
speech_event_type = proto.Field(proto.ENUM, number=4, enum=SpeechEventType,)
690+
total_billed_time = proto.Field(
691+
proto.MESSAGE, number=5, message=duration_pb2.Duration,
692+
)
663693

664694

665695
class StreamingRecognitionResult(proto.Message):
@@ -784,12 +814,12 @@ class WordInfo(proto.Message):
784814
The word corresponding to this set of
785815
information.
786816
speaker_tag (int):
787-
A distinct integer value is assigned for every speaker
788-
within the audio. This field specifies which one of those
789-
speakers was detected to have spoken this word. Value ranges
790-
from '1' to diarization_speaker_count. speaker_tag is set if
791-
enable_speaker_diarization = 'true' and only in the top
792-
alternative.
817+
Output only. A distinct integer value is assigned for every
818+
speaker within the audio. This field specifies which one of
819+
those speakers was detected to have spoken this word. Value
820+
ranges from '1' to diarization_speaker_count. speaker_tag is
821+
set if enable_speaker_diarization = 'true' and only in the
822+
top alternative.
793823
"""
794824

795825
start_time = proto.Field(proto.MESSAGE, number=1, message=duration_pb2.Duration,)

tests/unit/gapic/speech_v1/test_speech.py

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from google.cloud.speech_v1.types import cloud_speech
4242
from google.longrunning import operations_pb2
4343
from google.oauth2 import service_account
44+
from google.protobuf import duration_pb2 # type: ignore
4445
from google.rpc import status_pb2 # type: ignore
4546
import google.auth
4647

0 commit comments

Comments
 (0)