blob: 6b005c4aac9bb393e2bdecfb26783e9c633cc826 [file] [log] [blame]
[email protected]516b5bd2012-03-24 07:01:361// Copyright (c) 2012 The Chromium Authors. All rights reserved.
[email protected]c63f2b72011-07-07 05:25:002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
[email protected]25efde562013-01-24 18:36:055#ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
6#define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
[email protected]c63f2b72011-07-07 05:25:007
8#include <queue>
9#include <set>
10#include <string>
[email protected]7a06d282013-05-03 04:39:3311#include <vector>
[email protected]c63f2b72011-07-07 05:25:0012
[email protected]7226b33c2011-08-18 08:44:2213#include "base/memory/scoped_ptr.h"
[email protected]c63f2b72011-07-07 05:25:0014#include "base/memory/singleton.h"
[email protected]71b03cd52013-12-19 18:18:2015#include "base/memory/weak_ptr.h"
[email protected]761fa4702013-07-02 15:25:1516#include "url/gurl.h"
[email protected]c63f2b72011-07-07 05:25:0017
[email protected]7a06d282013-05-03 04:39:3318class Utterance;
[email protected]25efde562013-01-24 18:36:0519class TtsPlatformImpl;
[email protected]c63f2b72011-07-07 05:25:0020
[email protected]f3a1c642011-07-12 19:15:0321namespace base {
[email protected]f3a1c642011-07-12 19:15:0322class Value;
23}
[email protected]c63f2b72011-07-07 05:25:0024
[email protected]35c11842014-08-13 10:04:2725namespace content {
26class BrowserContext;
27}
28
[email protected]c63f2b72011-07-07 05:25:0029// Events sent back from the TTS engine indicating the progress.
30enum TtsEventType {
31 TTS_EVENT_START,
32 TTS_EVENT_END,
33 TTS_EVENT_WORD,
34 TTS_EVENT_SENTENCE,
35 TTS_EVENT_MARKER,
36 TTS_EVENT_INTERRUPTED,
37 TTS_EVENT_CANCELLED,
[email protected]5537ddf2013-05-30 15:17:2138 TTS_EVENT_ERROR,
39 TTS_EVENT_PAUSE,
40 TTS_EVENT_RESUME
[email protected]c63f2b72011-07-07 05:25:0041};
42
[email protected]56f6f9dc2013-05-11 18:51:4343enum TtsGenderType {
44 TTS_GENDER_NONE,
45 TTS_GENDER_MALE,
46 TTS_GENDER_FEMALE
47};
48
[email protected]7a06d282013-05-03 04:39:3349// Returns true if this event type is one that indicates an utterance
50// is finished and can be destroyed.
51bool IsFinalTtsEventType(TtsEventType event_type);
[email protected]c63f2b72011-07-07 05:25:0052
53// The continuous parameters that apply to a given utterance.
54struct UtteranceContinuousParameters {
55 UtteranceContinuousParameters();
56
57 double rate;
58 double pitch;
59 double volume;
60};
61
[email protected]7a06d282013-05-03 04:39:3362// Information about one voice.
63struct VoiceData {
64 VoiceData();
65 ~VoiceData();
66
67 std::string name;
68 std::string lang;
[email protected]56f6f9dc2013-05-11 18:51:4369 TtsGenderType gender;
[email protected]7a06d282013-05-03 04:39:3370 std::string extension_id;
[email protected]56f6f9dc2013-05-11 18:51:4371 std::set<TtsEventType> events;
72
[email protected]78127e62013-11-01 16:44:5773 // If true, the synthesis engine is a remote network resource.
74 // It may be higher latency and may incur bandwidth costs.
75 bool remote;
76
[email protected]56f6f9dc2013-05-11 18:51:4377 // If true, this is implemented by this platform's subclass of
78 // TtsPlatformImpl. If false, this is implemented by an extension.
79 bool native;
80 std::string native_voice_identifier;
[email protected]7a06d282013-05-03 04:39:3381};
82
[email protected]3ecaffe92014-07-12 12:44:0983// Interface that delegates TTS requests to user-installed extensions.
84class TtsEngineDelegate {
85 public:
86 virtual ~TtsEngineDelegate() {}
87
88 // Return a list of all available voices registered.
[email protected]35c11842014-08-13 10:04:2789 virtual void GetVoices(content::BrowserContext* browser_context,
[email protected]3ecaffe92014-07-12 12:44:0990 std::vector<VoiceData>* out_voices) = 0;
91
92 // Speak the given utterance by sending an event to the given TTS engine.
93 virtual void Speak(Utterance* utterance, const VoiceData& voice) = 0;
94
95 // Stop speaking the given utterance by sending an event to the target
96 // associated with this utterance.
97 virtual void Stop(Utterance* utterance) = 0;
98
99 // Pause in the middle of speaking this utterance.
100 virtual void Pause(Utterance* utterance) = 0;
101
102 // Resume speaking this utterance.
103 virtual void Resume(Utterance* utterance) = 0;
[email protected]2372952c2014-08-02 05:56:00104
105 // Load the built-in component extension for ChromeOS.
[email protected]35c11842014-08-13 10:04:27106 virtual bool LoadBuiltInTtsExtension(
107 content::BrowserContext* browser_context) = 0;
[email protected]3ecaffe92014-07-12 12:44:09108};
109
[email protected]7a06d282013-05-03 04:39:33110// Class that wants to receive events on utterances.
111class UtteranceEventDelegate {
112 public:
113 virtual ~UtteranceEventDelegate() {}
114 virtual void OnTtsEvent(Utterance* utterance,
115 TtsEventType event_type,
116 int char_index,
117 const std::string& error_message) = 0;
118};
[email protected]c63f2b72011-07-07 05:25:00119
[email protected]0a1091952013-05-20 16:21:54120// Class that wants to be notified when the set of
121// voices has changed.
122class VoicesChangedDelegate {
123 public:
124 virtual ~VoicesChangedDelegate() {}
125 virtual void OnVoicesChanged() = 0;
126};
127
[email protected]c63f2b72011-07-07 05:25:00128// One speech utterance.
129class Utterance {
130 public:
131 // Construct an utterance given a profile and a completion task to call
132 // when the utterance is done speaking. Before speaking this utterance,
133 // its other parameters like text, rate, pitch, etc. should all be set.
[email protected]35c11842014-08-13 10:04:27134 explicit Utterance(content::BrowserContext* browser_context);
[email protected]7a5f2702014-02-07 00:29:12135 ~Utterance();
[email protected]c63f2b72011-07-07 05:25:00136
137 // Sends an event to the delegate. If the event type is TTS_EVENT_END
138 // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
139 // uses the last good value.
140 void OnTtsEvent(TtsEventType event_type,
141 int char_index,
142 const std::string& error_message);
143
144 // Finish an utterance without sending an event to the delegate.
145 void Finish();
146
147 // Getters and setters for the text to speak and other speech options.
148 void set_text(const std::string& text) { text_ = text; }
149 const std::string& text() const { return text_; }
150
[email protected]f3a1c642011-07-12 19:15:03151 void set_options(const base::Value* options);
152 const base::Value* options() const { return options_.get(); }
[email protected]c63f2b72011-07-07 05:25:00153
[email protected]c63f2b72011-07-07 05:25:00154 void set_src_id(int src_id) { src_id_ = src_id; }
155 int src_id() { return src_id_; }
156
157 void set_src_url(const GURL& src_url) { src_url_ = src_url; }
158 const GURL& src_url() { return src_url_; }
159
160 void set_voice_name(const std::string& voice_name) {
161 voice_name_ = voice_name;
162 }
163 const std::string& voice_name() const { return voice_name_; }
164
165 void set_lang(const std::string& lang) {
166 lang_ = lang;
167 }
168 const std::string& lang() const { return lang_; }
169
[email protected]56f6f9dc2013-05-11 18:51:43170 void set_gender(TtsGenderType gender) {
[email protected]c63f2b72011-07-07 05:25:00171 gender_ = gender;
172 }
[email protected]56f6f9dc2013-05-11 18:51:43173 TtsGenderType gender() const { return gender_; }
[email protected]c63f2b72011-07-07 05:25:00174
djmix.kim61eb0c22015-03-30 17:38:38175 void set_continuous_parameters(const double rate,
176 const double pitch,
177 const double volume) {
178 continuous_parameters_.rate = rate;
179 continuous_parameters_.pitch = pitch;
180 continuous_parameters_.volume = volume;
[email protected]c63f2b72011-07-07 05:25:00181 }
182 const UtteranceContinuousParameters& continuous_parameters() {
183 return continuous_parameters_;
184 }
185
186 void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
187 bool can_enqueue() const { return can_enqueue_; }
188
[email protected]56f6f9dc2013-05-11 18:51:43189 void set_required_event_types(const std::set<TtsEventType>& types) {
[email protected]c63f2b72011-07-07 05:25:00190 required_event_types_ = types;
191 }
[email protected]56f6f9dc2013-05-11 18:51:43192 const std::set<TtsEventType>& required_event_types() const {
[email protected]c63f2b72011-07-07 05:25:00193 return required_event_types_;
194 }
195
[email protected]56f6f9dc2013-05-11 18:51:43196 void set_desired_event_types(const std::set<TtsEventType>& types) {
[email protected]c63f2b72011-07-07 05:25:00197 desired_event_types_ = types;
198 }
[email protected]56f6f9dc2013-05-11 18:51:43199 const std::set<TtsEventType>& desired_event_types() const {
[email protected]c63f2b72011-07-07 05:25:00200 return desired_event_types_;
201 }
202
203 const std::string& extension_id() const { return extension_id_; }
204 void set_extension_id(const std::string& extension_id) {
205 extension_id_ = extension_id;
206 }
207
[email protected]71b03cd52013-12-19 18:18:20208 UtteranceEventDelegate* event_delegate() const {
dmazzoni8880b2792014-11-06 08:44:55209 return event_delegate_;
[email protected]71b03cd52013-12-19 18:18:20210 }
dmazzoni8880b2792014-11-06 08:44:55211 void set_event_delegate(UtteranceEventDelegate* event_delegate) {
[email protected]7a06d282013-05-03 04:39:33212 event_delegate_ = event_delegate;
213 }
214
[email protected]c63f2b72011-07-07 05:25:00215 // Getters and setters for internal state.
[email protected]35c11842014-08-13 10:04:27216 content::BrowserContext* browser_context() const { return browser_context_; }
[email protected]c63f2b72011-07-07 05:25:00217 int id() const { return id_; }
218 bool finished() const { return finished_; }
219
220 private:
[email protected]35c11842014-08-13 10:04:27221 // The BrowserContext that initiated this utterance.
222 content::BrowserContext* browser_context_;
[email protected]c63f2b72011-07-07 05:25:00223
224 // The extension ID of the extension providing TTS for this utterance, or
225 // empty if native TTS is being used.
226 std::string extension_id_;
227
228 // The unique ID of this utterance, used to associate callback functions
229 // with utterances.
230 int id_;
231
232 // The id of the next utterance, so we can associate requests with
233 // responses.
234 static int next_utterance_id_;
235
236 // The text to speak.
237 std::string text_;
238
239 // The full options arg passed to tts.speak, which may include fields
240 // other than the ones we explicitly parse, below.
[email protected]f3a1c642011-07-12 19:15:03241 scoped_ptr<base::Value> options_;
[email protected]c63f2b72011-07-07 05:25:00242
[email protected]c63f2b72011-07-07 05:25:00243 // The source extension's ID of this utterance, so that it can associate
244 // events with the appropriate callback.
245 int src_id_;
246
247 // The URL of the page where the source extension called speak.
248 GURL src_url_;
249
[email protected]7a06d282013-05-03 04:39:33250 // The delegate to be called when an utterance event is fired.
dmazzoni8880b2792014-11-06 08:44:55251 UtteranceEventDelegate* event_delegate_;
[email protected]7a06d282013-05-03 04:39:33252
[email protected]c63f2b72011-07-07 05:25:00253 // The parsed options.
254 std::string voice_name_;
255 std::string lang_;
[email protected]56f6f9dc2013-05-11 18:51:43256 TtsGenderType gender_;
[email protected]c63f2b72011-07-07 05:25:00257 UtteranceContinuousParameters continuous_parameters_;
258 bool can_enqueue_;
[email protected]56f6f9dc2013-05-11 18:51:43259 std::set<TtsEventType> required_event_types_;
260 std::set<TtsEventType> desired_event_types_;
[email protected]c63f2b72011-07-07 05:25:00261
262 // The index of the current char being spoken.
263 int char_index_;
264
265 // True if this utterance received an event indicating it's done.
266 bool finished_;
267};
268
[email protected]c63f2b72011-07-07 05:25:00269// Singleton class that manages text-to-speech for the TTS and TTS engine
270// extension APIs, maintaining a queue of pending utterances and keeping
271// track of all state.
[email protected]25efde562013-01-24 18:36:05272class TtsController {
[email protected]c63f2b72011-07-07 05:25:00273 public:
274 // Get the single instance of this class.
[email protected]25efde562013-01-24 18:36:05275 static TtsController* GetInstance();
[email protected]c63f2b72011-07-07 05:25:00276
277 // Returns true if we're currently speaking an utterance.
[email protected]fd968ebf2014-07-24 10:28:48278 virtual bool IsSpeaking() = 0;
[email protected]c63f2b72011-07-07 05:25:00279
280 // Speak the given utterance. If the utterance's can_enqueue flag is true
281 // and another utterance is in progress, adds it to the end of the queue.
282 // Otherwise, interrupts any current utterance and speaks this one
283 // immediately.
[email protected]fd968ebf2014-07-24 10:28:48284 virtual void SpeakOrEnqueue(Utterance* utterance) = 0;
[email protected]c63f2b72011-07-07 05:25:00285
[email protected]5537ddf2013-05-30 15:17:21286 // Stop all utterances and flush the queue. Implies leaving pause mode
287 // as well.
[email protected]fd968ebf2014-07-24 10:28:48288 virtual void Stop() = 0;
[email protected]c63f2b72011-07-07 05:25:00289
[email protected]5537ddf2013-05-30 15:17:21290 // Pause the speech queue. Some engines may support pausing in the middle
291 // of an utterance.
[email protected]fd968ebf2014-07-24 10:28:48292 virtual void Pause() = 0;
[email protected]5537ddf2013-05-30 15:17:21293
294 // Resume speaking.
[email protected]fd968ebf2014-07-24 10:28:48295 virtual void Resume() = 0;
[email protected]5537ddf2013-05-30 15:17:21296
[email protected]c63f2b72011-07-07 05:25:00297 // Handle events received from the speech engine. Events are forwarded to
298 // the callback function, and in addition, completion and error events
299 // trigger finishing the current utterance and starting the next one, if
300 // any.
[email protected]fd968ebf2014-07-24 10:28:48301 virtual void OnTtsEvent(int utterance_id,
[email protected]35c11842014-08-13 10:04:27302 TtsEventType event_type,
303 int char_index,
304 const std::string& error_message) = 0;
[email protected]c63f2b72011-07-07 05:25:00305
306 // Return a list of all available voices, including the native voice,
307 // if supported, and all voices registered by extensions.
[email protected]35c11842014-08-13 10:04:27308 virtual void GetVoices(content::BrowserContext* browser_context,
[email protected]fd968ebf2014-07-24 10:28:48309 std::vector<VoiceData>* out_voices) = 0;
[email protected]c63f2b72011-07-07 05:25:00310
[email protected]0a1091952013-05-20 16:21:54311 // Called by the extension system or platform implementation when the
312 // list of voices may have changed and should be re-queried.
[email protected]fd968ebf2014-07-24 10:28:48313 virtual void VoicesChanged() = 0;
[email protected]0a1091952013-05-20 16:21:54314
315 // Add a delegate that wants to be notified when the set of voices changes.
[email protected]fd968ebf2014-07-24 10:28:48316 virtual void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
[email protected]0a1091952013-05-20 16:21:54317
318 // Remove delegate that wants to be notified when the set of voices changes.
[email protected]fd968ebf2014-07-24 10:28:48319 virtual void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
[email protected]0a1091952013-05-20 16:21:54320
dmazzoni8880b2792014-11-06 08:44:55321 // Remove delegate that wants to be notified when an utterance fires an event.
322 // Note: this cancels speech from any utterance with this delegate, and
323 // removes any utterances with this delegate from the queue.
324 virtual void RemoveUtteranceEventDelegate(UtteranceEventDelegate* delegate)
325 = 0;
326
[email protected]3ecaffe92014-07-12 12:44:09327 // Set the delegate that processes TTS requests with user-installed
328 // extensions.
[email protected]fd968ebf2014-07-24 10:28:48329 virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) = 0;
[email protected]3ecaffe92014-07-12 12:44:09330
[email protected]2372952c2014-08-02 05:56:00331 // Get the delegate that processes TTS requests with user-installed
332 // extensions.
333 virtual TtsEngineDelegate* GetTtsEngineDelegate() = 0;
334
[email protected]c63f2b72011-07-07 05:25:00335 // For unit testing.
[email protected]fd968ebf2014-07-24 10:28:48336 virtual void SetPlatformImpl(TtsPlatformImpl* platform_impl) = 0;
337 virtual int QueueSize() = 0;
[email protected]c63f2b72011-07-07 05:25:00338
[email protected]92da891b2011-11-16 16:55:12339 protected:
[email protected]fd968ebf2014-07-24 10:28:48340 virtual ~TtsController() {}
[email protected]c63f2b72011-07-07 05:25:00341};
342
[email protected]25efde562013-01-24 18:36:05343#endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_