Blame - chrome/browser/speech/tts_controller.h - chromium/src.git

blob: 6b005c4aac9bb393e2bdecfb26783e9c633cc826 [file] [log] [blame]

[email protected]	516b5bd	2012-03-24 07:01:36	[diff] [blame]	1	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
[email protected]	25efde56	2013-01-24 18:36:05	[diff] [blame]	5	#ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
				6	#define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	7
				8	#include <queue>
				9	#include <set>
				10	#include <string>
[email protected]	7a06d28	2013-05-03 04:39:33	[diff] [blame]	11	#include <vector>
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	12
[email protected]	7226b33c	2011-08-18 08:44:22	[diff] [blame]	13	#include "base/memory/scoped_ptr.h"
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	14	#include "base/memory/singleton.h"
[email protected]	71b03cd5	2013-12-19 18:18:20	[diff] [blame]	15	#include "base/memory/weak_ptr.h"
[email protected]	761fa470	2013-07-02 15:25:15	[diff] [blame]	16	#include "url/gurl.h"
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	17
[email protected]	7a06d28	2013-05-03 04:39:33	[diff] [blame]	18	class Utterance;
[email protected]	25efde56	2013-01-24 18:36:05	[diff] [blame]	19	class TtsPlatformImpl;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	20
[email protected]	f3a1c64	2011-07-12 19:15:03	[diff] [blame]	21	namespace base {
[email protected]	f3a1c64	2011-07-12 19:15:03	[diff] [blame]	22	class Value;
				23	}
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	24
[email protected]	35c1184	2014-08-13 10:04:27	[diff] [blame]	25	namespace content {
				26	class BrowserContext;
				27	}
				28
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	29	// Events sent back from the TTS engine indicating the progress.
				30	enum TtsEventType {
				31	TTS_EVENT_START,
				32	TTS_EVENT_END,
				33	TTS_EVENT_WORD,
				34	TTS_EVENT_SENTENCE,
				35	TTS_EVENT_MARKER,
				36	TTS_EVENT_INTERRUPTED,
				37	TTS_EVENT_CANCELLED,
[email protected]	5537ddf	2013-05-30 15:17:21	[diff] [blame]	38	TTS_EVENT_ERROR,
				39	TTS_EVENT_PAUSE,
				40	TTS_EVENT_RESUME
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	41	};
				42
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	43	enum TtsGenderType {
				44	TTS_GENDER_NONE,
				45	TTS_GENDER_MALE,
				46	TTS_GENDER_FEMALE
				47	};
				48
[email protected]	7a06d28	2013-05-03 04:39:33	[diff] [blame]	49	// Returns true if this event type is one that indicates an utterance
				50	// is finished and can be destroyed.
				51	bool IsFinalTtsEventType(TtsEventType event_type);
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	52
				53	// The continuous parameters that apply to a given utterance.
				54	struct UtteranceContinuousParameters {
				55	UtteranceContinuousParameters();
				56
				57	double rate;
				58	double pitch;
				59	double volume;
				60	};
				61
[email protected]	7a06d28	2013-05-03 04:39:33	[diff] [blame]	62	// Information about one voice.
				63	struct VoiceData {
				64	VoiceData();
				65	~VoiceData();
				66
				67	std::string name;
				68	std::string lang;
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	69	TtsGenderType gender;
[email protected]	7a06d28	2013-05-03 04:39:33	[diff] [blame]	70	std::string extension_id;
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	71	std::set<TtsEventType> events;
				72
[email protected]	78127e6	2013-11-01 16:44:57	[diff] [blame]	73	// If true, the synthesis engine is a remote network resource.
				74	// It may be higher latency and may incur bandwidth costs.
				75	bool remote;
				76
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	77	// If true, this is implemented by this platform's subclass of
				78	// TtsPlatformImpl. If false, this is implemented by an extension.
				79	bool native;
				80	std::string native_voice_identifier;
[email protected]	7a06d28	2013-05-03 04:39:33	[diff] [blame]	81	};
				82
[email protected]	3ecaffe9	2014-07-12 12:44:09	[diff] [blame]	83	// Interface that delegates TTS requests to user-installed extensions.
				84	class TtsEngineDelegate {
				85	public:
				86	virtual ~TtsEngineDelegate() {}
				87
				88	// Return a list of all available voices registered.
[email protected]	35c1184	2014-08-13 10:04:27	[diff] [blame]	89	virtual void GetVoices(content::BrowserContext* browser_context,
[email protected]	3ecaffe9	2014-07-12 12:44:09	[diff] [blame]	90	std::vector<VoiceData>* out_voices) = 0;
				91
				92	// Speak the given utterance by sending an event to the given TTS engine.
				93	virtual void Speak(Utterance* utterance, const VoiceData& voice) = 0;
				94
				95	// Stop speaking the given utterance by sending an event to the target
				96	// associated with this utterance.
				97	virtual void Stop(Utterance* utterance) = 0;
				98
				99	// Pause in the middle of speaking this utterance.
				100	virtual void Pause(Utterance* utterance) = 0;
				101
				102	// Resume speaking this utterance.
				103	virtual void Resume(Utterance* utterance) = 0;
[email protected]	2372952c	2014-08-02 05:56:00	[diff] [blame]	104
				105	// Load the built-in component extension for ChromeOS.
[email protected]	35c1184	2014-08-13 10:04:27	[diff] [blame]	106	virtual bool LoadBuiltInTtsExtension(
				107	content::BrowserContext* browser_context) = 0;
[email protected]	3ecaffe9	2014-07-12 12:44:09	[diff] [blame]	108	};
				109
[email protected]	7a06d28	2013-05-03 04:39:33	[diff] [blame]	110	// Class that wants to receive events on utterances.
				111	class UtteranceEventDelegate {
				112	public:
				113	virtual ~UtteranceEventDelegate() {}
				114	virtual void OnTtsEvent(Utterance* utterance,
				115	TtsEventType event_type,
				116	int char_index,
				117	const std::string& error_message) = 0;
				118	};
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	119
[email protected]	0a109195	2013-05-20 16:21:54	[diff] [blame]	120	// Class that wants to be notified when the set of
				121	// voices has changed.
				122	class VoicesChangedDelegate {
				123	public:
				124	virtual ~VoicesChangedDelegate() {}
				125	virtual void OnVoicesChanged() = 0;
				126	};
				127
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	128	// One speech utterance.
				129	class Utterance {
				130	public:
				131	// Construct an utterance given a profile and a completion task to call
				132	// when the utterance is done speaking. Before speaking this utterance,
				133	// its other parameters like text, rate, pitch, etc. should all be set.
[email protected]	35c1184	2014-08-13 10:04:27	[diff] [blame]	134	explicit Utterance(content::BrowserContext* browser_context);
[email protected]	7a5f270	2014-02-07 00:29:12	[diff] [blame]	135	~Utterance();
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	136
				137	// Sends an event to the delegate. If the event type is TTS_EVENT_END
				138	// or TTS_EVENT_ERROR, deletes the utterance. If \|char_index\| is -1,
				139	// uses the last good value.
				140	void OnTtsEvent(TtsEventType event_type,
				141	int char_index,
				142	const std::string& error_message);
				143
				144	// Finish an utterance without sending an event to the delegate.
				145	void Finish();
				146
				147	// Getters and setters for the text to speak and other speech options.
				148	void set_text(const std::string& text) { text_ = text; }
				149	const std::string& text() const { return text_; }
				150
[email protected]	f3a1c64	2011-07-12 19:15:03	[diff] [blame]	151	void set_options(const base::Value* options);
				152	const base::Value* options() const { return options_.get(); }
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	153
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	154	void set_src_id(int src_id) { src_id_ = src_id; }
				155	int src_id() { return src_id_; }
				156
				157	void set_src_url(const GURL& src_url) { src_url_ = src_url; }
				158	const GURL& src_url() { return src_url_; }
				159
				160	void set_voice_name(const std::string& voice_name) {
				161	voice_name_ = voice_name;
				162	}
				163	const std::string& voice_name() const { return voice_name_; }
				164
				165	void set_lang(const std::string& lang) {
				166	lang_ = lang;
				167	}
				168	const std::string& lang() const { return lang_; }
				169
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	170	void set_gender(TtsGenderType gender) {
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	171	gender_ = gender;
				172	}
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	173	TtsGenderType gender() const { return gender_; }
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	174
djmix.kim	61eb0c2	2015-03-30 17:38:38	[diff] [blame]	175	void set_continuous_parameters(const double rate,
				176	const double pitch,
				177	const double volume) {
				178	continuous_parameters_.rate = rate;
				179	continuous_parameters_.pitch = pitch;
				180	continuous_parameters_.volume = volume;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	181	}
				182	const UtteranceContinuousParameters& continuous_parameters() {
				183	return continuous_parameters_;
				184	}
				185
				186	void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
				187	bool can_enqueue() const { return can_enqueue_; }
				188
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	189	void set_required_event_types(const std::set<TtsEventType>& types) {
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	190	required_event_types_ = types;
				191	}
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	192	const std::set<TtsEventType>& required_event_types() const {
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	193	return required_event_types_;
				194	}
				195
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	196	void set_desired_event_types(const std::set<TtsEventType>& types) {
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	197	desired_event_types_ = types;
				198	}
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	199	const std::set<TtsEventType>& desired_event_types() const {
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	200	return desired_event_types_;
				201	}
				202
				203	const std::string& extension_id() const { return extension_id_; }
				204	void set_extension_id(const std::string& extension_id) {
				205	extension_id_ = extension_id;
				206	}
				207
[email protected]	71b03cd5	2013-12-19 18:18:20	[diff] [blame]	208	UtteranceEventDelegate* event_delegate() const {
dmazzoni	8880b279	2014-11-06 08:44:55	[diff] [blame]	209	return event_delegate_;
[email protected]	71b03cd5	2013-12-19 18:18:20	[diff] [blame]	210	}
dmazzoni	8880b279	2014-11-06 08:44:55	[diff] [blame]	211	void set_event_delegate(UtteranceEventDelegate* event_delegate) {
[email protected]	7a06d28	2013-05-03 04:39:33	[diff] [blame]	212	event_delegate_ = event_delegate;
				213	}
				214
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	215	// Getters and setters for internal state.
[email protected]	35c1184	2014-08-13 10:04:27	[diff] [blame]	216	content::BrowserContext* browser_context() const { return browser_context_; }
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	217	int id() const { return id_; }
				218	bool finished() const { return finished_; }
				219
				220	private:
[email protected]	35c1184	2014-08-13 10:04:27	[diff] [blame]	221	// The BrowserContext that initiated this utterance.
				222	content::BrowserContext* browser_context_;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	223
				224	// The extension ID of the extension providing TTS for this utterance, or
				225	// empty if native TTS is being used.
				226	std::string extension_id_;
				227
				228	// The unique ID of this utterance, used to associate callback functions
				229	// with utterances.
				230	int id_;
				231
				232	// The id of the next utterance, so we can associate requests with
				233	// responses.
				234	static int next_utterance_id_;
				235
				236	// The text to speak.
				237	std::string text_;
				238
				239	// The full options arg passed to tts.speak, which may include fields
				240	// other than the ones we explicitly parse, below.
[email protected]	f3a1c64	2011-07-12 19:15:03	[diff] [blame]	241	scoped_ptr<base::Value> options_;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	242
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	243	// The source extension's ID of this utterance, so that it can associate
				244	// events with the appropriate callback.
				245	int src_id_;
				246
				247	// The URL of the page where the source extension called speak.
				248	GURL src_url_;
				249
[email protected]	7a06d28	2013-05-03 04:39:33	[diff] [blame]	250	// The delegate to be called when an utterance event is fired.
dmazzoni	8880b279	2014-11-06 08:44:55	[diff] [blame]	251	UtteranceEventDelegate* event_delegate_;
[email protected]	7a06d28	2013-05-03 04:39:33	[diff] [blame]	252
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	253	// The parsed options.
				254	std::string voice_name_;
				255	std::string lang_;
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	256	TtsGenderType gender_;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	257	UtteranceContinuousParameters continuous_parameters_;
				258	bool can_enqueue_;
[email protected]	56f6f9dc	2013-05-11 18:51:43	[diff] [blame]	259	std::set<TtsEventType> required_event_types_;
				260	std::set<TtsEventType> desired_event_types_;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	261
				262	// The index of the current char being spoken.
				263	int char_index_;
				264
				265	// True if this utterance received an event indicating it's done.
				266	bool finished_;
				267	};
				268
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	269	// Singleton class that manages text-to-speech for the TTS and TTS engine
				270	// extension APIs, maintaining a queue of pending utterances and keeping
				271	// track of all state.
[email protected]	25efde56	2013-01-24 18:36:05	[diff] [blame]	272	class TtsController {
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	273	public:
				274	// Get the single instance of this class.
[email protected]	25efde56	2013-01-24 18:36:05	[diff] [blame]	275	static TtsController* GetInstance();
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	276
				277	// Returns true if we're currently speaking an utterance.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	278	virtual bool IsSpeaking() = 0;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	279
				280	// Speak the given utterance. If the utterance's can_enqueue flag is true
				281	// and another utterance is in progress, adds it to the end of the queue.
				282	// Otherwise, interrupts any current utterance and speaks this one
				283	// immediately.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	284	virtual void SpeakOrEnqueue(Utterance* utterance) = 0;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	285
[email protected]	5537ddf	2013-05-30 15:17:21	[diff] [blame]	286	// Stop all utterances and flush the queue. Implies leaving pause mode
				287	// as well.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	288	virtual void Stop() = 0;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	289
[email protected]	5537ddf	2013-05-30 15:17:21	[diff] [blame]	290	// Pause the speech queue. Some engines may support pausing in the middle
				291	// of an utterance.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	292	virtual void Pause() = 0;
[email protected]	5537ddf	2013-05-30 15:17:21	[diff] [blame]	293
				294	// Resume speaking.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	295	virtual void Resume() = 0;
[email protected]	5537ddf	2013-05-30 15:17:21	[diff] [blame]	296
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	297	// Handle events received from the speech engine. Events are forwarded to
				298	// the callback function, and in addition, completion and error events
				299	// trigger finishing the current utterance and starting the next one, if
				300	// any.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	301	virtual void OnTtsEvent(int utterance_id,
[email protected]	35c1184	2014-08-13 10:04:27	[diff] [blame]	302	TtsEventType event_type,
				303	int char_index,
				304	const std::string& error_message) = 0;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	305
				306	// Return a list of all available voices, including the native voice,
				307	// if supported, and all voices registered by extensions.
[email protected]	35c1184	2014-08-13 10:04:27	[diff] [blame]	308	virtual void GetVoices(content::BrowserContext* browser_context,
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	309	std::vector<VoiceData>* out_voices) = 0;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	310
[email protected]	0a109195	2013-05-20 16:21:54	[diff] [blame]	311	// Called by the extension system or platform implementation when the
				312	// list of voices may have changed and should be re-queried.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	313	virtual void VoicesChanged() = 0;
[email protected]	0a109195	2013-05-20 16:21:54	[diff] [blame]	314
				315	// Add a delegate that wants to be notified when the set of voices changes.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	316	virtual void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
[email protected]	0a109195	2013-05-20 16:21:54	[diff] [blame]	317
				318	// Remove delegate that wants to be notified when the set of voices changes.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	319	virtual void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
[email protected]	0a109195	2013-05-20 16:21:54	[diff] [blame]	320
dmazzoni	8880b279	2014-11-06 08:44:55	[diff] [blame]	321	// Remove delegate that wants to be notified when an utterance fires an event.
				322	// Note: this cancels speech from any utterance with this delegate, and
				323	// removes any utterances with this delegate from the queue.
				324	virtual void RemoveUtteranceEventDelegate(UtteranceEventDelegate* delegate)
				325	= 0;
				326
[email protected]	3ecaffe9	2014-07-12 12:44:09	[diff] [blame]	327	// Set the delegate that processes TTS requests with user-installed
				328	// extensions.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	329	virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) = 0;
[email protected]	3ecaffe9	2014-07-12 12:44:09	[diff] [blame]	330
[email protected]	2372952c	2014-08-02 05:56:00	[diff] [blame]	331	// Get the delegate that processes TTS requests with user-installed
				332	// extensions.
				333	virtual TtsEngineDelegate* GetTtsEngineDelegate() = 0;
				334
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	335	// For unit testing.
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	336	virtual void SetPlatformImpl(TtsPlatformImpl* platform_impl) = 0;
				337	virtual int QueueSize() = 0;
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	338
[email protected]	92da891b	2011-11-16 16:55:12	[diff] [blame]	339	protected:
[email protected]	fd968ebf	2014-07-24 10:28:48	[diff] [blame]	340	virtual ~TtsController() {}
[email protected]	c63f2b7	2011-07-07 05:25:00	[diff] [blame]	341	};
				342
[email protected]	25efde56	2013-01-24 18:36:05	[diff] [blame]	343	#endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_