diff options
author | primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-04-25 20:20:18 +0000 |
---|---|---|
committer | primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-04-25 20:20:18 +0000 |
commit | b450e9092544c11b225690a1e459ffe7e955cec4 (patch) | |
tree | 7838b6a28c94cf2a6c0c3a051b20d3690cde0a85 /content/browser/speech | |
parent | 2e526f05f8190a04df5105985b935c34a2acf7cf (diff) | |
download | chromium_src-b450e9092544c11b225690a1e459ffe7e955cec4.zip chromium_src-b450e9092544c11b225690a1e459ffe7e955cec4.tar.gz chromium_src-b450e9092544c11b225690a1e459ffe7e955cec4.tar.bz2 |
Speech refactoring: Reimplemented SpeechRecognitionManagerImpl as a FSM. (CL1.7)
BUG=116954
TEST=none.
Review URL: http://codereview.chromium.org/9972011
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@133967 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'content/browser/speech')
5 files changed, 816 insertions, 522 deletions
diff --git a/content/browser/speech/input_tag_speech_dispatcher_host.cc b/content/browser/speech/input_tag_speech_dispatcher_host.cc index 29eebb64..4831ecd 100644 --- a/content/browser/speech/input_tag_speech_dispatcher_host.cc +++ b/content/browser/speech/input_tag_speech_dispatcher_host.cc @@ -4,113 +4,31 @@ #include "content/browser/speech/input_tag_speech_dispatcher_host.h" +#include "base/bind.h" #include "base/lazy_instance.h" #include "content/browser/speech/speech_recognition_manager_impl.h" #include "content/browser/speech/speech_recognizer_impl.h" #include "content/common/speech_recognition_messages.h" #include "content/public/browser/speech_recognition_preferences.h" +#include "content/public/browser/speech_recognition_session_config.h" +#include "content/public/browser/speech_recognition_session_context.h" using content::BrowserThread; +using content::SpeechRecognitionSessionConfig; +using content::SpeechRecognitionSessionContext; -namespace speech { - -//----------------------------- Sessions ----------------------------- - -// TODO(primiano) Remove session handling from here in the next CL. The manager -// shall be the only one in charge of keeping all the context information for -// all recognition sessions. - -// A singleton class to map the tuple -// (render-process-id, render-view-id, requestid) to a single ID which is passed -// through rest of the speech code. -class InputTagSpeechDispatcherHost::Sessions { - public: - // Creates a new ID for a given tuple. - int CreateId(int render_process_id, int render_view_id, int request_id); - - // Returns the ID for a tuple assuming the ID was created earlier. - int GetId(int render_process_id, int render_view_id, int request_id); - - // Removes the ID and associated tuple from the map. - void RemoveId(int id); - - // Getters for the various tuple elements for the given ID. - int render_process_id(int id); - int render_view_id(int id); - int request_id(int id); - - private: - struct SessionInfo { - int render_process_id; - int render_view_id; - int request_id; - }; - friend struct base::DefaultLazyInstanceTraits<Sessions>; - - Sessions(); - - std::map<int, SessionInfo> sessions_; - int next_id_; -}; - -static base::LazyInstance<InputTagSpeechDispatcherHost::Sessions> - g_sessions = LAZY_INSTANCE_INITIALIZER; - -InputTagSpeechDispatcherHost::Sessions::Sessions() - : next_id_(1) { -} - -int InputTagSpeechDispatcherHost::Sessions::GetId(int render_process_id, - int render_view_id, - int request_id) { - for (std::map<int, SessionInfo>::iterator it = sessions_.begin(); - it != sessions_.end(); it++) { - const SessionInfo& item = it->second; - if (item.render_process_id == render_process_id && - item.render_view_id == render_view_id && - item.request_id == request_id) { - return it->first; - } - } - - // Not finding an entry here is valid since a cancel/stop may have been issued - // by the renderer and before it received our response the user may have - // clicked the button to stop again. The caller of this method should take - // care of this case. - return 0; -} - -int InputTagSpeechDispatcherHost::Sessions::CreateId(int render_process_id, - int render_view_id, - int request_id) { - SessionInfo info; - info.render_process_id = render_process_id; - info.render_view_id = render_view_id; - info.request_id = request_id; - sessions_[next_id_] = info; - return next_id_++; -} - -void InputTagSpeechDispatcherHost::Sessions::RemoveId(int id) { - sessions_.erase(id); -} - -int InputTagSpeechDispatcherHost::Sessions::render_process_id( - int id) { - return sessions_[id].render_process_id; +namespace { +bool IsSameContext(int render_process_id, + int render_view_id, + int render_request_id, + const SpeechRecognitionSessionContext& context) { + return context.render_process_id == render_process_id && + context.render_view_id == render_view_id && + context.render_request_id == render_request_id; } +} // namespace -int InputTagSpeechDispatcherHost::Sessions::render_view_id( - int id) { - return sessions_[id].render_view_id; -} - -int InputTagSpeechDispatcherHost::Sessions::request_id(int id) { - return sessions_[id].request_id; -} - -//----------------------- InputTagSpeechDispatcherHost ---------------------- - +namespace speech { SpeechRecognitionManagerImpl* InputTagSpeechDispatcherHost::manager_; void InputTagSpeechDispatcherHost::set_manager( @@ -120,11 +38,11 @@ void InputTagSpeechDispatcherHost::set_manager( InputTagSpeechDispatcherHost::InputTagSpeechDispatcherHost( int render_process_id, - net::URLRequestContextGetter* context_getter, + net::URLRequestContextGetter* url_request_context_getter, content::SpeechRecognitionPreferences* recognition_preferences) : render_process_id_(render_process_id), may_have_pending_requests_(false), - context_getter_(context_getter), + url_request_context_getter_(url_request_context_getter), recognition_preferences_(recognition_preferences) { // This is initialized by Browser. Do not add any non-trivial // initialization here, instead do it lazily when required (e.g. see the @@ -138,7 +56,7 @@ InputTagSpeechDispatcherHost::~InputTagSpeechDispatcherHost() { // we don't end up creating the speech input manager for web pages which don't // use speech input. if (may_have_pending_requests_) - manager()->CancelAllRequestsWithDelegate(this); + manager()->AbortAllSessionsForListener(this); } SpeechRecognitionManagerImpl* InputTagSpeechDispatcherHost::manager() { @@ -173,69 +91,100 @@ bool InputTagSpeechDispatcherHost::OnMessageReceived( void InputTagSpeechDispatcherHost::OnStartRecognition( const InputTagSpeechHostMsg_StartRecognition_Params ¶ms) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); - int session_id = g_sessions.Get().CreateId( - render_process_id_, params.render_view_id, params.request_id); - manager()->StartRecognition(this, session_id, - render_process_id_, - params.render_view_id, params.element_rect, - params.language, params.grammar, - params.origin_url, - context_getter_.get(), - recognition_preferences_.get()); + + SpeechRecognitionSessionContext context; + context.render_process_id = render_process_id_; + context.render_view_id = params.render_view_id; + context.render_request_id = params.request_id; + context.element_rect = params.element_rect; + + SpeechRecognitionSessionConfig config; + config.language = params.language; + config.grammar = params.grammar; + config.origin_url = params.origin_url; + config.initial_context = context; + config.url_request_context_getter = url_request_context_getter_.get(); + config.filter_profanities = recognition_preferences_->FilterProfanities(); + + int session_id = manager()->CreateSession(config, this); + if (session_id == content::SpeechRecognitionManager::kSessionIDInvalid) + return; + + manager()->StartSession(session_id); } void InputTagSpeechDispatcherHost::OnCancelRecognition(int render_view_id, int request_id) { - int session_id = g_sessions.Get().GetId( - render_process_id_, render_view_id, request_id); - if (session_id) { - manager()->CancelRecognition(session_id); - // Request sequence ended so remove mapping. - g_sessions.Get().RemoveId(session_id); - } + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + int session_id = manager()->LookupSessionByContext( + base::Bind(&IsSameContext, + render_process_id_, + render_view_id, + request_id)); + if (session_id != content::SpeechRecognitionManager::kSessionIDInvalid) + manager()->AbortSession(session_id); } void InputTagSpeechDispatcherHost::OnStopRecording(int render_view_id, int request_id) { - int session_id = g_sessions.Get().GetId( - render_process_id_, render_view_id, request_id); - if (session_id) - manager()->StopRecording(session_id); -} - -void InputTagSpeechDispatcherHost::SetRecognitionResult( - int session_id, const content::SpeechRecognitionResult& result) { - VLOG(1) << "InputTagSpeechDispatcherHost::SetRecognitionResult enter"; DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); - int session_render_view_id = g_sessions.Get().render_view_id(session_id); - int session_request_id = g_sessions.Get().request_id(session_id); - Send(new InputTagSpeechMsg_SetRecognitionResult(session_render_view_id, - session_request_id, - result)); - VLOG(1) << "InputTagSpeechDispatcherHost::SetRecognitionResult exit"; + int session_id = manager()->LookupSessionByContext( + base::Bind(&IsSameContext, + render_process_id_, + render_view_id, + request_id)); + DCHECK_NE(session_id, content::SpeechRecognitionManager::kSessionIDInvalid); + manager()->StopAudioCaptureForSession(session_id); +} + +// -------- SpeechRecognitionEventListener interface implementation ----------- +void InputTagSpeechDispatcherHost::OnRecognitionResult( + int session_id, const content::SpeechRecognitionResult& result) { + VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionResult enter"; + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + + const SpeechRecognitionSessionContext& context = + manager()->GetSessionContext(session_id); + + Send(new InputTagSpeechMsg_SetRecognitionResult( + context.render_view_id, + context.render_request_id, + result)); + VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionResult exit"; } -void InputTagSpeechDispatcherHost::DidCompleteRecording(int session_id) { - VLOG(1) << "InputTagSpeechDispatcherHost::DidCompleteRecording enter"; +void InputTagSpeechDispatcherHost::OnAudioEnd(int session_id) { + VLOG(1) << "InputTagSpeechDispatcherHost::OnAudioEnd enter"; DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); - int session_render_view_id = g_sessions.Get().render_view_id(session_id); - int session_request_id = g_sessions.Get().request_id(session_id); - Send(new InputTagSpeechMsg_RecordingComplete(session_render_view_id, - session_request_id)); - VLOG(1) << "InputTagSpeechDispatcherHost::DidCompleteRecording exit"; + + const SpeechRecognitionSessionContext& context = + manager()->GetSessionContext(session_id); + + Send(new InputTagSpeechMsg_RecordingComplete(context.render_view_id, + context.render_request_id)); + VLOG(1) << "InputTagSpeechDispatcherHost::OnAudioEnd exit"; } -void InputTagSpeechDispatcherHost::DidCompleteRecognition(int session_id) { - VLOG(1) << "InputTagSpeechDispatcherHost::DidCompleteRecognition enter"; +void InputTagSpeechDispatcherHost::OnRecognitionEnd(int session_id) { + VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionEnd enter"; DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); - int session_render_view_id = - g_sessions.Get().render_view_id(session_id); - int session_request_id = g_sessions.Get().request_id(session_id); - Send(new InputTagSpeechMsg_RecognitionComplete(session_render_view_id, - session_request_id)); - // Request sequence ended, so remove mapping. - g_sessions.Get().RemoveId(session_id); - VLOG(1) << "InputTagSpeechDispatcherHost::DidCompleteRecognition exit"; -} + const SpeechRecognitionSessionContext& context = + manager()->GetSessionContext(session_id); + Send(new InputTagSpeechMsg_RecognitionComplete(context.render_view_id, + context.render_request_id)); + VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionEnd exit"; +} + +// The events below are currently not used by x-webkit-speech implementation. +void InputTagSpeechDispatcherHost::OnRecognitionStart(int session_id) {} +void InputTagSpeechDispatcherHost::OnAudioStart(int session_id) {} +void InputTagSpeechDispatcherHost::OnSoundStart(int session_id) {} +void InputTagSpeechDispatcherHost::OnSoundEnd(int session_id) {} +void InputTagSpeechDispatcherHost::OnRecognitionError( + int session_id, const content::SpeechRecognitionError& error) {} +void InputTagSpeechDispatcherHost::OnAudioLevelsChange( + int session_id, float volume, float noise_volume) {} +void InputTagSpeechDispatcherHost::OnEnvironmentEstimationComplete( + int session_id) {} } // namespace speech diff --git a/content/browser/speech/input_tag_speech_dispatcher_host.h b/content/browser/speech/input_tag_speech_dispatcher_host.h index 95bd252..6a7358b 100644 --- a/content/browser/speech/input_tag_speech_dispatcher_host.h +++ b/content/browser/speech/input_tag_speech_dispatcher_host.h @@ -8,6 +8,7 @@ #include "base/memory/scoped_ptr.h" #include "content/common/content_export.h" #include "content/public/browser/browser_message_filter.h" +#include "content/public/browser/speech_recognition_event_listener.h" #include "net/url_request/url_request_context_getter.h" struct InputTagSpeechHostMsg_StartRecognition_Params; @@ -17,32 +18,37 @@ class SpeechRecognitionPreferences; struct SpeechRecognitionResult; } -namespace media { -class AudioManager; -} - namespace speech { class SpeechRecognitionManagerImpl; // InputTagSpeechDispatcherHost is a delegate for Speech API messages used by -// RenderMessageFilter. -// It's the complement of InputTagSpeechDispatcher (owned by RenderView). +// RenderMessageFilter. Basically it acts as a proxy, relaying the events coming +// from the SpeechRecognitionManager to IPC messages (and vice versa). +// It's the complement of SpeechRecognitionDispatcher (owned by RenderView). class CONTENT_EXPORT InputTagSpeechDispatcherHost - : public content::BrowserMessageFilter { + : public content::BrowserMessageFilter, + public content::SpeechRecognitionEventListener { public: - class Sessions; - InputTagSpeechDispatcherHost( int render_process_id, - net::URLRequestContextGetter* context_getter, + net::URLRequestContextGetter* url_request_context_getter, content::SpeechRecognitionPreferences* recognition_preferences); - // Methods called by SpeechRecognitionManagerImpl. - void SetRecognitionResult(int session_id, - const content::SpeechRecognitionResult& result); - void DidCompleteRecording(int session_id); - void DidCompleteRecognition(int session_id); + // SpeechRecognitionEventListener methods. + virtual void OnRecognitionStart(int session_id) OVERRIDE; + virtual void OnAudioStart(int session_id) OVERRIDE; + virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; + virtual void OnSoundStart(int session_id) OVERRIDE; + virtual void OnSoundEnd(int session_id) OVERRIDE; + virtual void OnAudioEnd(int session_id) OVERRIDE; + virtual void OnRecognitionEnd(int session_id) OVERRIDE; + virtual void OnRecognitionResult( + int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; + virtual void OnRecognitionError( + int session_id, const content::SpeechRecognitionError& error) OVERRIDE; + virtual void OnAudioLevelsChange( + int session_id, float volume, float noise_volume) OVERRIDE; // content::BrowserMessageFilter implementation. virtual bool OnMessageReceived(const IPC::Message& message, @@ -66,7 +72,7 @@ class CONTENT_EXPORT InputTagSpeechDispatcherHost int render_process_id_; bool may_have_pending_requests_; // Set if we received any speech IPC request - scoped_refptr<net::URLRequestContextGetter> context_getter_; + scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_; scoped_refptr<content::SpeechRecognitionPreferences> recognition_preferences_; static SpeechRecognitionManagerImpl* manager_; diff --git a/content/browser/speech/speech_recognition_browsertest.cc b/content/browser/speech/speech_recognition_browsertest.cc index ea8f95e..e1bd7b6 100644 --- a/content/browser/speech/speech_recognition_browsertest.cc +++ b/content/browser/speech/speech_recognition_browsertest.cc @@ -5,6 +5,7 @@ #include "base/bind.h" #include "base/command_line.h" #include "base/file_path.h" +#include "base/memory/scoped_ptr.h" #include "base/string_number_conversions.h" #include "base/synchronization/waitable_event.h" #include "base/utf_string_conversions.h" @@ -16,11 +17,15 @@ #include "content/browser/speech/speech_recognition_manager_impl.h" #include "content/browser/web_contents/web_contents_impl.h" #include "content/public/browser/notification_types.h" +#include "content/public/browser/speech_recognition_session_config.h" +#include "content/public/browser/speech_recognition_session_context.h" #include "content/public/common/content_switches.h" #include "content/public/common/speech_recognition_error.h" #include "content/public/common/speech_recognition_result.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebInputEvent.h" +using content::SpeechRecognitionEventListener; +using content::SpeechRecognitionSessionContext; using content::NavigationController; using content::WebContents; @@ -36,7 +41,7 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl { public: FakeSpeechRecognitionManager() : session_id_(0), - delegate_(NULL), + listener_(NULL), did_cancel_all_(false), should_send_fake_response_(true), recognition_started_event_(false, false) { @@ -63,23 +68,24 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl { } // SpeechRecognitionManager methods. - virtual void StartRecognition( - InputTagSpeechDispatcherHost* delegate, - int session_id, - int render_process_id, - int render_view_id, - const gfx::Rect& element_rect, - const std::string& language, - const std::string& grammar, - const std::string& origin_url, - net::URLRequestContextGetter* context_getter, - content::SpeechRecognitionPreferences* recognition_prefs) OVERRIDE { - VLOG(1) << "StartRecognition invoked."; + virtual int CreateSession( + const content::SpeechRecognitionSessionConfig& config, + SpeechRecognitionEventListener* event_listener) OVERRIDE { + VLOG(1) << "FAKE CreateSession invoked."; EXPECT_EQ(0, session_id_); - EXPECT_EQ(NULL, delegate_); - session_id_ = session_id; - delegate_ = delegate; - grammar_ = grammar; + EXPECT_EQ(NULL, listener_); + listener_ = event_listener; + grammar_ = config.grammar; + session_ctx_ = config.initial_context; + session_id_ = 1; + return session_id_; + } + + virtual void StartSession(int session_id) OVERRIDE { + VLOG(1) << "FAKE StartSession invoked."; + EXPECT_EQ(session_id, session_id_); + EXPECT_TRUE(listener_ != NULL); + if (should_send_fake_response_) { // Give the fake result in a short while. MessageLoop::current()->PostTask(FROM_HERE, base::Bind( @@ -93,45 +99,69 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl { } recognition_started_event_.Signal(); } - virtual void CancelRecognition(int session_id) OVERRIDE { - VLOG(1) << "CancelRecognition invoked."; + + virtual void AbortSession(int session_id) OVERRIDE { + VLOG(1) << "FAKE AbortSession invoked."; EXPECT_EQ(session_id_, session_id); session_id_ = 0; - delegate_ = NULL; + listener_ = NULL; } - virtual void StopRecording(int session_id) OVERRIDE { + + virtual void StopAudioCaptureForSession(int session_id) OVERRIDE { VLOG(1) << "StopRecording invoked."; EXPECT_EQ(session_id_, session_id); // Nothing to do here since we aren't really recording. } - virtual void CancelAllRequestsWithDelegate( - InputTagSpeechDispatcherHost* delegate) OVERRIDE { + + virtual void AbortAllSessionsForListener( + content::SpeechRecognitionEventListener* listener) OVERRIDE { VLOG(1) << "CancelAllRequestsWithDelegate invoked."; - // delegate_ is set to NULL if a fake result was received (see below), so - // check that delegate_ matches the incoming parameter only when there is + // listener_ is set to NULL if a fake result was received (see below), so + // check that listener_ matches the incoming parameter only when there is // no fake result sent. - EXPECT_TRUE(should_send_fake_response_ || delegate_ == delegate); + EXPECT_TRUE(should_send_fake_response_ || listener_ == listener); did_cancel_all_ = true; } + virtual void SendSessionToBackground(int session_id) OVERRIDE {} + virtual bool HasAudioInputDevices() OVERRIDE { return true; } + virtual bool IsCapturingAudio() OVERRIDE { return true; } + virtual string16 GetAudioInputDeviceModel() OVERRIDE { return string16(); } + virtual void ShowAudioInputSettings() OVERRIDE {} + + virtual int LookupSessionByContext( + base::Callback<bool( + const content::SpeechRecognitionSessionContext&)> matcher) + const OVERRIDE { + bool matched = matcher.Run(session_ctx_); + return matched ? session_id_ : 0; + } + + virtual content::SpeechRecognitionSessionContext GetSessionContext( + int session_id) const OVERRIDE { + EXPECT_EQ(session_id, session_id_); + return session_ctx_; + } + private: void SetFakeRecognitionResult() { if (session_id_) { // Do a check in case we were cancelled.. VLOG(1) << "Setting fake recognition result."; - delegate_->DidCompleteRecording(session_id_); + listener_->OnAudioEnd(session_id_); content::SpeechRecognitionResult results; results.hypotheses.push_back(content::SpeechRecognitionHypothesis( ASCIIToUTF16(kTestResult), 1.0)); - delegate_->SetRecognitionResult(session_id_, results); - delegate_->DidCompleteRecognition(session_id_); + listener_->OnRecognitionResult(session_id_, results); + listener_->OnRecognitionEnd(session_id_); session_id_ = 0; - delegate_ = NULL; + listener_ = NULL; VLOG(1) << "Finished setting fake recognition result."; } } int session_id_; - InputTagSpeechDispatcherHost* delegate_; + SpeechRecognitionEventListener* listener_; + SpeechRecognitionSessionContext session_ctx_; std::string grammar_; bool did_cancel_all_; bool should_send_fake_response_; diff --git a/content/browser/speech/speech_recognition_manager_impl.cc b/content/browser/speech/speech_recognition_manager_impl.cc index ed567e1..da2737c 100644 --- a/content/browser/speech/speech_recognition_manager_impl.cc +++ b/content/browser/speech/speech_recognition_manager_impl.cc @@ -5,361 +5,622 @@ #include "content/browser/speech/speech_recognition_manager_impl.h" #include "base/bind.h" +#include "base/memory/singleton.h" #include "content/browser/browser_main_loop.h" -#include "content/browser/renderer_host/render_view_host_impl.h" -#include "content/browser/speech/input_tag_speech_dispatcher_host.h" +#include "content/browser/speech/google_one_shot_remote_engine.h" +#include "content/browser/speech/speech_recognition_engine.h" +#include "content/browser/speech/speech_recognizer_impl.h" #include "content/public/browser/browser_thread.h" #include "content/public/browser/content_browser_client.h" -#include "content/public/browser/speech_recognizer.h" -#include "content/public/browser/render_view_host_delegate.h" #include "content/public/browser/resource_context.h" +#include "content/public/browser/speech_recognition_event_listener.h" #include "content/public/browser/speech_recognition_manager_delegate.h" -#include "content/public/browser/speech_recognition_preferences.h" -#include "content/public/common/view_type.h" +#include "content/public/browser/speech_recognition_session_config.h" +#include "content/public/browser/speech_recognition_session_context.h" +#include "content/public/common/speech_recognition_result.h" #include "media/audio/audio_manager.h" +using base::Callback; +using base::Unretained; using content::BrowserMainLoop; using content::BrowserThread; -using content::RenderViewHostImpl; +using content::SpeechRecognitionError; +using content::SpeechRecognitionEventListener; using content::SpeechRecognitionManager; -using content::SpeechRecognitionManagerDelegate; +using content::SpeechRecognitionResult; +using content::SpeechRecognitionSessionContext; +using content::SpeechRecognitionSessionConfig; + +namespace content { +const int SpeechRecognitionManager::kSessionIDInvalid = 0; SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() { return speech::SpeechRecognitionManagerImpl::GetInstance(); } +} // namespace content namespace speech { -struct SpeechRecognitionManagerImpl::SpeechRecognitionParams { - SpeechRecognitionParams( - InputTagSpeechDispatcherHost* delegate, - int session_id, - int render_process_id, - int render_view_id, - const gfx::Rect& element_rect, - const std::string& language, - const std::string& grammar, - const std::string& origin_url, - net::URLRequestContextGetter* context_getter, - content::SpeechRecognitionPreferences* recognition_prefs) - : delegate(delegate), - session_id(session_id), - render_process_id(render_process_id), - render_view_id(render_view_id), - element_rect(element_rect), - language(language), - grammar(grammar), - origin_url(origin_url), - context_getter(context_getter), - recognition_prefs(recognition_prefs) { - } - - InputTagSpeechDispatcherHost* delegate; - int session_id; - int render_process_id; - int render_view_id; - gfx::Rect element_rect; - std::string language; - std::string grammar; - std::string origin_url; - net::URLRequestContextGetter* context_getter; - content::SpeechRecognitionPreferences* recognition_prefs; -}; - SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() { return Singleton<SpeechRecognitionManagerImpl>::get(); } SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl() - : can_report_metrics_(false), - recording_session_id_(0) { - delegate_.reset(content::GetContentClient()->browser()-> - GetSpeechRecognitionManagerDelegate()); + : interactive_session_id_(kSessionIDInvalid), + last_session_id_(kSessionIDInvalid), + is_dispatching_event_(false) { + delegate_ = content::GetContentClient()->browser()-> + GetSpeechRecognitionManagerDelegate(); } SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() { - while (requests_.begin() != requests_.end()) - CancelRecognition(requests_.begin()->first); + // Recognition sessions will be aborted by the corresponding destructors. + sessions_.clear(); } -bool SpeechRecognitionManagerImpl::HasAudioInputDevices() { - return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices(); -} +int SpeechRecognitionManagerImpl::CreateSession( + const SpeechRecognitionSessionConfig& config, + SpeechRecognitionEventListener* event_listener) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); -bool SpeechRecognitionManagerImpl::IsCapturingAudio() { - return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess(); -} + const int session_id = GetNextSessionID(); + DCHECK(!SessionExists(session_id)); + // Set-up the new session. + Session& session = sessions_[session_id]; + session.id = session_id; + session.event_listener = event_listener; + session.context = config.initial_context; + + std::string hardware_info; + bool can_report_metrics = false; + if (delegate_) + delegate_->GetDiagnosticInformation(&can_report_metrics, &hardware_info); + + GoogleOneShotRemoteEngineConfig remote_engine_config; + remote_engine_config.language = config.language; + remote_engine_config.grammar = config.grammar; + remote_engine_config.audio_sample_rate = + SpeechRecognizerImpl::kAudioSampleRate; + remote_engine_config.audio_num_bits_per_sample = + SpeechRecognizerImpl::kNumBitsPerAudioSample; + remote_engine_config.filter_profanities = config.filter_profanities; + remote_engine_config.hardware_info = hardware_info; + remote_engine_config.origin_url = can_report_metrics ? config.origin_url : ""; + + GoogleOneShotRemoteEngine* google_remote_engine = + new GoogleOneShotRemoteEngine(config.url_request_context_getter); + google_remote_engine->SetConfig(remote_engine_config); + + session.recognizer = new SpeechRecognizerImpl(this, + session_id, + google_remote_engine); + return session_id; +} + +void SpeechRecognitionManagerImpl::StartSession(int session_id) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + DCHECK(SessionExists(session_id)); -string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() { - return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel(); -} + // If there is another interactive session, send it to background. + if (interactive_session_id_ != kSessionIDInvalid && + interactive_session_id_ != session_id) { + SendSessionToBackground(interactive_session_id_); + } -bool SpeechRecognitionManagerImpl::HasPendingRequest(int session_id) const { - return requests_.find(session_id) != requests_.end(); + if (delegate_) + delegate_->CheckRecognitionIsAllowed( + session_id, + base::Bind(&SpeechRecognitionManagerImpl::RecognitionAllowedCallback, + base::Unretained(this))); } -InputTagSpeechDispatcherHost* SpeechRecognitionManagerImpl::GetDelegate( - int session_id) const { - return requests_.find(session_id)->second.delegate; +void SpeechRecognitionManagerImpl::RecognitionAllowedCallback(int session_id, + bool is_allowed) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + DCHECK(SessionExists(session_id)); + if (is_allowed) { + BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, + base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, + Unretained(this), session_id, FSMEventArgs(EVENT_START))); + } else { + sessions_.erase(session_id); + } } -void SpeechRecognitionManagerImpl::ShowAudioInputSettings() { - // Since AudioManager::ShowAudioInputSettings can potentially launch external - // processes, do that in the FILE thread to not block the calling threads. - if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) { - BrowserThread::PostTask( - BrowserThread::FILE, FROM_HERE, - base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings, - base::Unretained(this))); - return; - } +void SpeechRecognitionManagerImpl::AbortSession(int session_id) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + DCHECK(SessionExists(session_id)); - media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager(); - DCHECK(audio_manager->CanShowAudioInputSettings()); - if (audio_manager->CanShowAudioInputSettings()) - audio_manager->ShowAudioInputSettings(); + BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, + base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), + session_id, FSMEventArgs(EVENT_ABORT))); } -void SpeechRecognitionManagerImpl::StartRecognition( - InputTagSpeechDispatcherHost* delegate, - int session_id, - int render_process_id, - int render_view_id, - const gfx::Rect& element_rect, - const std::string& language, - const std::string& grammar, - const std::string& origin_url, - net::URLRequestContextGetter* context_getter, - content::SpeechRecognitionPreferences* recognition_prefs) { +void SpeechRecognitionManagerImpl::StopAudioCaptureForSession(int session_id) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); - BrowserThread::PostTask( - BrowserThread::UI, FROM_HERE, - base::Bind( - &SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition, - base::Unretained(this), - SpeechRecognitionParams( - delegate, session_id, render_process_id, render_view_id, - element_rect, language, grammar, origin_url, context_getter, - recognition_prefs))); -} - -void SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition( - const SpeechRecognitionParams& params) { - DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); - - RenderViewHostImpl* render_view_host = RenderViewHostImpl::FromID( - params.render_process_id, params.render_view_id); - if (!render_view_host || !render_view_host->GetDelegate()) - return; + DCHECK(SessionExists(session_id)); - // For host delegates other than VIEW_TYPE_WEB_CONTENTS we can't reliably show - // a popup, including the speech input bubble. In these cases for privacy - // reasons we don't want to start recording if the user can't be properly - // notified. An example of this is trying to show the speech input bubble - // within an extension popup: http://crbug.com/92083. In these situations the - // speech input extension API should be used instead. - if (render_view_host->GetDelegate()->GetRenderViewType() == - content::VIEW_TYPE_WEB_CONTENTS) { - BrowserThread::PostTask( - BrowserThread::IO, FROM_HERE, - base::Bind(&SpeechRecognitionManagerImpl::ProceedStartingRecognition, - base::Unretained(this), params)); - } + BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, + base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), + session_id, FSMEventArgs(EVENT_STOP_CAPTURE))); } -void SpeechRecognitionManagerImpl::ProceedStartingRecognition( - const SpeechRecognitionParams& params) { +void SpeechRecognitionManagerImpl::SendSessionToBackground(int session_id) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); - DCHECK(!HasPendingRequest(params.session_id)); + DCHECK(SessionExists(session_id)); - if (delegate_.get()) { - delegate_->ShowRecognitionRequested( - params.session_id, params.render_process_id, params.render_view_id, - params.element_rect); - delegate_->GetRequestInfo(&can_report_metrics_, &request_info_); - } + BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, + base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), + session_id, FSMEventArgs(EVENT_SET_BACKGROUND))); +} + +// Here begins the SpeechRecognitionEventListener interface implementation, +// which will simply relay the events to the proper listener registered for the +// particular session (most likely InputTagSpeechDispatcherHost) and intercept +// some of them to provide UI notifications. - Request* request = &requests_[params.session_id]; - request->delegate = params.delegate; - request->recognizer = content::SpeechRecognizer::Create( - this, params.session_id, params.language, params.grammar, - params.context_getter, params.recognition_prefs->FilterProfanities(), - request_info_, can_report_metrics_ ? params.origin_url : ""); - request->is_active = false; +void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) + return; - StartRecognitionForRequest(params.session_id); + DCHECK_EQ(interactive_session_id_, session_id); + if (delegate_) + delegate_->ShowWarmUp(session_id); + GetListener(session_id)->OnRecognitionStart(session_id); } -void SpeechRecognitionManagerImpl::StartRecognitionForRequest(int session_id) { - SpeechRecognizerMap::iterator request = requests_.find(session_id); - if (request == requests_.end()) { - NOTREACHED(); +void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) return; - } - - // We should not currently be recording for the session. - CHECK(recording_session_id_ != session_id); - // If we are currently recording audio for another session, abort it cleanly. - if (recording_session_id_) - CancelRecognitionAndInformDelegate(recording_session_id_); - recording_session_id_ = session_id; - requests_[session_id].is_active = true; - requests_[session_id].recognizer->StartRecognition(); - if (delegate_.get()) - delegate_->ShowWarmUp(session_id); + DCHECK_EQ(interactive_session_id_, session_id); + if (delegate_) + delegate_->ShowRecording(session_id); + GetListener(session_id)->OnAudioStart(session_id); } -void SpeechRecognitionManagerImpl::CancelRecognitionForRequest(int session_id) { - // Ignore if the session id was not in our active recognizers list because the - // user might have clicked more than once, or recognition could have been - // ended due to other reasons before the user click was processed. - if (!HasPendingRequest(session_id)) +void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete( + int session_id) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) return; - CancelRecognitionAndInformDelegate(session_id); + DCHECK_EQ(interactive_session_id_, session_id); + GetListener(session_id)->OnEnvironmentEstimationComplete(session_id); } -void SpeechRecognitionManagerImpl::FocusLostForRequest(int session_id) { - // See above comment. - if (!HasPendingRequest(session_id)) +void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) return; - // If this is an ongoing recording or if we were displaying an error message - // to the user, abort it since user has switched focus. Otherwise - // recognition has started and keep that going so user can start speaking to - // another element while this gets the results in parallel. - if (recording_session_id_ == session_id || !requests_[session_id].is_active) - CancelRecognitionAndInformDelegate(session_id); -} - -void SpeechRecognitionManagerImpl::CancelRecognition(int session_id) { - DCHECK(HasPendingRequest(session_id)); - if (requests_[session_id].is_active) - requests_[session_id].recognizer->AbortRecognition(); - requests_.erase(session_id); - if (recording_session_id_ == session_id) - recording_session_id_ = 0; - if (delegate_.get()) - delegate_->DoClose(session_id); -} - -void SpeechRecognitionManagerImpl::CancelAllRequestsWithDelegate( - InputTagSpeechDispatcherHost* delegate) { - SpeechRecognizerMap::iterator it = requests_.begin(); - while (it != requests_.end()) { - if (it->second.delegate == delegate) { - CancelRecognition(it->first); - // This map will have very few elements so it is simpler to restart. - it = requests_.begin(); - } else { - ++it; - } - } + DCHECK_EQ(interactive_session_id_, session_id); + GetListener(session_id)->OnSoundStart(session_id); } -void SpeechRecognitionManagerImpl::StopRecording(int session_id) { - // No pending requests on extension popups. - if (!HasPendingRequest(session_id)) +void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) return; - requests_[session_id].recognizer->StopAudioCapture(); + GetListener(session_id)->OnSoundEnd(session_id); } -// -------- SpeechRecognitionEventListener interface implementation. --------- +void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) + return; + + // OnAudioEnd can also be raised after an abort request, when the session is + // not interactive anymore. + if (interactive_session_id_ == session_id && delegate_) + delegate_->ShowRecognizing(session_id); + + GetListener(session_id)->OnAudioEnd(session_id); +} void SpeechRecognitionManagerImpl::OnRecognitionResult( int session_id, const content::SpeechRecognitionResult& result) { - DCHECK(HasPendingRequest(session_id)); - GetDelegate(session_id)->SetRecognitionResult(session_id, result); + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) + return; + + GetListener(session_id)->OnRecognitionResult(session_id, result); + FSMEventArgs event_args(EVENT_RECOGNITION_RESULT); + BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, + base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), + session_id, event_args)); } -void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) { - if (recording_session_id_ != session_id) +void SpeechRecognitionManagerImpl::OnRecognitionError( + int session_id, const content::SpeechRecognitionError& error) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) return; - DCHECK_EQ(recording_session_id_, session_id); - DCHECK(HasPendingRequest(session_id)); - if (!requests_[session_id].is_active) + + GetListener(session_id)->OnRecognitionError(session_id, error); + FSMEventArgs event_args(EVENT_RECOGNITION_ERROR); + event_args.speech_error = error; + BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, + base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), + session_id, event_args)); +} + +void SpeechRecognitionManagerImpl::OnAudioLevelsChange( + int session_id, float volume, float noise_volume) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) return; - recording_session_id_ = 0; - GetDelegate(session_id)->DidCompleteRecording(session_id); - if (delegate_.get()) - delegate_->ShowRecognizing(session_id); + + if (delegate_) + delegate_->ShowInputVolume(session_id, volume, noise_volume); + + GetListener(session_id)->OnAudioLevelsChange(session_id, volume, + noise_volume); } void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) { - if (!HasPendingRequest(session_id) || !requests_[session_id].is_active) + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) return; - GetDelegate(session_id)->DidCompleteRecognition(session_id); - requests_.erase(session_id); - if (delegate_.get()) - delegate_->DoClose(session_id); + + GetListener(session_id)->OnRecognitionEnd(session_id); + BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, + base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), + session_id, FSMEventArgs(EVENT_RECOGNITION_ENDED))); } -void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) { +// TODO(primiano) After CL2: if we see that both InputTagDispatcherHost and +// SpeechRecognitionDispatcherHost do the same lookup operations, implement the +// lookup method directly here. +int SpeechRecognitionManagerImpl::LookupSessionByContext( + Callback<bool(const SpeechRecognitionSessionContext&)> matcher) const { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + SessionsTable::const_iterator iter; + // Note: the callback (matcher) must NEVER perform non-const calls on us. + for(iter = sessions_.begin(); iter != sessions_.end(); ++iter) { + const int session_id = iter->first; + const Session& session = iter->second; + bool matches = matcher.Run(session.context); + if (matches) + return session_id; + } + return kSessionIDInvalid; } -void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) { +SpeechRecognitionSessionContext +SpeechRecognitionManagerImpl::GetSessionContext(int session_id) const { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + SessionsTable::const_iterator iter = sessions_.find(session_id); + DCHECK(iter != sessions_.end()); + return iter->second.context; } -void SpeechRecognitionManagerImpl::OnRecognitionError( - int session_id, const content::SpeechRecognitionError& error) { - DCHECK(HasPendingRequest(session_id)); - if (session_id == recording_session_id_) - recording_session_id_ = 0; - requests_[session_id].is_active = false; - if (delegate_.get()) { - if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO && - error.details == content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC) { - delegate_->ShowMicError(session_id, - SpeechRecognitionManagerDelegate::MIC_ERROR_NO_DEVICE_AVAILABLE); - } else if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO && - error.details == content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE) { - delegate_->ShowMicError(session_id, - SpeechRecognitionManagerDelegate::MIC_ERROR_DEVICE_IN_USE); - } else { - delegate_->ShowRecognizerError(session_id, error.code); - } +void SpeechRecognitionManagerImpl::AbortAllSessionsForListener( + SpeechRecognitionEventListener* listener) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + + // AbortSession is asynchronous and the session will not be removed from the + // collection while we are iterating over it. + for (SessionsTable::iterator it = sessions_.begin(); it != sessions_.end(); + ++it) { + if (it->second.event_listener == listener) + AbortSession(it->first); } } -void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) { - DCHECK(HasPendingRequest(session_id)); - DCHECK_EQ(recording_session_id_, session_id); - if (delegate_.get()) - delegate_->ShowRecording(session_id); +// ----------------------- Core FSM implementation --------------------------- +void SpeechRecognitionManagerImpl::DispatchEvent(int session_id, + FSMEventArgs event_args) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!SessionExists(session_id)) + return; + + Session& session = sessions_[session_id]; + DCHECK_LE(session.state, STATE_MAX_VALUE); + DCHECK_LE(event_args.event, EVENT_MAX_VALUE); + + // Event dispatching must be sequential, otherwise it will break all the rules + // and the assumptions of the finite state automata model. + DCHECK(!is_dispatching_event_); + is_dispatching_event_ = true; + + // Pedantic preconditions consistency checks. + if (session.state == STATE_INTERACTIVE) + DCHECK_EQ(interactive_session_id_, session_id); + + if (session.state == STATE_BACKGROUND || + session.state == STATE_WAITING_FOR_DELETION) { + DCHECK_NE(interactive_session_id_, session_id); + } + + session.state = ExecuteTransitionAndGetNextState(session, event_args); + + is_dispatching_event_ = false; +} + +// This FSM handles the evolution of each session, from the viewpoint of the +// interaction with the user (that may be either the browser end-user which +// interacts with UI bubbles, or JS developer intracting with JS methods). +// All the events received by the SpeechRecognizerImpl instances (one for each +// session) are always routed to the SpeechRecognitionEventListener(s) +// regardless the choices taken in this FSM. +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::ExecuteTransitionAndGetNextState( + Session& session, const FSMEventArgs& event_args) { + // Some notes for the code below: + // - A session can be deleted only if it is not active, thus only if it ended + // spontaneously or we issued a prior SessionAbort. In these cases, we must + // wait for a RECOGNITION_ENDED event (which is guaranteed to come always at + // last by the SpeechRecognizer) in order to free resources gracefully. + // - Use SessionDelete only when absolutely sure that the recognizer is not + // active. Prefer SessionAbort, which will do it gracefully, otherwise. + // - Since this class methods are publicly exported, START, ABORT, + // STOP_CAPTURE and SET_BACKGROUND events can arrive in every moment from + // the outside wild wolrd, even if they make no sense. + const FSMEvent event = event_args.event; + switch (session.state) { + case STATE_IDLE: + // Session has just been created or had an error while interactive. + switch (event) { + case EVENT_START: + return SessionStart(session, event_args); + case EVENT_ABORT: + case EVENT_SET_BACKGROUND: + return SessionAbort(session, event_args); + case EVENT_STOP_CAPTURE: + case EVENT_RECOGNITION_ENDED: + // In case of error, we come back in this state before receiving the + // OnRecognitionEnd event, thus EVENT_RECOGNITION_ENDED is feasible. + return DoNothing(session, event_args); + case EVENT_RECOGNITION_RESULT: + case EVENT_RECOGNITION_ERROR: + return NotFeasible(session, event_args); + } + break; + case STATE_INTERACTIVE: + // The recognizer can be either capturing audio or waiting for a result. + switch (event) { + case EVENT_RECOGNITION_RESULT: + // TODO(primiano) Valid only in single shot mode. Review in next CLs. + return SessionSetBackground(session, event_args); + case EVENT_SET_BACKGROUND: + return SessionAbortIfCapturingAudioOrBackground(session, event_args); + case EVENT_STOP_CAPTURE: + return SessionStopAudioCapture(session, event_args); + case EVENT_ABORT: + return SessionAbort(session, event_args); + case EVENT_RECOGNITION_ERROR: + return SessionReportError(session, event_args); + case EVENT_RECOGNITION_ENDED: + // If we're still interactive it means that no result was received + // in the meanwhile (otherwise we'd have been sent to background). + return SessionReportNoMatch(session, event_args); + case EVENT_START: + return DoNothing(session, event_args); + } + break; + case STATE_BACKGROUND: + switch (event) { + case EVENT_ABORT: + return SessionAbort(session, event_args); + case EVENT_RECOGNITION_ENDED: + return SessionDelete(session, event_args); + case EVENT_START: + case EVENT_STOP_CAPTURE: + case EVENT_RECOGNITION_RESULT: + case EVENT_RECOGNITION_ERROR: + return DoNothing(session, event_args); + case EVENT_SET_BACKGROUND: + return NotFeasible(session, event_args); + } + break; + case STATE_WAITING_FOR_DELETION: + switch (event) { + case EVENT_RECOGNITION_ENDED: + return SessionDelete(session, event_args); + case EVENT_ABORT: + case EVENT_START: + case EVENT_STOP_CAPTURE: + case EVENT_SET_BACKGROUND: + case EVENT_RECOGNITION_RESULT: + case EVENT_RECOGNITION_ERROR: + return DoNothing(session, event_args); + } + break; + } + return NotFeasible(session, event_args); +} + +// ----------- Contract for all the FSM evolution functions below ------------- +// - Are guaranteed to be executed in the IO thread; +// - Are guaranteed to be not reentrant (themselves and each other); +// - event_args members are guaranteed to be stable during the call; + +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::SessionStart(Session& session, + const FSMEventArgs& event_args) { + if (interactive_session_id_ != kSessionIDInvalid && delegate_) + delegate_->DoClose(interactive_session_id_); + interactive_session_id_ = session.id; + if (delegate_) + delegate_->ShowRecognitionRequested(session.id); + session.recognizer->StartRecognition(); + return STATE_INTERACTIVE; +} + +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::SessionAbort(Session& session, + const FSMEventArgs& event_args) { + if (interactive_session_id_ == session.id) { + interactive_session_id_ = kSessionIDInvalid; + if (delegate_) + delegate_->DoClose(session.id); + } + + // If abort was requested while the recognizer was inactive, delete directly. + if (session.recognizer == NULL || !session.recognizer->IsActive()) + return SessionDelete(session, event_args); + + // Otherwise issue an abort and delete gracefully, waiting for a + // RECOGNITION_ENDED event first. + session.recognizer->AbortRecognition(); + return STATE_WAITING_FOR_DELETION; +} + +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::SessionStopAudioCapture( + Session& session, const FSMEventArgs& event_args) { + DCHECK(session.recognizer != NULL); + DCHECK(session.recognizer->IsActive()); + if (session.recognizer->IsCapturingAudio()) + session.recognizer->StopAudioCapture(); + return STATE_INTERACTIVE; +} + +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::SessionAbortIfCapturingAudioOrBackground( + Session& session, const FSMEventArgs& event_args) { + DCHECK_EQ(interactive_session_id_, session.id); + + DCHECK(session.recognizer != NULL); + DCHECK(session.recognizer->IsActive()); + if (session.recognizer->IsCapturingAudio()) + return SessionAbort(session, event_args); + + interactive_session_id_ = kSessionIDInvalid; + if (delegate_) + delegate_->DoClose(session.id); + return STATE_BACKGROUND; +} + + +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::SessionSetBackground( + Session& session, const FSMEventArgs& event_args) { + DCHECK_EQ(interactive_session_id_, session.id); + interactive_session_id_ = kSessionIDInvalid; + if (delegate_) + delegate_->DoClose(session.id); + return STATE_BACKGROUND; +} + +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::SessionReportError( + Session& session, const FSMEventArgs& event_args) { + DCHECK_EQ(interactive_session_id_, session.id); + if (delegate_) + delegate_->ShowError(session.id, event_args.speech_error); + return STATE_IDLE; +} + +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::SessionReportNoMatch( + Session& session, const FSMEventArgs& event_args) { + DCHECK_EQ(interactive_session_id_, session.id); + if (delegate_) { + delegate_->ShowError( + session.id, + SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_MATCH)); + } + return STATE_IDLE; } -void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) { +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::SessionDelete(Session& session, + const FSMEventArgs& event_args) { + DCHECK(session.recognizer == NULL || !session.recognizer->IsActive()); + if (interactive_session_id_ == session.id) { + interactive_session_id_ = kSessionIDInvalid; + if (delegate_) + delegate_->DoClose(session.id); + } + sessions_.erase(session.id); + // Next state is irrelevant, the session will be deleted afterwards. + return STATE_WAITING_FOR_DELETION; } -void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete( - int session_id) { - DCHECK(HasPendingRequest(session_id)); - DCHECK_EQ(recording_session_id_, session_id); +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::DoNothing(Session& session, + const FSMEventArgs& event_args) { + return session.state; } -void SpeechRecognitionManagerImpl::OnAudioLevelsChange( - int session_id, float volume, float noise_volume) { - DCHECK(HasPendingRequest(session_id)); - DCHECK_EQ(recording_session_id_, session_id); - if (delegate_.get()) - delegate_->ShowInputVolume(session_id, volume, noise_volume); +SpeechRecognitionManagerImpl::FSMState +SpeechRecognitionManagerImpl::NotFeasible(Session& session, + const FSMEventArgs& event_args) { + NOTREACHED() << "Unfeasible event " << event_args.event + << " in state " << session.state + << " for session " << session.id; + return session.state; } -void SpeechRecognitionManagerImpl::CancelRecognitionAndInformDelegate( - int session_id) { - InputTagSpeechDispatcherHost* cur_delegate = GetDelegate(session_id); - CancelRecognition(session_id); - cur_delegate->DidCompleteRecording(session_id); - cur_delegate->DidCompleteRecognition(session_id); +int SpeechRecognitionManagerImpl::GetNextSessionID() { + ++last_session_id_; + // Deal with wrapping of last_session_id_. (How civilized). + if (last_session_id_ <= 0) + last_session_id_ = 1; + return last_session_id_; +} + +bool SpeechRecognitionManagerImpl::SessionExists(int session_id) const { + return sessions_.find(session_id) != sessions_.end(); +} + +SpeechRecognitionEventListener* SpeechRecognitionManagerImpl::GetListener( + int session_id) const { + SessionsTable::const_iterator iter = sessions_.find(session_id); + DCHECK(iter != sessions_.end()); + return iter->second.event_listener; +} + + +bool SpeechRecognitionManagerImpl::HasAudioInputDevices() { + return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices(); +} + +bool SpeechRecognitionManagerImpl::IsCapturingAudio() { + return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess(); +} + +string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() { + return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel(); +} + +void SpeechRecognitionManagerImpl::ShowAudioInputSettings() { + // Since AudioManager::ShowAudioInputSettings can potentially launch external + // processes, do that in the FILE thread to not block the calling threads. + if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) { + BrowserThread::PostTask( + BrowserThread::FILE, FROM_HERE, + base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings, + base::Unretained(this))); + return; + } + + media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager(); + DCHECK(audio_manager->CanShowAudioInputSettings()); + if (audio_manager->CanShowAudioInputSettings()) + audio_manager->ShowAudioInputSettings(); +} + +SpeechRecognitionManagerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) + : event(event_value), + speech_error(content::SPEECH_RECOGNITION_ERROR_NONE) { +} + +SpeechRecognitionManagerImpl::FSMEventArgs::~FSMEventArgs() { } -SpeechRecognitionManagerImpl::Request::Request() - : is_active(false) { +SpeechRecognitionManagerImpl::Session::Session() + : id(kSessionIDInvalid), + event_listener(NULL), + state(STATE_IDLE) { } -SpeechRecognitionManagerImpl::Request::~Request() { +SpeechRecognitionManagerImpl::Session::~Session() { } } // namespace speech diff --git a/content/browser/speech/speech_recognition_manager_impl.h b/content/browser/speech/speech_recognition_manager_impl.h index 9a0c967..650c4ad 100644 --- a/content/browser/speech/speech_recognition_manager_impl.h +++ b/content/browser/speech/speech_recognition_manager_impl.h @@ -9,69 +9,66 @@ #include <string> #include "base/basictypes.h" +#include "base/callback.h" #include "base/compiler_specific.h" -#include "base/memory/ref_counted.h" -#include "base/memory/scoped_ptr.h" #include "base/memory/singleton.h" #include "content/public/browser/speech_recognition_event_listener.h" #include "content/public/browser/speech_recognition_manager.h" -#include "ui/gfx/rect.h" +#include "content/public/browser/speech_recognition_session_context.h" +#include "content/public/common/speech_recognition_error.h" namespace content { -class ResourceContext; class SpeechRecognitionManagerDelegate; -class SpeechRecognitionPreferences; -struct SpeechRecognitionResult; -class SpeechRecognizer; -} - -namespace net { -class URLRequestContextGetter; } namespace speech { -class InputTagSpeechDispatcherHost; - -class CONTENT_EXPORT SpeechRecognitionManagerImpl - : NON_EXPORTED_BASE(public content::SpeechRecognitionManager), - NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) { +class SpeechRecognizerImpl; + +// This is the manager for speech recognition. It is a singleton instance in +// the browser process and can serve several requests. Each recognition request +// corresponds to a session, initiated via |CreateSession|. +// In every moment the manager has at most one "interactive" session (identified +// by |interactive_session_id_|), that is the session that is currently holding +// user attention. For privacy reasons, only the interactive session is allowed +// to capture audio from the microphone. However, after audio capture is +// completed, a session can be sent to background and can live in parallel with +// other sessions, while waiting for its results. +// +// More in details, SpeechRecognitionManager has the following responsibilities: +// - Handles requests received from various render views and makes sure only +// one of them accesses the audio device at any given time. +// - Relays recognition results/status/error events of each session to the +// corresponding listener (demuxing on the base of their session_id). +// - Handles the instantiation of SpeechRecognitionEngine objects when +// requested by SpeechRecognitionSessions. +class CONTENT_EXPORT SpeechRecognitionManagerImpl : + public NON_EXPORTED_BASE(content::SpeechRecognitionManager), + public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) { public: static SpeechRecognitionManagerImpl* GetInstance(); - // SpeechRecognitionManager implementation: - virtual void StartRecognitionForRequest(int session_id) OVERRIDE; - virtual void CancelRecognitionForRequest(int session_id) OVERRIDE; - virtual void FocusLostForRequest(int session_id) OVERRIDE; + // SpeechRecognitionManager implementation. + virtual int CreateSession( + const content::SpeechRecognitionSessionConfig& config, + SpeechRecognitionEventListener* event_listener) OVERRIDE; + virtual void StartSession(int session_id) OVERRIDE; + virtual void AbortSession(int session_id) OVERRIDE; + virtual void AbortAllSessionsForListener( + content::SpeechRecognitionEventListener* listener) OVERRIDE; + virtual void StopAudioCaptureForSession(int session_id) OVERRIDE; + virtual void SendSessionToBackground(int session_id) OVERRIDE; + virtual content::SpeechRecognitionSessionContext GetSessionContext( + int session_id) const OVERRIDE; + virtual int LookupSessionByContext( + base::Callback<bool( + const content::SpeechRecognitionSessionContext&)> matcher) + const OVERRIDE; virtual bool HasAudioInputDevices() OVERRIDE; virtual bool IsCapturingAudio() OVERRIDE; virtual string16 GetAudioInputDeviceModel() OVERRIDE; virtual void ShowAudioInputSettings() OVERRIDE; - // Handlers for requests from render views. - - // |delegate| is a weak pointer and should remain valid until - // its |DidCompleteRecognition| method is called or recognition is cancelled. - // |render_process_id| is the ID of the renderer process initiating the - // request. - // |element_rect| is the display bounds of the html element requesting speech - // input (in page coordinates). - virtual void StartRecognition( - InputTagSpeechDispatcherHost* delegate, - int session_id, - int render_process_id, - int render_view_id, - const gfx::Rect& element_rect, - const std::string& language, - const std::string& grammar, - const std::string& origin_url, - net::URLRequestContextGetter* context_getter, - content::SpeechRecognitionPreferences* speech_recognition_prefs); - virtual void CancelRecognition(int session_id); - virtual void CancelAllRequestsWithDelegate( - InputTagSpeechDispatcherHost* delegate); - virtual void StopRecording(int session_id); - // SpeechRecognitionEventListener methods. virtual void OnRecognitionStart(int session_id) OVERRIDE; virtual void OnAudioStart(int session_id) OVERRIDE; @@ -84,8 +81,8 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; virtual void OnRecognitionError( int session_id, const content::SpeechRecognitionError& error) OVERRIDE; - virtual void OnAudioLevelsChange( - int session_id, float volume, float noise_volume) OVERRIDE; + virtual void OnAudioLevelsChange(int session_id, float volume, + float noise_volume) OVERRIDE; protected: // Private constructor to enforce singleton. @@ -93,34 +90,85 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl SpeechRecognitionManagerImpl(); virtual ~SpeechRecognitionManagerImpl(); - bool HasPendingRequest(int session_id) const; - private: - struct Request { - Request(); - ~Request(); + // Data types for the internal Finite State Machine (FSM). + enum FSMState { + STATE_IDLE = 0, + STATE_INTERACTIVE, + STATE_BACKGROUND, + STATE_WAITING_FOR_DELETION, + STATE_MAX_VALUE = STATE_WAITING_FOR_DELETION + }; - InputTagSpeechDispatcherHost* delegate; - scoped_refptr<content::SpeechRecognizer> recognizer; - bool is_active; // Set to true when recording or recognition is going on. + enum FSMEvent { + EVENT_ABORT = 0, + EVENT_START, + EVENT_STOP_CAPTURE, + EVENT_SET_BACKGROUND, + EVENT_RECOGNITION_ENDED, + EVENT_RECOGNITION_RESULT, + EVENT_RECOGNITION_ERROR, + EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR }; - struct SpeechRecognitionParams; + struct Session { + Session(); + ~Session(); - InputTagSpeechDispatcherHost* GetDelegate(int session_id) const; + int id; + content::SpeechRecognitionEventListener* event_listener; + content::SpeechRecognitionSessionContext context; + scoped_refptr<SpeechRecognizerImpl> recognizer; + FSMState state; + bool error_occurred; + }; - void CheckRenderViewTypeAndStartRecognition( - const SpeechRecognitionParams& params); - void ProceedStartingRecognition(const SpeechRecognitionParams& params); + struct FSMEventArgs { + explicit FSMEventArgs(FSMEvent event_value); + ~FSMEventArgs(); - void CancelRecognitionAndInformDelegate(int session_id); + FSMEvent event; + content::SpeechRecognitionError speech_error; + }; - typedef std::map<int, Request> SpeechRecognizerMap; - SpeechRecognizerMap requests_; - std::string request_info_; - bool can_report_metrics_; - int recording_session_id_; - scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_; + // Callback issued by the SpeechRecognitionManagerDelegate for reporting + // asynchronously the result of the CheckRecognitionIsAllowed call. + void RecognitionAllowedCallback(int session_id, bool is_allowed); + + // Entry point for pushing any external event into the session handling FSM. + void DispatchEvent(int session_id, FSMEventArgs args); + + // Defines the behavior of the session handling FSM, selecting the appropriate + // transition according to the session, its current state and the event. + FSMState ExecuteTransitionAndGetNextState(Session& session, + const FSMEventArgs& event_args); + + // The methods below handle transitions of the session handling FSM. + FSMState SessionStart(Session& session, const FSMEventArgs& event_args); + FSMState SessionAbort(Session& session, const FSMEventArgs& event_args); + FSMState SessionStopAudioCapture(Session& session, + const FSMEventArgs& event_args); + FSMState SessionAbortIfCapturingAudioOrBackground( + Session& session, const FSMEventArgs& event_args); + FSMState SessionSetBackground(Session& session, + const FSMEventArgs& event_args); + FSMState SessionReportError(Session& session, const FSMEventArgs& event_args); + FSMState SessionReportNoMatch(Session& session, + const FSMEventArgs& event_args); + FSMState SessionDelete(Session& session, const FSMEventArgs& event_args); + FSMState DoNothing(Session& session, const FSMEventArgs& event_args); + FSMState NotFeasible(Session& session, const FSMEventArgs& event_args); + + bool SessionExists(int session_id) const; + content::SpeechRecognitionEventListener* GetListener(int session_id) const; + int GetNextSessionID(); + + typedef std::map<int, Session> SessionsTable; + SessionsTable sessions_; + int interactive_session_id_; + int last_session_id_; + bool is_dispatching_event_; + content::SpeechRecognitionManagerDelegate* delegate_; }; } // namespace speech |