summaryrefslogtreecommitdiffstats
path: root/content/browser/speech
diff options
context:
space:
mode:
authorprimiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-04-25 20:20:18 +0000
committerprimiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-04-25 20:20:18 +0000
commitb450e9092544c11b225690a1e459ffe7e955cec4 (patch)
tree7838b6a28c94cf2a6c0c3a051b20d3690cde0a85 /content/browser/speech
parent2e526f05f8190a04df5105985b935c34a2acf7cf (diff)
downloadchromium_src-b450e9092544c11b225690a1e459ffe7e955cec4.zip
chromium_src-b450e9092544c11b225690a1e459ffe7e955cec4.tar.gz
chromium_src-b450e9092544c11b225690a1e459ffe7e955cec4.tar.bz2
Speech refactoring: Reimplemented SpeechRecognitionManagerImpl as a FSM. (CL1.7)
BUG=116954 TEST=none. Review URL: http://codereview.chromium.org/9972011 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@133967 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'content/browser/speech')
-rw-r--r--content/browser/speech/input_tag_speech_dispatcher_host.cc247
-rw-r--r--content/browser/speech/input_tag_speech_dispatcher_host.h38
-rw-r--r--content/browser/speech/speech_recognition_browsertest.cc92
-rw-r--r--content/browser/speech/speech_recognition_manager_impl.cc781
-rw-r--r--content/browser/speech/speech_recognition_manager_impl.h180
5 files changed, 816 insertions, 522 deletions
diff --git a/content/browser/speech/input_tag_speech_dispatcher_host.cc b/content/browser/speech/input_tag_speech_dispatcher_host.cc
index 29eebb64..4831ecd 100644
--- a/content/browser/speech/input_tag_speech_dispatcher_host.cc
+++ b/content/browser/speech/input_tag_speech_dispatcher_host.cc
@@ -4,113 +4,31 @@
#include "content/browser/speech/input_tag_speech_dispatcher_host.h"
+#include "base/bind.h"
#include "base/lazy_instance.h"
#include "content/browser/speech/speech_recognition_manager_impl.h"
#include "content/browser/speech/speech_recognizer_impl.h"
#include "content/common/speech_recognition_messages.h"
#include "content/public/browser/speech_recognition_preferences.h"
+#include "content/public/browser/speech_recognition_session_config.h"
+#include "content/public/browser/speech_recognition_session_context.h"
using content::BrowserThread;
+using content::SpeechRecognitionSessionConfig;
+using content::SpeechRecognitionSessionContext;
-namespace speech {
-
-//----------------------------- Sessions -----------------------------
-
-// TODO(primiano) Remove session handling from here in the next CL. The manager
-// shall be the only one in charge of keeping all the context information for
-// all recognition sessions.
-
-// A singleton class to map the tuple
-// (render-process-id, render-view-id, requestid) to a single ID which is passed
-// through rest of the speech code.
-class InputTagSpeechDispatcherHost::Sessions {
- public:
- // Creates a new ID for a given tuple.
- int CreateId(int render_process_id, int render_view_id, int request_id);
-
- // Returns the ID for a tuple assuming the ID was created earlier.
- int GetId(int render_process_id, int render_view_id, int request_id);
-
- // Removes the ID and associated tuple from the map.
- void RemoveId(int id);
-
- // Getters for the various tuple elements for the given ID.
- int render_process_id(int id);
- int render_view_id(int id);
- int request_id(int id);
-
- private:
- struct SessionInfo {
- int render_process_id;
- int render_view_id;
- int request_id;
- };
- friend struct base::DefaultLazyInstanceTraits<Sessions>;
-
- Sessions();
-
- std::map<int, SessionInfo> sessions_;
- int next_id_;
-};
-
-static base::LazyInstance<InputTagSpeechDispatcherHost::Sessions>
- g_sessions = LAZY_INSTANCE_INITIALIZER;
-
-InputTagSpeechDispatcherHost::Sessions::Sessions()
- : next_id_(1) {
-}
-
-int InputTagSpeechDispatcherHost::Sessions::GetId(int render_process_id,
- int render_view_id,
- int request_id) {
- for (std::map<int, SessionInfo>::iterator it = sessions_.begin();
- it != sessions_.end(); it++) {
- const SessionInfo& item = it->second;
- if (item.render_process_id == render_process_id &&
- item.render_view_id == render_view_id &&
- item.request_id == request_id) {
- return it->first;
- }
- }
-
- // Not finding an entry here is valid since a cancel/stop may have been issued
- // by the renderer and before it received our response the user may have
- // clicked the button to stop again. The caller of this method should take
- // care of this case.
- return 0;
-}
-
-int InputTagSpeechDispatcherHost::Sessions::CreateId(int render_process_id,
- int render_view_id,
- int request_id) {
- SessionInfo info;
- info.render_process_id = render_process_id;
- info.render_view_id = render_view_id;
- info.request_id = request_id;
- sessions_[next_id_] = info;
- return next_id_++;
-}
-
-void InputTagSpeechDispatcherHost::Sessions::RemoveId(int id) {
- sessions_.erase(id);
-}
-
-int InputTagSpeechDispatcherHost::Sessions::render_process_id(
- int id) {
- return sessions_[id].render_process_id;
+namespace {
+bool IsSameContext(int render_process_id,
+ int render_view_id,
+ int render_request_id,
+ const SpeechRecognitionSessionContext& context) {
+ return context.render_process_id == render_process_id &&
+ context.render_view_id == render_view_id &&
+ context.render_request_id == render_request_id;
}
+} // namespace
-int InputTagSpeechDispatcherHost::Sessions::render_view_id(
- int id) {
- return sessions_[id].render_view_id;
-}
-
-int InputTagSpeechDispatcherHost::Sessions::request_id(int id) {
- return sessions_[id].request_id;
-}
-
-//----------------------- InputTagSpeechDispatcherHost ----------------------
-
+namespace speech {
SpeechRecognitionManagerImpl* InputTagSpeechDispatcherHost::manager_;
void InputTagSpeechDispatcherHost::set_manager(
@@ -120,11 +38,11 @@ void InputTagSpeechDispatcherHost::set_manager(
InputTagSpeechDispatcherHost::InputTagSpeechDispatcherHost(
int render_process_id,
- net::URLRequestContextGetter* context_getter,
+ net::URLRequestContextGetter* url_request_context_getter,
content::SpeechRecognitionPreferences* recognition_preferences)
: render_process_id_(render_process_id),
may_have_pending_requests_(false),
- context_getter_(context_getter),
+ url_request_context_getter_(url_request_context_getter),
recognition_preferences_(recognition_preferences) {
// This is initialized by Browser. Do not add any non-trivial
// initialization here, instead do it lazily when required (e.g. see the
@@ -138,7 +56,7 @@ InputTagSpeechDispatcherHost::~InputTagSpeechDispatcherHost() {
// we don't end up creating the speech input manager for web pages which don't
// use speech input.
if (may_have_pending_requests_)
- manager()->CancelAllRequestsWithDelegate(this);
+ manager()->AbortAllSessionsForListener(this);
}
SpeechRecognitionManagerImpl* InputTagSpeechDispatcherHost::manager() {
@@ -173,69 +91,100 @@ bool InputTagSpeechDispatcherHost::OnMessageReceived(
void InputTagSpeechDispatcherHost::OnStartRecognition(
const InputTagSpeechHostMsg_StartRecognition_Params &params) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- int session_id = g_sessions.Get().CreateId(
- render_process_id_, params.render_view_id, params.request_id);
- manager()->StartRecognition(this, session_id,
- render_process_id_,
- params.render_view_id, params.element_rect,
- params.language, params.grammar,
- params.origin_url,
- context_getter_.get(),
- recognition_preferences_.get());
+
+ SpeechRecognitionSessionContext context;
+ context.render_process_id = render_process_id_;
+ context.render_view_id = params.render_view_id;
+ context.render_request_id = params.request_id;
+ context.element_rect = params.element_rect;
+
+ SpeechRecognitionSessionConfig config;
+ config.language = params.language;
+ config.grammar = params.grammar;
+ config.origin_url = params.origin_url;
+ config.initial_context = context;
+ config.url_request_context_getter = url_request_context_getter_.get();
+ config.filter_profanities = recognition_preferences_->FilterProfanities();
+
+ int session_id = manager()->CreateSession(config, this);
+ if (session_id == content::SpeechRecognitionManager::kSessionIDInvalid)
+ return;
+
+ manager()->StartSession(session_id);
}
void InputTagSpeechDispatcherHost::OnCancelRecognition(int render_view_id,
int request_id) {
- int session_id = g_sessions.Get().GetId(
- render_process_id_, render_view_id, request_id);
- if (session_id) {
- manager()->CancelRecognition(session_id);
- // Request sequence ended so remove mapping.
- g_sessions.Get().RemoveId(session_id);
- }
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ int session_id = manager()->LookupSessionByContext(
+ base::Bind(&IsSameContext,
+ render_process_id_,
+ render_view_id,
+ request_id));
+ if (session_id != content::SpeechRecognitionManager::kSessionIDInvalid)
+ manager()->AbortSession(session_id);
}
void InputTagSpeechDispatcherHost::OnStopRecording(int render_view_id,
int request_id) {
- int session_id = g_sessions.Get().GetId(
- render_process_id_, render_view_id, request_id);
- if (session_id)
- manager()->StopRecording(session_id);
-}
-
-void InputTagSpeechDispatcherHost::SetRecognitionResult(
- int session_id, const content::SpeechRecognitionResult& result) {
- VLOG(1) << "InputTagSpeechDispatcherHost::SetRecognitionResult enter";
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- int session_render_view_id = g_sessions.Get().render_view_id(session_id);
- int session_request_id = g_sessions.Get().request_id(session_id);
- Send(new InputTagSpeechMsg_SetRecognitionResult(session_render_view_id,
- session_request_id,
- result));
- VLOG(1) << "InputTagSpeechDispatcherHost::SetRecognitionResult exit";
+ int session_id = manager()->LookupSessionByContext(
+ base::Bind(&IsSameContext,
+ render_process_id_,
+ render_view_id,
+ request_id));
+ DCHECK_NE(session_id, content::SpeechRecognitionManager::kSessionIDInvalid);
+ manager()->StopAudioCaptureForSession(session_id);
+}
+
+// -------- SpeechRecognitionEventListener interface implementation -----------
+void InputTagSpeechDispatcherHost::OnRecognitionResult(
+ int session_id, const content::SpeechRecognitionResult& result) {
+ VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionResult enter";
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+
+ const SpeechRecognitionSessionContext& context =
+ manager()->GetSessionContext(session_id);
+
+ Send(new InputTagSpeechMsg_SetRecognitionResult(
+ context.render_view_id,
+ context.render_request_id,
+ result));
+ VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionResult exit";
}
-void InputTagSpeechDispatcherHost::DidCompleteRecording(int session_id) {
- VLOG(1) << "InputTagSpeechDispatcherHost::DidCompleteRecording enter";
+void InputTagSpeechDispatcherHost::OnAudioEnd(int session_id) {
+ VLOG(1) << "InputTagSpeechDispatcherHost::OnAudioEnd enter";
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- int session_render_view_id = g_sessions.Get().render_view_id(session_id);
- int session_request_id = g_sessions.Get().request_id(session_id);
- Send(new InputTagSpeechMsg_RecordingComplete(session_render_view_id,
- session_request_id));
- VLOG(1) << "InputTagSpeechDispatcherHost::DidCompleteRecording exit";
+
+ const SpeechRecognitionSessionContext& context =
+ manager()->GetSessionContext(session_id);
+
+ Send(new InputTagSpeechMsg_RecordingComplete(context.render_view_id,
+ context.render_request_id));
+ VLOG(1) << "InputTagSpeechDispatcherHost::OnAudioEnd exit";
}
-void InputTagSpeechDispatcherHost::DidCompleteRecognition(int session_id) {
- VLOG(1) << "InputTagSpeechDispatcherHost::DidCompleteRecognition enter";
+void InputTagSpeechDispatcherHost::OnRecognitionEnd(int session_id) {
+ VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionEnd enter";
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- int session_render_view_id =
- g_sessions.Get().render_view_id(session_id);
- int session_request_id = g_sessions.Get().request_id(session_id);
- Send(new InputTagSpeechMsg_RecognitionComplete(session_render_view_id,
- session_request_id));
- // Request sequence ended, so remove mapping.
- g_sessions.Get().RemoveId(session_id);
- VLOG(1) << "InputTagSpeechDispatcherHost::DidCompleteRecognition exit";
-}
+ const SpeechRecognitionSessionContext& context =
+ manager()->GetSessionContext(session_id);
+ Send(new InputTagSpeechMsg_RecognitionComplete(context.render_view_id,
+ context.render_request_id));
+ VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionEnd exit";
+}
+
+// The events below are currently not used by x-webkit-speech implementation.
+void InputTagSpeechDispatcherHost::OnRecognitionStart(int session_id) {}
+void InputTagSpeechDispatcherHost::OnAudioStart(int session_id) {}
+void InputTagSpeechDispatcherHost::OnSoundStart(int session_id) {}
+void InputTagSpeechDispatcherHost::OnSoundEnd(int session_id) {}
+void InputTagSpeechDispatcherHost::OnRecognitionError(
+ int session_id, const content::SpeechRecognitionError& error) {}
+void InputTagSpeechDispatcherHost::OnAudioLevelsChange(
+ int session_id, float volume, float noise_volume) {}
+void InputTagSpeechDispatcherHost::OnEnvironmentEstimationComplete(
+ int session_id) {}
} // namespace speech
diff --git a/content/browser/speech/input_tag_speech_dispatcher_host.h b/content/browser/speech/input_tag_speech_dispatcher_host.h
index 95bd252..6a7358b 100644
--- a/content/browser/speech/input_tag_speech_dispatcher_host.h
+++ b/content/browser/speech/input_tag_speech_dispatcher_host.h
@@ -8,6 +8,7 @@
#include "base/memory/scoped_ptr.h"
#include "content/common/content_export.h"
#include "content/public/browser/browser_message_filter.h"
+#include "content/public/browser/speech_recognition_event_listener.h"
#include "net/url_request/url_request_context_getter.h"
struct InputTagSpeechHostMsg_StartRecognition_Params;
@@ -17,32 +18,37 @@ class SpeechRecognitionPreferences;
struct SpeechRecognitionResult;
}
-namespace media {
-class AudioManager;
-}
-
namespace speech {
class SpeechRecognitionManagerImpl;
// InputTagSpeechDispatcherHost is a delegate for Speech API messages used by
-// RenderMessageFilter.
-// It's the complement of InputTagSpeechDispatcher (owned by RenderView).
+// RenderMessageFilter. Basically it acts as a proxy, relaying the events coming
+// from the SpeechRecognitionManager to IPC messages (and vice versa).
+// It's the complement of SpeechRecognitionDispatcher (owned by RenderView).
class CONTENT_EXPORT InputTagSpeechDispatcherHost
- : public content::BrowserMessageFilter {
+ : public content::BrowserMessageFilter,
+ public content::SpeechRecognitionEventListener {
public:
- class Sessions;
-
InputTagSpeechDispatcherHost(
int render_process_id,
- net::URLRequestContextGetter* context_getter,
+ net::URLRequestContextGetter* url_request_context_getter,
content::SpeechRecognitionPreferences* recognition_preferences);
- // Methods called by SpeechRecognitionManagerImpl.
- void SetRecognitionResult(int session_id,
- const content::SpeechRecognitionResult& result);
- void DidCompleteRecording(int session_id);
- void DidCompleteRecognition(int session_id);
+ // SpeechRecognitionEventListener methods.
+ virtual void OnRecognitionStart(int session_id) OVERRIDE;
+ virtual void OnAudioStart(int session_id) OVERRIDE;
+ virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE;
+ virtual void OnSoundStart(int session_id) OVERRIDE;
+ virtual void OnSoundEnd(int session_id) OVERRIDE;
+ virtual void OnAudioEnd(int session_id) OVERRIDE;
+ virtual void OnRecognitionEnd(int session_id) OVERRIDE;
+ virtual void OnRecognitionResult(
+ int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
+ virtual void OnRecognitionError(
+ int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
+ virtual void OnAudioLevelsChange(
+ int session_id, float volume, float noise_volume) OVERRIDE;
// content::BrowserMessageFilter implementation.
virtual bool OnMessageReceived(const IPC::Message& message,
@@ -66,7 +72,7 @@ class CONTENT_EXPORT InputTagSpeechDispatcherHost
int render_process_id_;
bool may_have_pending_requests_; // Set if we received any speech IPC request
- scoped_refptr<net::URLRequestContextGetter> context_getter_;
+ scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
scoped_refptr<content::SpeechRecognitionPreferences> recognition_preferences_;
static SpeechRecognitionManagerImpl* manager_;
diff --git a/content/browser/speech/speech_recognition_browsertest.cc b/content/browser/speech/speech_recognition_browsertest.cc
index ea8f95e..e1bd7b6 100644
--- a/content/browser/speech/speech_recognition_browsertest.cc
+++ b/content/browser/speech/speech_recognition_browsertest.cc
@@ -5,6 +5,7 @@
#include "base/bind.h"
#include "base/command_line.h"
#include "base/file_path.h"
+#include "base/memory/scoped_ptr.h"
#include "base/string_number_conversions.h"
#include "base/synchronization/waitable_event.h"
#include "base/utf_string_conversions.h"
@@ -16,11 +17,15 @@
#include "content/browser/speech/speech_recognition_manager_impl.h"
#include "content/browser/web_contents/web_contents_impl.h"
#include "content/public/browser/notification_types.h"
+#include "content/public/browser/speech_recognition_session_config.h"
+#include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/common/content_switches.h"
#include "content/public/common/speech_recognition_error.h"
#include "content/public/common/speech_recognition_result.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebInputEvent.h"
+using content::SpeechRecognitionEventListener;
+using content::SpeechRecognitionSessionContext;
using content::NavigationController;
using content::WebContents;
@@ -36,7 +41,7 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
public:
FakeSpeechRecognitionManager()
: session_id_(0),
- delegate_(NULL),
+ listener_(NULL),
did_cancel_all_(false),
should_send_fake_response_(true),
recognition_started_event_(false, false) {
@@ -63,23 +68,24 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
}
// SpeechRecognitionManager methods.
- virtual void StartRecognition(
- InputTagSpeechDispatcherHost* delegate,
- int session_id,
- int render_process_id,
- int render_view_id,
- const gfx::Rect& element_rect,
- const std::string& language,
- const std::string& grammar,
- const std::string& origin_url,
- net::URLRequestContextGetter* context_getter,
- content::SpeechRecognitionPreferences* recognition_prefs) OVERRIDE {
- VLOG(1) << "StartRecognition invoked.";
+ virtual int CreateSession(
+ const content::SpeechRecognitionSessionConfig& config,
+ SpeechRecognitionEventListener* event_listener) OVERRIDE {
+ VLOG(1) << "FAKE CreateSession invoked.";
EXPECT_EQ(0, session_id_);
- EXPECT_EQ(NULL, delegate_);
- session_id_ = session_id;
- delegate_ = delegate;
- grammar_ = grammar;
+ EXPECT_EQ(NULL, listener_);
+ listener_ = event_listener;
+ grammar_ = config.grammar;
+ session_ctx_ = config.initial_context;
+ session_id_ = 1;
+ return session_id_;
+ }
+
+ virtual void StartSession(int session_id) OVERRIDE {
+ VLOG(1) << "FAKE StartSession invoked.";
+ EXPECT_EQ(session_id, session_id_);
+ EXPECT_TRUE(listener_ != NULL);
+
if (should_send_fake_response_) {
// Give the fake result in a short while.
MessageLoop::current()->PostTask(FROM_HERE, base::Bind(
@@ -93,45 +99,69 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
}
recognition_started_event_.Signal();
}
- virtual void CancelRecognition(int session_id) OVERRIDE {
- VLOG(1) << "CancelRecognition invoked.";
+
+ virtual void AbortSession(int session_id) OVERRIDE {
+ VLOG(1) << "FAKE AbortSession invoked.";
EXPECT_EQ(session_id_, session_id);
session_id_ = 0;
- delegate_ = NULL;
+ listener_ = NULL;
}
- virtual void StopRecording(int session_id) OVERRIDE {
+
+ virtual void StopAudioCaptureForSession(int session_id) OVERRIDE {
VLOG(1) << "StopRecording invoked.";
EXPECT_EQ(session_id_, session_id);
// Nothing to do here since we aren't really recording.
}
- virtual void CancelAllRequestsWithDelegate(
- InputTagSpeechDispatcherHost* delegate) OVERRIDE {
+
+ virtual void AbortAllSessionsForListener(
+ content::SpeechRecognitionEventListener* listener) OVERRIDE {
VLOG(1) << "CancelAllRequestsWithDelegate invoked.";
- // delegate_ is set to NULL if a fake result was received (see below), so
- // check that delegate_ matches the incoming parameter only when there is
+ // listener_ is set to NULL if a fake result was received (see below), so
+ // check that listener_ matches the incoming parameter only when there is
// no fake result sent.
- EXPECT_TRUE(should_send_fake_response_ || delegate_ == delegate);
+ EXPECT_TRUE(should_send_fake_response_ || listener_ == listener);
did_cancel_all_ = true;
}
+ virtual void SendSessionToBackground(int session_id) OVERRIDE {}
+ virtual bool HasAudioInputDevices() OVERRIDE { return true; }
+ virtual bool IsCapturingAudio() OVERRIDE { return true; }
+ virtual string16 GetAudioInputDeviceModel() OVERRIDE { return string16(); }
+ virtual void ShowAudioInputSettings() OVERRIDE {}
+
+ virtual int LookupSessionByContext(
+ base::Callback<bool(
+ const content::SpeechRecognitionSessionContext&)> matcher)
+ const OVERRIDE {
+ bool matched = matcher.Run(session_ctx_);
+ return matched ? session_id_ : 0;
+ }
+
+ virtual content::SpeechRecognitionSessionContext GetSessionContext(
+ int session_id) const OVERRIDE {
+ EXPECT_EQ(session_id, session_id_);
+ return session_ctx_;
+ }
+
private:
void SetFakeRecognitionResult() {
if (session_id_) { // Do a check in case we were cancelled..
VLOG(1) << "Setting fake recognition result.";
- delegate_->DidCompleteRecording(session_id_);
+ listener_->OnAudioEnd(session_id_);
content::SpeechRecognitionResult results;
results.hypotheses.push_back(content::SpeechRecognitionHypothesis(
ASCIIToUTF16(kTestResult), 1.0));
- delegate_->SetRecognitionResult(session_id_, results);
- delegate_->DidCompleteRecognition(session_id_);
+ listener_->OnRecognitionResult(session_id_, results);
+ listener_->OnRecognitionEnd(session_id_);
session_id_ = 0;
- delegate_ = NULL;
+ listener_ = NULL;
VLOG(1) << "Finished setting fake recognition result.";
}
}
int session_id_;
- InputTagSpeechDispatcherHost* delegate_;
+ SpeechRecognitionEventListener* listener_;
+ SpeechRecognitionSessionContext session_ctx_;
std::string grammar_;
bool did_cancel_all_;
bool should_send_fake_response_;
diff --git a/content/browser/speech/speech_recognition_manager_impl.cc b/content/browser/speech/speech_recognition_manager_impl.cc
index ed567e1..da2737c 100644
--- a/content/browser/speech/speech_recognition_manager_impl.cc
+++ b/content/browser/speech/speech_recognition_manager_impl.cc
@@ -5,361 +5,622 @@
#include "content/browser/speech/speech_recognition_manager_impl.h"
#include "base/bind.h"
+#include "base/memory/singleton.h"
#include "content/browser/browser_main_loop.h"
-#include "content/browser/renderer_host/render_view_host_impl.h"
-#include "content/browser/speech/input_tag_speech_dispatcher_host.h"
+#include "content/browser/speech/google_one_shot_remote_engine.h"
+#include "content/browser/speech/speech_recognition_engine.h"
+#include "content/browser/speech/speech_recognizer_impl.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/content_browser_client.h"
-#include "content/public/browser/speech_recognizer.h"
-#include "content/public/browser/render_view_host_delegate.h"
#include "content/public/browser/resource_context.h"
+#include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager_delegate.h"
-#include "content/public/browser/speech_recognition_preferences.h"
-#include "content/public/common/view_type.h"
+#include "content/public/browser/speech_recognition_session_config.h"
+#include "content/public/browser/speech_recognition_session_context.h"
+#include "content/public/common/speech_recognition_result.h"
#include "media/audio/audio_manager.h"
+using base::Callback;
+using base::Unretained;
using content::BrowserMainLoop;
using content::BrowserThread;
-using content::RenderViewHostImpl;
+using content::SpeechRecognitionError;
+using content::SpeechRecognitionEventListener;
using content::SpeechRecognitionManager;
-using content::SpeechRecognitionManagerDelegate;
+using content::SpeechRecognitionResult;
+using content::SpeechRecognitionSessionContext;
+using content::SpeechRecognitionSessionConfig;
+
+namespace content {
+const int SpeechRecognitionManager::kSessionIDInvalid = 0;
SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() {
return speech::SpeechRecognitionManagerImpl::GetInstance();
}
+} // namespace content
namespace speech {
-struct SpeechRecognitionManagerImpl::SpeechRecognitionParams {
- SpeechRecognitionParams(
- InputTagSpeechDispatcherHost* delegate,
- int session_id,
- int render_process_id,
- int render_view_id,
- const gfx::Rect& element_rect,
- const std::string& language,
- const std::string& grammar,
- const std::string& origin_url,
- net::URLRequestContextGetter* context_getter,
- content::SpeechRecognitionPreferences* recognition_prefs)
- : delegate(delegate),
- session_id(session_id),
- render_process_id(render_process_id),
- render_view_id(render_view_id),
- element_rect(element_rect),
- language(language),
- grammar(grammar),
- origin_url(origin_url),
- context_getter(context_getter),
- recognition_prefs(recognition_prefs) {
- }
-
- InputTagSpeechDispatcherHost* delegate;
- int session_id;
- int render_process_id;
- int render_view_id;
- gfx::Rect element_rect;
- std::string language;
- std::string grammar;
- std::string origin_url;
- net::URLRequestContextGetter* context_getter;
- content::SpeechRecognitionPreferences* recognition_prefs;
-};
-
SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() {
return Singleton<SpeechRecognitionManagerImpl>::get();
}
SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl()
- : can_report_metrics_(false),
- recording_session_id_(0) {
- delegate_.reset(content::GetContentClient()->browser()->
- GetSpeechRecognitionManagerDelegate());
+ : interactive_session_id_(kSessionIDInvalid),
+ last_session_id_(kSessionIDInvalid),
+ is_dispatching_event_(false) {
+ delegate_ = content::GetContentClient()->browser()->
+ GetSpeechRecognitionManagerDelegate();
}
SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() {
- while (requests_.begin() != requests_.end())
- CancelRecognition(requests_.begin()->first);
+ // Recognition sessions will be aborted by the corresponding destructors.
+ sessions_.clear();
}
-bool SpeechRecognitionManagerImpl::HasAudioInputDevices() {
- return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices();
-}
+int SpeechRecognitionManagerImpl::CreateSession(
+ const SpeechRecognitionSessionConfig& config,
+ SpeechRecognitionEventListener* event_listener) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
-bool SpeechRecognitionManagerImpl::IsCapturingAudio() {
- return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess();
-}
+ const int session_id = GetNextSessionID();
+ DCHECK(!SessionExists(session_id));
+ // Set-up the new session.
+ Session& session = sessions_[session_id];
+ session.id = session_id;
+ session.event_listener = event_listener;
+ session.context = config.initial_context;
+
+ std::string hardware_info;
+ bool can_report_metrics = false;
+ if (delegate_)
+ delegate_->GetDiagnosticInformation(&can_report_metrics, &hardware_info);
+
+ GoogleOneShotRemoteEngineConfig remote_engine_config;
+ remote_engine_config.language = config.language;
+ remote_engine_config.grammar = config.grammar;
+ remote_engine_config.audio_sample_rate =
+ SpeechRecognizerImpl::kAudioSampleRate;
+ remote_engine_config.audio_num_bits_per_sample =
+ SpeechRecognizerImpl::kNumBitsPerAudioSample;
+ remote_engine_config.filter_profanities = config.filter_profanities;
+ remote_engine_config.hardware_info = hardware_info;
+ remote_engine_config.origin_url = can_report_metrics ? config.origin_url : "";
+
+ GoogleOneShotRemoteEngine* google_remote_engine =
+ new GoogleOneShotRemoteEngine(config.url_request_context_getter);
+ google_remote_engine->SetConfig(remote_engine_config);
+
+ session.recognizer = new SpeechRecognizerImpl(this,
+ session_id,
+ google_remote_engine);
+ return session_id;
+}
+
+void SpeechRecognitionManagerImpl::StartSession(int session_id) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ DCHECK(SessionExists(session_id));
-string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() {
- return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel();
-}
+ // If there is another interactive session, send it to background.
+ if (interactive_session_id_ != kSessionIDInvalid &&
+ interactive_session_id_ != session_id) {
+ SendSessionToBackground(interactive_session_id_);
+ }
-bool SpeechRecognitionManagerImpl::HasPendingRequest(int session_id) const {
- return requests_.find(session_id) != requests_.end();
+ if (delegate_)
+ delegate_->CheckRecognitionIsAllowed(
+ session_id,
+ base::Bind(&SpeechRecognitionManagerImpl::RecognitionAllowedCallback,
+ base::Unretained(this)));
}
-InputTagSpeechDispatcherHost* SpeechRecognitionManagerImpl::GetDelegate(
- int session_id) const {
- return requests_.find(session_id)->second.delegate;
+void SpeechRecognitionManagerImpl::RecognitionAllowedCallback(int session_id,
+ bool is_allowed) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ DCHECK(SessionExists(session_id));
+ if (is_allowed) {
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
+ Unretained(this), session_id, FSMEventArgs(EVENT_START)));
+ } else {
+ sessions_.erase(session_id);
+ }
}
-void SpeechRecognitionManagerImpl::ShowAudioInputSettings() {
- // Since AudioManager::ShowAudioInputSettings can potentially launch external
- // processes, do that in the FILE thread to not block the calling threads.
- if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
- BrowserThread::PostTask(
- BrowserThread::FILE, FROM_HERE,
- base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings,
- base::Unretained(this)));
- return;
- }
+void SpeechRecognitionManagerImpl::AbortSession(int session_id) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ DCHECK(SessionExists(session_id));
- media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager();
- DCHECK(audio_manager->CanShowAudioInputSettings());
- if (audio_manager->CanShowAudioInputSettings())
- audio_manager->ShowAudioInputSettings();
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
+ session_id, FSMEventArgs(EVENT_ABORT)));
}
-void SpeechRecognitionManagerImpl::StartRecognition(
- InputTagSpeechDispatcherHost* delegate,
- int session_id,
- int render_process_id,
- int render_view_id,
- const gfx::Rect& element_rect,
- const std::string& language,
- const std::string& grammar,
- const std::string& origin_url,
- net::URLRequestContextGetter* context_getter,
- content::SpeechRecognitionPreferences* recognition_prefs) {
+void SpeechRecognitionManagerImpl::StopAudioCaptureForSession(int session_id) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- BrowserThread::PostTask(
- BrowserThread::UI, FROM_HERE,
- base::Bind(
- &SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition,
- base::Unretained(this),
- SpeechRecognitionParams(
- delegate, session_id, render_process_id, render_view_id,
- element_rect, language, grammar, origin_url, context_getter,
- recognition_prefs)));
-}
-
-void SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition(
- const SpeechRecognitionParams& params) {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
-
- RenderViewHostImpl* render_view_host = RenderViewHostImpl::FromID(
- params.render_process_id, params.render_view_id);
- if (!render_view_host || !render_view_host->GetDelegate())
- return;
+ DCHECK(SessionExists(session_id));
- // For host delegates other than VIEW_TYPE_WEB_CONTENTS we can't reliably show
- // a popup, including the speech input bubble. In these cases for privacy
- // reasons we don't want to start recording if the user can't be properly
- // notified. An example of this is trying to show the speech input bubble
- // within an extension popup: http://crbug.com/92083. In these situations the
- // speech input extension API should be used instead.
- if (render_view_host->GetDelegate()->GetRenderViewType() ==
- content::VIEW_TYPE_WEB_CONTENTS) {
- BrowserThread::PostTask(
- BrowserThread::IO, FROM_HERE,
- base::Bind(&SpeechRecognitionManagerImpl::ProceedStartingRecognition,
- base::Unretained(this), params));
- }
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
+ session_id, FSMEventArgs(EVENT_STOP_CAPTURE)));
}
-void SpeechRecognitionManagerImpl::ProceedStartingRecognition(
- const SpeechRecognitionParams& params) {
+void SpeechRecognitionManagerImpl::SendSessionToBackground(int session_id) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- DCHECK(!HasPendingRequest(params.session_id));
+ DCHECK(SessionExists(session_id));
- if (delegate_.get()) {
- delegate_->ShowRecognitionRequested(
- params.session_id, params.render_process_id, params.render_view_id,
- params.element_rect);
- delegate_->GetRequestInfo(&can_report_metrics_, &request_info_);
- }
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
+ session_id, FSMEventArgs(EVENT_SET_BACKGROUND)));
+}
+
+// Here begins the SpeechRecognitionEventListener interface implementation,
+// which will simply relay the events to the proper listener registered for the
+// particular session (most likely InputTagSpeechDispatcherHost) and intercept
+// some of them to provide UI notifications.
- Request* request = &requests_[params.session_id];
- request->delegate = params.delegate;
- request->recognizer = content::SpeechRecognizer::Create(
- this, params.session_id, params.language, params.grammar,
- params.context_getter, params.recognition_prefs->FilterProfanities(),
- request_info_, can_report_metrics_ ? params.origin_url : "");
- request->is_active = false;
+void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
+ return;
- StartRecognitionForRequest(params.session_id);
+ DCHECK_EQ(interactive_session_id_, session_id);
+ if (delegate_)
+ delegate_->ShowWarmUp(session_id);
+ GetListener(session_id)->OnRecognitionStart(session_id);
}
-void SpeechRecognitionManagerImpl::StartRecognitionForRequest(int session_id) {
- SpeechRecognizerMap::iterator request = requests_.find(session_id);
- if (request == requests_.end()) {
- NOTREACHED();
+void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
return;
- }
-
- // We should not currently be recording for the session.
- CHECK(recording_session_id_ != session_id);
- // If we are currently recording audio for another session, abort it cleanly.
- if (recording_session_id_)
- CancelRecognitionAndInformDelegate(recording_session_id_);
- recording_session_id_ = session_id;
- requests_[session_id].is_active = true;
- requests_[session_id].recognizer->StartRecognition();
- if (delegate_.get())
- delegate_->ShowWarmUp(session_id);
+ DCHECK_EQ(interactive_session_id_, session_id);
+ if (delegate_)
+ delegate_->ShowRecording(session_id);
+ GetListener(session_id)->OnAudioStart(session_id);
}
-void SpeechRecognitionManagerImpl::CancelRecognitionForRequest(int session_id) {
- // Ignore if the session id was not in our active recognizers list because the
- // user might have clicked more than once, or recognition could have been
- // ended due to other reasons before the user click was processed.
- if (!HasPendingRequest(session_id))
+void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete(
+ int session_id) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
return;
- CancelRecognitionAndInformDelegate(session_id);
+ DCHECK_EQ(interactive_session_id_, session_id);
+ GetListener(session_id)->OnEnvironmentEstimationComplete(session_id);
}
-void SpeechRecognitionManagerImpl::FocusLostForRequest(int session_id) {
- // See above comment.
- if (!HasPendingRequest(session_id))
+void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
return;
- // If this is an ongoing recording or if we were displaying an error message
- // to the user, abort it since user has switched focus. Otherwise
- // recognition has started and keep that going so user can start speaking to
- // another element while this gets the results in parallel.
- if (recording_session_id_ == session_id || !requests_[session_id].is_active)
- CancelRecognitionAndInformDelegate(session_id);
-}
-
-void SpeechRecognitionManagerImpl::CancelRecognition(int session_id) {
- DCHECK(HasPendingRequest(session_id));
- if (requests_[session_id].is_active)
- requests_[session_id].recognizer->AbortRecognition();
- requests_.erase(session_id);
- if (recording_session_id_ == session_id)
- recording_session_id_ = 0;
- if (delegate_.get())
- delegate_->DoClose(session_id);
-}
-
-void SpeechRecognitionManagerImpl::CancelAllRequestsWithDelegate(
- InputTagSpeechDispatcherHost* delegate) {
- SpeechRecognizerMap::iterator it = requests_.begin();
- while (it != requests_.end()) {
- if (it->second.delegate == delegate) {
- CancelRecognition(it->first);
- // This map will have very few elements so it is simpler to restart.
- it = requests_.begin();
- } else {
- ++it;
- }
- }
+ DCHECK_EQ(interactive_session_id_, session_id);
+ GetListener(session_id)->OnSoundStart(session_id);
}
-void SpeechRecognitionManagerImpl::StopRecording(int session_id) {
- // No pending requests on extension popups.
- if (!HasPendingRequest(session_id))
+void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
return;
- requests_[session_id].recognizer->StopAudioCapture();
+ GetListener(session_id)->OnSoundEnd(session_id);
}
-// -------- SpeechRecognitionEventListener interface implementation. ---------
+void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
+ return;
+
+ // OnAudioEnd can also be raised after an abort request, when the session is
+ // not interactive anymore.
+ if (interactive_session_id_ == session_id && delegate_)
+ delegate_->ShowRecognizing(session_id);
+
+ GetListener(session_id)->OnAudioEnd(session_id);
+}
void SpeechRecognitionManagerImpl::OnRecognitionResult(
int session_id, const content::SpeechRecognitionResult& result) {
- DCHECK(HasPendingRequest(session_id));
- GetDelegate(session_id)->SetRecognitionResult(session_id, result);
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
+ return;
+
+ GetListener(session_id)->OnRecognitionResult(session_id, result);
+ FSMEventArgs event_args(EVENT_RECOGNITION_RESULT);
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
+ session_id, event_args));
}
-void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) {
- if (recording_session_id_ != session_id)
+void SpeechRecognitionManagerImpl::OnRecognitionError(
+ int session_id, const content::SpeechRecognitionError& error) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
return;
- DCHECK_EQ(recording_session_id_, session_id);
- DCHECK(HasPendingRequest(session_id));
- if (!requests_[session_id].is_active)
+
+ GetListener(session_id)->OnRecognitionError(session_id, error);
+ FSMEventArgs event_args(EVENT_RECOGNITION_ERROR);
+ event_args.speech_error = error;
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
+ session_id, event_args));
+}
+
+void SpeechRecognitionManagerImpl::OnAudioLevelsChange(
+ int session_id, float volume, float noise_volume) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
return;
- recording_session_id_ = 0;
- GetDelegate(session_id)->DidCompleteRecording(session_id);
- if (delegate_.get())
- delegate_->ShowRecognizing(session_id);
+
+ if (delegate_)
+ delegate_->ShowInputVolume(session_id, volume, noise_volume);
+
+ GetListener(session_id)->OnAudioLevelsChange(session_id, volume,
+ noise_volume);
}
void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) {
- if (!HasPendingRequest(session_id) || !requests_[session_id].is_active)
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
return;
- GetDelegate(session_id)->DidCompleteRecognition(session_id);
- requests_.erase(session_id);
- if (delegate_.get())
- delegate_->DoClose(session_id);
+
+ GetListener(session_id)->OnRecognitionEnd(session_id);
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
+ session_id, FSMEventArgs(EVENT_RECOGNITION_ENDED)));
}
-void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) {
+// TODO(primiano) After CL2: if we see that both InputTagDispatcherHost and
+// SpeechRecognitionDispatcherHost do the same lookup operations, implement the
+// lookup method directly here.
+int SpeechRecognitionManagerImpl::LookupSessionByContext(
+ Callback<bool(const SpeechRecognitionSessionContext&)> matcher) const {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ SessionsTable::const_iterator iter;
+ // Note: the callback (matcher) must NEVER perform non-const calls on us.
+ for(iter = sessions_.begin(); iter != sessions_.end(); ++iter) {
+ const int session_id = iter->first;
+ const Session& session = iter->second;
+ bool matches = matcher.Run(session.context);
+ if (matches)
+ return session_id;
+ }
+ return kSessionIDInvalid;
}
-void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) {
+SpeechRecognitionSessionContext
+SpeechRecognitionManagerImpl::GetSessionContext(int session_id) const {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ SessionsTable::const_iterator iter = sessions_.find(session_id);
+ DCHECK(iter != sessions_.end());
+ return iter->second.context;
}
-void SpeechRecognitionManagerImpl::OnRecognitionError(
- int session_id, const content::SpeechRecognitionError& error) {
- DCHECK(HasPendingRequest(session_id));
- if (session_id == recording_session_id_)
- recording_session_id_ = 0;
- requests_[session_id].is_active = false;
- if (delegate_.get()) {
- if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO &&
- error.details == content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC) {
- delegate_->ShowMicError(session_id,
- SpeechRecognitionManagerDelegate::MIC_ERROR_NO_DEVICE_AVAILABLE);
- } else if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO &&
- error.details == content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE) {
- delegate_->ShowMicError(session_id,
- SpeechRecognitionManagerDelegate::MIC_ERROR_DEVICE_IN_USE);
- } else {
- delegate_->ShowRecognizerError(session_id, error.code);
- }
+void SpeechRecognitionManagerImpl::AbortAllSessionsForListener(
+ SpeechRecognitionEventListener* listener) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+
+ // AbortSession is asynchronous and the session will not be removed from the
+ // collection while we are iterating over it.
+ for (SessionsTable::iterator it = sessions_.begin(); it != sessions_.end();
+ ++it) {
+ if (it->second.event_listener == listener)
+ AbortSession(it->first);
}
}
-void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) {
- DCHECK(HasPendingRequest(session_id));
- DCHECK_EQ(recording_session_id_, session_id);
- if (delegate_.get())
- delegate_->ShowRecording(session_id);
+// ----------------------- Core FSM implementation ---------------------------
+void SpeechRecognitionManagerImpl::DispatchEvent(int session_id,
+ FSMEventArgs event_args) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ if (!SessionExists(session_id))
+ return;
+
+ Session& session = sessions_[session_id];
+ DCHECK_LE(session.state, STATE_MAX_VALUE);
+ DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
+
+ // Event dispatching must be sequential, otherwise it will break all the rules
+ // and the assumptions of the finite state automata model.
+ DCHECK(!is_dispatching_event_);
+ is_dispatching_event_ = true;
+
+ // Pedantic preconditions consistency checks.
+ if (session.state == STATE_INTERACTIVE)
+ DCHECK_EQ(interactive_session_id_, session_id);
+
+ if (session.state == STATE_BACKGROUND ||
+ session.state == STATE_WAITING_FOR_DELETION) {
+ DCHECK_NE(interactive_session_id_, session_id);
+ }
+
+ session.state = ExecuteTransitionAndGetNextState(session, event_args);
+
+ is_dispatching_event_ = false;
+}
+
+// This FSM handles the evolution of each session, from the viewpoint of the
+// interaction with the user (that may be either the browser end-user which
+// interacts with UI bubbles, or JS developer intracting with JS methods).
+// All the events received by the SpeechRecognizerImpl instances (one for each
+// session) are always routed to the SpeechRecognitionEventListener(s)
+// regardless the choices taken in this FSM.
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::ExecuteTransitionAndGetNextState(
+ Session& session, const FSMEventArgs& event_args) {
+ // Some notes for the code below:
+ // - A session can be deleted only if it is not active, thus only if it ended
+ // spontaneously or we issued a prior SessionAbort. In these cases, we must
+ // wait for a RECOGNITION_ENDED event (which is guaranteed to come always at
+ // last by the SpeechRecognizer) in order to free resources gracefully.
+ // - Use SessionDelete only when absolutely sure that the recognizer is not
+ // active. Prefer SessionAbort, which will do it gracefully, otherwise.
+ // - Since this class methods are publicly exported, START, ABORT,
+ // STOP_CAPTURE and SET_BACKGROUND events can arrive in every moment from
+ // the outside wild wolrd, even if they make no sense.
+ const FSMEvent event = event_args.event;
+ switch (session.state) {
+ case STATE_IDLE:
+ // Session has just been created or had an error while interactive.
+ switch (event) {
+ case EVENT_START:
+ return SessionStart(session, event_args);
+ case EVENT_ABORT:
+ case EVENT_SET_BACKGROUND:
+ return SessionAbort(session, event_args);
+ case EVENT_STOP_CAPTURE:
+ case EVENT_RECOGNITION_ENDED:
+ // In case of error, we come back in this state before receiving the
+ // OnRecognitionEnd event, thus EVENT_RECOGNITION_ENDED is feasible.
+ return DoNothing(session, event_args);
+ case EVENT_RECOGNITION_RESULT:
+ case EVENT_RECOGNITION_ERROR:
+ return NotFeasible(session, event_args);
+ }
+ break;
+ case STATE_INTERACTIVE:
+ // The recognizer can be either capturing audio or waiting for a result.
+ switch (event) {
+ case EVENT_RECOGNITION_RESULT:
+ // TODO(primiano) Valid only in single shot mode. Review in next CLs.
+ return SessionSetBackground(session, event_args);
+ case EVENT_SET_BACKGROUND:
+ return SessionAbortIfCapturingAudioOrBackground(session, event_args);
+ case EVENT_STOP_CAPTURE:
+ return SessionStopAudioCapture(session, event_args);
+ case EVENT_ABORT:
+ return SessionAbort(session, event_args);
+ case EVENT_RECOGNITION_ERROR:
+ return SessionReportError(session, event_args);
+ case EVENT_RECOGNITION_ENDED:
+ // If we're still interactive it means that no result was received
+ // in the meanwhile (otherwise we'd have been sent to background).
+ return SessionReportNoMatch(session, event_args);
+ case EVENT_START:
+ return DoNothing(session, event_args);
+ }
+ break;
+ case STATE_BACKGROUND:
+ switch (event) {
+ case EVENT_ABORT:
+ return SessionAbort(session, event_args);
+ case EVENT_RECOGNITION_ENDED:
+ return SessionDelete(session, event_args);
+ case EVENT_START:
+ case EVENT_STOP_CAPTURE:
+ case EVENT_RECOGNITION_RESULT:
+ case EVENT_RECOGNITION_ERROR:
+ return DoNothing(session, event_args);
+ case EVENT_SET_BACKGROUND:
+ return NotFeasible(session, event_args);
+ }
+ break;
+ case STATE_WAITING_FOR_DELETION:
+ switch (event) {
+ case EVENT_RECOGNITION_ENDED:
+ return SessionDelete(session, event_args);
+ case EVENT_ABORT:
+ case EVENT_START:
+ case EVENT_STOP_CAPTURE:
+ case EVENT_SET_BACKGROUND:
+ case EVENT_RECOGNITION_RESULT:
+ case EVENT_RECOGNITION_ERROR:
+ return DoNothing(session, event_args);
+ }
+ break;
+ }
+ return NotFeasible(session, event_args);
+}
+
+// ----------- Contract for all the FSM evolution functions below -------------
+// - Are guaranteed to be executed in the IO thread;
+// - Are guaranteed to be not reentrant (themselves and each other);
+// - event_args members are guaranteed to be stable during the call;
+
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::SessionStart(Session& session,
+ const FSMEventArgs& event_args) {
+ if (interactive_session_id_ != kSessionIDInvalid && delegate_)
+ delegate_->DoClose(interactive_session_id_);
+ interactive_session_id_ = session.id;
+ if (delegate_)
+ delegate_->ShowRecognitionRequested(session.id);
+ session.recognizer->StartRecognition();
+ return STATE_INTERACTIVE;
+}
+
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::SessionAbort(Session& session,
+ const FSMEventArgs& event_args) {
+ if (interactive_session_id_ == session.id) {
+ interactive_session_id_ = kSessionIDInvalid;
+ if (delegate_)
+ delegate_->DoClose(session.id);
+ }
+
+ // If abort was requested while the recognizer was inactive, delete directly.
+ if (session.recognizer == NULL || !session.recognizer->IsActive())
+ return SessionDelete(session, event_args);
+
+ // Otherwise issue an abort and delete gracefully, waiting for a
+ // RECOGNITION_ENDED event first.
+ session.recognizer->AbortRecognition();
+ return STATE_WAITING_FOR_DELETION;
+}
+
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::SessionStopAudioCapture(
+ Session& session, const FSMEventArgs& event_args) {
+ DCHECK(session.recognizer != NULL);
+ DCHECK(session.recognizer->IsActive());
+ if (session.recognizer->IsCapturingAudio())
+ session.recognizer->StopAudioCapture();
+ return STATE_INTERACTIVE;
+}
+
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::SessionAbortIfCapturingAudioOrBackground(
+ Session& session, const FSMEventArgs& event_args) {
+ DCHECK_EQ(interactive_session_id_, session.id);
+
+ DCHECK(session.recognizer != NULL);
+ DCHECK(session.recognizer->IsActive());
+ if (session.recognizer->IsCapturingAudio())
+ return SessionAbort(session, event_args);
+
+ interactive_session_id_ = kSessionIDInvalid;
+ if (delegate_)
+ delegate_->DoClose(session.id);
+ return STATE_BACKGROUND;
+}
+
+
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::SessionSetBackground(
+ Session& session, const FSMEventArgs& event_args) {
+ DCHECK_EQ(interactive_session_id_, session.id);
+ interactive_session_id_ = kSessionIDInvalid;
+ if (delegate_)
+ delegate_->DoClose(session.id);
+ return STATE_BACKGROUND;
+}
+
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::SessionReportError(
+ Session& session, const FSMEventArgs& event_args) {
+ DCHECK_EQ(interactive_session_id_, session.id);
+ if (delegate_)
+ delegate_->ShowError(session.id, event_args.speech_error);
+ return STATE_IDLE;
+}
+
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::SessionReportNoMatch(
+ Session& session, const FSMEventArgs& event_args) {
+ DCHECK_EQ(interactive_session_id_, session.id);
+ if (delegate_) {
+ delegate_->ShowError(
+ session.id,
+ SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_MATCH));
+ }
+ return STATE_IDLE;
}
-void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) {
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::SessionDelete(Session& session,
+ const FSMEventArgs& event_args) {
+ DCHECK(session.recognizer == NULL || !session.recognizer->IsActive());
+ if (interactive_session_id_ == session.id) {
+ interactive_session_id_ = kSessionIDInvalid;
+ if (delegate_)
+ delegate_->DoClose(session.id);
+ }
+ sessions_.erase(session.id);
+ // Next state is irrelevant, the session will be deleted afterwards.
+ return STATE_WAITING_FOR_DELETION;
}
-void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete(
- int session_id) {
- DCHECK(HasPendingRequest(session_id));
- DCHECK_EQ(recording_session_id_, session_id);
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::DoNothing(Session& session,
+ const FSMEventArgs& event_args) {
+ return session.state;
}
-void SpeechRecognitionManagerImpl::OnAudioLevelsChange(
- int session_id, float volume, float noise_volume) {
- DCHECK(HasPendingRequest(session_id));
- DCHECK_EQ(recording_session_id_, session_id);
- if (delegate_.get())
- delegate_->ShowInputVolume(session_id, volume, noise_volume);
+SpeechRecognitionManagerImpl::FSMState
+SpeechRecognitionManagerImpl::NotFeasible(Session& session,
+ const FSMEventArgs& event_args) {
+ NOTREACHED() << "Unfeasible event " << event_args.event
+ << " in state " << session.state
+ << " for session " << session.id;
+ return session.state;
}
-void SpeechRecognitionManagerImpl::CancelRecognitionAndInformDelegate(
- int session_id) {
- InputTagSpeechDispatcherHost* cur_delegate = GetDelegate(session_id);
- CancelRecognition(session_id);
- cur_delegate->DidCompleteRecording(session_id);
- cur_delegate->DidCompleteRecognition(session_id);
+int SpeechRecognitionManagerImpl::GetNextSessionID() {
+ ++last_session_id_;
+ // Deal with wrapping of last_session_id_. (How civilized).
+ if (last_session_id_ <= 0)
+ last_session_id_ = 1;
+ return last_session_id_;
+}
+
+bool SpeechRecognitionManagerImpl::SessionExists(int session_id) const {
+ return sessions_.find(session_id) != sessions_.end();
+}
+
+SpeechRecognitionEventListener* SpeechRecognitionManagerImpl::GetListener(
+ int session_id) const {
+ SessionsTable::const_iterator iter = sessions_.find(session_id);
+ DCHECK(iter != sessions_.end());
+ return iter->second.event_listener;
+}
+
+
+bool SpeechRecognitionManagerImpl::HasAudioInputDevices() {
+ return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices();
+}
+
+bool SpeechRecognitionManagerImpl::IsCapturingAudio() {
+ return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess();
+}
+
+string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() {
+ return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel();
+}
+
+void SpeechRecognitionManagerImpl::ShowAudioInputSettings() {
+ // Since AudioManager::ShowAudioInputSettings can potentially launch external
+ // processes, do that in the FILE thread to not block the calling threads.
+ if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
+ BrowserThread::PostTask(
+ BrowserThread::FILE, FROM_HERE,
+ base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings,
+ base::Unretained(this)));
+ return;
+ }
+
+ media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager();
+ DCHECK(audio_manager->CanShowAudioInputSettings());
+ if (audio_manager->CanShowAudioInputSettings())
+ audio_manager->ShowAudioInputSettings();
+}
+
+SpeechRecognitionManagerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
+ : event(event_value),
+ speech_error(content::SPEECH_RECOGNITION_ERROR_NONE) {
+}
+
+SpeechRecognitionManagerImpl::FSMEventArgs::~FSMEventArgs() {
}
-SpeechRecognitionManagerImpl::Request::Request()
- : is_active(false) {
+SpeechRecognitionManagerImpl::Session::Session()
+ : id(kSessionIDInvalid),
+ event_listener(NULL),
+ state(STATE_IDLE) {
}
-SpeechRecognitionManagerImpl::Request::~Request() {
+SpeechRecognitionManagerImpl::Session::~Session() {
}
} // namespace speech
diff --git a/content/browser/speech/speech_recognition_manager_impl.h b/content/browser/speech/speech_recognition_manager_impl.h
index 9a0c967..650c4ad 100644
--- a/content/browser/speech/speech_recognition_manager_impl.h
+++ b/content/browser/speech/speech_recognition_manager_impl.h
@@ -9,69 +9,66 @@
#include <string>
#include "base/basictypes.h"
+#include "base/callback.h"
#include "base/compiler_specific.h"
-#include "base/memory/ref_counted.h"
-#include "base/memory/scoped_ptr.h"
#include "base/memory/singleton.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager.h"
-#include "ui/gfx/rect.h"
+#include "content/public/browser/speech_recognition_session_context.h"
+#include "content/public/common/speech_recognition_error.h"
namespace content {
-class ResourceContext;
class SpeechRecognitionManagerDelegate;
-class SpeechRecognitionPreferences;
-struct SpeechRecognitionResult;
-class SpeechRecognizer;
-}
-
-namespace net {
-class URLRequestContextGetter;
}
namespace speech {
-class InputTagSpeechDispatcherHost;
-
-class CONTENT_EXPORT SpeechRecognitionManagerImpl
- : NON_EXPORTED_BASE(public content::SpeechRecognitionManager),
- NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) {
+class SpeechRecognizerImpl;
+
+// This is the manager for speech recognition. It is a singleton instance in
+// the browser process and can serve several requests. Each recognition request
+// corresponds to a session, initiated via |CreateSession|.
+// In every moment the manager has at most one "interactive" session (identified
+// by |interactive_session_id_|), that is the session that is currently holding
+// user attention. For privacy reasons, only the interactive session is allowed
+// to capture audio from the microphone. However, after audio capture is
+// completed, a session can be sent to background and can live in parallel with
+// other sessions, while waiting for its results.
+//
+// More in details, SpeechRecognitionManager has the following responsibilities:
+// - Handles requests received from various render views and makes sure only
+// one of them accesses the audio device at any given time.
+// - Relays recognition results/status/error events of each session to the
+// corresponding listener (demuxing on the base of their session_id).
+// - Handles the instantiation of SpeechRecognitionEngine objects when
+// requested by SpeechRecognitionSessions.
+class CONTENT_EXPORT SpeechRecognitionManagerImpl :
+ public NON_EXPORTED_BASE(content::SpeechRecognitionManager),
+ public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) {
public:
static SpeechRecognitionManagerImpl* GetInstance();
- // SpeechRecognitionManager implementation:
- virtual void StartRecognitionForRequest(int session_id) OVERRIDE;
- virtual void CancelRecognitionForRequest(int session_id) OVERRIDE;
- virtual void FocusLostForRequest(int session_id) OVERRIDE;
+ // SpeechRecognitionManager implementation.
+ virtual int CreateSession(
+ const content::SpeechRecognitionSessionConfig& config,
+ SpeechRecognitionEventListener* event_listener) OVERRIDE;
+ virtual void StartSession(int session_id) OVERRIDE;
+ virtual void AbortSession(int session_id) OVERRIDE;
+ virtual void AbortAllSessionsForListener(
+ content::SpeechRecognitionEventListener* listener) OVERRIDE;
+ virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;
+ virtual void SendSessionToBackground(int session_id) OVERRIDE;
+ virtual content::SpeechRecognitionSessionContext GetSessionContext(
+ int session_id) const OVERRIDE;
+ virtual int LookupSessionByContext(
+ base::Callback<bool(
+ const content::SpeechRecognitionSessionContext&)> matcher)
+ const OVERRIDE;
virtual bool HasAudioInputDevices() OVERRIDE;
virtual bool IsCapturingAudio() OVERRIDE;
virtual string16 GetAudioInputDeviceModel() OVERRIDE;
virtual void ShowAudioInputSettings() OVERRIDE;
- // Handlers for requests from render views.
-
- // |delegate| is a weak pointer and should remain valid until
- // its |DidCompleteRecognition| method is called or recognition is cancelled.
- // |render_process_id| is the ID of the renderer process initiating the
- // request.
- // |element_rect| is the display bounds of the html element requesting speech
- // input (in page coordinates).
- virtual void StartRecognition(
- InputTagSpeechDispatcherHost* delegate,
- int session_id,
- int render_process_id,
- int render_view_id,
- const gfx::Rect& element_rect,
- const std::string& language,
- const std::string& grammar,
- const std::string& origin_url,
- net::URLRequestContextGetter* context_getter,
- content::SpeechRecognitionPreferences* speech_recognition_prefs);
- virtual void CancelRecognition(int session_id);
- virtual void CancelAllRequestsWithDelegate(
- InputTagSpeechDispatcherHost* delegate);
- virtual void StopRecording(int session_id);
-
// SpeechRecognitionEventListener methods.
virtual void OnRecognitionStart(int session_id) OVERRIDE;
virtual void OnAudioStart(int session_id) OVERRIDE;
@@ -84,8 +81,8 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
virtual void OnRecognitionError(
int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
- virtual void OnAudioLevelsChange(
- int session_id, float volume, float noise_volume) OVERRIDE;
+ virtual void OnAudioLevelsChange(int session_id, float volume,
+ float noise_volume) OVERRIDE;
protected:
// Private constructor to enforce singleton.
@@ -93,34 +90,85 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
SpeechRecognitionManagerImpl();
virtual ~SpeechRecognitionManagerImpl();
- bool HasPendingRequest(int session_id) const;
-
private:
- struct Request {
- Request();
- ~Request();
+ // Data types for the internal Finite State Machine (FSM).
+ enum FSMState {
+ STATE_IDLE = 0,
+ STATE_INTERACTIVE,
+ STATE_BACKGROUND,
+ STATE_WAITING_FOR_DELETION,
+ STATE_MAX_VALUE = STATE_WAITING_FOR_DELETION
+ };
- InputTagSpeechDispatcherHost* delegate;
- scoped_refptr<content::SpeechRecognizer> recognizer;
- bool is_active; // Set to true when recording or recognition is going on.
+ enum FSMEvent {
+ EVENT_ABORT = 0,
+ EVENT_START,
+ EVENT_STOP_CAPTURE,
+ EVENT_SET_BACKGROUND,
+ EVENT_RECOGNITION_ENDED,
+ EVENT_RECOGNITION_RESULT,
+ EVENT_RECOGNITION_ERROR,
+ EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR
};
- struct SpeechRecognitionParams;
+ struct Session {
+ Session();
+ ~Session();
- InputTagSpeechDispatcherHost* GetDelegate(int session_id) const;
+ int id;
+ content::SpeechRecognitionEventListener* event_listener;
+ content::SpeechRecognitionSessionContext context;
+ scoped_refptr<SpeechRecognizerImpl> recognizer;
+ FSMState state;
+ bool error_occurred;
+ };
- void CheckRenderViewTypeAndStartRecognition(
- const SpeechRecognitionParams& params);
- void ProceedStartingRecognition(const SpeechRecognitionParams& params);
+ struct FSMEventArgs {
+ explicit FSMEventArgs(FSMEvent event_value);
+ ~FSMEventArgs();
- void CancelRecognitionAndInformDelegate(int session_id);
+ FSMEvent event;
+ content::SpeechRecognitionError speech_error;
+ };
- typedef std::map<int, Request> SpeechRecognizerMap;
- SpeechRecognizerMap requests_;
- std::string request_info_;
- bool can_report_metrics_;
- int recording_session_id_;
- scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_;
+ // Callback issued by the SpeechRecognitionManagerDelegate for reporting
+ // asynchronously the result of the CheckRecognitionIsAllowed call.
+ void RecognitionAllowedCallback(int session_id, bool is_allowed);
+
+ // Entry point for pushing any external event into the session handling FSM.
+ void DispatchEvent(int session_id, FSMEventArgs args);
+
+ // Defines the behavior of the session handling FSM, selecting the appropriate
+ // transition according to the session, its current state and the event.
+ FSMState ExecuteTransitionAndGetNextState(Session& session,
+ const FSMEventArgs& event_args);
+
+ // The methods below handle transitions of the session handling FSM.
+ FSMState SessionStart(Session& session, const FSMEventArgs& event_args);
+ FSMState SessionAbort(Session& session, const FSMEventArgs& event_args);
+ FSMState SessionStopAudioCapture(Session& session,
+ const FSMEventArgs& event_args);
+ FSMState SessionAbortIfCapturingAudioOrBackground(
+ Session& session, const FSMEventArgs& event_args);
+ FSMState SessionSetBackground(Session& session,
+ const FSMEventArgs& event_args);
+ FSMState SessionReportError(Session& session, const FSMEventArgs& event_args);
+ FSMState SessionReportNoMatch(Session& session,
+ const FSMEventArgs& event_args);
+ FSMState SessionDelete(Session& session, const FSMEventArgs& event_args);
+ FSMState DoNothing(Session& session, const FSMEventArgs& event_args);
+ FSMState NotFeasible(Session& session, const FSMEventArgs& event_args);
+
+ bool SessionExists(int session_id) const;
+ content::SpeechRecognitionEventListener* GetListener(int session_id) const;
+ int GetNextSessionID();
+
+ typedef std::map<int, Session> SessionsTable;
+ SessionsTable sessions_;
+ int interactive_session_id_;
+ int last_session_id_;
+ bool is_dispatching_event_;
+ content::SpeechRecognitionManagerDelegate* delegate_;
};
} // namespace speech