summaryrefslogtreecommitdiffstats
path: root/content/browser/speech/speech_recognition_manager_impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'content/browser/speech/speech_recognition_manager_impl.h')
-rw-r--r--content/browser/speech/speech_recognition_manager_impl.h180
1 files changed, 114 insertions, 66 deletions
diff --git a/content/browser/speech/speech_recognition_manager_impl.h b/content/browser/speech/speech_recognition_manager_impl.h
index 9a0c967..650c4ad 100644
--- a/content/browser/speech/speech_recognition_manager_impl.h
+++ b/content/browser/speech/speech_recognition_manager_impl.h
@@ -9,69 +9,66 @@
#include <string>
#include "base/basictypes.h"
+#include "base/callback.h"
#include "base/compiler_specific.h"
-#include "base/memory/ref_counted.h"
-#include "base/memory/scoped_ptr.h"
#include "base/memory/singleton.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager.h"
-#include "ui/gfx/rect.h"
+#include "content/public/browser/speech_recognition_session_context.h"
+#include "content/public/common/speech_recognition_error.h"
namespace content {
-class ResourceContext;
class SpeechRecognitionManagerDelegate;
-class SpeechRecognitionPreferences;
-struct SpeechRecognitionResult;
-class SpeechRecognizer;
-}
-
-namespace net {
-class URLRequestContextGetter;
}
namespace speech {
-class InputTagSpeechDispatcherHost;
-
-class CONTENT_EXPORT SpeechRecognitionManagerImpl
- : NON_EXPORTED_BASE(public content::SpeechRecognitionManager),
- NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) {
+class SpeechRecognizerImpl;
+
+// This is the manager for speech recognition. It is a singleton instance in
+// the browser process and can serve several requests. Each recognition request
+// corresponds to a session, initiated via |CreateSession|.
+// In every moment the manager has at most one "interactive" session (identified
+// by |interactive_session_id_|), that is the session that is currently holding
+// user attention. For privacy reasons, only the interactive session is allowed
+// to capture audio from the microphone. However, after audio capture is
+// completed, a session can be sent to background and can live in parallel with
+// other sessions, while waiting for its results.
+//
+// More in details, SpeechRecognitionManager has the following responsibilities:
+// - Handles requests received from various render views and makes sure only
+// one of them accesses the audio device at any given time.
+// - Relays recognition results/status/error events of each session to the
+// corresponding listener (demuxing on the base of their session_id).
+// - Handles the instantiation of SpeechRecognitionEngine objects when
+// requested by SpeechRecognitionSessions.
+class CONTENT_EXPORT SpeechRecognitionManagerImpl :
+ public NON_EXPORTED_BASE(content::SpeechRecognitionManager),
+ public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) {
public:
static SpeechRecognitionManagerImpl* GetInstance();
- // SpeechRecognitionManager implementation:
- virtual void StartRecognitionForRequest(int session_id) OVERRIDE;
- virtual void CancelRecognitionForRequest(int session_id) OVERRIDE;
- virtual void FocusLostForRequest(int session_id) OVERRIDE;
+ // SpeechRecognitionManager implementation.
+ virtual int CreateSession(
+ const content::SpeechRecognitionSessionConfig& config,
+ SpeechRecognitionEventListener* event_listener) OVERRIDE;
+ virtual void StartSession(int session_id) OVERRIDE;
+ virtual void AbortSession(int session_id) OVERRIDE;
+ virtual void AbortAllSessionsForListener(
+ content::SpeechRecognitionEventListener* listener) OVERRIDE;
+ virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;
+ virtual void SendSessionToBackground(int session_id) OVERRIDE;
+ virtual content::SpeechRecognitionSessionContext GetSessionContext(
+ int session_id) const OVERRIDE;
+ virtual int LookupSessionByContext(
+ base::Callback<bool(
+ const content::SpeechRecognitionSessionContext&)> matcher)
+ const OVERRIDE;
virtual bool HasAudioInputDevices() OVERRIDE;
virtual bool IsCapturingAudio() OVERRIDE;
virtual string16 GetAudioInputDeviceModel() OVERRIDE;
virtual void ShowAudioInputSettings() OVERRIDE;
- // Handlers for requests from render views.
-
- // |delegate| is a weak pointer and should remain valid until
- // its |DidCompleteRecognition| method is called or recognition is cancelled.
- // |render_process_id| is the ID of the renderer process initiating the
- // request.
- // |element_rect| is the display bounds of the html element requesting speech
- // input (in page coordinates).
- virtual void StartRecognition(
- InputTagSpeechDispatcherHost* delegate,
- int session_id,
- int render_process_id,
- int render_view_id,
- const gfx::Rect& element_rect,
- const std::string& language,
- const std::string& grammar,
- const std::string& origin_url,
- net::URLRequestContextGetter* context_getter,
- content::SpeechRecognitionPreferences* speech_recognition_prefs);
- virtual void CancelRecognition(int session_id);
- virtual void CancelAllRequestsWithDelegate(
- InputTagSpeechDispatcherHost* delegate);
- virtual void StopRecording(int session_id);
-
// SpeechRecognitionEventListener methods.
virtual void OnRecognitionStart(int session_id) OVERRIDE;
virtual void OnAudioStart(int session_id) OVERRIDE;
@@ -84,8 +81,8 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
virtual void OnRecognitionError(
int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
- virtual void OnAudioLevelsChange(
- int session_id, float volume, float noise_volume) OVERRIDE;
+ virtual void OnAudioLevelsChange(int session_id, float volume,
+ float noise_volume) OVERRIDE;
protected:
// Private constructor to enforce singleton.
@@ -93,34 +90,85 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
SpeechRecognitionManagerImpl();
virtual ~SpeechRecognitionManagerImpl();
- bool HasPendingRequest(int session_id) const;
-
private:
- struct Request {
- Request();
- ~Request();
+ // Data types for the internal Finite State Machine (FSM).
+ enum FSMState {
+ STATE_IDLE = 0,
+ STATE_INTERACTIVE,
+ STATE_BACKGROUND,
+ STATE_WAITING_FOR_DELETION,
+ STATE_MAX_VALUE = STATE_WAITING_FOR_DELETION
+ };
- InputTagSpeechDispatcherHost* delegate;
- scoped_refptr<content::SpeechRecognizer> recognizer;
- bool is_active; // Set to true when recording or recognition is going on.
+ enum FSMEvent {
+ EVENT_ABORT = 0,
+ EVENT_START,
+ EVENT_STOP_CAPTURE,
+ EVENT_SET_BACKGROUND,
+ EVENT_RECOGNITION_ENDED,
+ EVENT_RECOGNITION_RESULT,
+ EVENT_RECOGNITION_ERROR,
+ EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR
};
- struct SpeechRecognitionParams;
+ struct Session {
+ Session();
+ ~Session();
- InputTagSpeechDispatcherHost* GetDelegate(int session_id) const;
+ int id;
+ content::SpeechRecognitionEventListener* event_listener;
+ content::SpeechRecognitionSessionContext context;
+ scoped_refptr<SpeechRecognizerImpl> recognizer;
+ FSMState state;
+ bool error_occurred;
+ };
- void CheckRenderViewTypeAndStartRecognition(
- const SpeechRecognitionParams& params);
- void ProceedStartingRecognition(const SpeechRecognitionParams& params);
+ struct FSMEventArgs {
+ explicit FSMEventArgs(FSMEvent event_value);
+ ~FSMEventArgs();
- void CancelRecognitionAndInformDelegate(int session_id);
+ FSMEvent event;
+ content::SpeechRecognitionError speech_error;
+ };
- typedef std::map<int, Request> SpeechRecognizerMap;
- SpeechRecognizerMap requests_;
- std::string request_info_;
- bool can_report_metrics_;
- int recording_session_id_;
- scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_;
+ // Callback issued by the SpeechRecognitionManagerDelegate for reporting
+ // asynchronously the result of the CheckRecognitionIsAllowed call.
+ void RecognitionAllowedCallback(int session_id, bool is_allowed);
+
+ // Entry point for pushing any external event into the session handling FSM.
+ void DispatchEvent(int session_id, FSMEventArgs args);
+
+ // Defines the behavior of the session handling FSM, selecting the appropriate
+ // transition according to the session, its current state and the event.
+ FSMState ExecuteTransitionAndGetNextState(Session& session,
+ const FSMEventArgs& event_args);
+
+ // The methods below handle transitions of the session handling FSM.
+ FSMState SessionStart(Session& session, const FSMEventArgs& event_args);
+ FSMState SessionAbort(Session& session, const FSMEventArgs& event_args);
+ FSMState SessionStopAudioCapture(Session& session,
+ const FSMEventArgs& event_args);
+ FSMState SessionAbortIfCapturingAudioOrBackground(
+ Session& session, const FSMEventArgs& event_args);
+ FSMState SessionSetBackground(Session& session,
+ const FSMEventArgs& event_args);
+ FSMState SessionReportError(Session& session, const FSMEventArgs& event_args);
+ FSMState SessionReportNoMatch(Session& session,
+ const FSMEventArgs& event_args);
+ FSMState SessionDelete(Session& session, const FSMEventArgs& event_args);
+ FSMState DoNothing(Session& session, const FSMEventArgs& event_args);
+ FSMState NotFeasible(Session& session, const FSMEventArgs& event_args);
+
+ bool SessionExists(int session_id) const;
+ content::SpeechRecognitionEventListener* GetListener(int session_id) const;
+ int GetNextSessionID();
+
+ typedef std::map<int, Session> SessionsTable;
+ SessionsTable sessions_;
+ int interactive_session_id_;
+ int last_session_id_;
+ bool is_dispatching_event_;
+ content::SpeechRecognitionManagerDelegate* delegate_;
};
} // namespace speech