1 files changed, 114 insertions, 66 deletions
diff --git a/content/browser/speech/speech_recognition_manager_impl.h b/content/browser/speech/speech_recognition_manager_impl.h
index 9a0c967..650c4ad 100644
--- a/content/browser/speech/speech_recognition_manager_impl.h
+++ b/content/browser/speech/speech_recognition_manager_impl.h
@@ -9,69 +9,66 @@
 #include <string>
 
 #include "base/basictypes.h"
+#include "base/callback.h"
 #include "base/compiler_specific.h"
-#include "base/memory/ref_counted.h"
-#include "base/memory/scoped_ptr.h"
 #include "base/memory/singleton.h"
 #include "content/public/browser/speech_recognition_event_listener.h"
 #include "content/public/browser/speech_recognition_manager.h"
-#include "ui/gfx/rect.h"
+#include "content/public/browser/speech_recognition_session_context.h"
+#include "content/public/common/speech_recognition_error.h"
 
 namespace content {
-class ResourceContext;
 class SpeechRecognitionManagerDelegate;
-class SpeechRecognitionPreferences;
-struct SpeechRecognitionResult;
-class SpeechRecognizer;
-}
-
-namespace net {
-class URLRequestContextGetter;
 }
 
 namespace speech {
 
-class InputTagSpeechDispatcherHost;
-
-class CONTENT_EXPORT SpeechRecognitionManagerImpl
-    : NON_EXPORTED_BASE(public content::SpeechRecognitionManager),
-      NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) {
+class SpeechRecognizerImpl;
+
+// This is the manager for speech recognition. It is a singleton instance in
+// the browser process and can serve several requests. Each recognition request
+// corresponds to a session, initiated via |CreateSession|.
+// In every moment the manager has at most one "interactive" session (identified
+// by |interactive_session_id_|), that is the session that is currently holding
+// user attention. For privacy reasons, only the interactive session is allowed
+// to capture audio from the microphone. However, after audio capture is
+// completed, a session can be sent to background and can live in parallel with
+// other sessions, while waiting for its results.
+//
+// More in details, SpeechRecognitionManager has the following responsibilities:
+//  - Handles requests received from various render views and makes sure only
+//    one of them accesses the audio device at any given time.
+//  - Relays recognition results/status/error events of each session to the
+//    corresponding listener (demuxing on the base of their session_id).
+//  - Handles the instantiation of SpeechRecognitionEngine objects when
+//    requested by SpeechRecognitionSessions.
+class CONTENT_EXPORT SpeechRecognitionManagerImpl :
+    public NON_EXPORTED_BASE(content::SpeechRecognitionManager),
+    public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) {
  public:
   static SpeechRecognitionManagerImpl* GetInstance();
 
-  // SpeechRecognitionManager implementation:
-  virtual void StartRecognitionForRequest(int session_id) OVERRIDE;
-  virtual void CancelRecognitionForRequest(int session_id) OVERRIDE;
-  virtual void FocusLostForRequest(int session_id) OVERRIDE;
+  // SpeechRecognitionManager implementation.
+  virtual int CreateSession(
+      const content::SpeechRecognitionSessionConfig& config,
+      SpeechRecognitionEventListener* event_listener) OVERRIDE;
+  virtual void StartSession(int session_id) OVERRIDE;
+  virtual void AbortSession(int session_id) OVERRIDE;
+  virtual void AbortAllSessionsForListener(
+        content::SpeechRecognitionEventListener* listener) OVERRIDE;
+  virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;
+  virtual void SendSessionToBackground(int session_id) OVERRIDE;
+  virtual content::SpeechRecognitionSessionContext GetSessionContext(
+      int session_id) const OVERRIDE;
+  virtual int LookupSessionByContext(
+      base::Callback<bool(
+          const content::SpeechRecognitionSessionContext&)> matcher)
+            const OVERRIDE;
   virtual bool HasAudioInputDevices() OVERRIDE;
   virtual bool IsCapturingAudio() OVERRIDE;
   virtual string16 GetAudioInputDeviceModel() OVERRIDE;
   virtual void ShowAudioInputSettings() OVERRIDE;
 
-  // Handlers for requests from render views.
-
-  // |delegate| is a weak pointer and should remain valid until
-  // its |DidCompleteRecognition| method is called or recognition is cancelled.
-  // |render_process_id| is the ID of the renderer process initiating the
-  // request.
-  // |element_rect| is the display bounds of the html element requesting speech
-  // input (in page coordinates).
-  virtual void StartRecognition(
-      InputTagSpeechDispatcherHost* delegate,
-      int session_id,
-      int render_process_id,
-      int render_view_id,
-      const gfx::Rect& element_rect,
-      const std::string& language,
-      const std::string& grammar,
-      const std::string& origin_url,
-      net::URLRequestContextGetter* context_getter,
-      content::SpeechRecognitionPreferences* speech_recognition_prefs);
-  virtual void CancelRecognition(int session_id);
-  virtual void CancelAllRequestsWithDelegate(
-      InputTagSpeechDispatcherHost* delegate);
-  virtual void StopRecording(int session_id);
-
   // SpeechRecognitionEventListener methods.
   virtual void OnRecognitionStart(int session_id) OVERRIDE;
   virtual void OnAudioStart(int session_id) OVERRIDE;
@@ -84,8 +81,8 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
       int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
   virtual void OnRecognitionError(
       int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
-  virtual void OnAudioLevelsChange(
-      int session_id, float volume, float noise_volume) OVERRIDE;
+  virtual void OnAudioLevelsChange(int session_id, float volume,
+                                   float noise_volume) OVERRIDE;
 
  protected:
   // Private constructor to enforce singleton.
@@ -93,34 +90,85 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
   SpeechRecognitionManagerImpl();
   virtual ~SpeechRecognitionManagerImpl();
 
-  bool HasPendingRequest(int session_id) const;
-
  private:
-  struct Request {
-    Request();
-    ~Request();
+  // Data types for the internal Finite State Machine (FSM).
+  enum FSMState {
+    STATE_IDLE = 0,
+    STATE_INTERACTIVE,
+    STATE_BACKGROUND,
+    STATE_WAITING_FOR_DELETION,
+    STATE_MAX_VALUE = STATE_WAITING_FOR_DELETION
+  };
 
-    InputTagSpeechDispatcherHost* delegate;
-    scoped_refptr<content::SpeechRecognizer> recognizer;
-    bool is_active;  // Set to true when recording or recognition is going on.
+  enum FSMEvent {
+    EVENT_ABORT = 0,
+    EVENT_START,
+    EVENT_STOP_CAPTURE,
+    EVENT_SET_BACKGROUND,
+    EVENT_RECOGNITION_ENDED,
+    EVENT_RECOGNITION_RESULT,
+    EVENT_RECOGNITION_ERROR,
+    EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR
   };
 
-  struct SpeechRecognitionParams;
+  struct Session {
+    Session();
+    ~Session();
 
-  InputTagSpeechDispatcherHost* GetDelegate(int session_id) const;
+    int id;
+    content::SpeechRecognitionEventListener* event_listener;
+    content::SpeechRecognitionSessionContext context;
+    scoped_refptr<SpeechRecognizerImpl> recognizer;
+    FSMState state;
+    bool error_occurred;
+  };
 
-  void CheckRenderViewTypeAndStartRecognition(
-      const SpeechRecognitionParams& params);
-  void ProceedStartingRecognition(const SpeechRecognitionParams& params);
+  struct FSMEventArgs {
+    explicit FSMEventArgs(FSMEvent event_value);
+    ~FSMEventArgs();
 
-  void CancelRecognitionAndInformDelegate(int session_id);
+    FSMEvent event;
+    content::SpeechRecognitionError speech_error;
+  };
 
-  typedef std::map<int, Request> SpeechRecognizerMap;
-  SpeechRecognizerMap requests_;
-  std::string request_info_;
-  bool can_report_metrics_;
-  int recording_session_id_;
-  scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_;
+  // Callback issued by the SpeechRecognitionManagerDelegate for reporting
+  // asynchronously the result of the CheckRecognitionIsAllowed call.
+  void RecognitionAllowedCallback(int session_id, bool is_allowed);
+
+  // Entry point for pushing any external event into the session handling FSM.
+  void DispatchEvent(int session_id, FSMEventArgs args);
+
+  // Defines the behavior of the session handling FSM, selecting the appropriate
+  // transition according to the session, its current state and the event.
+  FSMState ExecuteTransitionAndGetNextState(Session& session,
+                                            const FSMEventArgs& event_args);
+
+  // The methods below handle transitions of the session handling FSM.
+  FSMState SessionStart(Session& session, const FSMEventArgs& event_args);
+  FSMState SessionAbort(Session& session, const FSMEventArgs& event_args);
+  FSMState SessionStopAudioCapture(Session& session,
+                                   const FSMEventArgs& event_args);
+  FSMState SessionAbortIfCapturingAudioOrBackground(
+      Session& session, const FSMEventArgs& event_args);
+  FSMState SessionSetBackground(Session& session,
+                                const FSMEventArgs& event_args);
+  FSMState SessionReportError(Session& session, const FSMEventArgs& event_args);
+  FSMState SessionReportNoMatch(Session& session,
+                                const FSMEventArgs& event_args);
+  FSMState SessionDelete(Session& session, const FSMEventArgs& event_args);
+  FSMState DoNothing(Session& session, const FSMEventArgs& event_args);
+  FSMState NotFeasible(Session& session, const FSMEventArgs& event_args);
+
+  bool SessionExists(int session_id) const;
+  content::SpeechRecognitionEventListener* GetListener(int session_id) const;
+  int GetNextSessionID();
+
+  typedef std::map<int, Session> SessionsTable;
+  SessionsTable sessions_;
+  int interactive_session_id_;
+  int last_session_id_;
+  bool is_dispatching_event_;
+  content::SpeechRecognitionManagerDelegate* delegate_;
 };
 
 }  // namespace speech