content/browser/speech/speech_recognition_manager_impl.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
#pragma once

#include <map>
#include <string>

#include "base/basictypes.h"
#include "base/callback.h"
#include "base/compiler_specific.h"
#include "base/memory/weak_ptr.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/common/speech_recognition_error.h"

namespace content {
class BrowserMainLoop;
class SpeechRecognitionManagerDelegate;
}

namespace speech {

class SpeechRecognizerImpl;

// This is the manager for speech recognition. It is a single instance in
// the browser process and can serve several requests. Each recognition request
// corresponds to a session, initiated via |CreateSession|.
// In every moment the manager has at most one session capturing audio, which
// is identified by |session_id_capturing_audio_|. However, multiple sessions
// can live in parallel in respect of the aforementioned constraint, i.e. while
// waiting for results.
// This class does not handle user interface objects (bubbles, tray icon).
// Those are managed by the delegate, which receives a copy of all sessions
// events.
//
// The SpeechRecognitionManager has the following responsibilities:
//  - Handles requests received from various render views and makes sure only
//    one of them accesses the audio device at any given time.
//  - Handles the instantiation of SpeechRecognitionEngine objects when
//    requested by SpeechRecognitionSessions.
//  - Relays recognition results/status/error events of each session to the
//    corresponding listener (demuxing on the base of their session_id).
//  - Relays also recognition results/status/error events of every session to
//    the catch-all snoop listener (optionally) provided by the delegate.
class CONTENT_EXPORT SpeechRecognitionManagerImpl :
    public NON_EXPORTED_BASE(content::SpeechRecognitionManager),
    public base::SupportsWeakPtr<SpeechRecognitionManagerImpl>,
    public content::SpeechRecognitionEventListener {
 public:
  // Returns the current SpeechRecognitionManagerImpl or NULL if the call is
  // issued when it is not created yet or destroyed (by BrowserMainLoop).
  static SpeechRecognitionManagerImpl* GetInstance();

  // SpeechRecognitionManager implementation.
  virtual int CreateSession(
      const content::SpeechRecognitionSessionConfig& config) OVERRIDE;
  virtual void StartSession(int session_id) OVERRIDE;
  virtual void AbortSession(int session_id) OVERRIDE;
  virtual void AbortAllSessionsForListener(
        content::SpeechRecognitionEventListener* listener) OVERRIDE;
  virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;
  virtual const content::SpeechRecognitionSessionConfig& GetSessionConfig(
      int session_id) const OVERRIDE;
  virtual content::SpeechRecognitionSessionContext GetSessionContext(
      int session_id) const OVERRIDE;
  virtual int GetSession(int render_process_id,
                         int render_view_id,
                         int request_id) const OVERRIDE;
  virtual bool HasAudioInputDevices() OVERRIDE;
  virtual bool IsCapturingAudio() OVERRIDE;
  virtual string16 GetAudioInputDeviceModel() OVERRIDE;
  virtual void ShowAudioInputSettings() OVERRIDE;

  // SpeechRecognitionEventListener methods.
  virtual void OnRecognitionStart(int session_id) OVERRIDE;
  virtual void OnAudioStart(int session_id) OVERRIDE;
  virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE;
  virtual void OnSoundStart(int session_id) OVERRIDE;
  virtual void OnSoundEnd(int session_id) OVERRIDE;
  virtual void OnAudioEnd(int session_id) OVERRIDE;
  virtual void OnRecognitionEnd(int session_id) OVERRIDE;
  virtual void OnRecognitionResult(
      int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
  virtual void OnRecognitionError(
      int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
  virtual void OnAudioLevelsChange(int session_id, float volume,
                                   float noise_volume) OVERRIDE;

 protected:
  // BrowserMainLoop is the only one allowed to istantiate and free us.
  friend class content::BrowserMainLoop;
  friend class scoped_ptr<SpeechRecognitionManagerImpl>;  // Needed for dtor.
  SpeechRecognitionManagerImpl();
  virtual ~SpeechRecognitionManagerImpl();

 private:
  // Data types for the internal Finite State Machine (FSM).
  enum FSMState {
    SESSION_STATE_IDLE = 0,
    SESSION_STATE_CAPTURING_AUDIO,
    SESSION_STATE_WAITING_FOR_RESULT,
    SESSION_STATE_MAX_VALUE = SESSION_STATE_WAITING_FOR_RESULT
  };

  enum FSMEvent {
    EVENT_ABORT = 0,
    EVENT_START,
    EVENT_STOP_CAPTURE,
    EVENT_AUDIO_ENDED,
    EVENT_RECOGNITION_ENDED,
    EVENT_MAX_VALUE = EVENT_RECOGNITION_ENDED
  };

  struct Session {
    Session();
    ~Session();

    int id;
    bool listener_is_active;
    content::SpeechRecognitionSessionConfig config;
    content::SpeechRecognitionSessionContext context;
    scoped_refptr<SpeechRecognizerImpl> recognizer;
  };

  // Callback issued by the SpeechRecognitionManagerDelegate for reporting
  // asynchronously the result of the CheckRecognitionIsAllowed call.
  void RecognitionAllowedCallback(int session_id, bool is_allowed);

  // Entry point for pushing any external event into the session handling FSM.
  void DispatchEvent(int session_id, FSMEvent event);

  // Defines the behavior of the session handling FSM, selecting the appropriate
  // transition according to the session, its current state and the event.
  void ExecuteTransitionAndGetNextState(
      const Session& session, FSMState session_state, FSMEvent event);

  // Retrieves the state of the session, enquiring directly the recognizer.
  FSMState GetSessionState(int session_id) const;

  // The methods below handle transitions of the session handling FSM.
  void SessionStart(const Session& session);
  void SessionAbort(const Session& session);
  void SessionStopAudioCapture(const Session& session);
  void ResetCapturingSessionId(const Session& session);
  void SessionDelete(const Session& session);
  void NotFeasible(const Session& session, FSMEvent event);

  bool SessionExists(int session_id) const;
  const Session& GetSession(int session_id) const;
  content::SpeechRecognitionEventListener* GetListener(int session_id) const;
  content::SpeechRecognitionEventListener* GetDelegateListener() const;
  int GetNextSessionID();

  typedef std::map<int, Session> SessionsTable;
  SessionsTable sessions_;
  int session_id_capturing_audio_;
  int last_session_id_;
  bool is_dispatching_event_;
  scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_;
};

}  // namespace speech

#endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_