content/browser/speech/speech_input_manager.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_SPEECH_SPEECH_INPUT_MANAGER_H_
#define CONTENT_BROWSER_SPEECH_SPEECH_INPUT_MANAGER_H_

#include <map>
#include <string>

#include "base/basictypes.h"
#include "base/compiler_specific.h"
#include "base/memory/ref_counted.h"
#include "content/common/content_export.h"
#include "content/public/browser/speech_recognizer_delegate.h"
#include "ui/gfx/rect.h"

class AudioManager;

namespace content {
class ResourceContext;
class SpeechInputPreferences;
class SpeechInputManagerDelegate;
struct SpeechInputResult;
}

namespace net {
class URLRequestContextGetter;
}

namespace speech_input {

class SpeechRecognizer;

// This is the gatekeeper for speech recognition in the browser process. It
// handles requests received from various render views and makes sure only one
// of them can use speech recognition at a time. It also sends recognition
// results and status events to the render views when required.
class CONTENT_EXPORT SpeechInputManager
    : public NON_EXPORTED_BASE(content::SpeechRecognizerDelegate) {
 public:
  // Describes the microphone errors that are reported via ShowMicError.
  enum MicError {
    kNoDeviceAvailable = 0,
    kDeviceInUse
  };

  SpeechInputManager();

  // Invokes the platform provided microphone settings UI in a non-blocking way,
  // via the BrowserThread::FILE thread.
  static void ShowAudioInputSettings(AudioManager* audio_manager);

  // Same as ShowAudioInputSettings above but can be called from the UI thread
  // where the caller has a pointer to a resource context, but due to not
  // running on the IO thread, cannot access its properties.
  static void ShowAudioInputSettingsFromUI(
      content::ResourceContext* resource_context);

  virtual ~SpeechInputManager();

  // Handlers for requests from render views.

  // |delegate| is a weak pointer and should remain valid until
  // its |DidCompleteRecognition| method is called or recognition is cancelled.
  // |render_process_id| is the ID of the renderer process initiating the
  // request.
  // |element_rect| is the display bounds of the html element requesting speech
  // input (in page coordinates).
  virtual void StartRecognition(
      content::SpeechInputManagerDelegate* delegate,
      int caller_id,
      int render_process_id,
      int render_view_id,
      const gfx::Rect& element_rect,
      const std::string& language,
      const std::string& grammar,
      const std::string& origin_url,
      net::URLRequestContextGetter* context_getter,
      content::SpeechInputPreferences* speech_input_prefs,
      AudioManager* audio_manager);
  virtual void CancelRecognition(int caller_id);
  virtual void CancelAllRequestsWithDelegate(
      content::SpeechInputManagerDelegate* delegate);
  virtual void StopRecording(int caller_id);

  // Overridden from content::SpeechRecognizerDelegate:
  virtual void DidStartReceivingAudio(int caller_id) OVERRIDE;
  virtual void SetRecognitionResult(
      int caller_id,
      const content::SpeechInputResult& result) OVERRIDE;
  virtual void DidCompleteRecording(int caller_id) OVERRIDE;
  virtual void DidCompleteRecognition(int caller_id) OVERRIDE;
  virtual void DidStartReceivingSpeech(int caller_id) OVERRIDE;
  virtual void DidStopReceivingSpeech(int caller_id) OVERRIDE;

  virtual void OnRecognizerError(int caller_id,
                                 content::SpeechInputError error) OVERRIDE;
  virtual void DidCompleteEnvironmentEstimation(int caller_id) OVERRIDE;
  virtual void SetInputVolume(int caller_id, float volume,
                              float noise_volume) OVERRIDE;

 protected:
  // The pure virtual methods are used for displaying the current state of
  // recognition and for fetching optional request information.

  // Get the optional request information if available.
  virtual void GetRequestInfo(AudioManager* audio_manager,
                              bool* can_report_metrics,
                              std::string* request_info) = 0;

  // Called when recognition has been requested from point |element_rect_| on
  // the view port for the given caller.
  virtual void ShowRecognitionRequested(int caller_id,
                                        int render_process_id,
                                        int render_view_id,
                                        const gfx::Rect& element_rect) = 0;

  // Called when recognition is starting up.
  virtual void ShowWarmUp(int caller_id) = 0;

  // Called when recognition has started.
  virtual void ShowRecognizing(int caller_id) = 0;

  // Called when recording has started.
  virtual void ShowRecording(int caller_id) = 0;

  // Continuously updated with the current input volume.
  virtual void ShowInputVolume(int caller_id,
                               float volume,
                               float noise_volume) = 0;

  // Called when no microphone has been found.
  virtual void ShowMicError(int caller_id, MicError error) = 0;

  // Called when there has been a error with the recognition.
  virtual void ShowRecognizerError(int caller_id,
                                   content::SpeechInputError error) = 0;

  // Called when recognition has ended or has been canceled.
  virtual void DoClose(int caller_id) = 0;

  // Cancels recognition for the specified caller if it is active.
  void OnFocusChanged(int caller_id);

  bool HasPendingRequest(int caller_id) const;

  // Starts/restarts recognition for an existing request.
  void StartRecognitionForRequest(int caller_id);

  void CancelRecognitionAndInformDelegate(int caller_id);

 private:
  struct SpeechInputRequest {
    SpeechInputRequest();
    ~SpeechInputRequest();

    content::SpeechInputManagerDelegate* delegate;
    scoped_refptr<SpeechRecognizer> recognizer;
    bool is_active;  // Set to true when recording or recognition is going on.
  };

  struct SpeechInputParams;

  content::SpeechInputManagerDelegate* GetDelegate(int caller_id) const;

  void CheckRenderViewTypeAndStartRecognition(const SpeechInputParams& params);
  void ProceedStartingRecognition(const SpeechInputParams& params);

  typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap;
  SpeechRecognizerMap requests_;
  std::string request_info_;
  bool can_report_metrics_;
  int recording_caller_id_;
};

}  // namespace speech_input

#endif  // CONTENT_BROWSER_SPEECH_SPEECH_INPUT_MANAGER_H_