summaryrefslogtreecommitdiffstats
path: root/content/browser/speech/speech_input_manager.h
blob: afac4427d69189065779be67286041e918c337f6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_SPEECH_SPEECH_INPUT_MANAGER_H_
#define CONTENT_BROWSER_SPEECH_SPEECH_INPUT_MANAGER_H_

#include <map>
#include <string>

#include "base/basictypes.h"
#include "content/browser/speech/speech_recognizer.h"
#include "content/common/content_export.h"
#include "content/common/speech_input_result.h"
#include "ui/gfx/rect.h"

namespace speech_input {

// This is the gatekeeper for speech recognition in the browser process. It
// handles requests received from various render views and makes sure only one
// of them can use speech recognition at a time. It also sends recognition
// results and status events to the render views when required.
class SpeechInputManager : public SpeechRecognizerDelegate {
 public:
  // Implemented by the dispatcher host to relay events to the render views.
  class Delegate {
   public:
    virtual void SetRecognitionResult(
        int caller_id,
        const SpeechInputResultArray& result) = 0;
    virtual void DidCompleteRecording(int caller_id) = 0;
    virtual void DidCompleteRecognition(int caller_id) = 0;

   protected:
    virtual ~Delegate() {}
  };

  CONTENT_EXPORT SpeechInputManager();

  // Invokes the platform provided microphone settings UI in a non-blocking way,
  // via the BrowserThread::FILE thread.
  static void ShowAudioInputSettings();

  CONTENT_EXPORT virtual ~SpeechInputManager();

  // Handlers for requests from render views.

  // |delegate| is a weak pointer and should remain valid until
  // its |DidCompleteRecognition| method is called or recognition is cancelled.
  // |render_process_id| is the ID of the renderer process initiating the
  // request.
  // |element_rect| is the display bounds of the html element requesting speech
  // input (in page coordinates).
  virtual void StartRecognition(Delegate* delegate,
                                int caller_id,
                                int render_process_id,
                                int render_view_id,
                                const gfx::Rect& element_rect,
                                const std::string& language,
                                const std::string& grammar,
                                const std::string& origin_url);
  virtual void CancelRecognition(int caller_id);
  virtual void CancelAllRequestsWithDelegate(Delegate* delegate);
  virtual void StopRecording(int caller_id);

  // SpeechRecognizer::Delegate methods.
  virtual void DidStartReceivingAudio(int caller_id);
  virtual void SetRecognitionResult(int caller_id,
                                    bool error,
                                    const SpeechInputResultArray& result);
  virtual void DidCompleteRecording(int caller_id);
  virtual void DidCompleteRecognition(int caller_id);
  virtual void OnRecognizerError(int caller_id,
                                 SpeechRecognizer::ErrorCode error);
  virtual void DidCompleteEnvironmentEstimation(int caller_id);
  virtual void SetInputVolume(int caller_id, float volume, float noise_volume);

  void set_censor_results(bool censor) { censor_results_ = censor; }

  bool censor_results() { return censor_results_; }

 protected:
  // The pure virtual methods are used for displaying the current state of
  // recognition and for fetching optional request information.

  // Get the optional request information if available.
  virtual void GetRequestInfo(bool* can_report_metrics,
                              std::string* request_info) = 0;

  // Called when recognition has been requested from point |element_rect_| on
  // the view port for the given caller.
  virtual void ShowRecognitionRequested(int caller_id,
                                        int render_process_id,
                                        int render_view_id,
                                        const gfx::Rect& element_rect) = 0;

  // Called when recognition is starting up.
  virtual void ShowWarmUp(int caller_id) = 0;

  // Called when recognition has started.
  virtual void ShowRecognizing(int caller_id) = 0;

  // Called when recording has started.
  virtual void ShowRecording(int caller_id) = 0;

  // Continuously updated with the current input volume.
  virtual void ShowInputVolume(int caller_id,
                               float volume,
                               float noise_volume) = 0;

  // Called when no microphone has been found.
  virtual void ShowNoMicError(int caller_id) = 0;

  // Called when there has been a error with the recognition.
  virtual void ShowRecognizerError(int caller_id,
                                   SpeechRecognizer::ErrorCode error) = 0;

  // Called when recognition has ended or has been canceled.
  virtual void DoClose(int caller_id) = 0;

  // Cancels recognition for the specified caller if it is active.
  void OnFocusChanged(int caller_id);

  bool HasPendingRequest(int caller_id) const;

  // Starts/restarts recognition for an existing request.
  void StartRecognitionForRequest(int caller_id);

  void CancelRecognitionAndInformDelegate(int caller_id);

 private:
  struct SpeechInputRequest {
    SpeechInputRequest();
    ~SpeechInputRequest();

    Delegate* delegate;
    scoped_refptr<SpeechRecognizer> recognizer;
    bool is_active;  // Set to true when recording or recognition is going on.
  };

  Delegate* GetDelegate(int caller_id) const;

  typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap;
  SpeechRecognizerMap requests_;
  std::string request_info_;
  bool can_report_metrics_;
  bool censor_results_;
  int recording_caller_id_;
};

// This typedef is to workaround the issue with certain versions of
// Visual Studio where it gets confused between multiple Delegate
// classes and gives a C2500 error. (I saw this error on the try bots -
// the workaround was not needed for my machine).
typedef SpeechInputManager::Delegate SpeechInputManagerDelegate;

}  // namespace speech_input

#endif  // CONTENT_BROWSER_SPEECH_SPEECH_INPUT_MANAGER_H_