summaryrefslogtreecommitdiffstats
path: root/content/browser/speech/speech_recognizer_impl.h
blob: 25e0c0ca48bca16e9a6eebe76822d98daa3aa447 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

#include <list>
#include <utility>

#include "base/compiler_specific.h"
#include "base/memory/scoped_ptr.h"
#include "content/browser/speech/audio_encoder.h"
#include "content/browser/speech/endpointer/endpointer.h"
#include "content/browser/speech/speech_recognition_request.h"
#include "content/public/browser/speech_recognizer.h"
#include "content/public/common/speech_recognition_result.h"
#include "media/audio/audio_input_controller.h"

class AudioManager;

namespace speech {

// Records audio, sends recorded audio to server and translates server response
// to recognition result.
class CONTENT_EXPORT SpeechRecognizerImpl
    : NON_EXPORTED_BASE(public content::SpeechRecognizer),
      public media::AudioInputController::EventHandler,
      public SpeechRecognitionRequestDelegate {
 public:
  SpeechRecognizerImpl(content::SpeechRecognizerDelegate* delegate,
                       int caller_id,
                       const std::string& language,
                       const std::string& grammar,
                       net::URLRequestContextGetter* context_getter,
                       bool filter_profanities,
                       const std::string& hardware_info,
                       const std::string& origin_url);

  virtual ~SpeechRecognizerImpl();

  // SpeechRecognizer implementation:
  virtual bool StartRecording() OVERRIDE;
  virtual void CancelRecognition() OVERRIDE;

  // Stops recording audio and starts recognition.
  void StopRecording();

  // AudioInputController::EventHandler methods.
  virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
  virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
  virtual void OnError(media::AudioInputController* controller,
                       int error_code) OVERRIDE;
  virtual void OnData(media::AudioInputController* controller,
                      const uint8* data,
                      uint32 size) OVERRIDE;

  // SpeechRecognitionRequest::Delegate methods.
  virtual void SetRecognitionResult(
      const content::SpeechRecognitionResult& result) OVERRIDE;

  static const int kAudioSampleRate;
  static const int kAudioPacketIntervalMs;  // Duration of each audio packet.
  static const ChannelLayout kChannelLayout;
  static const int kNumBitsPerAudioSample;
  static const int kNoSpeechTimeoutSec;
  static const int kEndpointerEstimationTimeMs;

 private:
  friend class SpeechRecognizerTest;

  void InformErrorAndCancelRecognition(
      content::SpeechRecognitionErrorCode error);
  void SendRecordedAudioToServer();

  void HandleOnError(int error_code);  // Handles OnError in the IO thread.

  // Handles OnData in the IO thread. Takes ownership of |data|.
  void HandleOnData(std::string* data);

  // Helper method which closes the audio controller and blocks until done.
  void CloseAudioControllerSynchronously();

  void SetAudioManagerForTesting(AudioManager* audio_manager);

  content::SpeechRecognizerDelegate* delegate_;
  int caller_id_;
  std::string language_;
  std::string grammar_;
  bool filter_profanities_;
  std::string hardware_info_;
  std::string origin_url_;

  scoped_ptr<SpeechRecognitionRequest> request_;
  scoped_refptr<media::AudioInputController> audio_controller_;
  scoped_refptr<net::URLRequestContextGetter> context_getter_;
  AudioEncoder::Codec codec_;
  scoped_ptr<AudioEncoder> encoder_;
  Endpointer endpointer_;
  int num_samples_recorded_;
  float audio_level_;
  AudioManager* audio_manager_;

  DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
};

}  // namespace speech

#endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_