summaryrefslogtreecommitdiffstats
path: root/chrome/browser/speech/speech_recognizer.h
blob: 5d51b029a337576053fa22bb4154a1edbb64e674 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
#define CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_

#include "base/ref_counted.h"
#include "base/scoped_ptr.h"
#include "media/audio/audio_input_controller.h"
#include "chrome/browser/speech/speech_recognition_request.h"
#include <list>
#include <string>
#include <utility>

namespace speech_input {

// Holds the details of a particular webkit element making a speech request.
// SpeechInputCallerId::first holds the ID of the render view and
// SpeechInputCallerId::second holds the request ID given by the element.
typedef std::pair<int, int> SpeechInputCallerId;

class SpeexEncoder;

// Records audio, sends recorded audio to server and translates server response
// to recognition result.
class SpeechRecognizer
    : public base::RefCountedThreadSafe<SpeechRecognizer>,
      public media::AudioInputController::EventHandler,
      public SpeechRecognitionRequestDelegate {
 public:
  // Implemented by the caller to receive recognition events.
  class Delegate {
   public:
    virtual void SetRecognitionResult(const SpeechInputCallerId& caller_id,
                                      bool error,
                                      const string16& value) = 0;

    // Invoked when audio recording stops, either due to the end pointer
    // detecting silence in user input or if |StopRecording| was called. The
    // delegate has to wait until |DidCompleteRecognition| is invoked before
    // destroying the |SpeechRecognizer| object.
    virtual void DidCompleteRecording(const SpeechInputCallerId& caller_id) = 0;

    // This is guaranteed to be the last method invoked in the recognition
    // sequence and the |SpeechRecognizer| object can be freed up if necessary.
    virtual void DidCompleteRecognition(
        const SpeechInputCallerId& caller_id) = 0;

   protected:
    virtual ~Delegate() {}
  };

  SpeechRecognizer(Delegate* delegate, const SpeechInputCallerId& caller_id);
  ~SpeechRecognizer();

  // Starts audio recording and does recognition after recording ends. The same
  // SpeechRecognizer instance can be used multiple times for speech recognition
  // though each recognition request can be made only after the previous one
  // completes (i.e. after receiving Delegate::DidCompleteRecognition).
  bool StartRecording();

  // Stops recording audio and starts recognition.
  void StopRecording();

  // Stops recording audio and cancels recognition. Any audio recorded so far
  // gets discarded.
  void CancelRecognition();

  // AudioInputController::EventHandler methods.
  void OnCreated(media::AudioInputController* controller) { }
  void OnRecording(media::AudioInputController* controller) { }
  void OnError(media::AudioInputController* controller, int error_code);
  void OnData(media::AudioInputController* controller, const uint8* data,
              uint32 size);

  // SpeechRecognitionRequest::Delegate methods.
  void SetRecognitionResult(bool error, const string16& value);

 private:
  void ReleaseAudioBuffers();

  void HandleOnError(int error_code);  // Handles OnError in the IO thread.

  // Handles OnData in the IO thread. Takes ownership of |data|.
  void HandleOnData(std::string* data);

  Delegate* delegate_;
  SpeechInputCallerId caller_id_;

  // Buffer holding the recorded audio. Owns the strings inside the list.
  typedef std::list<std::string*> AudioBufferQueue;
  AudioBufferQueue audio_buffers_;

  scoped_ptr<SpeechRecognitionRequest> request_;
  scoped_refptr<media::AudioInputController> audio_controller_;
  scoped_ptr<SpeexEncoder> encoder_;

  DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer);
};

// This typedef is to workaround the issue with certain versions of
// Visual Studio where it gets confused between multiple Delegate
// classes and gives a C2500 error. (I saw this error on the try bots -
// the workaround was not needed for my machine).
typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate;

}  // namespace speech_input

#endif  // CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_