summaryrefslogtreecommitdiffstats
path: root/content/browser/speech/speech_recognizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'content/browser/speech/speech_recognizer.h')
-rw-r--r--content/browser/speech/speech_recognizer.h151
1 files changed, 151 insertions, 0 deletions
diff --git a/content/browser/speech/speech_recognizer.h b/content/browser/speech/speech_recognizer.h
new file mode 100644
index 0000000..a54a59d
--- /dev/null
+++ b/content/browser/speech/speech_recognizer.h
@@ -0,0 +1,151 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
+#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
+
+#include <list>
+#include <string>
+#include <utility>
+
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "content/browser/speech/audio_encoder.h"
+#include "content/browser/speech/endpointer/endpointer.h"
+#include "content/browser/speech/speech_recognition_request.h"
+#include "media/audio/audio_input_controller.h"
+
+namespace speech_input {
+
+// Records audio, sends recorded audio to server and translates server response
+// to recognition result.
+class SpeechRecognizer
+ : public base::RefCountedThreadSafe<SpeechRecognizer>,
+ public media::AudioInputController::EventHandler,
+ public SpeechRecognitionRequestDelegate {
+ public:
+ enum ErrorCode {
+ RECOGNIZER_NO_ERROR,
+ RECOGNIZER_ERROR_CAPTURE,
+ RECOGNIZER_ERROR_NO_SPEECH,
+ RECOGNIZER_ERROR_NO_RESULTS,
+ };
+
+ // Implemented by the caller to receive recognition events.
+ class Delegate {
+ public:
+ virtual void SetRecognitionResult(
+ int caller_id,
+ bool error,
+ const SpeechInputResultArray& result) = 0;
+
+ // Invoked when audio recording stops, either due to the end pointer
+ // detecting silence in user input or if |StopRecording| was called. The
+ // delegate has to wait until |DidCompleteRecognition| is invoked before
+ // destroying the |SpeechRecognizer| object.
+ virtual void DidCompleteRecording(int caller_id) = 0;
+
+ // This is guaranteed to be the last method invoked in the recognition
+ // sequence and the |SpeechRecognizer| object can be freed up if necessary.
+ virtual void DidCompleteRecognition(int caller_id) = 0;
+
+ // Invoked if there was an error while recording or recognizing audio. The
+ // session has already been cancelled when this call is made and the DidXxxx
+ // callbacks will not be issued. It is safe to destroy/release the
+ // |SpeechRecognizer| object while processing this call.
+ virtual void OnRecognizerError(int caller_id,
+ SpeechRecognizer::ErrorCode error) = 0;
+
+ // At the start of recognition, a short amount of audio is recorded to
+ // estimate the environment/background noise and this callback is issued
+ // after that is complete. Typically the delegate brings up any speech
+ // recognition UI once this callback is received.
+ virtual void DidCompleteEnvironmentEstimation(int caller_id) = 0;
+
+ // Informs of a change in the captured audio level, useful if displaying
+ // a microphone volume indicator while recording.
+ // The value of |volume| is in the [0.0, 1.0] range.
+ virtual void SetInputVolume(int caller_id, float volume) = 0;
+
+ protected:
+ virtual ~Delegate() {}
+ };
+
+ SpeechRecognizer(Delegate* delegate,
+ int caller_id,
+ const std::string& language,
+ const std::string& grammar,
+ const std::string& hardware_info,
+ const std::string& origin_url);
+ ~SpeechRecognizer();
+
+ // Starts audio recording and does recognition after recording ends. The same
+ // SpeechRecognizer instance can be used multiple times for speech recognition
+ // though each recognition request can be made only after the previous one
+ // completes (i.e. after receiving Delegate::DidCompleteRecognition).
+ bool StartRecording();
+
+ // Stops recording audio and starts recognition.
+ void StopRecording();
+
+ // Stops recording audio and cancels recognition. Any audio recorded so far
+ // gets discarded.
+ void CancelRecognition();
+
+ // AudioInputController::EventHandler methods.
+ virtual void OnCreated(media::AudioInputController* controller) { }
+ virtual void OnRecording(media::AudioInputController* controller) { }
+ virtual void OnError(media::AudioInputController* controller, int error_code);
+ virtual void OnData(media::AudioInputController* controller,
+ const uint8* data,
+ uint32 size);
+
+ // SpeechRecognitionRequest::Delegate methods.
+ virtual void SetRecognitionResult(bool error,
+ const SpeechInputResultArray& result);
+
+ static const int kAudioSampleRate;
+ static const int kAudioPacketIntervalMs; // Duration of each audio packet.
+ static const int kNumAudioChannels;
+ static const int kNumBitsPerAudioSample;
+ static const int kNoSpeechTimeoutSec;
+ static const int kEndpointerEstimationTimeMs;
+
+ private:
+ void ReleaseAudioBuffers();
+ void InformErrorAndCancelRecognition(ErrorCode error);
+ void SendRecordedAudioToServer();
+
+ void HandleOnError(int error_code); // Handles OnError in the IO thread.
+
+ // Handles OnData in the IO thread. Takes ownership of |data|.
+ void HandleOnData(std::string* data);
+
+ Delegate* delegate_;
+ int caller_id_;
+ std::string language_;
+ std::string grammar_;
+ std::string hardware_info_;
+ std::string origin_url_;
+
+ scoped_ptr<SpeechRecognitionRequest> request_;
+ scoped_refptr<media::AudioInputController> audio_controller_;
+ AudioEncoder::Codec codec_;
+ scoped_ptr<AudioEncoder> encoder_;
+ Endpointer endpointer_;
+ int num_samples_recorded_;
+ float audio_level_;
+
+ DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer);
+};
+
+// This typedef is to workaround the issue with certain versions of
+// Visual Studio where it gets confused between multiple Delegate
+// classes and gives a C2500 error. (I saw this error on the try bots -
+// the workaround was not needed for my machine).
+typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate;
+
+} // namespace speech_input
+
+#endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_