summaryrefslogtreecommitdiffstats
path: root/content/browser/speech/speech_recognizer.h
diff options
context:
space:
mode:
authorprimiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-06-27 10:56:45 +0000
committerprimiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-06-27 10:56:45 +0000
commitc91bb26fa85807df212657eb025c2c69be100b9f (patch)
tree8c9bcf7070b8d64a3c1b3118525f35dc296993fc /content/browser/speech/speech_recognizer.h
parent62a16d8b66f5660a45320708aac90ea2c0b88584 (diff)
downloadchromium_src-c91bb26fa85807df212657eb025c2c69be100b9f.zip
chromium_src-c91bb26fa85807df212657eb025c2c69be100b9f.tar.gz
chromium_src-c91bb26fa85807df212657eb025c2c69be100b9f.tar.bz2
Renamed SpeechRecognizerImpl to SpeechRecognizer and fixed some comments in the speech code.
BUG=116954 TEST=none Review URL: https://chromiumcodereview.appspot.com/10661053 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@144435 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'content/browser/speech/speech_recognizer.h')
-rw-r--r--content/browser/speech/speech_recognizer.h162
1 files changed, 162 insertions, 0 deletions
diff --git a/content/browser/speech/speech_recognizer.h b/content/browser/speech/speech_recognizer.h
new file mode 100644
index 0000000..c523f27
--- /dev/null
+++ b/content/browser/speech/speech_recognizer.h
@@ -0,0 +1,162 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
+#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
+#pragma once
+
+#include "base/basictypes.h"
+#include "base/memory/ref_counted.h"
+#include "base/memory/scoped_ptr.h"
+#include "content/browser/speech/endpointer/endpointer.h"
+#include "content/browser/speech/speech_recognition_engine.h"
+#include "content/public/common/speech_recognition_error.h"
+#include "content/public/common/speech_recognition_result.h"
+#include "media/audio/audio_input_controller.h"
+#include "net/url_request/url_request_context_getter.h"
+
+namespace content {
+class SpeechRecognitionEventListener;
+}
+
+namespace media {
+class AudioManager;
+}
+
+namespace speech {
+// Handles speech recognition for a session (identified by |session_id|), taking
+// care of audio capture, silence detection/endpointer and interaction with the
+// SpeechRecognitionEngine.
+class CONTENT_EXPORT SpeechRecognizer
+ : public base::RefCountedThreadSafe<SpeechRecognizer>,
+ public media::AudioInputController::EventHandler,
+ public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
+ public:
+ static const int kAudioSampleRate;
+ static const ChannelLayout kChannelLayout;
+ static const int kNumBitsPerAudioSample;
+ static const int kNoSpeechTimeoutMs;
+ static const int kEndpointerEstimationTimeMs;
+
+ SpeechRecognizer(
+ content::SpeechRecognitionEventListener* listener,
+ int session_id,
+ bool is_single_shot,
+ SpeechRecognitionEngine* engine);
+
+ void StartRecognition();
+ void AbortRecognition();
+ void StopAudioCapture();
+ bool IsActive() const;
+ bool IsCapturingAudio() const;
+ const SpeechRecognitionEngine& recognition_engine() const;
+
+ private:
+ friend class base::RefCountedThreadSafe<SpeechRecognizer>;
+ friend class SpeechRecognizerTest;
+
+ enum FSMState {
+ STATE_IDLE = 0,
+ STATE_STARTING,
+ STATE_ESTIMATING_ENVIRONMENT,
+ STATE_WAITING_FOR_SPEECH,
+ STATE_RECOGNIZING,
+ STATE_WAITING_FINAL_RESULT,
+ STATE_MAX_VALUE = STATE_WAITING_FINAL_RESULT
+ };
+
+ enum FSMEvent {
+ EVENT_ABORT = 0,
+ EVENT_START,
+ EVENT_STOP_CAPTURE,
+ EVENT_AUDIO_DATA,
+ EVENT_ENGINE_RESULT,
+ EVENT_ENGINE_ERROR,
+ EVENT_AUDIO_ERROR,
+ EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
+ };
+
+ struct FSMEventArgs {
+ explicit FSMEventArgs(FSMEvent event_value);
+ ~FSMEventArgs();
+
+ FSMEvent event;
+ int audio_error_code;
+ scoped_refptr<AudioChunk> audio_data;
+ content::SpeechRecognitionResult engine_result;
+ content::SpeechRecognitionError engine_error;
+ };
+
+ virtual ~SpeechRecognizer();
+
+ // Entry point for pushing any new external event into the recognizer FSM.
+ void DispatchEvent(const FSMEventArgs& event_args);
+
+ // Defines the behavior of the recognizer FSM, selecting the appropriate
+ // transition according to the current state and event.
+ FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
+
+ // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
+ void ProcessAudioPipeline(const AudioChunk& raw_audio);
+
+ // The methods below handle transitions of the recognizer FSM.
+ FSMState StartRecording(const FSMEventArgs& event_args);
+ FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
+ FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
+ FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
+ FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
+ FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
+ FSMState ProcessFinalResult(const FSMEventArgs& event_args);
+ FSMState AbortSilently(const FSMEventArgs& event_args);
+ FSMState AbortWithError(const FSMEventArgs& event_args);
+ FSMState Abort(const content::SpeechRecognitionError& error);
+ FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
+ FSMState DoNothing(const FSMEventArgs& event_args) const;
+ FSMState NotFeasible(const FSMEventArgs& event_args);
+
+ // Returns the time span of captured audio samples since the start of capture.
+ int GetElapsedTimeMs() const;
+
+ // Calculates the input volume to be displayed in the UI, triggering the
+ // OnAudioLevelsChange event accordingly.
+ void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
+
+ void CloseAudioControllerAsynchronously();
+ void SetAudioManagerForTesting(media::AudioManager* audio_manager);
+
+ // Callback called on IO thread by audio_controller->Close().
+ void OnAudioClosed(media::AudioInputController*);
+
+ // AudioInputController::EventHandler methods.
+ virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
+ virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
+ virtual void OnError(media::AudioInputController* controller,
+ int error_code) OVERRIDE;
+ virtual void OnData(media::AudioInputController* controller,
+ const uint8* data, uint32 size) OVERRIDE;
+
+ // SpeechRecognitionEngineDelegate methods.
+ virtual void OnSpeechRecognitionEngineResult(
+ const content::SpeechRecognitionResult& result) OVERRIDE;
+ virtual void OnSpeechRecognitionEngineError(
+ const content::SpeechRecognitionError& error) OVERRIDE;
+
+ content::SpeechRecognitionEventListener* listener_;
+ media::AudioManager* testing_audio_manager_;
+ scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
+ Endpointer endpointer_;
+ scoped_refptr<media::AudioInputController> audio_controller_;
+ int session_id_;
+ int num_samples_recorded_;
+ float audio_level_;
+ bool is_dispatching_event_;
+ bool is_single_shot_;
+ FSMState state_;
+
+ DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer);
+};
+
+} // namespace speech
+
+#endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_