Renamed SpeechRecognizerImpl to SpeechRecognizer and fixed some comments in the speech code.

BUG=116954 TEST=none Review URL: https://chromiumcodereview.appspot.com/10661053 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@144435 0039d316-1c4b-4281-b951-d872f2087c98
author: primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-06-27 10:56:45 +0000
committer: primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-06-27 10:56:45 +0000
commit: c91bb26fa85807df212657eb025c2c69be100b9f (patch)
tree: 8c9bcf7070b8d64a3c1b3118525f35dc296993fc /content/browser/speech/speech_recognizer.h
parent: 62a16d8b66f5660a45320708aac90ea2c0b88584 (diff)
download: chromium_src-c91bb26fa85807df212657eb025c2c69be100b9f.zip
chromium_src-c91bb26fa85807df212657eb025c2c69be100b9f.tar.gz
chromium_src-c91bb26fa85807df212657eb025c2c69be100b9f.tar.bz2
1 files changed, 162 insertions, 0 deletions
diff --git a/content/browser/speech/speech_recognizer.h b/content/browser/speech/speech_recognizer.h
new file mode 100644
index 0000000..c523f27
--- /dev/null
+++ b/content/browser/speech/speech_recognizer.h
@@ -0,0 +1,162 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
+#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
+#pragma once
+
+#include "base/basictypes.h"
+#include "base/memory/ref_counted.h"
+#include "base/memory/scoped_ptr.h"
+#include "content/browser/speech/endpointer/endpointer.h"
+#include "content/browser/speech/speech_recognition_engine.h"
+#include "content/public/common/speech_recognition_error.h"
+#include "content/public/common/speech_recognition_result.h"
+#include "media/audio/audio_input_controller.h"
+#include "net/url_request/url_request_context_getter.h"
+
+namespace content {
+class SpeechRecognitionEventListener;
+}
+
+namespace media {
+class AudioManager;
+}
+
+namespace speech {
+// Handles speech recognition for a session (identified by |session_id|), taking
+// care of audio capture, silence detection/endpointer and interaction with the
+// SpeechRecognitionEngine.
+class CONTENT_EXPORT SpeechRecognizer
+    : public base::RefCountedThreadSafe<SpeechRecognizer>,
+      public media::AudioInputController::EventHandler,
+      public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
+ public:
+  static const int kAudioSampleRate;
+  static const ChannelLayout kChannelLayout;
+  static const int kNumBitsPerAudioSample;
+  static const int kNoSpeechTimeoutMs;
+  static const int kEndpointerEstimationTimeMs;
+
+  SpeechRecognizer(
+      content::SpeechRecognitionEventListener* listener,
+      int session_id,
+      bool is_single_shot,
+      SpeechRecognitionEngine* engine);
+
+  void StartRecognition();
+  void AbortRecognition();
+  void StopAudioCapture();
+  bool IsActive() const;
+  bool IsCapturingAudio() const;
+  const SpeechRecognitionEngine& recognition_engine() const;
+
+ private:
+  friend class base::RefCountedThreadSafe<SpeechRecognizer>;
+  friend class SpeechRecognizerTest;
+
+  enum FSMState {
+    STATE_IDLE = 0,
+    STATE_STARTING,
+    STATE_ESTIMATING_ENVIRONMENT,
+    STATE_WAITING_FOR_SPEECH,
+    STATE_RECOGNIZING,
+    STATE_WAITING_FINAL_RESULT,
+    STATE_MAX_VALUE = STATE_WAITING_FINAL_RESULT
+  };
+
+  enum FSMEvent {
+    EVENT_ABORT = 0,
+    EVENT_START,
+    EVENT_STOP_CAPTURE,
+    EVENT_AUDIO_DATA,
+    EVENT_ENGINE_RESULT,
+    EVENT_ENGINE_ERROR,
+    EVENT_AUDIO_ERROR,
+    EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
+  };
+
+  struct FSMEventArgs {
+    explicit FSMEventArgs(FSMEvent event_value);
+    ~FSMEventArgs();
+
+    FSMEvent event;
+    int audio_error_code;
+    scoped_refptr<AudioChunk> audio_data;
+    content::SpeechRecognitionResult engine_result;
+    content::SpeechRecognitionError engine_error;
+  };
+
+  virtual ~SpeechRecognizer();
+
+  // Entry point for pushing any new external event into the recognizer FSM.
+  void DispatchEvent(const FSMEventArgs& event_args);
+
+  // Defines the behavior of the recognizer FSM, selecting the appropriate
+  // transition according to the current state and event.
+  FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
+
+  // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
+  void ProcessAudioPipeline(const AudioChunk& raw_audio);
+
+  // The methods below handle transitions of the recognizer FSM.
+  FSMState StartRecording(const FSMEventArgs& event_args);
+  FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
+  FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
+  FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
+  FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
+  FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
+  FSMState ProcessFinalResult(const FSMEventArgs& event_args);
+  FSMState AbortSilently(const FSMEventArgs& event_args);
+  FSMState AbortWithError(const FSMEventArgs& event_args);
+  FSMState Abort(const content::SpeechRecognitionError& error);
+  FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
+  FSMState DoNothing(const FSMEventArgs& event_args) const;
+  FSMState NotFeasible(const FSMEventArgs& event_args);
+
+  // Returns the time span of captured audio samples since the start of capture.
+  int GetElapsedTimeMs() const;
+
+  // Calculates the input volume to be displayed in the UI, triggering the
+  // OnAudioLevelsChange event accordingly.
+  void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
+
+  void CloseAudioControllerAsynchronously();
+  void SetAudioManagerForTesting(media::AudioManager* audio_manager);
+
+  // Callback called on IO thread by audio_controller->Close().
+  void OnAudioClosed(media::AudioInputController*);
+
+  // AudioInputController::EventHandler methods.
+  virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
+  virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
+  virtual void OnError(media::AudioInputController* controller,
+                       int error_code) OVERRIDE;
+  virtual void OnData(media::AudioInputController* controller,
+                      const uint8* data, uint32 size) OVERRIDE;
+
+  // SpeechRecognitionEngineDelegate methods.
+  virtual void OnSpeechRecognitionEngineResult(
+      const content::SpeechRecognitionResult& result) OVERRIDE;
+  virtual void OnSpeechRecognitionEngineError(
+      const content::SpeechRecognitionError& error) OVERRIDE;
+
+  content::SpeechRecognitionEventListener* listener_;
+  media::AudioManager* testing_audio_manager_;
+  scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
+  Endpointer endpointer_;
+  scoped_refptr<media::AudioInputController> audio_controller_;
+  int session_id_;
+  int num_samples_recorded_;
+  float audio_level_;
+  bool is_dispatching_event_;
+  bool is_single_shot_;
+  FSMState state_;
+
+  DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer);
+};
+
+}  // namespace speech
+
+#endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
author	primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-06-27 10:56:45 +0000
committer	primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-06-27 10:56:45 +0000
commit	c91bb26fa85807df212657eb025c2c69be100b9f (patch)
tree	8c9bcf7070b8d64a3c1b3118525f35dc296993fc /content/browser/speech/speech_recognizer.h
parent	62a16d8b66f5660a45320708aac90ea2c0b88584 (diff)
download	chromium_src-c91bb26fa85807df212657eb025c2c69be100b9f.zip chromium_src-c91bb26fa85807df212657eb025c2c69be100b9f.tar.gz chromium_src-c91bb26fa85807df212657eb025c2c69be100b9f.tar.bz2