summaryrefslogtreecommitdiffstats
path: root/content/browser/speech/speech_recognizer_impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'content/browser/speech/speech_recognizer_impl.h')
-rw-r--r--content/browser/speech/speech_recognizer_impl.h125
1 files changed, 87 insertions, 38 deletions
diff --git a/content/browser/speech/speech_recognizer_impl.h b/content/browser/speech/speech_recognizer_impl.h
index 516dfea..a2cce74 100644
--- a/content/browser/speech/speech_recognizer_impl.h
+++ b/content/browser/speech/speech_recognizer_impl.h
@@ -12,6 +12,7 @@
#include "content/browser/speech/speech_recognition_engine.h"
#include "content/public/browser/speech_recognizer.h"
#include "content/public/common/speech_recognition_error.h"
+#include "content/public/common/speech_recognition_result.h"
#include "media/audio/audio_input_controller.h"
#include "net/url_request/url_request_context_getter.h"
@@ -27,8 +28,13 @@ class AudioManager;
namespace speech {
-// Records audio, sends recorded audio to server and translates server response
-// to recognition result.
+// TODO(primiano) Next CL: Remove the Impl suffix and the exported
+// /content/public/browser/speech_recognizer.h interface since this class should
+// not be visible outside (currently we need it for speech input extension API).
+
+// Handles speech recognition for a session (identified by |caller_id|), taking
+// care of audio capture, silence detection/endpointer and interaction with the
+// SpeechRecognitionEngine.
class CONTENT_EXPORT SpeechRecognizerImpl
: public NON_EXPORTED_BASE(content::SpeechRecognizer),
public media::AudioInputController::EventHandler,
@@ -41,14 +47,9 @@ class CONTENT_EXPORT SpeechRecognizerImpl
static const int kEndpointerEstimationTimeMs;
SpeechRecognizerImpl(
- content::SpeechRecognitionEventListener* listener,
- int caller_id,
- const std::string& language,
- const std::string& grammar,
- net::URLRequestContextGetter* context_getter,
- bool filter_profanities,
- const std::string& hardware_info,
- const std::string& origin_url);
+ content::SpeechRecognitionEventListener* listener,
+ int caller_id,
+ SpeechRecognitionEngine* engine);
virtual ~SpeechRecognizerImpl();
// content::SpeechRecognizer methods.
@@ -59,14 +60,86 @@ class CONTENT_EXPORT SpeechRecognizerImpl
virtual bool IsCapturingAudio() const OVERRIDE;
const SpeechRecognitionEngine& recognition_engine() const;
+ private:
+ friend class SpeechRecognizerImplTest;
+
+ enum FSMState {
+ STATE_IDLE = 0,
+ STATE_STARTING,
+ STATE_ESTIMATING_ENVIRONMENT,
+ STATE_WAITING_FOR_SPEECH,
+ STATE_RECOGNIZING,
+ STATE_WAITING_FINAL_RESULT,
+ STATE_MAX_VALUE = STATE_WAITING_FINAL_RESULT
+ };
+
+ enum FSMEvent {
+ EVENT_ABORT = 0,
+ EVENT_START,
+ EVENT_STOP_CAPTURE,
+ EVENT_AUDIO_DATA,
+ EVENT_ENGINE_RESULT,
+ EVENT_ENGINE_ERROR,
+ EVENT_AUDIO_ERROR,
+ EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
+ };
+
+ struct FSMEventArgs {
+ explicit FSMEventArgs(FSMEvent event_value);
+ ~FSMEventArgs();
+
+ FSMEvent event;
+ int audio_error_code;
+ scoped_refptr<AudioChunk> audio_data;
+ content::SpeechRecognitionResult engine_result;
+ content::SpeechRecognitionError engine_error;
+ };
+
+ // Entry point for pushing any new external event into the recognizer FSM.
+ void DispatchEvent(const FSMEventArgs& event_args);
+
+ // Defines the behavior of the recognizer FSM, selecting the appropriate
+ // transition according to the current state and event.
+ FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
+
+ // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
+ void ProcessAudioPipeline(const AudioChunk& raw_audio);
+
+ // The methods below handle transitions of the recognizer FSM.
+ FSMState StartRecording(const FSMEventArgs& event_args);
+ FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
+ FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
+ FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
+ FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
+ FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
+ FSMState ProcessFinalResult(const FSMEventArgs& event_args);
+ FSMState Abort(const FSMEventArgs& event_args);
+ FSMState AbortWithError(const content::SpeechRecognitionError* error);
+ FSMState AbortWithError(const content::SpeechRecognitionError& error);
+ FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
+ FSMState DoNothing(const FSMEventArgs& event_args) const;
+ FSMState NotFeasible(const FSMEventArgs& event_args);
+
+ // Returns the time span of captured audio samples since the start of capture.
+ int GetElapsedTimeMs() const;
+
+ // Calculates the input volume to be displayed in the UI, triggering the
+ // OnAudioLevelsChange event accordingly.
+ void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
+
+ void CloseAudioControllerAsynchronously();
+ void SetAudioManagerForTesting(media::AudioManager* audio_manager);
+
+ // Callback called on IO thread by audio_controller->Close().
+ void OnAudioClosed(media::AudioInputController*);
+
// AudioInputController::EventHandler methods.
virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
virtual void OnError(media::AudioInputController* controller,
int error_code) OVERRIDE;
virtual void OnData(media::AudioInputController* controller,
- const uint8* data,
- uint32 size) OVERRIDE;
+ const uint8* data, uint32 size) OVERRIDE;
// SpeechRecognitionEngineDelegate methods.
virtual void OnSpeechRecognitionEngineResult(
@@ -74,40 +147,16 @@ class CONTENT_EXPORT SpeechRecognizerImpl
virtual void OnSpeechRecognitionEngineError(
const content::SpeechRecognitionError& error) OVERRIDE;
- private:
- friend class SpeechRecognizerImplTest;
-
- void InformErrorAndAbortRecognition(
- content::SpeechRecognitionErrorCode error);
- void SendRecordedAudioToServer();
-
- void HandleOnError(int error_code); // Handles OnError in the IO thread.
-
- // Handles OnData in the IO thread.
- void HandleOnData(scoped_refptr<AudioChunk> raw_audio);
-
- void OnAudioClosed(media::AudioInputController*);
-
- // Helper method which closes the audio controller and frees it asynchronously
- // without blocking the IO thread.
- void CloseAudioControllerAsynchronously();
-
- void SetAudioManagerForTesting(media::AudioManager* audio_manager);
-
content::SpeechRecognitionEventListener* listener_;
media::AudioManager* testing_audio_manager_;
scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
Endpointer endpointer_;
scoped_refptr<media::AudioInputController> audio_controller_;
- scoped_refptr<net::URLRequestContextGetter> context_getter_;
int caller_id_;
- std::string language_;
- std::string grammar_;
- bool filter_profanities_;
- std::string hardware_info_;
- std::string origin_url_;
int num_samples_recorded_;
float audio_level_;
+ bool is_dispatching_event_;
+ FSMState state_;
DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
};