summaryrefslogtreecommitdiffstats
path: root/content
diff options
context:
space:
mode:
authorprimiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-04-13 13:06:39 +0000
committerprimiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-04-13 13:06:39 +0000
commit2ba0644d32705803938d2022562d2e42e5ac7615 (patch)
treeab1a3973ce11d8fcb5855b89c6287dfd9eb66230 /content
parent0d2dafb39d52717a30631e7104a9c60fa6b0e57b (diff)
downloadchromium_src-2ba0644d32705803938d2022562d2e42e5ac7615.zip
chromium_src-2ba0644d32705803938d2022562d2e42e5ac7615.tar.gz
chromium_src-2ba0644d32705803938d2022562d2e42e5ac7615.tar.bz2
Speech refactoring: Reimplemented speech_recognizer as a FSM (CL1.5)
BUG=116954 TEST=none Review URL: http://codereview.chromium.org/9835049 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@132179 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'content')
-rw-r--r--content/browser/speech/google_one_shot_remote_engine.h2
-rw-r--r--content/browser/speech/speech_recognizer_impl.cc642
-rw-r--r--content/browser/speech/speech_recognizer_impl.h125
-rw-r--r--content/browser/speech/speech_recognizer_impl_unittest.cc137
4 files changed, 653 insertions, 253 deletions
diff --git a/content/browser/speech/google_one_shot_remote_engine.h b/content/browser/speech/google_one_shot_remote_engine.h
index 236ac94..7e47c67 100644
--- a/content/browser/speech/google_one_shot_remote_engine.h
+++ b/content/browser/speech/google_one_shot_remote_engine.h
@@ -31,7 +31,7 @@ namespace speech {
class AudioChunk;
-struct GoogleOneShotRemoteEngineConfig {
+struct CONTENT_EXPORT GoogleOneShotRemoteEngineConfig {
std::string language;
std::string grammar;
bool filter_profanities;
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
index 07bd75e..d9d9bab 100644
--- a/content/browser/speech/speech_recognizer_impl.cc
+++ b/content/browser/speech/speech_recognizer_impl.cc
@@ -4,6 +4,7 @@
#include "content/browser/speech/speech_recognizer_impl.h"
+#include "base/basictypes.h"
#include "base/bind.h"
#include "base/time.h"
#include "content/browser/browser_main_loop.h"
@@ -24,6 +25,7 @@ using content::SpeechRecognitionResult;
using content::SpeechRecognizer;
using media::AudioInputController;
using media::AudioManager;
+using media::AudioParameters;
namespace {
@@ -49,6 +51,7 @@ bool DetectClipping(const speech::AudioChunk& chunk) {
const int16* samples = chunk.SamplesData16();
const int kThreshold = num_samples / 20;
int clipping_samples = 0;
+
for (int i = 0; i < num_samples; ++i) {
if (samples[i] <= -32767 || samples[i] >= 32767) {
if (++clipping_samples > kThreshold)
@@ -69,14 +72,25 @@ SpeechRecognizer* SpeechRecognizer::Create(
bool filter_profanities,
const std::string& hardware_info,
const std::string& origin_url) {
+ speech::GoogleOneShotRemoteEngineConfig remote_engine_config;
+ remote_engine_config.language = language;
+ remote_engine_config.grammar = grammar;
+ remote_engine_config.audio_sample_rate =
+ speech::SpeechRecognizerImpl::kAudioSampleRate;
+ remote_engine_config.audio_num_bits_per_sample =
+ speech::SpeechRecognizerImpl::kNumBitsPerAudioSample;
+ remote_engine_config.filter_profanities = filter_profanities;
+ remote_engine_config.hardware_info = hardware_info;
+ remote_engine_config.origin_url = origin_url;
+
+ // SpeechRecognizerImpl takes ownership of google_remote_engine.
+ speech::GoogleOneShotRemoteEngine* google_remote_engine =
+ new speech::GoogleOneShotRemoteEngine(context_getter);
+ google_remote_engine->SetConfig(remote_engine_config);
+
return new speech::SpeechRecognizerImpl(listener,
caller_id,
- language,
- grammar,
- context_getter,
- filter_profanities,
- hardware_info,
- origin_url);
+ google_remote_engine);
}
namespace speech {
@@ -87,247 +101,488 @@ const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;
const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;
const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;
+COMPILE_ASSERT(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0,
+ kNumBitsPerAudioSample_must_be_a_multiple_of_8);
+
SpeechRecognizerImpl::SpeechRecognizerImpl(
SpeechRecognitionEventListener* listener,
int caller_id,
- const std::string& language,
- const std::string& grammar,
- net::URLRequestContextGetter* context_getter,
- bool filter_profanities,
- const std::string& hardware_info,
- const std::string& origin_url)
+ SpeechRecognitionEngine* engine)
: listener_(listener),
testing_audio_manager_(NULL),
+ recognition_engine_(engine),
endpointer_(kAudioSampleRate),
- context_getter_(context_getter),
caller_id_(caller_id),
- language_(language),
- grammar_(grammar),
- filter_profanities_(filter_profanities),
- hardware_info_(hardware_info),
- origin_url_(origin_url),
- num_samples_recorded_(0),
- audio_level_(0.0f) {
+ is_dispatching_event_(false),
+ state_(STATE_IDLE) {
DCHECK(listener_ != NULL);
+ DCHECK(recognition_engine_ != NULL);
endpointer_.set_speech_input_complete_silence_length(
base::Time::kMicrosecondsPerSecond / 2);
endpointer_.set_long_speech_input_complete_silence_length(
base::Time::kMicrosecondsPerSecond);
endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);
endpointer_.StartSession();
+ recognition_engine_->set_delegate(this);
}
SpeechRecognizerImpl::~SpeechRecognizerImpl() {
- // Recording should have stopped earlier due to the endpointer or
- // |StopRecording| being called.
- DCHECK(!audio_controller_.get());
- DCHECK(!recognition_engine_.get() ||
- !recognition_engine_->IsRecognitionPending());
endpointer_.EndSession();
}
-void SpeechRecognizerImpl::StartRecognition() {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- DCHECK(!audio_controller_.get());
- DCHECK(!recognition_engine_.get() ||
- !recognition_engine_->IsRecognitionPending());
+// ------- Methods that trigger Finite State Machine (FSM) events ------------
- // The endpointer needs to estimate the environment/background noise before
- // starting to treat the audio as user input. In |HandleOnData| we wait until
- // such time has passed before switching to user input mode.
- endpointer_.SetEnvironmentEstimationMode();
+// NOTE:all the external events and requests should be enqueued (PostTask), even
+// if they come from the same (IO) thread, in order to preserve the relationship
+// of causality between events and avoid interleaved event processing due to
+// synchronous callbacks.
- AudioManager* audio_manager = (testing_audio_manager_ != NULL) ?
- testing_audio_manager_ : BrowserMainLoop::GetAudioManager();
- const int samples_per_packet = kAudioSampleRate *
- GoogleOneShotRemoteEngine::kAudioPacketIntervalMs / 1000;
- media::AudioParameters params(
- media::AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout,
- kAudioSampleRate, kNumBitsPerAudioSample, samples_per_packet);
- audio_controller_ = AudioInputController::Create(audio_manager, this, params);
- DCHECK(audio_controller_.get());
- VLOG(1) << "SpeechRecognizer starting record.";
- num_samples_recorded_ = 0;
- audio_controller_->Record();
+void SpeechRecognizerImpl::StartRecognition() {
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognizerImpl::DispatchEvent,
+ this, FSMEventArgs(EVENT_START)));
}
void SpeechRecognizerImpl::AbortRecognition() {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- DCHECK(audio_controller_.get() || recognition_engine_.get());
-
- // Stop recording if required.
- if (audio_controller_.get()) {
- CloseAudioControllerAsynchronously();
- }
-
- VLOG(1) << "SpeechRecognizer canceling recognition.";
- recognition_engine_.reset();
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognizerImpl::DispatchEvent,
+ this, FSMEventArgs(EVENT_ABORT)));
}
void SpeechRecognizerImpl::StopAudioCapture() {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
-
- // If audio recording has already stopped and we are in recognition phase,
- // silently ignore any more calls to stop recording.
- if (!audio_controller_.get())
- return;
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognizerImpl::DispatchEvent,
+ this, FSMEventArgs(EVENT_STOP_CAPTURE)));
+}
- CloseAudioControllerAsynchronously();
- listener_->OnSoundEnd(caller_id_);
- listener_->OnAudioEnd(caller_id_);
+bool SpeechRecognizerImpl::IsActive() const {
+ // Checking the FSM state from another thread (thus, while the FSM is
+ // potentially concurrently evolving) is meaningless.
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ return state_ != STATE_IDLE;
+}
- // If we haven't got any audio yet end the recognition sequence here.
- if (recognition_engine_ == NULL) {
- // Guard against the listener freeing us until we finish our job.
- scoped_refptr<SpeechRecognizerImpl> me(this);
- listener_->OnRecognitionEnd(caller_id_);
- } else {
- recognition_engine_->AudioChunksEnded();
- }
+bool SpeechRecognizerImpl::IsCapturingAudio() const {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // See IsActive().
+ const bool is_capturing_audio = state_ >= STATE_STARTING &&
+ state_ <= STATE_RECOGNIZING;
+ DCHECK((is_capturing_audio && (audio_controller_.get() != NULL)) ||
+ (!is_capturing_audio && audio_controller_.get() == NULL));
+ return is_capturing_audio;
}
// Invoked in the audio thread.
void SpeechRecognizerImpl::OnError(AudioInputController* controller,
int error_code) {
+ FSMEventArgs event_args(EVENT_AUDIO_ERROR);
+ event_args.audio_error_code = error_code;
BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
- base::Bind(&SpeechRecognizerImpl::HandleOnError,
- this, error_code));
+ base::Bind(&SpeechRecognizerImpl::DispatchEvent,
+ this, event_args));
}
-void SpeechRecognizerImpl::HandleOnError(int error_code) {
- LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code;
-
- // Check if we are still recording before canceling recognition, as
- // recording might have been stopped after this error was posted to the queue
- // by |OnError|.
- if (!audio_controller_.get())
+void SpeechRecognizerImpl::OnData(AudioInputController* controller,
+ const uint8* data, uint32 size) {
+ if (size == 0) // This could happen when audio capture stops and is normal.
return;
- InformErrorAndAbortRecognition(content::SPEECH_RECOGNITION_ERROR_AUDIO);
+ FSMEventArgs event_args(EVENT_AUDIO_DATA);
+ event_args.audio_data = new AudioChunk(data, static_cast<size_t>(size),
+ kNumBitsPerAudioSample / 8);
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognizerImpl::DispatchEvent,
+ this, event_args));
}
-void SpeechRecognizerImpl::OnData(AudioInputController* controller,
- const uint8* data, uint32 size) {
- if (size == 0) // This could happen when recording stops and is normal.
- return;
- scoped_refptr<AudioChunk> raw_audio(
- new AudioChunk(data,
- static_cast<size_t>(size),
- kNumBitsPerAudioSample / 8));
+void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {}
+
+void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult(
+ const content::SpeechRecognitionResult& result) {
+ FSMEventArgs event_args(EVENT_ENGINE_RESULT);
+ event_args.engine_result = result;
BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
- base::Bind(&SpeechRecognizerImpl::HandleOnData,
- this, raw_audio));
+ base::Bind(&SpeechRecognizerImpl::DispatchEvent,
+ this, event_args));
}
-void SpeechRecognizerImpl::HandleOnData(scoped_refptr<AudioChunk> raw_audio) {
- // Check if we are still recording and if not discard this buffer, as
- // recording might have been stopped after this buffer was posted to the queue
- // by |OnData|.
- if (!audio_controller_.get())
- return;
+void SpeechRecognizerImpl::OnSpeechRecognitionEngineError(
+ const content::SpeechRecognitionError& error) {
+ FSMEventArgs event_args(EVENT_ENGINE_ERROR);
+ event_args.engine_error = error;
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognizerImpl::DispatchEvent,
+ this, event_args));
+}
+
+// ----------------------- Core FSM implementation ---------------------------
+// TODO(primiano) After the changes in the media package (r129173), this class
+// slightly violates the SpeechRecognitionEventListener interface contract. In
+// particular, it is not true anymore that this class can be freed after the
+// OnRecognitionEnd event, since the audio_controller_.Close() asynchronous
+// call can be still in progress after the end event. Currently, it does not
+// represent a problem for the browser itself, since refcounting protects us
+// against such race conditions. However, we should fix this in the next CLs.
+// For instance, tests are currently working just because the
+// TestAudioInputController is not closing asynchronously as the real controller
+// does, but they will become flaky if TestAudioInputController will be fixed.
+
+void SpeechRecognizerImpl::DispatchEvent(const FSMEventArgs& event_args) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
+ DCHECK_LE(state_, STATE_MAX_VALUE);
+
+ // Event dispatching must be sequential, otherwise it will break all the rules
+ // and the assumptions of the finite state automata model.
+ DCHECK(!is_dispatching_event_);
+ is_dispatching_event_ = true;
- bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();
-
- float rms;
- endpointer_.ProcessAudio(*raw_audio, &rms);
- bool did_clip = DetectClipping(*raw_audio);
- num_samples_recorded_ += raw_audio->NumSamples();
-
- if (recognition_engine_ == NULL) {
- // This was the first audio packet recorded, so start a request to the
- // server to send the data and inform the listener.
- listener_->OnAudioStart(caller_id_);
- GoogleOneShotRemoteEngineConfig google_sr_config;
- google_sr_config.language = language_;
- google_sr_config.grammar = grammar_;
- google_sr_config.audio_sample_rate = kAudioSampleRate;
- google_sr_config.audio_num_bits_per_sample = kNumBitsPerAudioSample;
- google_sr_config.filter_profanities = filter_profanities_;
- google_sr_config.hardware_info = hardware_info_;
- google_sr_config.origin_url = origin_url_;
- GoogleOneShotRemoteEngine* google_sr_engine =
- new GoogleOneShotRemoteEngine(context_getter_.get());
- google_sr_engine->SetConfig(google_sr_config);
- recognition_engine_.reset(google_sr_engine);
- recognition_engine_->set_delegate(this);
- recognition_engine_->StartRecognition();
+ // Guard against the delegate freeing us until we finish processing the event.
+ scoped_refptr<SpeechRecognizerImpl> me(this);
+
+ if (event_args.event == EVENT_AUDIO_DATA) {
+ DCHECK(event_args.audio_data.get() != NULL);
+ ProcessAudioPipeline(*event_args.audio_data);
}
- recognition_engine_->TakeAudioChunk(*raw_audio);
+ // The audio pipeline must be processed before the event dispatch, otherwise
+ // it would take actions according to the future state instead of the current.
+ state_ = ExecuteTransitionAndGetNextState(event_args);
- if (endpointer_.IsEstimatingEnvironment()) {
- // Check if we have gathered enough audio for the endpointer to do
- // environment estimation and should move on to detect speech/end of speech.
- if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *
- kAudioSampleRate) / 1000) {
- endpointer_.SetUserInputMode();
- listener_->OnEnvironmentEstimationComplete(caller_id_);
- }
- return; // No more processing since we are still estimating environment.
+ is_dispatching_event_ = false;
+}
+
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
+ const FSMEventArgs& event_args) {
+ const FSMEvent event = event_args.event;
+ switch (state_) {
+ case STATE_IDLE:
+ switch (event) {
+ // TODO(primiano) restore UNREACHABLE_CONDITION on EVENT_ABORT and
+ // EVENT_STOP_CAPTURE below once speech input extensions are fixed.
+ case EVENT_ABORT:
+ return DoNothing(event_args);
+ case EVENT_START:
+ return StartRecording(event_args);
+ case EVENT_STOP_CAPTURE: // Corner cases related to queued messages
+ case EVENT_AUDIO_DATA: // being lately dispatched.
+ case EVENT_ENGINE_RESULT:
+ case EVENT_ENGINE_ERROR:
+ case EVENT_AUDIO_ERROR:
+ return DoNothing(event_args);
+ }
+ break;
+ case STATE_STARTING:
+ switch (event) {
+ case EVENT_ABORT:
+ return Abort(event_args);
+ case EVENT_START:
+ return NotFeasible(event_args);
+ case EVENT_STOP_CAPTURE:
+ return Abort(event_args);
+ case EVENT_AUDIO_DATA:
+ return StartRecognitionEngine(event_args);
+ case EVENT_ENGINE_RESULT:
+ return NotFeasible(event_args);
+ case EVENT_ENGINE_ERROR:
+ case EVENT_AUDIO_ERROR:
+ return Abort(event_args);
+ }
+ break;
+ case STATE_ESTIMATING_ENVIRONMENT:
+ switch (event) {
+ case EVENT_ABORT:
+ return Abort(event_args);
+ case EVENT_START:
+ return NotFeasible(event_args);
+ case EVENT_STOP_CAPTURE:
+ return StopCaptureAndWaitForResult(event_args);
+ case EVENT_AUDIO_DATA:
+ return WaitEnvironmentEstimationCompletion(event_args);
+ case EVENT_ENGINE_RESULT:
+ return ProcessIntermediateResult(event_args);
+ case EVENT_ENGINE_ERROR:
+ case EVENT_AUDIO_ERROR:
+ return Abort(event_args);
+ }
+ break;
+ case STATE_WAITING_FOR_SPEECH:
+ switch (event) {
+ case EVENT_ABORT:
+ return Abort(event_args);
+ case EVENT_START:
+ return NotFeasible(event_args);
+ case EVENT_STOP_CAPTURE:
+ return StopCaptureAndWaitForResult(event_args);
+ case EVENT_AUDIO_DATA:
+ return DetectUserSpeechOrTimeout(event_args);
+ case EVENT_ENGINE_RESULT:
+ return ProcessIntermediateResult(event_args);
+ case EVENT_ENGINE_ERROR:
+ case EVENT_AUDIO_ERROR:
+ return Abort(event_args);
+ }
+ break;
+ case STATE_RECOGNIZING:
+ switch (event) {
+ case EVENT_ABORT:
+ return Abort(event_args);
+ case EVENT_START:
+ return NotFeasible(event_args);
+ case EVENT_STOP_CAPTURE:
+ return StopCaptureAndWaitForResult(event_args);
+ case EVENT_AUDIO_DATA:
+ return DetectEndOfSpeech(event_args);
+ case EVENT_ENGINE_RESULT:
+ return ProcessIntermediateResult(event_args);
+ case EVENT_ENGINE_ERROR:
+ case EVENT_AUDIO_ERROR:
+ return Abort(event_args);
+ }
+ break;
+ case STATE_WAITING_FINAL_RESULT:
+ switch (event) {
+ case EVENT_ABORT:
+ return Abort(event_args);
+ case EVENT_START:
+ return NotFeasible(event_args);
+ case EVENT_STOP_CAPTURE:
+ case EVENT_AUDIO_DATA:
+ return DoNothing(event_args);
+ case EVENT_ENGINE_RESULT:
+ return ProcessFinalResult(event_args);
+ case EVENT_ENGINE_ERROR:
+ case EVENT_AUDIO_ERROR:
+ return Abort(event_args);
+ }
+ break;
}
+ return NotFeasible(event_args);
+}
- // Check if we have waited too long without hearing any speech.
- bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech();
- if (!speech_was_heard_after_packet &&
- num_samples_recorded_ >= (kNoSpeechTimeoutMs / 1000) * kAudioSampleRate) {
- InformErrorAndAbortRecognition(
- content::SPEECH_RECOGNITION_ERROR_NO_SPEECH);
- return;
+// ----------- Contract for all the FSM evolution functions below -------------
+// - Are guaranteed to be executed in the IO thread;
+// - Are guaranteed to be not reentrant (themselves and each other);
+// - event_args members are guaranteed to be stable during the call;
+// - The class won't be freed in the meanwhile due to callbacks;
+// - IsCapturingAudio() returns true if and only if audio_controller_ != NULL.
+
+// TODO(primiano) the audio pipeline is currently serial. However, the
+// clipper->endpointer->vumeter chain and the sr_engine could be parallelized.
+// We should profile the execution to see if it would be worth or not.
+void SpeechRecognizerImpl::ProcessAudioPipeline(const AudioChunk& raw_audio) {
+ const bool route_to_endpointer = state_ >= STATE_ESTIMATING_ENVIRONMENT &&
+ state_ <= STATE_RECOGNIZING;
+ const bool route_to_sr_engine = route_to_endpointer;
+ const bool route_to_vumeter = state_ >= STATE_WAITING_FOR_SPEECH &&
+ state_ <= STATE_RECOGNIZING;
+ const bool clip_detected = DetectClipping(raw_audio);
+ float rms = 0.0f;
+
+ num_samples_recorded_ += raw_audio.NumSamples();
+
+ if (route_to_endpointer)
+ endpointer_.ProcessAudio(raw_audio, &rms);
+
+ if (route_to_vumeter) {
+ DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|.
+ UpdateSignalAndNoiseLevels(rms, clip_detected);
}
+ if (route_to_sr_engine) {
+ DCHECK(recognition_engine_.get() != NULL);
+ recognition_engine_->TakeAudioChunk(raw_audio);
+ }
+}
- if (!speech_was_heard_before_packet && speech_was_heard_after_packet)
- listener_->OnSoundStart(caller_id_);
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
+ DCHECK(recognition_engine_.get() != NULL);
+ DCHECK(!IsCapturingAudio());
+ AudioManager* audio_manager = (testing_audio_manager_ != NULL) ?
+ testing_audio_manager_ :
+ BrowserMainLoop::GetAudioManager();
+ DCHECK(audio_manager != NULL);
- // Calculate the input volume to display in the UI, smoothing towards the
- // new level.
- float level = (rms - kAudioMeterMinDb) /
- (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);
- level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped);
- if (level > audio_level_) {
- audio_level_ += (level - audio_level_) * kUpSmoothingFactor;
+ DVLOG(1) << "SpeechRecognizerImpl starting audio capture.";
+ num_samples_recorded_ = 0;
+ audio_level_ = 0;
+ listener_->OnRecognitionStart(caller_id_);
+
+ if (!audio_manager->HasAudioInputDevices()) {
+ return AbortWithError(SpeechRecognitionError(
+ content::SPEECH_RECOGNITION_ERROR_AUDIO,
+ content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));
+ }
+
+ if (audio_manager->IsRecordingInProcess()) {
+ return AbortWithError(SpeechRecognitionError(
+ content::SPEECH_RECOGNITION_ERROR_AUDIO,
+ content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE));
+ }
+
+ const int samples_per_packet = (kAudioSampleRate *
+ recognition_engine_->GetDesiredAudioChunkDurationMs()) / 1000;
+ AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout,
+ kAudioSampleRate, kNumBitsPerAudioSample,
+ samples_per_packet);
+ audio_controller_ = AudioInputController::Create(audio_manager, this, params);
+
+ if (audio_controller_.get() == NULL) {
+ return AbortWithError(
+ SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_AUDIO));
+ }
+
+ // The endpointer needs to estimate the environment/background noise before
+ // starting to treat the audio as user input. We wait in the state
+ // ESTIMATING_ENVIRONMENT until such interval has elapsed before switching
+ // to user input mode.
+ endpointer_.SetEnvironmentEstimationMode();
+ audio_controller_->Record();
+ return STATE_STARTING;
+}
+
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::StartRecognitionEngine(const FSMEventArgs& event_args) {
+ // This is the first audio packet captured, so the recognition engine is
+ // started and the delegate notified about the event.
+ DCHECK(recognition_engine_.get() != NULL);
+ recognition_engine_->StartRecognition();
+ listener_->OnAudioStart(caller_id_);
+
+ // This is a little hack, since TakeAudioChunk() is already called by
+ // ProcessAudioPipeline(). It is the best tradeoff, unless we allow dropping
+ // the first audio chunk captured after opening the audio device.
+ recognition_engine_->TakeAudioChunk(*(event_args.audio_data));
+ return STATE_ESTIMATING_ENVIRONMENT;
+}
+
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::WaitEnvironmentEstimationCompletion(const FSMEventArgs&) {
+ DCHECK(endpointer_.IsEstimatingEnvironment());
+ if (GetElapsedTimeMs() >= kEndpointerEstimationTimeMs) {
+ endpointer_.SetUserInputMode();
+ listener_->OnEnvironmentEstimationComplete(caller_id_);
+ return STATE_WAITING_FOR_SPEECH;
} else {
- audio_level_ += (level - audio_level_) * kDownSmoothingFactor;
+ return STATE_ESTIMATING_ENVIRONMENT;
+ }
+}
+
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::DetectUserSpeechOrTimeout(const FSMEventArgs&) {
+ if (endpointer_.DidStartReceivingSpeech()) {
+ listener_->OnSoundStart(caller_id_);
+ return STATE_RECOGNIZING;
+ } else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) {
+ return AbortWithError(
+ SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_SPEECH));
}
+ return STATE_WAITING_FOR_SPEECH;
+}
- float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /
- (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);
- noise_level = std::min(std::max(0.0f, noise_level),
- kAudioMeterRangeMaxUnclipped);
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::DetectEndOfSpeech(const FSMEventArgs& event_args) {
+ if (endpointer_.speech_input_complete()) {
+ return StopCaptureAndWaitForResult(event_args);
+ }
+ return STATE_RECOGNIZING;
+}
- listener_->OnAudioLevelsChange(caller_id_, did_clip ? 1.0f : audio_level_,
- noise_level);
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::StopCaptureAndWaitForResult(const FSMEventArgs&) {
+ DCHECK(state_ >= STATE_ESTIMATING_ENVIRONMENT && state_ <= STATE_RECOGNIZING);
- if (endpointer_.speech_input_complete())
- StopAudioCapture();
+ DVLOG(1) << "Concluding recognition";
+ CloseAudioControllerAsynchronously();
+ recognition_engine_->AudioChunksEnded();
+
+ if (state_ > STATE_WAITING_FOR_SPEECH)
+ listener_->OnSoundEnd(caller_id_);
+
+ listener_->OnAudioEnd(caller_id_);
+ return STATE_WAITING_FINAL_RESULT;
}
-void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {}
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::Abort(const FSMEventArgs& event_args) {
+ // TODO(primiano) Should raise SPEECH_RECOGNITION_ERROR_ABORTED in lack of
+ // other specific error sources (so that it was an explicit abort request).
+ // However, SPEECH_RECOGNITION_ERROR_ABORTED is not currently caught by
+ // ChromeSpeechRecognitionManagerDelegate and would cause an exception.
+ // JS support will probably need it in future.
+ if (event_args.event == EVENT_AUDIO_ERROR) {
+ return AbortWithError(
+ SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_AUDIO));
+ } else if (event_args.event == EVENT_ENGINE_ERROR) {
+ return AbortWithError(event_args.engine_error);
+ }
+ return AbortWithError(NULL);
+}
+
+SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::AbortWithError(
+ const SpeechRecognitionError& error) {
+ return AbortWithError(&error);
+}
+
+SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::AbortWithError(
+ const SpeechRecognitionError* error) {
+ if (IsCapturingAudio())
+ CloseAudioControllerAsynchronously();
+
+ DVLOG(1) << "SpeechRecognizerImpl canceling recognition. ";
+
+ // The recognition engine is initialized only after STATE_STARTING.
+ if (state_ > STATE_STARTING) {
+ DCHECK(recognition_engine_.get() != NULL);
+ recognition_engine_->EndRecognition();
+ }
+
+ if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT)
+ listener_->OnSoundEnd(caller_id_);
+
+ if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)
+ listener_->OnAudioEnd(caller_id_);
+
+ if (error != NULL)
+ listener_->OnRecognitionError(caller_id_, *error);
-void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult(
- const content::SpeechRecognitionResult& result) {
- // Guard against the listener freeing us until we finish our job.
- scoped_refptr<SpeechRecognizerImpl> me(this);
- listener_->OnRecognitionResult(caller_id_, result);
listener_->OnRecognitionEnd(caller_id_);
+
+ return STATE_IDLE;
}
-void SpeechRecognizerImpl::OnSpeechRecognitionEngineError(
- const content::SpeechRecognitionError& error) {
- InformErrorAndAbortRecognition(error.code);
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::ProcessIntermediateResult(const FSMEventArgs&) {
+ // This is in preparation for future speech recognition functions.
+ NOTREACHED();
+ return state_;
+}
+
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) {
+ const SpeechRecognitionResult& result = event_args.engine_result;
+ DVLOG(1) << "Got valid result";
+ recognition_engine_->EndRecognition();
+ listener_->OnRecognitionResult(caller_id_, result);
+ listener_->OnRecognitionEnd(caller_id_);
+ return STATE_IDLE;
}
-void SpeechRecognizerImpl::InformErrorAndAbortRecognition(
- content::SpeechRecognitionErrorCode error) {
- DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE);
- AbortRecognition();
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::DoNothing(const FSMEventArgs&) const {
+ return state_; // Just keep the current state.
+}
- // Guard against the listener freeing us until we finish our job.
- scoped_refptr<SpeechRecognizerImpl> me(this);
- listener_->OnRecognitionError(caller_id_, error);
+SpeechRecognizerImpl::FSMState
+SpeechRecognizerImpl::NotFeasible(const FSMEventArgs& event_args) {
+ NOTREACHED() << "Unfeasible event " << event_args.event
+ << " in state " << state_;
+ return state_;
}
void SpeechRecognizerImpl::CloseAudioControllerAsynchronously() {
- VLOG(1) << "SpeechRecognizer stopping record.";
+ DCHECK(IsCapturingAudio());
+ DVLOG(1) << "SpeechRecognizerImpl stopping audio capture.";
// Issues a Close on the audio controller, passing an empty callback. The only
// purpose of such callback is to keep the audio controller refcounted until
// Close has completed (in the audio thread) and automatically destroy it
@@ -337,12 +592,30 @@ void SpeechRecognizerImpl::CloseAudioControllerAsynchronously() {
audio_controller_ = NULL; // The controller is still refcounted by Bind.
}
-bool SpeechRecognizerImpl::IsActive() const {
- return (recognition_engine_.get() != NULL);
+int SpeechRecognizerImpl::GetElapsedTimeMs() const {
+ return (num_samples_recorded_ * 1000) / kAudioSampleRate;
}
-bool SpeechRecognizerImpl::IsCapturingAudio() const {
- return (audio_controller_.get() != NULL);
+void SpeechRecognizerImpl::UpdateSignalAndNoiseLevels(const float& rms,
+ bool clip_detected) {
+ // Calculate the input volume to display in the UI, smoothing towards the
+ // new level.
+ // TODO(primiano) Do we really need all this floating point arith here?
+ // Perhaps it might be quite expensive on mobile.
+ float level = (rms - kAudioMeterMinDb) /
+ (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);
+ level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped);
+ const float smoothing_factor = (level > audio_level_) ? kUpSmoothingFactor :
+ kDownSmoothingFactor;
+ audio_level_ += (level - audio_level_) * smoothing_factor;
+
+ float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /
+ (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);
+ noise_level = std::min(std::max(0.0f, noise_level),
+ kAudioMeterRangeMaxUnclipped);
+
+ listener_->OnAudioLevelsChange(
+ caller_id_, clip_detected ? 1.0f : audio_level_, noise_level);
}
const SpeechRecognitionEngine&
@@ -355,5 +628,14 @@ void SpeechRecognizerImpl::SetAudioManagerForTesting(
testing_audio_manager_ = audio_manager;
}
+SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
+ : event(event_value),
+ audio_error_code(0),
+ audio_data(NULL),
+ engine_error(content::SPEECH_RECOGNITION_ERROR_NONE) {
+}
+
+SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() {
+}
} // namespace speech
diff --git a/content/browser/speech/speech_recognizer_impl.h b/content/browser/speech/speech_recognizer_impl.h
index 516dfea..a2cce74 100644
--- a/content/browser/speech/speech_recognizer_impl.h
+++ b/content/browser/speech/speech_recognizer_impl.h
@@ -12,6 +12,7 @@
#include "content/browser/speech/speech_recognition_engine.h"
#include "content/public/browser/speech_recognizer.h"
#include "content/public/common/speech_recognition_error.h"
+#include "content/public/common/speech_recognition_result.h"
#include "media/audio/audio_input_controller.h"
#include "net/url_request/url_request_context_getter.h"
@@ -27,8 +28,13 @@ class AudioManager;
namespace speech {
-// Records audio, sends recorded audio to server and translates server response
-// to recognition result.
+// TODO(primiano) Next CL: Remove the Impl suffix and the exported
+// /content/public/browser/speech_recognizer.h interface since this class should
+// not be visible outside (currently we need it for speech input extension API).
+
+// Handles speech recognition for a session (identified by |caller_id|), taking
+// care of audio capture, silence detection/endpointer and interaction with the
+// SpeechRecognitionEngine.
class CONTENT_EXPORT SpeechRecognizerImpl
: public NON_EXPORTED_BASE(content::SpeechRecognizer),
public media::AudioInputController::EventHandler,
@@ -41,14 +47,9 @@ class CONTENT_EXPORT SpeechRecognizerImpl
static const int kEndpointerEstimationTimeMs;
SpeechRecognizerImpl(
- content::SpeechRecognitionEventListener* listener,
- int caller_id,
- const std::string& language,
- const std::string& grammar,
- net::URLRequestContextGetter* context_getter,
- bool filter_profanities,
- const std::string& hardware_info,
- const std::string& origin_url);
+ content::SpeechRecognitionEventListener* listener,
+ int caller_id,
+ SpeechRecognitionEngine* engine);
virtual ~SpeechRecognizerImpl();
// content::SpeechRecognizer methods.
@@ -59,14 +60,86 @@ class CONTENT_EXPORT SpeechRecognizerImpl
virtual bool IsCapturingAudio() const OVERRIDE;
const SpeechRecognitionEngine& recognition_engine() const;
+ private:
+ friend class SpeechRecognizerImplTest;
+
+ enum FSMState {
+ STATE_IDLE = 0,
+ STATE_STARTING,
+ STATE_ESTIMATING_ENVIRONMENT,
+ STATE_WAITING_FOR_SPEECH,
+ STATE_RECOGNIZING,
+ STATE_WAITING_FINAL_RESULT,
+ STATE_MAX_VALUE = STATE_WAITING_FINAL_RESULT
+ };
+
+ enum FSMEvent {
+ EVENT_ABORT = 0,
+ EVENT_START,
+ EVENT_STOP_CAPTURE,
+ EVENT_AUDIO_DATA,
+ EVENT_ENGINE_RESULT,
+ EVENT_ENGINE_ERROR,
+ EVENT_AUDIO_ERROR,
+ EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
+ };
+
+ struct FSMEventArgs {
+ explicit FSMEventArgs(FSMEvent event_value);
+ ~FSMEventArgs();
+
+ FSMEvent event;
+ int audio_error_code;
+ scoped_refptr<AudioChunk> audio_data;
+ content::SpeechRecognitionResult engine_result;
+ content::SpeechRecognitionError engine_error;
+ };
+
+ // Entry point for pushing any new external event into the recognizer FSM.
+ void DispatchEvent(const FSMEventArgs& event_args);
+
+ // Defines the behavior of the recognizer FSM, selecting the appropriate
+ // transition according to the current state and event.
+ FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
+
+ // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
+ void ProcessAudioPipeline(const AudioChunk& raw_audio);
+
+ // The methods below handle transitions of the recognizer FSM.
+ FSMState StartRecording(const FSMEventArgs& event_args);
+ FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
+ FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
+ FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
+ FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
+ FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
+ FSMState ProcessFinalResult(const FSMEventArgs& event_args);
+ FSMState Abort(const FSMEventArgs& event_args);
+ FSMState AbortWithError(const content::SpeechRecognitionError* error);
+ FSMState AbortWithError(const content::SpeechRecognitionError& error);
+ FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
+ FSMState DoNothing(const FSMEventArgs& event_args) const;
+ FSMState NotFeasible(const FSMEventArgs& event_args);
+
+ // Returns the time span of captured audio samples since the start of capture.
+ int GetElapsedTimeMs() const;
+
+ // Calculates the input volume to be displayed in the UI, triggering the
+ // OnAudioLevelsChange event accordingly.
+ void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
+
+ void CloseAudioControllerAsynchronously();
+ void SetAudioManagerForTesting(media::AudioManager* audio_manager);
+
+ // Callback called on IO thread by audio_controller->Close().
+ void OnAudioClosed(media::AudioInputController*);
+
// AudioInputController::EventHandler methods.
virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
virtual void OnError(media::AudioInputController* controller,
int error_code) OVERRIDE;
virtual void OnData(media::AudioInputController* controller,
- const uint8* data,
- uint32 size) OVERRIDE;
+ const uint8* data, uint32 size) OVERRIDE;
// SpeechRecognitionEngineDelegate methods.
virtual void OnSpeechRecognitionEngineResult(
@@ -74,40 +147,16 @@ class CONTENT_EXPORT SpeechRecognizerImpl
virtual void OnSpeechRecognitionEngineError(
const content::SpeechRecognitionError& error) OVERRIDE;
- private:
- friend class SpeechRecognizerImplTest;
-
- void InformErrorAndAbortRecognition(
- content::SpeechRecognitionErrorCode error);
- void SendRecordedAudioToServer();
-
- void HandleOnError(int error_code); // Handles OnError in the IO thread.
-
- // Handles OnData in the IO thread.
- void HandleOnData(scoped_refptr<AudioChunk> raw_audio);
-
- void OnAudioClosed(media::AudioInputController*);
-
- // Helper method which closes the audio controller and frees it asynchronously
- // without blocking the IO thread.
- void CloseAudioControllerAsynchronously();
-
- void SetAudioManagerForTesting(media::AudioManager* audio_manager);
-
content::SpeechRecognitionEventListener* listener_;
media::AudioManager* testing_audio_manager_;
scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
Endpointer endpointer_;
scoped_refptr<media::AudioInputController> audio_controller_;
- scoped_refptr<net::URLRequestContextGetter> context_getter_;
int caller_id_;
- std::string language_;
- std::string grammar_;
- bool filter_profanities_;
- std::string hardware_info_;
- std::string origin_url_;
int num_samples_recorded_;
float audio_level_;
+ bool is_dispatching_event_;
+ FSMState state_;
DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
};
diff --git a/content/browser/speech/speech_recognizer_impl_unittest.cc b/content/browser/speech/speech_recognizer_impl_unittest.cc
index 01b7e4c..5dbe6cc 100644
--- a/content/browser/speech/speech_recognizer_impl_unittest.cc
+++ b/content/browser/speech/speech_recognizer_impl_unittest.cc
@@ -17,6 +17,7 @@
#include "net/url_request/url_request_status.h"
#include "testing/gtest/include/gtest/gtest.h"
+using base::MessageLoopProxy;
using content::BrowserThread;
using content::BrowserThreadImpl;
using media::AudioInputController;
@@ -97,16 +98,28 @@ class SpeechRecognizerImplTest : public content::SpeechRecognitionEventListener,
SpeechRecognizerImplTest()
: io_thread_(BrowserThread::IO, &message_loop_),
audio_manager_(new MockAudioManager()),
- audio_ended_(false),
+ recognition_started_(false),
recognition_ended_(false),
result_received_(false),
audio_started_(false),
+ audio_ended_(false),
+ sound_started_(false),
+ sound_ended_(false),
error_(content::SPEECH_RECOGNITION_ERROR_NONE),
volume_(-1.0f) {
- recognizer_ = new SpeechRecognizerImpl(
- this, 1, std::string(), std::string(), NULL, false, std::string(),
- std::string());
+ // SpeechRecognizerImpl takes ownership of sr_engine.
+ GoogleOneShotRemoteEngine* sr_engine =
+ new GoogleOneShotRemoteEngine(NULL /* URLRequestContextGetter */);
+ GoogleOneShotRemoteEngineConfig config;
+ config.audio_num_bits_per_sample =
+ SpeechRecognizerImpl::kNumBitsPerAudioSample;
+ config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate;
+ config.filter_profanities = false;
+ sr_engine->SetConfig(config);
+
+ recognizer_ = new SpeechRecognizerImpl(this, 1, sr_engine);
recognizer_->SetAudioManagerForTesting(audio_manager_.get());
+
int audio_packet_length_bytes =
(SpeechRecognizerImpl::kAudioSampleRate *
GoogleOneShotRemoteEngine::kAudioPacketIntervalMs *
@@ -115,13 +128,33 @@ class SpeechRecognizerImplTest : public content::SpeechRecognitionEventListener,
audio_packet_.resize(audio_packet_length_bytes);
}
+ void CheckEventsConsistency() {
+ // Note: "!x || y" == "x implies y".
+ EXPECT_TRUE(!recognition_ended_ || recognition_started_);
+ EXPECT_TRUE(!audio_ended_ || audio_started_);
+ EXPECT_TRUE(!sound_ended_ || sound_started_);
+ EXPECT_TRUE(!audio_started_ || recognition_started_);
+ EXPECT_TRUE(!sound_started_ || audio_started_);
+ EXPECT_TRUE(!audio_ended_ || (sound_ended_ || !sound_started_));
+ EXPECT_TRUE(!recognition_ended_ || (audio_ended_ || !audio_started_));
+ }
+
+ void CheckFinalEventsConsistency() {
+ // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
+ EXPECT_FALSE(recognition_started_ ^ recognition_ended_);
+ EXPECT_FALSE(audio_started_ ^ audio_ended_);
+ EXPECT_FALSE(sound_started_ ^ sound_ended_);
+ }
+
// Overridden from content::SpeechRecognitionEventListener:
virtual void OnAudioStart(int caller_id) OVERRIDE {
audio_started_ = true;
+ CheckEventsConsistency();
}
virtual void OnAudioEnd(int caller_id) OVERRIDE {
audio_ended_ = true;
+ CheckEventsConsistency();
}
virtual void OnRecognitionResult(
@@ -130,8 +163,9 @@ class SpeechRecognizerImplTest : public content::SpeechRecognitionEventListener,
}
virtual void OnRecognitionError(
- int caller_id,
- const content::SpeechRecognitionError& error) OVERRIDE {
+ int caller_id, const content::SpeechRecognitionError& error) OVERRIDE {
+ EXPECT_TRUE(recognition_started_);
+ EXPECT_FALSE(recognition_ended_);
error_ = error.code;
}
@@ -143,12 +177,25 @@ class SpeechRecognizerImplTest : public content::SpeechRecognitionEventListener,
virtual void OnRecognitionEnd(int caller_id) OVERRIDE {
recognition_ended_ = true;
+ CheckEventsConsistency();
+ }
+
+ virtual void OnRecognitionStart(int caller_id) OVERRIDE {
+ recognition_started_ = true;
+ CheckEventsConsistency();
}
- virtual void OnRecognitionStart(int caller_id) OVERRIDE {}
virtual void OnEnvironmentEstimationComplete(int caller_id) OVERRIDE {}
- virtual void OnSoundStart(int caller_id) OVERRIDE {}
- virtual void OnSoundEnd(int caller_id) OVERRIDE {}
+
+ virtual void OnSoundStart(int caller_id) OVERRIDE {
+ sound_started_ = true;
+ CheckEventsConsistency();
+ }
+
+ virtual void OnSoundEnd(int caller_id) OVERRIDE {
+ sound_ended_ = true;
+ CheckEventsConsistency();
+ }
// testing::Test methods.
virtual void SetUp() OVERRIDE {
@@ -180,10 +227,13 @@ class SpeechRecognizerImplTest : public content::SpeechRecognitionEventListener,
BrowserThreadImpl io_thread_;
scoped_refptr<SpeechRecognizerImpl> recognizer_;
scoped_ptr<AudioManager> audio_manager_;
- bool audio_ended_;
+ bool recognition_started_;
bool recognition_ended_;
bool result_received_;
bool audio_started_;
+ bool audio_ended_;
+ bool sound_started_;
+ bool sound_ended_;
content::SpeechRecognitionErrorCode error_;
TestURLFetcherFactory url_fetcher_factory_;
TestAudioInputControllerFactory audio_input_controller_factory_;
@@ -196,11 +246,12 @@ TEST_F(SpeechRecognizerImplTest, StopNoData) {
// Check for callbacks when stopping record before any audio gets recorded.
recognizer_->StartRecognition();
recognizer_->AbortRecognition();
- EXPECT_FALSE(audio_ended_);
- EXPECT_FALSE(recognition_ended_);
- EXPECT_FALSE(result_received_);
+ MessageLoop::current()->RunAllPending();
+ EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_);
+ EXPECT_FALSE(result_received_);
EXPECT_EQ(content::SPEECH_RECOGNITION_ERROR_NONE, error_);
+ CheckFinalEventsConsistency();
}
TEST_F(SpeechRecognizerImplTest, CancelNoData) {
@@ -208,17 +259,19 @@ TEST_F(SpeechRecognizerImplTest, CancelNoData) {
// recorded.
recognizer_->StartRecognition();
recognizer_->StopAudioCapture();
- EXPECT_TRUE(audio_ended_);
- EXPECT_TRUE(recognition_ended_);
- EXPECT_FALSE(result_received_);
+ MessageLoop::current()->RunAllPending();
+ EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_);
+ EXPECT_FALSE(result_received_);
EXPECT_EQ(content::SPEECH_RECOGNITION_ERROR_NONE, error_);
+ CheckFinalEventsConsistency();
}
TEST_F(SpeechRecognizerImplTest, StopWithData) {
// Start recording, give some data and then stop. This should wait for the
// network callback to arrive before completion.
recognizer_->StartRecognition();
+ MessageLoop::current()->RunAllPending();
TestAudioInputController* controller =
audio_input_controller_factory_.controller();
ASSERT_TRUE(controller);
@@ -238,6 +291,7 @@ TEST_F(SpeechRecognizerImplTest, StopWithData) {
}
recognizer_->StopAudioCapture();
+ MessageLoop::current()->RunAllPending();
EXPECT_TRUE(audio_started_);
EXPECT_TRUE(audio_ended_);
EXPECT_FALSE(recognition_ended_);
@@ -256,16 +310,17 @@ TEST_F(SpeechRecognizerImplTest, StopWithData) {
fetcher->SetResponseString(
"{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
fetcher->delegate()->OnURLFetchComplete(fetcher);
-
+ MessageLoop::current()->RunAllPending();
EXPECT_TRUE(recognition_ended_);
EXPECT_TRUE(result_received_);
EXPECT_EQ(content::SPEECH_RECOGNITION_ERROR_NONE, error_);
+ CheckFinalEventsConsistency();
}
TEST_F(SpeechRecognizerImplTest, CancelWithData) {
- // Start recording, give some data and then cancel. This should create
- // a network request but give no callbacks.
+ // Start recording, give some data and then cancel.
recognizer_->StartRecognition();
+ MessageLoop::current()->RunAllPending();
TestAudioInputController* controller =
audio_input_controller_factory_.controller();
ASSERT_TRUE(controller);
@@ -273,18 +328,20 @@ TEST_F(SpeechRecognizerImplTest, CancelWithData) {
audio_packet_.size());
MessageLoop::current()->RunAllPending();
recognizer_->AbortRecognition();
+ MessageLoop::current()->RunAllPending();
ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
+ EXPECT_TRUE(recognition_started_);
EXPECT_TRUE(audio_started_);
- EXPECT_FALSE(audio_ended_);
- EXPECT_FALSE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(content::SPEECH_RECOGNITION_ERROR_NONE, error_);
+ CheckFinalEventsConsistency();
}
TEST_F(SpeechRecognizerImplTest, ConnectionError) {
// Start recording, give some data and then stop. Issue the network callback
// with a connection error and verify that the recognizer bubbles the error up
recognizer_->StartRecognition();
+ MessageLoop::current()->RunAllPending();
TestAudioInputController* controller =
audio_input_controller_factory_.controller();
ASSERT_TRUE(controller);
@@ -295,6 +352,7 @@ TEST_F(SpeechRecognizerImplTest, ConnectionError) {
ASSERT_TRUE(fetcher);
recognizer_->StopAudioCapture();
+ MessageLoop::current()->RunAllPending();
EXPECT_TRUE(audio_started_);
EXPECT_TRUE(audio_ended_);
EXPECT_FALSE(recognition_ended_);
@@ -310,16 +368,18 @@ TEST_F(SpeechRecognizerImplTest, ConnectionError) {
fetcher->set_response_code(0);
fetcher->SetResponseString("");
fetcher->delegate()->OnURLFetchComplete(fetcher);
-
- EXPECT_FALSE(recognition_ended_);
+ MessageLoop::current()->RunAllPending();
+ EXPECT_TRUE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(content::SPEECH_RECOGNITION_ERROR_NETWORK, error_);
+ CheckFinalEventsConsistency();
}
TEST_F(SpeechRecognizerImplTest, ServerError) {
// Start recording, give some data and then stop. Issue the network callback
// with a 500 error and verify that the recognizer bubbles the error up
recognizer_->StartRecognition();
+ MessageLoop::current()->RunAllPending();
TestAudioInputController* controller =
audio_input_controller_factory_.controller();
ASSERT_TRUE(controller);
@@ -330,6 +390,7 @@ TEST_F(SpeechRecognizerImplTest, ServerError) {
ASSERT_TRUE(fetcher);
recognizer_->StopAudioCapture();
+ MessageLoop::current()->RunAllPending();
EXPECT_TRUE(audio_started_);
EXPECT_TRUE(audio_ended_);
EXPECT_FALSE(recognition_ended_);
@@ -344,31 +405,34 @@ TEST_F(SpeechRecognizerImplTest, ServerError) {
fetcher->set_response_code(500);
fetcher->SetResponseString("Internal Server Error");
fetcher->delegate()->OnURLFetchComplete(fetcher);
-
- EXPECT_FALSE(recognition_ended_);
+ MessageLoop::current()->RunAllPending();
+ EXPECT_TRUE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(content::SPEECH_RECOGNITION_ERROR_NETWORK, error_);
+ CheckFinalEventsConsistency();
}
TEST_F(SpeechRecognizerImplTest, AudioControllerErrorNoData) {
// Check if things tear down properly if AudioInputController threw an error.
recognizer_->StartRecognition();
+ MessageLoop::current()->RunAllPending();
TestAudioInputController* controller =
audio_input_controller_factory_.controller();
ASSERT_TRUE(controller);
controller->event_handler()->OnError(controller, 0);
MessageLoop::current()->RunAllPending();
+ EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_);
- EXPECT_FALSE(audio_ended_);
- EXPECT_FALSE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(content::SPEECH_RECOGNITION_ERROR_AUDIO, error_);
+ CheckFinalEventsConsistency();
}
TEST_F(SpeechRecognizerImplTest, AudioControllerErrorWithData) {
// Check if things tear down properly if AudioInputController threw an error
// after giving some audio data.
recognizer_->StartRecognition();
+ MessageLoop::current()->RunAllPending();
TestAudioInputController* controller =
audio_input_controller_factory_.controller();
ASSERT_TRUE(controller);
@@ -377,36 +441,35 @@ TEST_F(SpeechRecognizerImplTest, AudioControllerErrorWithData) {
controller->event_handler()->OnError(controller, 0);
MessageLoop::current()->RunAllPending();
ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
+ EXPECT_TRUE(recognition_started_);
EXPECT_TRUE(audio_started_);
- EXPECT_FALSE(audio_ended_);
- EXPECT_FALSE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(content::SPEECH_RECOGNITION_ERROR_AUDIO, error_);
+ CheckFinalEventsConsistency();
}
TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) {
// Start recording and give a lot of packets with audio samples set to zero.
// This should trigger the no-speech detector and issue a callback.
recognizer_->StartRecognition();
+ MessageLoop::current()->RunAllPending();
TestAudioInputController* controller =
audio_input_controller_factory_.controller();
ASSERT_TRUE(controller);
- controller = audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
- GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
+ GoogleOneShotRemoteEngine::kAudioPacketIntervalMs + 1;
// The vector is already filled with zero value samples on create.
for (int i = 0; i < num_packets; ++i) {
controller->event_handler()->OnData(controller, &audio_packet_[0],
audio_packet_.size());
}
MessageLoop::current()->RunAllPending();
+ EXPECT_TRUE(recognition_started_);
EXPECT_TRUE(audio_started_);
- EXPECT_FALSE(audio_ended_);
- EXPECT_FALSE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(content::SPEECH_RECOGNITION_ERROR_NO_SPEECH, error_);
+ CheckFinalEventsConsistency();
}
TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
@@ -415,6 +478,7 @@ TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
// treated as normal speech input and the no-speech detector should not get
// triggered.
recognizer_->StartRecognition();
+ MessageLoop::current()->RunAllPending();
TestAudioInputController* controller =
audio_input_controller_factory_.controller();
ASSERT_TRUE(controller);
@@ -442,6 +506,8 @@ TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
EXPECT_FALSE(audio_ended_);
EXPECT_FALSE(recognition_ended_);
recognizer_->AbortRecognition();
+ MessageLoop::current()->RunAllPending();
+ CheckFinalEventsConsistency();
}
TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
@@ -450,6 +516,7 @@ TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
// get the callback during estimation phase, then get zero for the silence
// samples and proper volume for the loud audio.
recognizer_->StartRecognition();
+ MessageLoop::current()->RunAllPending();
TestAudioInputController* controller =
audio_input_controller_factory_.controller();
ASSERT_TRUE(controller);
@@ -484,6 +551,8 @@ TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
EXPECT_FALSE(audio_ended_);
EXPECT_FALSE(recognition_ended_);
recognizer_->AbortRecognition();
+ MessageLoop::current()->RunAllPending();
+ CheckFinalEventsConsistency();
}
} // namespace speech