summaryrefslogtreecommitdiffstats
path: root/content/browser/speech/speech_recognizer_impl_unittest.cc
diff options
context:
space:
mode:
authorjanx@chromium.org <janx@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-05-20 13:35:43 +0000
committerjanx@chromium.org <janx@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-05-20 13:35:43 +0000
commitce1adc3482a91262df06fbe9a824d29817a8771d (patch)
tree28dcc4dc66366821d7597fcbe6490d091f5ddfc9 /content/browser/speech/speech_recognizer_impl_unittest.cc
parenta3f9516f938d3aa22914cdddfb7fe71f097981a4 (diff)
downloadchromium_src-ce1adc3482a91262df06fbe9a824d29817a8771d.zip
chromium_src-ce1adc3482a91262df06fbe9a824d29817a8771d.tar.gz
chromium_src-ce1adc3482a91262df06fbe9a824d29817a8771d.tar.bz2
Extract interface from content::SpeechRecognizer
SpeechRecognizer's current design assumes that the audio capture and the endpointer are always performed inside the browser. This is not going to be true for some platforms, for instance Android, where we plan to delegate not only the recognition activity, but also the audio capture and the endpointer, to the OS. TBR=avi@chromium.org (gypi) BUG=222352 Review URL: https://chromiumcodereview.appspot.com/15230003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@201082 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'content/browser/speech/speech_recognizer_impl_unittest.cc')
-rw-r--r--content/browser/speech/speech_recognizer_impl_unittest.cc498
1 files changed, 498 insertions, 0 deletions
diff --git a/content/browser/speech/speech_recognizer_impl_unittest.cc b/content/browser/speech/speech_recognizer_impl_unittest.cc
new file mode 100644
index 0000000..8c7c2d7
--- /dev/null
+++ b/content/browser/speech/speech_recognizer_impl_unittest.cc
@@ -0,0 +1,498 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <vector>
+
+#include "content/browser/browser_thread_impl.h"
+#include "content/browser/speech/google_one_shot_remote_engine.h"
+#include "content/browser/speech/speech_recognizer_impl.h"
+#include "content/public/browser/speech_recognition_event_listener.h"
+#include "media/audio/fake_audio_input_stream.h"
+#include "media/audio/fake_audio_output_stream.h"
+#include "media/audio/mock_audio_manager.h"
+#include "media/audio/test_audio_input_controller_factory.h"
+#include "net/base/net_errors.h"
+#include "net/url_request/test_url_fetcher_factory.h"
+#include "net/url_request/url_request_status.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using base::MessageLoopProxy;
+using media::AudioInputController;
+using media::AudioInputStream;
+using media::AudioManager;
+using media::AudioOutputStream;
+using media::AudioParameters;
+using media::TestAudioInputController;
+using media::TestAudioInputControllerFactory;
+
+namespace content {
+
+class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
+ public testing::Test {
+ public:
+ SpeechRecognizerImplTest()
+ : io_thread_(BrowserThread::IO, &message_loop_),
+ recognition_started_(false),
+ recognition_ended_(false),
+ result_received_(false),
+ audio_started_(false),
+ audio_ended_(false),
+ sound_started_(false),
+ sound_ended_(false),
+ error_(SPEECH_RECOGNITION_ERROR_NONE),
+ volume_(-1.0f) {
+ // SpeechRecognizer takes ownership of sr_engine.
+ SpeechRecognitionEngine* sr_engine =
+ new GoogleOneShotRemoteEngine(NULL /* URLRequestContextGetter */);
+ SpeechRecognitionEngineConfig config;
+ config.audio_num_bits_per_sample =
+ SpeechRecognizerImpl::kNumBitsPerAudioSample;
+ config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate;
+ config.filter_profanities = false;
+ sr_engine->SetConfig(config);
+
+ const int kTestingSessionId = 1;
+ const bool kOneShotMode = true;
+ recognizer_ = new SpeechRecognizerImpl(
+ this, kTestingSessionId, kOneShotMode, sr_engine);
+ audio_manager_.reset(new media::MockAudioManager(
+ base::MessageLoop::current()->message_loop_proxy()));
+ recognizer_->SetAudioManagerForTests(audio_manager_.get());
+
+ int audio_packet_length_bytes =
+ (SpeechRecognizerImpl::kAudioSampleRate *
+ GoogleOneShotRemoteEngine::kAudioPacketIntervalMs *
+ ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout) *
+ SpeechRecognizerImpl::kNumBitsPerAudioSample) / (8 * 1000);
+ audio_packet_.resize(audio_packet_length_bytes);
+ }
+
+ void CheckEventsConsistency() {
+ // Note: "!x || y" == "x implies y".
+ EXPECT_TRUE(!recognition_ended_ || recognition_started_);
+ EXPECT_TRUE(!audio_ended_ || audio_started_);
+ EXPECT_TRUE(!sound_ended_ || sound_started_);
+ EXPECT_TRUE(!audio_started_ || recognition_started_);
+ EXPECT_TRUE(!sound_started_ || audio_started_);
+ EXPECT_TRUE(!audio_ended_ || (sound_ended_ || !sound_started_));
+ EXPECT_TRUE(!recognition_ended_ || (audio_ended_ || !audio_started_));
+ }
+
+ void CheckFinalEventsConsistency() {
+ // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
+ EXPECT_FALSE(recognition_started_ ^ recognition_ended_);
+ EXPECT_FALSE(audio_started_ ^ audio_ended_);
+ EXPECT_FALSE(sound_started_ ^ sound_ended_);
+ }
+
+ // Overridden from SpeechRecognitionEventListener:
+ virtual void OnAudioStart(int session_id) OVERRIDE {
+ audio_started_ = true;
+ CheckEventsConsistency();
+ }
+
+ virtual void OnAudioEnd(int session_id) OVERRIDE {
+ audio_ended_ = true;
+ CheckEventsConsistency();
+ }
+
+ virtual void OnRecognitionResults(
+ int session_id, const SpeechRecognitionResults& results) OVERRIDE {
+ result_received_ = true;
+ }
+
+ virtual void OnRecognitionError(
+ int session_id, const SpeechRecognitionError& error) OVERRIDE {
+ EXPECT_TRUE(recognition_started_);
+ EXPECT_FALSE(recognition_ended_);
+ error_ = error.code;
+ }
+
+ virtual void OnAudioLevelsChange(int session_id, float volume,
+ float noise_volume) OVERRIDE {
+ volume_ = volume;
+ noise_volume_ = noise_volume;
+ }
+
+ virtual void OnRecognitionEnd(int session_id) OVERRIDE {
+ recognition_ended_ = true;
+ CheckEventsConsistency();
+ }
+
+ virtual void OnRecognitionStart(int session_id) OVERRIDE {
+ recognition_started_ = true;
+ CheckEventsConsistency();
+ }
+
+ virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE {}
+
+ virtual void OnSoundStart(int session_id) OVERRIDE {
+ sound_started_ = true;
+ CheckEventsConsistency();
+ }
+
+ virtual void OnSoundEnd(int session_id) OVERRIDE {
+ sound_ended_ = true;
+ CheckEventsConsistency();
+ }
+
+ // testing::Test methods.
+ virtual void SetUp() OVERRIDE {
+ AudioInputController::set_factory_for_testing(
+ &audio_input_controller_factory_);
+ }
+
+ virtual void TearDown() OVERRIDE {
+ AudioInputController::set_factory_for_testing(NULL);
+ }
+
+ void FillPacketWithTestWaveform() {
+ // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
+ for (size_t i = 0; i < audio_packet_.size(); ++i)
+ audio_packet_[i] = static_cast<uint8>(i);
+ }
+
+ void FillPacketWithNoise() {
+ int value = 0;
+ int factor = 175;
+ for (size_t i = 0; i < audio_packet_.size(); ++i) {
+ value += factor;
+ audio_packet_[i] = value % 100;
+ }
+ }
+
+ protected:
+ base::MessageLoopForIO message_loop_;
+ BrowserThreadImpl io_thread_;
+ scoped_refptr<SpeechRecognizerImpl> recognizer_;
+ scoped_ptr<AudioManager> audio_manager_;
+ bool recognition_started_;
+ bool recognition_ended_;
+ bool result_received_;
+ bool audio_started_;
+ bool audio_ended_;
+ bool sound_started_;
+ bool sound_ended_;
+ SpeechRecognitionErrorCode error_;
+ net::TestURLFetcherFactory url_fetcher_factory_;
+ TestAudioInputControllerFactory audio_input_controller_factory_;
+ std::vector<uint8> audio_packet_;
+ float volume_;
+ float noise_volume_;
+};
+
+TEST_F(SpeechRecognizerImplTest, StopNoData) {
+ // Check for callbacks when stopping record before any audio gets recorded.
+ recognizer_->StartRecognition();
+ recognizer_->StopAudioCapture();
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_TRUE(recognition_started_);
+ EXPECT_FALSE(audio_started_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
+ CheckFinalEventsConsistency();
+}
+
+TEST_F(SpeechRecognizerImplTest, CancelNoData) {
+ // Check for callbacks when canceling recognition before any audio gets
+ // recorded.
+ recognizer_->StartRecognition();
+ recognizer_->AbortRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_TRUE(recognition_started_);
+ EXPECT_FALSE(audio_started_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
+ CheckFinalEventsConsistency();
+}
+
+TEST_F(SpeechRecognizerImplTest, StopWithData) {
+ // Start recording, give some data and then stop. This should wait for the
+ // network callback to arrive before completion.
+ recognizer_->StartRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+
+ // Try sending 5 chunks of mock audio data and verify that each of them
+ // resulted immediately in a packet sent out via the network. This verifies
+ // that we are streaming out encoded data as chunks without waiting for the
+ // full recording to complete.
+ const size_t kNumChunks = 5;
+ for (size_t i = 0; i < kNumChunks; ++i) {
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ base::MessageLoop::current()->RunUntilIdle();
+ net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
+ ASSERT_TRUE(fetcher);
+ EXPECT_EQ(i + 1, fetcher->upload_chunks().size());
+ }
+
+ recognizer_->StopAudioCapture();
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_TRUE(audio_started_);
+ EXPECT_TRUE(audio_ended_);
+ EXPECT_FALSE(recognition_ended_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
+
+ // Issue the network callback to complete the process.
+ net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
+ ASSERT_TRUE(fetcher);
+
+ fetcher->set_url(fetcher->GetOriginalURL());
+ net::URLRequestStatus status;
+ status.set_status(net::URLRequestStatus::SUCCESS);
+ fetcher->set_status(status);
+ fetcher->set_response_code(200);
+ fetcher->SetResponseString(
+ "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
+ fetcher->delegate()->OnURLFetchComplete(fetcher);
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_TRUE(recognition_ended_);
+ EXPECT_TRUE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
+ CheckFinalEventsConsistency();
+}
+
+TEST_F(SpeechRecognizerImplTest, CancelWithData) {
+ // Start recording, give some data and then cancel.
+ recognizer_->StartRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ base::MessageLoop::current()->RunUntilIdle();
+ recognizer_->AbortRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
+ EXPECT_TRUE(recognition_started_);
+ EXPECT_TRUE(audio_started_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
+ CheckFinalEventsConsistency();
+}
+
+TEST_F(SpeechRecognizerImplTest, ConnectionError) {
+ // Start recording, give some data and then stop. Issue the network callback
+ // with a connection error and verify that the recognizer bubbles the error up
+ recognizer_->StartRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ base::MessageLoop::current()->RunUntilIdle();
+ net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
+ ASSERT_TRUE(fetcher);
+
+ recognizer_->StopAudioCapture();
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_TRUE(audio_started_);
+ EXPECT_TRUE(audio_ended_);
+ EXPECT_FALSE(recognition_ended_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
+
+ // Issue the network callback to complete the process.
+ fetcher->set_url(fetcher->GetOriginalURL());
+ net::URLRequestStatus status;
+ status.set_status(net::URLRequestStatus::FAILED);
+ status.set_error(net::ERR_CONNECTION_REFUSED);
+ fetcher->set_status(status);
+ fetcher->set_response_code(0);
+ fetcher->SetResponseString(std::string());
+ fetcher->delegate()->OnURLFetchComplete(fetcher);
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_TRUE(recognition_ended_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
+ CheckFinalEventsConsistency();
+}
+
+TEST_F(SpeechRecognizerImplTest, ServerError) {
+ // Start recording, give some data and then stop. Issue the network callback
+ // with a 500 error and verify that the recognizer bubbles the error up
+ recognizer_->StartRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ base::MessageLoop::current()->RunUntilIdle();
+ net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
+ ASSERT_TRUE(fetcher);
+
+ recognizer_->StopAudioCapture();
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_TRUE(audio_started_);
+ EXPECT_TRUE(audio_ended_);
+ EXPECT_FALSE(recognition_ended_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
+
+ // Issue the network callback to complete the process.
+ fetcher->set_url(fetcher->GetOriginalURL());
+ net::URLRequestStatus status;
+ status.set_status(net::URLRequestStatus::SUCCESS);
+ fetcher->set_status(status);
+ fetcher->set_response_code(500);
+ fetcher->SetResponseString("Internal Server Error");
+ fetcher->delegate()->OnURLFetchComplete(fetcher);
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_TRUE(recognition_ended_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
+ CheckFinalEventsConsistency();
+}
+
+TEST_F(SpeechRecognizerImplTest, AudioControllerErrorNoData) {
+ // Check if things tear down properly if AudioInputController threw an error.
+ recognizer_->StartRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller->event_handler()->OnError(controller);
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_TRUE(recognition_started_);
+ EXPECT_FALSE(audio_started_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO, error_);
+ CheckFinalEventsConsistency();
+}
+
+TEST_F(SpeechRecognizerImplTest, AudioControllerErrorWithData) {
+ // Check if things tear down properly if AudioInputController threw an error
+ // after giving some audio data.
+ recognizer_->StartRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ controller->event_handler()->OnError(controller);
+ base::MessageLoop::current()->RunUntilIdle();
+ ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
+ EXPECT_TRUE(recognition_started_);
+ EXPECT_TRUE(audio_started_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO, error_);
+ CheckFinalEventsConsistency();
+}
+
+TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) {
+ // Start recording and give a lot of packets with audio samples set to zero.
+ // This should trigger the no-speech detector and issue a callback.
+ recognizer_->StartRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+
+ int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
+ GoogleOneShotRemoteEngine::kAudioPacketIntervalMs + 1;
+ // The vector is already filled with zero value samples on create.
+ for (int i = 0; i < num_packets; ++i) {
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ }
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_TRUE(recognition_started_);
+ EXPECT_TRUE(audio_started_);
+ EXPECT_FALSE(result_received_);
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH, error_);
+ CheckFinalEventsConsistency();
+}
+
+TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
+ // Start recording and give a lot of packets with audio samples set to zero
+ // and then some more with reasonably loud audio samples. This should be
+ // treated as normal speech input and the no-speech detector should not get
+ // triggered.
+ recognizer_->StartRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller = audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+
+ int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
+ GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
+
+ // The vector is already filled with zero value samples on create.
+ for (int i = 0; i < num_packets / 2; ++i) {
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ }
+
+ FillPacketWithTestWaveform();
+ for (int i = 0; i < num_packets / 2; ++i) {
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ }
+
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
+ EXPECT_TRUE(audio_started_);
+ EXPECT_FALSE(audio_ended_);
+ EXPECT_FALSE(recognition_ended_);
+ recognizer_->AbortRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ CheckFinalEventsConsistency();
+}
+
+TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
+ // Start recording and give a lot of packets with audio samples set to zero
+ // and then some more with reasonably loud audio samples. Check that we don't
+ // get the callback during estimation phase, then get zero for the silence
+ // samples and proper volume for the loud audio.
+ recognizer_->StartRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller = audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+
+ // Feed some samples to begin with for the endpointer to do noise estimation.
+ int num_packets = SpeechRecognizerImpl::kEndpointerEstimationTimeMs /
+ GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
+ FillPacketWithNoise();
+ for (int i = 0; i < num_packets; ++i) {
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ }
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_EQ(-1.0f, volume_); // No audio volume set yet.
+
+ // The vector is already filled with zero value samples on create.
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_FLOAT_EQ(0.74939233f, volume_);
+
+ FillPacketWithTestWaveform();
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_FLOAT_EQ(0.89926866f, volume_);
+ EXPECT_FLOAT_EQ(0.75071919f, noise_volume_);
+
+ EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
+ EXPECT_FALSE(audio_ended_);
+ EXPECT_FALSE(recognition_ended_);
+ recognizer_->AbortRecognition();
+ base::MessageLoop::current()->RunUntilIdle();
+ CheckFinalEventsConsistency();
+}
+
+} // namespace content