diff options
author | satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-08-12 19:57:31 +0000 |
---|---|---|
committer | satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-08-12 19:57:31 +0000 |
commit | 4b9a8c385aaa926cedf5ba7c213c0fc247ef2cb4 (patch) | |
tree | 50d0565e66f421d6ce0b734e1c18eb614b7f5a06 /chrome/browser/speech | |
parent | df9a4de661636de81fccb6cfa552de94e84efa50 (diff) | |
download | chromium_src-4b9a8c385aaa926cedf5ba7c213c0fc247ef2cb4.zip chromium_src-4b9a8c385aaa926cedf5ba7c213c0fc247ef2cb4.tar.gz chromium_src-4b9a8c385aaa926cedf5ba7c213c0fc247ef2cb4.tar.bz2 |
Adds SpeechRecognizer which provides a simple interface to record and recognize speech.
Also added a unit test for checking the callbacks fire as expected.
TEST=unit_tests --gtest_filter=SpeechRecognizerTest.*
BUG=none
Review URL: http://codereview.chromium.org/3124009
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@55918 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/speech')
-rw-r--r-- | chrome/browser/speech/speech_recognizer.cc | 180 | ||||
-rw-r--r-- | chrome/browser/speech/speech_recognizer.h | 99 | ||||
-rw-r--r-- | chrome/browser/speech/speech_recognizer_unittest.cc | 167 |
3 files changed, 446 insertions, 0 deletions
diff --git a/chrome/browser/speech/speech_recognizer.cc b/chrome/browser/speech/speech_recognizer.cc new file mode 100644 index 0000000..0f8b116 --- /dev/null +++ b/chrome/browser/speech/speech_recognizer.cc @@ -0,0 +1,180 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/speech/speech_recognizer.h" + +#include "base/ref_counted.h" +#include "base/scoped_ptr.h" +#include "chrome/browser/chrome_thread.h" +#include "chrome/browser/profile.h" +#include "chrome/common/net/url_request_context_getter.h" + +using media::AudioInputController; +using std::list; +using std::string; + +namespace { +const char* kDefaultSpeechRecognitionUrl = + "http://www.google.com/speech-api/v1/recognize?lang=en-us&client=chromium"; +const int kAudioPacketIntervalMs = 100; // Record 100ms long audio packets. +const int kNumAudioChannels = 1; // Speech is recorded as mono. +const int kNumBitsPerAudioSample = 16; +} // namespace + +namespace speech_input { + +SpeechRecognizer::SpeechRecognizer(Delegate* delegate, int render_view_id) + : delegate_(delegate), + render_view_id_(render_view_id) { +} + +SpeechRecognizer::~SpeechRecognizer() { + // Recording should have stopped earlier due to the endpointer or + // |StopRecording| being called. + DCHECK(!audio_controller_.get()); + DCHECK(!request_.get() || !request_->HasPendingRequest()); + DCHECK(audio_buffers_.empty()); +} + +bool SpeechRecognizer::StartRecording() { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + DCHECK(!audio_controller_.get()); + DCHECK(!request_.get() || !request_->HasPendingRequest()); + + audio_controller_ = AudioInputController::Create(this, + AudioManager::AUDIO_PCM_LINEAR, kNumAudioChannels, + AudioManager::kTelephoneSampleRate, kNumBitsPerAudioSample, + (AudioManager::kTelephoneSampleRate * kAudioPacketIntervalMs) / 1000); + DCHECK(audio_controller_.get()); + LOG(INFO) << "SpeechRecognizer starting record."; + audio_controller_->Record(); + + return true; +} + +void SpeechRecognizer::CancelRecognition() { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + DCHECK(audio_controller_.get() || request_.get()); + + // Stop recording if required. + if (audio_controller_.get()) { + LOG(INFO) << "SpeechRecognizer stopping record."; + audio_controller_->Close(); + audio_controller_ = NULL; // Releases the ref ptr. + } + + LOG(INFO) << "SpeechRecognizer canceling recognition."; + ReleaseAudioBuffers(); + request_.reset(); +} + +void SpeechRecognizer::StopRecording() { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + + // If audio recording has already stopped and we are in recognition phase, + // silently ignore any more calls to stop recording. + if (!audio_controller_.get()) + return; + + LOG(INFO) << "SpeechRecognizer stopping record."; + audio_controller_->Close(); + audio_controller_ = NULL; // Releases the ref ptr. + delegate_->DidCompleteRecording(render_view_id_); + + // If we haven't got any audio yet end the recognition sequence here. + if (audio_buffers_.empty()) { + // Guard against the delegate freeing us until we finish our job. + scoped_refptr<SpeechRecognizer> me(this); + delegate_->DidCompleteRecognition(render_view_id_); + return; + } + + // We now have recorded audio in our buffers, so start a recognition request. + // Since the http request takes a single string as POST data, allocate + // one and copy over bytes from the audio buffers to the string. + int audio_buffer_length = 0; + for (AudioBufferQueue::iterator it = audio_buffers_.begin(); + it != audio_buffers_.end(); it++) { + audio_buffer_length += (*it)->length(); + } + string data; + data.reserve(audio_buffer_length); + for (AudioBufferQueue::iterator it = audio_buffers_.begin(); + it != audio_buffers_.end(); it++) { + data.append(*(*it)); + } + DCHECK(!request_.get()); + request_.reset(new SpeechRecognitionRequest( + Profile::GetDefaultRequestContext(), + GURL(kDefaultSpeechRecognitionUrl), + this)); + request_->Send(data); + ReleaseAudioBuffers(); // No need to keep the audio anymore. +} + +void SpeechRecognizer::ReleaseAudioBuffers() { + for (AudioBufferQueue::iterator it = audio_buffers_.begin(); + it != audio_buffers_.end(); it++) + delete *it; + audio_buffers_.clear(); +} + +// Invoked in the audio thread. +void SpeechRecognizer::OnError(AudioInputController* controller, + int error_code) { + ChromeThread::PostTask(ChromeThread::IO, FROM_HERE, + NewRunnableMethod(this, + &SpeechRecognizer::HandleOnError, + error_code)); +} + +void SpeechRecognizer::HandleOnError(int error_code) { + LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; + + // Check if we are still recording before canceling recognition, as + // recording might have been stopped after this error was posted to the queue + // by |OnError|. + if (!audio_controller_.get()) + return; + + CancelRecognition(); + delegate_->DidCompleteRecording(render_view_id_); + delegate_->DidCompleteRecognition(render_view_id_); +} + +void SpeechRecognizer::OnData(AudioInputController* controller, + const uint8* data, uint32 size) { + if (size == 0) // This could happen when recording stops and is normal. + return; + + string* str_data = new string(reinterpret_cast<const char*>(data), size); + ChromeThread::PostTask(ChromeThread::IO, FROM_HERE, + NewRunnableMethod(this, + &SpeechRecognizer::HandleOnData, + str_data)); +} + +void SpeechRecognizer::HandleOnData(string* data) { + // Check if we are still recording and if not discard this buffer, as + // recording might have been stopped after this buffer was posted to the queue + // by |OnData|. + if (!audio_controller_.get()) { + delete data; + return; + } + + // TODO(satish): Once we have streaming POST, start sending the data received + // here as POST chunks. + audio_buffers_.push_back(data); +} + +void SpeechRecognizer::SetRecognitionResult(bool error, const string16& value) { + delegate_->SetRecognitionResult(render_view_id_, error, value); + + // Guard against the delegate freeing us until we finish our job. + scoped_refptr<SpeechRecognizer> me(this); + delegate_->DidCompleteRecognition(render_view_id_); +} + +} // namespace speech_input diff --git a/chrome/browser/speech/speech_recognizer.h b/chrome/browser/speech/speech_recognizer.h new file mode 100644 index 0000000..fd8c7c4 --- /dev/null +++ b/chrome/browser/speech/speech_recognizer.h @@ -0,0 +1,99 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ +#define CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ + +#include "base/ref_counted.h" +#include "base/scoped_ptr.h" +#include "media/audio/audio_input_controller.h" +#include "chrome/browser/speech/speech_recognition_request.h" +#include <list> +#include <string> + +namespace speech_input { + +// Records audio, sends recorded audio to server and translates server response +// to recognition result. +class SpeechRecognizer + : public base::RefCountedThreadSafe<SpeechRecognizer>, + public media::AudioInputController::EventHandler, + public SpeechRecognitionRequestDelegate { + public: + // Implemented by the caller to receive recognition events. + class Delegate { + public: + virtual void SetRecognitionResult(int render_view_id, bool error, + const string16& value) = 0; + + // Invoked when audio recording stops, either due to the end pointer + // detecting silence in user input or if |StopRecording| was called. The + // delegate has to wait until |DidCompleteRecognition| is invoked before + // destroying the |SpeechRecognizer| object. + virtual void DidCompleteRecording(int render_view_id) = 0; + + // This is guaranteed to be the last method invoked in the recognition + // sequence and the |SpeechRecognizer| object can be freed up if necessary. + virtual void DidCompleteRecognition(int render_view_id) = 0; + + protected: + virtual ~Delegate() {} + }; + + SpeechRecognizer(Delegate* delegate, int render_view_id); + ~SpeechRecognizer(); + + // Starts audio recording and does recognition after recording ends. The same + // SpeechRecognizer instance can be used multiple times for speech recognition + // though each recognition request can be made only after the previous one + // completes (i.e. after receiving Delegate::DidCompleteRecognition). + bool StartRecording(); + + // Stops recording audio and starts recognition. + void StopRecording(); + + // Stops recording audio and cancels recognition. Any audio recorded so far + // gets discarded. + void CancelRecognition(); + + // AudioInputController::EventHandler methods. + void OnCreated(media::AudioInputController* controller) { } + void OnRecording(media::AudioInputController* controller) { } + void OnError(media::AudioInputController* controller, int error_code); + void OnData(media::AudioInputController* controller, const uint8* data, + uint32 size); + + // SpeechRecognitionRequest::Delegate methods. + void SetRecognitionResult(bool error, const string16& value); + + private: + void ReleaseAudioBuffers(); + + void HandleOnError(int error_code); // Handles OnError in the IO thread. + + // Handles OnData in the IO thread. Takes ownership of |data|. + void HandleOnData(std::string* data); + + Delegate* delegate_; + int render_view_id_; + + // Buffer holding the recorded audio. Owns the strings inside the list. + typedef std::list<std::string*> AudioBufferQueue; + AudioBufferQueue audio_buffers_; + + scoped_ptr<SpeechRecognitionRequest> request_; + scoped_refptr<media::AudioInputController> audio_controller_; + + DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer); +}; + +// This typedef is to workaround the issue with certain versions of +// Visual Studio where it gets confused between multiple Delegate +// classes and gives a C2500 error. (I saw this error on the try bots - +// the workaround was not needed for my machine). +typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate; + +} // namespace speech_input + +#endif // CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ diff --git a/chrome/browser/speech/speech_recognizer_unittest.cc b/chrome/browser/speech/speech_recognizer_unittest.cc new file mode 100644 index 0000000..1068364 --- /dev/null +++ b/chrome/browser/speech/speech_recognizer_unittest.cc @@ -0,0 +1,167 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/chrome_thread.h" +#include "chrome/browser/speech/speech_recognizer.h" +#include "chrome/common/net/test_url_fetcher_factory.h" +#include "media/audio/test_audio_input_controller_factory.h" +#include "net/url_request/url_request_status.h" +#include "testing/gtest/include/gtest/gtest.h" + +using media::AudioInputController; +using media::TestAudioInputController; +using media::TestAudioInputControllerFactory; + +namespace speech_input { + +class SpeechRecognizerTest : public SpeechRecognizerDelegate, + public testing::Test { + public: + SpeechRecognizerTest() + : io_thread_(ChromeThread::IO, &message_loop_), + ALLOW_THIS_IN_INITIALIZER_LIST( + recognizer_(new SpeechRecognizer(this, 1))) { + } + + void StartTest() { + EXPECT_TRUE(recognizer_->StartRecording()); + } + + // SpeechRecognizer::Delegate methods. + virtual void SetRecognitionResult(int render_view_id, bool error, + const string16& result) { + result_received_ = true; + } + + virtual void DidCompleteRecording(int render_view_id) { + recording_complete_ = true; + } + + virtual void DidCompleteRecognition(int render_view_id) { + recognition_complete_ = true; + } + + // testing::Test methods. + virtual void SetUp() { + result_received_ = false; + recording_complete_ = false; + recognition_complete_ = false; + URLFetcher::set_factory(&url_fetcher_factory_); + AudioInputController::set_factory(&audio_input_controller_factory_); + } + + virtual void TearDown() { + URLFetcher::set_factory(NULL); + AudioInputController::set_factory(NULL); + } + + protected: + MessageLoopForIO message_loop_; + ChromeThread io_thread_; + scoped_refptr<SpeechRecognizer> recognizer_; + bool recording_complete_; + bool recognition_complete_; + bool result_received_; + TestURLFetcherFactory url_fetcher_factory_; + TestAudioInputControllerFactory audio_input_controller_factory_; +}; + +TEST_F(SpeechRecognizerTest, StopNoData) { + // Check for callbacks when stopping record before any audio gets recorded. + EXPECT_TRUE(recognizer_->StartRecording()); + recognizer_->CancelRecognition(); + EXPECT_FALSE(recording_complete_); + EXPECT_FALSE(recognition_complete_); + EXPECT_FALSE(result_received_); +} + +TEST_F(SpeechRecognizerTest, CancelNoData) { + // Check for callbacks when canceling recognition before any audio gets + // recorded. + EXPECT_TRUE(recognizer_->StartRecording()); + recognizer_->StopRecording(); + EXPECT_TRUE(recording_complete_); + EXPECT_TRUE(recognition_complete_); + EXPECT_FALSE(result_received_); +} + +TEST_F(SpeechRecognizerTest, StopWithData) { + uint8 data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; + + // Start recording, give some data and then stop. This should wait for the + // network callback to arrive before completion. + EXPECT_TRUE(recognizer_->StartRecording()); + TestAudioInputController* controller = + audio_input_controller_factory_.controller(); + ASSERT_TRUE(controller); + controller = audio_input_controller_factory_.controller(); + ASSERT_TRUE(controller); + controller->event_handler()->OnData(controller, data, sizeof(data)); + MessageLoop::current()->RunAllPending(); + recognizer_->StopRecording(); + EXPECT_TRUE(recording_complete_); + EXPECT_FALSE(recognition_complete_); + EXPECT_FALSE(result_received_); + + // Issue the network callback to complete the process. + TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0); + ASSERT_TRUE(fetcher); + URLRequestStatus status; + status.set_status(URLRequestStatus::SUCCESS); + fetcher->delegate()->OnURLFetchComplete(fetcher, fetcher->original_url(), + status, 200, ResponseCookies(), ""); + EXPECT_TRUE(recognition_complete_); + EXPECT_TRUE(result_received_); +} + +TEST_F(SpeechRecognizerTest, CancelWithData) { + uint8 data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; + + // Start recording, give some data and then cancel. This should not create + // a network request and finish immediately. + EXPECT_TRUE(recognizer_->StartRecording()); + TestAudioInputController* controller = + audio_input_controller_factory_.controller(); + ASSERT_TRUE(controller); + controller->event_handler()->OnData(controller, data, sizeof(data)); + MessageLoop::current()->RunAllPending(); + recognizer_->CancelRecognition(); + EXPECT_EQ(NULL, url_fetcher_factory_.GetFetcherByID(0)); + EXPECT_FALSE(recording_complete_); + EXPECT_FALSE(recognition_complete_); + EXPECT_FALSE(result_received_); +} + +TEST_F(SpeechRecognizerTest, AudioControllerErrorNoData) { + // Check if things tear down properly if AudioInputController threw an error. + EXPECT_TRUE(recognizer_->StartRecording()); + TestAudioInputController* controller = + audio_input_controller_factory_.controller(); + ASSERT_TRUE(controller); + controller->event_handler()->OnError(controller, 0); + MessageLoop::current()->RunAllPending(); + EXPECT_TRUE(recording_complete_); + EXPECT_TRUE(recognition_complete_); + EXPECT_FALSE(result_received_); +} + +TEST_F(SpeechRecognizerTest, AudioControllerErrorWithData) { + uint8 data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; + + // Check if things tear down properly if AudioInputController threw an error + // after giving some audio data. + EXPECT_TRUE(recognizer_->StartRecording()); + TestAudioInputController* controller = + audio_input_controller_factory_.controller(); + ASSERT_TRUE(controller); + controller->event_handler()->OnData(controller, data, sizeof(data)); + controller->event_handler()->OnError(controller, 0); + MessageLoop::current()->RunAllPending(); + EXPECT_EQ(NULL, url_fetcher_factory_.GetFetcherByID(0)); + EXPECT_TRUE(recording_complete_); + EXPECT_TRUE(recognition_complete_); + EXPECT_FALSE(result_received_); +} + +} // namespace speech_input |