summaryrefslogtreecommitdiffstats
path: root/chrome/browser/speech
diff options
context:
space:
mode:
authorsatish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-08-12 19:57:31 +0000
committersatish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-08-12 19:57:31 +0000
commit4b9a8c385aaa926cedf5ba7c213c0fc247ef2cb4 (patch)
tree50d0565e66f421d6ce0b734e1c18eb614b7f5a06 /chrome/browser/speech
parentdf9a4de661636de81fccb6cfa552de94e84efa50 (diff)
downloadchromium_src-4b9a8c385aaa926cedf5ba7c213c0fc247ef2cb4.zip
chromium_src-4b9a8c385aaa926cedf5ba7c213c0fc247ef2cb4.tar.gz
chromium_src-4b9a8c385aaa926cedf5ba7c213c0fc247ef2cb4.tar.bz2
Adds SpeechRecognizer which provides a simple interface to record and recognize speech.
Also added a unit test for checking the callbacks fire as expected. TEST=unit_tests --gtest_filter=SpeechRecognizerTest.* BUG=none Review URL: http://codereview.chromium.org/3124009 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@55918 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/speech')
-rw-r--r--chrome/browser/speech/speech_recognizer.cc180
-rw-r--r--chrome/browser/speech/speech_recognizer.h99
-rw-r--r--chrome/browser/speech/speech_recognizer_unittest.cc167
3 files changed, 446 insertions, 0 deletions
diff --git a/chrome/browser/speech/speech_recognizer.cc b/chrome/browser/speech/speech_recognizer.cc
new file mode 100644
index 0000000..0f8b116
--- /dev/null
+++ b/chrome/browser/speech/speech_recognizer.cc
@@ -0,0 +1,180 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/speech/speech_recognizer.h"
+
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "chrome/browser/chrome_thread.h"
+#include "chrome/browser/profile.h"
+#include "chrome/common/net/url_request_context_getter.h"
+
+using media::AudioInputController;
+using std::list;
+using std::string;
+
+namespace {
+const char* kDefaultSpeechRecognitionUrl =
+ "http://www.google.com/speech-api/v1/recognize?lang=en-us&client=chromium";
+const int kAudioPacketIntervalMs = 100; // Record 100ms long audio packets.
+const int kNumAudioChannels = 1; // Speech is recorded as mono.
+const int kNumBitsPerAudioSample = 16;
+} // namespace
+
+namespace speech_input {
+
+SpeechRecognizer::SpeechRecognizer(Delegate* delegate, int render_view_id)
+ : delegate_(delegate),
+ render_view_id_(render_view_id) {
+}
+
+SpeechRecognizer::~SpeechRecognizer() {
+ // Recording should have stopped earlier due to the endpointer or
+ // |StopRecording| being called.
+ DCHECK(!audio_controller_.get());
+ DCHECK(!request_.get() || !request_->HasPendingRequest());
+ DCHECK(audio_buffers_.empty());
+}
+
+bool SpeechRecognizer::StartRecording() {
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
+ DCHECK(!audio_controller_.get());
+ DCHECK(!request_.get() || !request_->HasPendingRequest());
+
+ audio_controller_ = AudioInputController::Create(this,
+ AudioManager::AUDIO_PCM_LINEAR, kNumAudioChannels,
+ AudioManager::kTelephoneSampleRate, kNumBitsPerAudioSample,
+ (AudioManager::kTelephoneSampleRate * kAudioPacketIntervalMs) / 1000);
+ DCHECK(audio_controller_.get());
+ LOG(INFO) << "SpeechRecognizer starting record.";
+ audio_controller_->Record();
+
+ return true;
+}
+
+void SpeechRecognizer::CancelRecognition() {
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
+ DCHECK(audio_controller_.get() || request_.get());
+
+ // Stop recording if required.
+ if (audio_controller_.get()) {
+ LOG(INFO) << "SpeechRecognizer stopping record.";
+ audio_controller_->Close();
+ audio_controller_ = NULL; // Releases the ref ptr.
+ }
+
+ LOG(INFO) << "SpeechRecognizer canceling recognition.";
+ ReleaseAudioBuffers();
+ request_.reset();
+}
+
+void SpeechRecognizer::StopRecording() {
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
+
+ // If audio recording has already stopped and we are in recognition phase,
+ // silently ignore any more calls to stop recording.
+ if (!audio_controller_.get())
+ return;
+
+ LOG(INFO) << "SpeechRecognizer stopping record.";
+ audio_controller_->Close();
+ audio_controller_ = NULL; // Releases the ref ptr.
+ delegate_->DidCompleteRecording(render_view_id_);
+
+ // If we haven't got any audio yet end the recognition sequence here.
+ if (audio_buffers_.empty()) {
+ // Guard against the delegate freeing us until we finish our job.
+ scoped_refptr<SpeechRecognizer> me(this);
+ delegate_->DidCompleteRecognition(render_view_id_);
+ return;
+ }
+
+ // We now have recorded audio in our buffers, so start a recognition request.
+ // Since the http request takes a single string as POST data, allocate
+ // one and copy over bytes from the audio buffers to the string.
+ int audio_buffer_length = 0;
+ for (AudioBufferQueue::iterator it = audio_buffers_.begin();
+ it != audio_buffers_.end(); it++) {
+ audio_buffer_length += (*it)->length();
+ }
+ string data;
+ data.reserve(audio_buffer_length);
+ for (AudioBufferQueue::iterator it = audio_buffers_.begin();
+ it != audio_buffers_.end(); it++) {
+ data.append(*(*it));
+ }
+ DCHECK(!request_.get());
+ request_.reset(new SpeechRecognitionRequest(
+ Profile::GetDefaultRequestContext(),
+ GURL(kDefaultSpeechRecognitionUrl),
+ this));
+ request_->Send(data);
+ ReleaseAudioBuffers(); // No need to keep the audio anymore.
+}
+
+void SpeechRecognizer::ReleaseAudioBuffers() {
+ for (AudioBufferQueue::iterator it = audio_buffers_.begin();
+ it != audio_buffers_.end(); it++)
+ delete *it;
+ audio_buffers_.clear();
+}
+
+// Invoked in the audio thread.
+void SpeechRecognizer::OnError(AudioInputController* controller,
+ int error_code) {
+ ChromeThread::PostTask(ChromeThread::IO, FROM_HERE,
+ NewRunnableMethod(this,
+ &SpeechRecognizer::HandleOnError,
+ error_code));
+}
+
+void SpeechRecognizer::HandleOnError(int error_code) {
+ LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code;
+
+ // Check if we are still recording before canceling recognition, as
+ // recording might have been stopped after this error was posted to the queue
+ // by |OnError|.
+ if (!audio_controller_.get())
+ return;
+
+ CancelRecognition();
+ delegate_->DidCompleteRecording(render_view_id_);
+ delegate_->DidCompleteRecognition(render_view_id_);
+}
+
+void SpeechRecognizer::OnData(AudioInputController* controller,
+ const uint8* data, uint32 size) {
+ if (size == 0) // This could happen when recording stops and is normal.
+ return;
+
+ string* str_data = new string(reinterpret_cast<const char*>(data), size);
+ ChromeThread::PostTask(ChromeThread::IO, FROM_HERE,
+ NewRunnableMethod(this,
+ &SpeechRecognizer::HandleOnData,
+ str_data));
+}
+
+void SpeechRecognizer::HandleOnData(string* data) {
+ // Check if we are still recording and if not discard this buffer, as
+ // recording might have been stopped after this buffer was posted to the queue
+ // by |OnData|.
+ if (!audio_controller_.get()) {
+ delete data;
+ return;
+ }
+
+ // TODO(satish): Once we have streaming POST, start sending the data received
+ // here as POST chunks.
+ audio_buffers_.push_back(data);
+}
+
+void SpeechRecognizer::SetRecognitionResult(bool error, const string16& value) {
+ delegate_->SetRecognitionResult(render_view_id_, error, value);
+
+ // Guard against the delegate freeing us until we finish our job.
+ scoped_refptr<SpeechRecognizer> me(this);
+ delegate_->DidCompleteRecognition(render_view_id_);
+}
+
+} // namespace speech_input
diff --git a/chrome/browser/speech/speech_recognizer.h b/chrome/browser/speech/speech_recognizer.h
new file mode 100644
index 0000000..fd8c7c4
--- /dev/null
+++ b/chrome/browser/speech/speech_recognizer.h
@@ -0,0 +1,99 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
+#define CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
+
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "media/audio/audio_input_controller.h"
+#include "chrome/browser/speech/speech_recognition_request.h"
+#include <list>
+#include <string>
+
+namespace speech_input {
+
+// Records audio, sends recorded audio to server and translates server response
+// to recognition result.
+class SpeechRecognizer
+ : public base::RefCountedThreadSafe<SpeechRecognizer>,
+ public media::AudioInputController::EventHandler,
+ public SpeechRecognitionRequestDelegate {
+ public:
+ // Implemented by the caller to receive recognition events.
+ class Delegate {
+ public:
+ virtual void SetRecognitionResult(int render_view_id, bool error,
+ const string16& value) = 0;
+
+ // Invoked when audio recording stops, either due to the end pointer
+ // detecting silence in user input or if |StopRecording| was called. The
+ // delegate has to wait until |DidCompleteRecognition| is invoked before
+ // destroying the |SpeechRecognizer| object.
+ virtual void DidCompleteRecording(int render_view_id) = 0;
+
+ // This is guaranteed to be the last method invoked in the recognition
+ // sequence and the |SpeechRecognizer| object can be freed up if necessary.
+ virtual void DidCompleteRecognition(int render_view_id) = 0;
+
+ protected:
+ virtual ~Delegate() {}
+ };
+
+ SpeechRecognizer(Delegate* delegate, int render_view_id);
+ ~SpeechRecognizer();
+
+ // Starts audio recording and does recognition after recording ends. The same
+ // SpeechRecognizer instance can be used multiple times for speech recognition
+ // though each recognition request can be made only after the previous one
+ // completes (i.e. after receiving Delegate::DidCompleteRecognition).
+ bool StartRecording();
+
+ // Stops recording audio and starts recognition.
+ void StopRecording();
+
+ // Stops recording audio and cancels recognition. Any audio recorded so far
+ // gets discarded.
+ void CancelRecognition();
+
+ // AudioInputController::EventHandler methods.
+ void OnCreated(media::AudioInputController* controller) { }
+ void OnRecording(media::AudioInputController* controller) { }
+ void OnError(media::AudioInputController* controller, int error_code);
+ void OnData(media::AudioInputController* controller, const uint8* data,
+ uint32 size);
+
+ // SpeechRecognitionRequest::Delegate methods.
+ void SetRecognitionResult(bool error, const string16& value);
+
+ private:
+ void ReleaseAudioBuffers();
+
+ void HandleOnError(int error_code); // Handles OnError in the IO thread.
+
+ // Handles OnData in the IO thread. Takes ownership of |data|.
+ void HandleOnData(std::string* data);
+
+ Delegate* delegate_;
+ int render_view_id_;
+
+ // Buffer holding the recorded audio. Owns the strings inside the list.
+ typedef std::list<std::string*> AudioBufferQueue;
+ AudioBufferQueue audio_buffers_;
+
+ scoped_ptr<SpeechRecognitionRequest> request_;
+ scoped_refptr<media::AudioInputController> audio_controller_;
+
+ DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer);
+};
+
+// This typedef is to workaround the issue with certain versions of
+// Visual Studio where it gets confused between multiple Delegate
+// classes and gives a C2500 error. (I saw this error on the try bots -
+// the workaround was not needed for my machine).
+typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate;
+
+} // namespace speech_input
+
+#endif // CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
diff --git a/chrome/browser/speech/speech_recognizer_unittest.cc b/chrome/browser/speech/speech_recognizer_unittest.cc
new file mode 100644
index 0000000..1068364
--- /dev/null
+++ b/chrome/browser/speech/speech_recognizer_unittest.cc
@@ -0,0 +1,167 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/chrome_thread.h"
+#include "chrome/browser/speech/speech_recognizer.h"
+#include "chrome/common/net/test_url_fetcher_factory.h"
+#include "media/audio/test_audio_input_controller_factory.h"
+#include "net/url_request/url_request_status.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using media::AudioInputController;
+using media::TestAudioInputController;
+using media::TestAudioInputControllerFactory;
+
+namespace speech_input {
+
+class SpeechRecognizerTest : public SpeechRecognizerDelegate,
+ public testing::Test {
+ public:
+ SpeechRecognizerTest()
+ : io_thread_(ChromeThread::IO, &message_loop_),
+ ALLOW_THIS_IN_INITIALIZER_LIST(
+ recognizer_(new SpeechRecognizer(this, 1))) {
+ }
+
+ void StartTest() {
+ EXPECT_TRUE(recognizer_->StartRecording());
+ }
+
+ // SpeechRecognizer::Delegate methods.
+ virtual void SetRecognitionResult(int render_view_id, bool error,
+ const string16& result) {
+ result_received_ = true;
+ }
+
+ virtual void DidCompleteRecording(int render_view_id) {
+ recording_complete_ = true;
+ }
+
+ virtual void DidCompleteRecognition(int render_view_id) {
+ recognition_complete_ = true;
+ }
+
+ // testing::Test methods.
+ virtual void SetUp() {
+ result_received_ = false;
+ recording_complete_ = false;
+ recognition_complete_ = false;
+ URLFetcher::set_factory(&url_fetcher_factory_);
+ AudioInputController::set_factory(&audio_input_controller_factory_);
+ }
+
+ virtual void TearDown() {
+ URLFetcher::set_factory(NULL);
+ AudioInputController::set_factory(NULL);
+ }
+
+ protected:
+ MessageLoopForIO message_loop_;
+ ChromeThread io_thread_;
+ scoped_refptr<SpeechRecognizer> recognizer_;
+ bool recording_complete_;
+ bool recognition_complete_;
+ bool result_received_;
+ TestURLFetcherFactory url_fetcher_factory_;
+ TestAudioInputControllerFactory audio_input_controller_factory_;
+};
+
+TEST_F(SpeechRecognizerTest, StopNoData) {
+ // Check for callbacks when stopping record before any audio gets recorded.
+ EXPECT_TRUE(recognizer_->StartRecording());
+ recognizer_->CancelRecognition();
+ EXPECT_FALSE(recording_complete_);
+ EXPECT_FALSE(recognition_complete_);
+ EXPECT_FALSE(result_received_);
+}
+
+TEST_F(SpeechRecognizerTest, CancelNoData) {
+ // Check for callbacks when canceling recognition before any audio gets
+ // recorded.
+ EXPECT_TRUE(recognizer_->StartRecording());
+ recognizer_->StopRecording();
+ EXPECT_TRUE(recording_complete_);
+ EXPECT_TRUE(recognition_complete_);
+ EXPECT_FALSE(result_received_);
+}
+
+TEST_F(SpeechRecognizerTest, StopWithData) {
+ uint8 data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+
+ // Start recording, give some data and then stop. This should wait for the
+ // network callback to arrive before completion.
+ EXPECT_TRUE(recognizer_->StartRecording());
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller = audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller->event_handler()->OnData(controller, data, sizeof(data));
+ MessageLoop::current()->RunAllPending();
+ recognizer_->StopRecording();
+ EXPECT_TRUE(recording_complete_);
+ EXPECT_FALSE(recognition_complete_);
+ EXPECT_FALSE(result_received_);
+
+ // Issue the network callback to complete the process.
+ TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
+ ASSERT_TRUE(fetcher);
+ URLRequestStatus status;
+ status.set_status(URLRequestStatus::SUCCESS);
+ fetcher->delegate()->OnURLFetchComplete(fetcher, fetcher->original_url(),
+ status, 200, ResponseCookies(), "");
+ EXPECT_TRUE(recognition_complete_);
+ EXPECT_TRUE(result_received_);
+}
+
+TEST_F(SpeechRecognizerTest, CancelWithData) {
+ uint8 data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+
+ // Start recording, give some data and then cancel. This should not create
+ // a network request and finish immediately.
+ EXPECT_TRUE(recognizer_->StartRecording());
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller->event_handler()->OnData(controller, data, sizeof(data));
+ MessageLoop::current()->RunAllPending();
+ recognizer_->CancelRecognition();
+ EXPECT_EQ(NULL, url_fetcher_factory_.GetFetcherByID(0));
+ EXPECT_FALSE(recording_complete_);
+ EXPECT_FALSE(recognition_complete_);
+ EXPECT_FALSE(result_received_);
+}
+
+TEST_F(SpeechRecognizerTest, AudioControllerErrorNoData) {
+ // Check if things tear down properly if AudioInputController threw an error.
+ EXPECT_TRUE(recognizer_->StartRecording());
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller->event_handler()->OnError(controller, 0);
+ MessageLoop::current()->RunAllPending();
+ EXPECT_TRUE(recording_complete_);
+ EXPECT_TRUE(recognition_complete_);
+ EXPECT_FALSE(result_received_);
+}
+
+TEST_F(SpeechRecognizerTest, AudioControllerErrorWithData) {
+ uint8 data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+
+ // Check if things tear down properly if AudioInputController threw an error
+ // after giving some audio data.
+ EXPECT_TRUE(recognizer_->StartRecording());
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller->event_handler()->OnData(controller, data, sizeof(data));
+ controller->event_handler()->OnError(controller, 0);
+ MessageLoop::current()->RunAllPending();
+ EXPECT_EQ(NULL, url_fetcher_factory_.GetFetcherByID(0));
+ EXPECT_TRUE(recording_complete_);
+ EXPECT_TRUE(recognition_complete_);
+ EXPECT_FALSE(result_received_);
+}
+
+} // namespace speech_input