summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsatish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-08-10 20:02:48 +0000
committersatish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-08-10 20:02:48 +0000
commitc8463a431cf61e71db88419f89e44cdae57f8293 (patch)
tree50d9631026418ac79158faa8db89fb56b227f59c
parent9ba20809ed26f2dc47444c44e1f34b810b2760fc (diff)
downloadchromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.zip
chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.tar.gz
chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.tar.bz2
Adds SpeechRecognitionRequest class for sending recorded audio data to server and parse response.
Also added a unit test. This code will be used in upcoming CLs for speech input. TEST=unit_tests --gtest_filter=SpeechRecognitionRequestTest.* BUG=none Review URL: http://codereview.chromium.org/3164002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@55611 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/browser/speech/speech_recognition_request.cc139
-rw-r--r--chrome/browser/speech/speech_recognition_request.h73
-rw-r--r--chrome/browser/speech/speech_recognition_request_unittest.cc85
-rw-r--r--chrome/chrome_browser.gypi2
-rw-r--r--chrome/chrome_tests.gypi1
5 files changed, 300 insertions, 0 deletions
diff --git a/chrome/browser/speech/speech_recognition_request.cc b/chrome/browser/speech/speech_recognition_request.cc
new file mode 100644
index 0000000..7a26d22
--- /dev/null
+++ b/chrome/browser/speech/speech_recognition_request.cc
@@ -0,0 +1,139 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/speech/speech_recognition_request.h"
+
+#include "base/json/json_reader.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "base/values.h"
+#include "chrome/common/net/url_request_context_getter.h"
+#include "net/base/load_flags.h"
+#include "net/url_request/url_request_status.h"
+
+namespace {
+
+// TODO(satish): Change this once speex compression is enabled for audio.
+const char kMimeRawAudio[] = "audio/l16; rate=8000";
+
+const char* const kHypothesesString = "hypotheses";
+const char* const kUtteranceString = "utterance";
+
+bool ParseServerResponse(const std::string& response_body, string16* value) {
+ DCHECK(value);
+
+ if (response_body.empty()) {
+ LOG(WARNING) << "ParseServerResponse: Response was empty.\n";
+ return false;
+ }
+ DLOG(INFO) << "ParseServerResponse: Parsing response "
+ << response_body << ".\n";
+
+ // Parse the response, ignoring comments.
+ std::string error_msg;
+ scoped_ptr<Value> response_value(base::JSONReader::ReadAndReturnError(
+ response_body, false, NULL, &error_msg));
+ if (response_value == NULL) {
+ LOG(WARNING) << "ParseServerResponse: JSONReader failed : "
+ << error_msg << ".\n";
+ return false;
+ }
+
+ if (!response_value->IsType(Value::TYPE_DICTIONARY)) {
+ LOG(INFO) << "ParseServerResponse: Unexpected response type "
+ << response_value->GetType() << ".\n";
+ return false;
+ }
+ const DictionaryValue* response_object =
+ static_cast<DictionaryValue*>(response_value.get());
+
+ // Get the hypotheses
+ Value* hypotheses_value = NULL;
+ if (!response_object->Get(kHypothesesString, &hypotheses_value)) {
+ LOG(INFO) << "ParseServerResponse: Missing hypotheses attribute.\n";
+ return false;
+ }
+ DCHECK(hypotheses_value);
+ if (!hypotheses_value->IsType(Value::TYPE_LIST)) {
+ LOG(INFO) << "ParseServerResponse: Unexpected hypotheses type "
+ << hypotheses_value->GetType() << ".\n";
+ return false;
+ }
+ const ListValue* hypotheses_list = static_cast<ListValue*>(hypotheses_value);
+ if (hypotheses_list->GetSize() == 0) {
+ LOG(INFO) << "ParseServerResponse: hypotheses list is empty.\n";
+ return false;
+ }
+
+ Value* first_hypotheses = NULL;
+ if (!hypotheses_list->Get(0, &first_hypotheses)) {
+ LOG(INFO) << "ParseServerResponse: Unable to read hypotheses value.\n";
+ return false;
+ }
+ DCHECK(first_hypotheses);
+ if (!first_hypotheses->IsType(Value::TYPE_DICTIONARY)) {
+ LOG(INFO) << "ParseServerResponse: Unexpected value type "
+ << first_hypotheses->GetType() << ".\n";
+ return false;
+ }
+ const DictionaryValue* first_hypotheses_value =
+ static_cast<DictionaryValue*>(first_hypotheses);
+ if (!first_hypotheses_value->GetString(kUtteranceString, value)) {
+ LOG(INFO) << "ParseServerResponse: Missing utterance value.\n";
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace speech_input {
+
+int SpeechRecognitionRequest::url_fetcher_id_for_tests = 0;
+
+SpeechRecognitionRequest::SpeechRecognitionRequest(
+ URLRequestContextGetter* context, const GURL& url, Delegate* delegate)
+ : url_context_(context),
+ url_(url),
+ delegate_(delegate) {
+ DCHECK(delegate);
+}
+
+bool SpeechRecognitionRequest::Send(const std::string& audio_data) {
+ DCHECK(!url_fetcher_.get());
+
+ url_fetcher_.reset(URLFetcher::Create(
+ url_fetcher_id_for_tests, url_, URLFetcher::POST, this));
+ url_fetcher_->set_upload_data(kMimeRawAudio, audio_data);
+ url_fetcher_->set_request_context(url_context_);
+ url_fetcher_->set_load_flags(
+ net::LOAD_BYPASS_CACHE | net::LOAD_DISABLE_CACHE |
+ net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |
+ net::LOAD_DO_NOT_SEND_AUTH_DATA);
+ url_fetcher_->Start();
+ return true;
+}
+
+void SpeechRecognitionRequest::OnURLFetchComplete(
+ const URLFetcher* source,
+ const GURL& url,
+ const URLRequestStatus& status,
+ int response_code,
+ const ResponseCookies& cookies,
+ const std::string& data) {
+ DCHECK_EQ(url_fetcher_.get(), source);
+ DCHECK(url_.possibly_invalid_spec() == url.possibly_invalid_spec());
+
+ bool error = !status.is_success() || response_code != 200;
+ string16 value;
+ if (!error)
+ error = !ParseServerResponse(data, &value);
+ url_fetcher_.reset();
+
+ DLOG(INFO) << "SpeechRecognitionRequest: Invoking delegate with result.";
+ delegate_->SetRecognitionResult(error, value);
+}
+
+} // namespace speech_input
diff --git a/chrome/browser/speech/speech_recognition_request.h b/chrome/browser/speech/speech_recognition_request.h
new file mode 100644
index 0000000..db12d34
--- /dev/null
+++ b/chrome/browser/speech/speech_recognition_request.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_
+#define CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_
+
+#include <string>
+#include "base/basictypes.h"
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "chrome/common/net/url_fetcher.h"
+#include "googleurl/src/gurl.h"
+
+class URLFetcher;
+class URLRequestContextGetter;
+
+namespace speech_input {
+
+// Provides a simple interface for sending recorded speech data to the server
+// and get back recognition results.
+class SpeechRecognitionRequest : public URLFetcher::Delegate {
+ public:
+ // ID passed to URLFetcher::Create(). Used for testing.
+ static int url_fetcher_id_for_tests;
+
+ // Interface for receiving callbacks from this object.
+ class Delegate {
+ public:
+ virtual void SetRecognitionResult(bool error, const string16& value) = 0;
+
+ protected:
+ virtual ~Delegate() {}
+ };
+
+ // |url| is the server address to which the request wil be sent.
+ SpeechRecognitionRequest(URLRequestContextGetter* context,
+ const GURL& url,
+ Delegate* delegate);
+
+ // Sends a new request with the given audio data, returns true if successful.
+ // The same object can be used to send multiple requests but only after the
+ // previous request has completed.
+ bool Send(const std::string& audio_data);
+
+ bool HasPendingRequest() { return url_fetcher_ != NULL; }
+
+ // URLFetcher::Delegate methods.
+ void OnURLFetchComplete(const URLFetcher* source,
+ const GURL& url,
+ const URLRequestStatus& status,
+ int response_code,
+ const ResponseCookies& cookies,
+ const std::string& data);
+
+ private:
+ scoped_refptr<URLRequestContextGetter> url_context_;
+ const GURL url_;
+ Delegate* delegate_;
+ scoped_ptr<URLFetcher> url_fetcher_;
+
+ DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionRequest);
+};
+
+// This typedef is to workaround the issue with certain versions of
+// Visual Studio where it gets confused between multiple Delegate
+// classes and gives a C2500 error. (I saw this error on the try bots -
+// the workaround was not needed for my machine).
+typedef SpeechRecognitionRequest::Delegate SpeechRecognitionRequestDelegate;
+
+} // namespace speech_input
+
+#endif // CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_
diff --git a/chrome/browser/speech/speech_recognition_request_unittest.cc b/chrome/browser/speech/speech_recognition_request_unittest.cc
new file mode 100644
index 0000000..0e3559e
--- /dev/null
+++ b/chrome/browser/speech/speech_recognition_request_unittest.cc
@@ -0,0 +1,85 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/speech/speech_recognition_request.h"
+#include "chrome/common/net/url_request_context_getter.h"
+#include "chrome/common/net/test_url_fetcher_factory.h"
+#include "net/url_request/url_request_status.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace speech_input {
+
+class SpeechRecognitionRequestTest : public SpeechRecognitionRequestDelegate,
+ public testing::Test {
+ public:
+ SpeechRecognitionRequestTest() : error_(false) { }
+
+ // Creates a speech recognition request and invokes it's URL fetcher delegate
+ // with the given test data.
+ void CreateAndTestRequest(bool success, const std::string& http_response);
+
+ // SpeechRecognitionRequestDelegate methods.
+ virtual void SetRecognitionResult(bool error, const string16& result) {
+ error_ = error;
+ result_ = result;
+ }
+
+ // testing::Test methods.
+ virtual void SetUp() {
+ URLFetcher::set_factory(&url_fetcher_factory_);
+ }
+
+ virtual void TearDown() {
+ URLFetcher::set_factory(NULL);
+ }
+
+ protected:
+ TestURLFetcherFactory url_fetcher_factory_;
+ bool error_;
+ string16 result_;
+};
+
+void SpeechRecognitionRequestTest::CreateAndTestRequest(
+ bool success, const std::string& http_response) {
+ SpeechRecognitionRequest request(NULL, GURL(""), this);
+ request.Send(std::string());
+ TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
+ ASSERT_TRUE(fetcher);
+ URLRequestStatus status;
+ status.set_status(success ? URLRequestStatus::SUCCESS :
+ URLRequestStatus::FAILED);
+ fetcher->delegate()->OnURLFetchComplete(fetcher, fetcher->original_url(),
+ status, success ? 200 : 500,
+ ResponseCookies(),
+ http_response);
+ // Parsed response will be available in result_.
+}
+
+TEST_F(SpeechRecognitionRequestTest, BasicTest) {
+ // Normal success case with one result.
+ CreateAndTestRequest(true,
+ "{\"hypotheses\":[{\"utterance\":\"123456\",\"confidence\":0.9}]}");
+ EXPECT_EQ(false, error_);
+ EXPECT_EQ(ASCIIToUTF16("123456"), result_);
+
+ // Normal success case with multiple results.
+ CreateAndTestRequest(true,
+ "{\"hypotheses\":[{\"utterance\":\"hello\",\"confidence\":0.9},"
+ "{\"utterance\":\"123456\",\"confidence\":0.5}]}");
+ EXPECT_EQ(false, error_);
+ EXPECT_EQ(ASCIIToUTF16("hello"), result_);
+
+ // Http failure case.
+ CreateAndTestRequest(false, "");
+ EXPECT_EQ(true, error_);
+ EXPECT_EQ(ASCIIToUTF16(""), result_);
+
+ // Malformed JSON case.
+ CreateAndTestRequest(true, "{\"hypotheses\":[{\"unknownkey\":\"hello\"}]}");
+ EXPECT_EQ(true, error_);
+ EXPECT_EQ(ASCIIToUTF16(""), result_);
+}
+
+} // namespace speech_input
diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi
index 56b4c62..375120e 100644
--- a/chrome/chrome_browser.gypi
+++ b/chrome/chrome_browser.gypi
@@ -2347,6 +2347,8 @@
'browser/speech/speech_input_dispatcher_host.h',
'browser/speech/speech_input_manager.cc',
'browser/speech/speech_input_manager.h',
+ 'browser/speech/speech_recognition_request.cc',
+ 'browser/speech/speech_recognition_request.h',
'browser/spellcheck_host.cc',
'browser/spellcheck_host.h',
'browser/spellchecker_linux.cc',
diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi
index 67bcfc3e..efe1045 100644
--- a/chrome/chrome_tests.gypi
+++ b/chrome/chrome_tests.gypi
@@ -1088,6 +1088,7 @@
'browser/sessions/session_service_unittest.cc',
'browser/sessions/tab_restore_service_unittest.cc',
'browser/shell_integration_unittest.cc',
+ 'browser/speech/speech_recognition_request_unittest.cc',
'browser/spellchecker_platform_engine_unittest.cc',
'browser/ssl/ssl_host_state_unittest.cc',
'browser/status_icons/status_icon_unittest.cc',