Adds SpeechRecognitionRequest class for sending recorded audio data to server and parse response.

Also added a unit test. This code will be used in upcoming CLs for speech input. TEST=unit_tests --gtest_filter=SpeechRecognitionRequestTest.* BUG=none Review URL: http://codereview.chromium.org/3164002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@55611 0039d316-1c4b-4281-b951-d872f2087c98
author: satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-08-10 20:02:48 +0000
committer: satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-08-10 20:02:48 +0000
commit: c8463a431cf61e71db88419f89e44cdae57f8293 (patch)
tree: 50d9631026418ac79158faa8db89fb56b227f59c
parent: 9ba20809ed26f2dc47444c44e1f34b810b2760fc (diff)
download: chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.zip
chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.tar.gz
chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.tar.bz2
5 files changed, 300 insertions, 0 deletions
diff --git a/chrome/browser/speech/speech_recognition_request.cc b/chrome/browser/speech/speech_recognition_request.cc
new file mode 100644
index 0000000..7a26d22
--- /dev/null
+++ b/chrome/browser/speech/speech_recognition_request.cc
@@ -0,0 +1,139 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/speech/speech_recognition_request.h"
+
+#include "base/json/json_reader.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "base/values.h"
+#include "chrome/common/net/url_request_context_getter.h"
+#include "net/base/load_flags.h"
+#include "net/url_request/url_request_status.h"
+
+namespace {
+
+// TODO(satish): Change this once speex compression is enabled for audio.
+const char kMimeRawAudio[] = "audio/l16; rate=8000";
+
+const char* const kHypothesesString = "hypotheses";
+const char* const kUtteranceString = "utterance";
+
+bool ParseServerResponse(const std::string& response_body, string16* value) {
+  DCHECK(value);
+
+  if (response_body.empty()) {
+    LOG(WARNING) << "ParseServerResponse: Response was empty.\n";
+    return false;
+  }
+  DLOG(INFO) << "ParseServerResponse: Parsing response "
+             << response_body << ".\n";
+
+  // Parse the response, ignoring comments.
+  std::string error_msg;
+  scoped_ptr<Value> response_value(base::JSONReader::ReadAndReturnError(
+      response_body, false, NULL, &error_msg));
+  if (response_value == NULL) {
+    LOG(WARNING) << "ParseServerResponse: JSONReader failed : "
+                 << error_msg << ".\n";
+    return false;
+  }
+
+  if (!response_value->IsType(Value::TYPE_DICTIONARY)) {
+    LOG(INFO) << "ParseServerResponse: Unexpected response type "
+              << response_value->GetType() <<  ".\n";
+    return false;
+  }
+  const DictionaryValue* response_object =
+      static_cast<DictionaryValue*>(response_value.get());
+
+  // Get the hypotheses
+  Value* hypotheses_value = NULL;
+  if (!response_object->Get(kHypothesesString, &hypotheses_value)) {
+    LOG(INFO) << "ParseServerResponse: Missing hypotheses attribute.\n";
+    return false;
+  }
+  DCHECK(hypotheses_value);
+  if (!hypotheses_value->IsType(Value::TYPE_LIST)) {
+    LOG(INFO) << "ParseServerResponse: Unexpected hypotheses type "
+              << hypotheses_value->GetType() <<  ".\n";
+    return false;
+  }
+  const ListValue* hypotheses_list = static_cast<ListValue*>(hypotheses_value);
+  if (hypotheses_list->GetSize() == 0) {
+    LOG(INFO) << "ParseServerResponse: hypotheses list is empty.\n";
+    return false;
+  }
+
+  Value* first_hypotheses = NULL;
+  if (!hypotheses_list->Get(0, &first_hypotheses)) {
+    LOG(INFO) << "ParseServerResponse: Unable to read hypotheses value.\n";
+    return false;
+  }
+  DCHECK(first_hypotheses);
+  if (!first_hypotheses->IsType(Value::TYPE_DICTIONARY)) {
+    LOG(INFO) << "ParseServerResponse: Unexpected value type "
+              << first_hypotheses->GetType() <<  ".\n";
+    return false;
+  }
+  const DictionaryValue* first_hypotheses_value =
+      static_cast<DictionaryValue*>(first_hypotheses);
+  if (!first_hypotheses_value->GetString(kUtteranceString, value)) {
+    LOG(INFO) << "ParseServerResponse: Missing utterance value.\n";
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace
+
+namespace speech_input {
+
+int SpeechRecognitionRequest::url_fetcher_id_for_tests = 0;
+
+SpeechRecognitionRequest::SpeechRecognitionRequest(
+    URLRequestContextGetter* context, const GURL& url, Delegate* delegate)
+    : url_context_(context),
+      url_(url),
+      delegate_(delegate) {
+  DCHECK(delegate);
+}
+
+bool SpeechRecognitionRequest::Send(const std::string& audio_data) {
+  DCHECK(!url_fetcher_.get());
+
+  url_fetcher_.reset(URLFetcher::Create(
+      url_fetcher_id_for_tests, url_, URLFetcher::POST, this));
+  url_fetcher_->set_upload_data(kMimeRawAudio, audio_data);
+  url_fetcher_->set_request_context(url_context_);
+  url_fetcher_->set_load_flags(
+      net::LOAD_BYPASS_CACHE | net::LOAD_DISABLE_CACHE |
+      net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |
+      net::LOAD_DO_NOT_SEND_AUTH_DATA);
+  url_fetcher_->Start();
+  return true;
+}
+
+void SpeechRecognitionRequest::OnURLFetchComplete(
+    const URLFetcher* source,
+    const GURL& url,
+    const URLRequestStatus& status,
+    int response_code,
+    const ResponseCookies& cookies,
+    const std::string& data) {
+  DCHECK_EQ(url_fetcher_.get(), source);
+  DCHECK(url_.possibly_invalid_spec() == url.possibly_invalid_spec());
+
+  bool error = !status.is_success() || response_code != 200;
+  string16 value;
+  if (!error)
+    error = !ParseServerResponse(data, &value);
+  url_fetcher_.reset();
+
+  DLOG(INFO) << "SpeechRecognitionRequest: Invoking delegate with result.";
+  delegate_->SetRecognitionResult(error, value);
+}
+
+}  // namespace speech_input
diff --git a/chrome/browser/speech/speech_recognition_request.h b/chrome/browser/speech/speech_recognition_request.h
new file mode 100644
index 0000000..db12d34
--- /dev/null
+++ b/chrome/browser/speech/speech_recognition_request.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_
+#define CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_
+
+#include <string>
+#include "base/basictypes.h"
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "chrome/common/net/url_fetcher.h"
+#include "googleurl/src/gurl.h"
+
+class URLFetcher;
+class URLRequestContextGetter;
+
+namespace speech_input {
+
+// Provides a simple interface for sending recorded speech data to the server
+// and get back recognition results.
+class SpeechRecognitionRequest : public URLFetcher::Delegate {
+ public:
+  // ID passed to URLFetcher::Create(). Used for testing.
+  static int url_fetcher_id_for_tests;
+
+  // Interface for receiving callbacks from this object.
+  class Delegate {
+   public:
+    virtual void SetRecognitionResult(bool error, const string16& value) = 0;
+
+   protected:
+    virtual ~Delegate() {}
+  };
+
+  // |url| is the server address to which the request wil be sent.
+  SpeechRecognitionRequest(URLRequestContextGetter* context,
+                           const GURL& url,
+                           Delegate* delegate);
+
+  // Sends a new request with the given audio data, returns true if successful.
+  // The same object can be used to send multiple requests but only after the
+  // previous request has completed.
+  bool Send(const std::string& audio_data);
+
+  bool HasPendingRequest() { return url_fetcher_ != NULL; }
+
+  // URLFetcher::Delegate methods.
+  void OnURLFetchComplete(const URLFetcher* source,
+                          const GURL& url,
+                          const URLRequestStatus& status,
+                          int response_code,
+                          const ResponseCookies& cookies,
+                          const std::string& data);
+
+ private:
+  scoped_refptr<URLRequestContextGetter> url_context_;
+  const GURL url_;
+  Delegate* delegate_;
+  scoped_ptr<URLFetcher> url_fetcher_;
+
+  DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionRequest);
+};
+
+// This typedef is to workaround the issue with certain versions of
+// Visual Studio where it gets confused between multiple Delegate
+// classes and gives a C2500 error. (I saw this error on the try bots -
+// the workaround was not needed for my machine).
+typedef SpeechRecognitionRequest::Delegate SpeechRecognitionRequestDelegate;
+
+}  // namespace speech_input
+
+#endif  // CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_
diff --git a/chrome/browser/speech/speech_recognition_request_unittest.cc b/chrome/browser/speech/speech_recognition_request_unittest.cc
new file mode 100644
index 0000000..0e3559e
--- /dev/null
+++ b/chrome/browser/speech/speech_recognition_request_unittest.cc
@@ -0,0 +1,85 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/speech/speech_recognition_request.h"
+#include "chrome/common/net/url_request_context_getter.h"
+#include "chrome/common/net/test_url_fetcher_factory.h"
+#include "net/url_request/url_request_status.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace speech_input {
+
+class SpeechRecognitionRequestTest : public SpeechRecognitionRequestDelegate,
+                                     public testing::Test {
+ public:
+  SpeechRecognitionRequestTest() : error_(false) { }
+
+  // Creates a speech recognition request and invokes it's URL fetcher delegate
+  // with the given test data.
+  void CreateAndTestRequest(bool success, const std::string& http_response);
+
+  // SpeechRecognitionRequestDelegate methods.
+  virtual void SetRecognitionResult(bool error, const string16& result) {
+    error_ = error;
+    result_ = result;
+  }
+
+  // testing::Test methods.
+  virtual void SetUp() {
+    URLFetcher::set_factory(&url_fetcher_factory_);
+  }
+
+  virtual void TearDown() {
+    URLFetcher::set_factory(NULL);
+  }
+
+ protected:
+  TestURLFetcherFactory url_fetcher_factory_;
+  bool error_;
+  string16 result_;
+};
+
+void SpeechRecognitionRequestTest::CreateAndTestRequest(
+    bool success, const std::string& http_response) {
+  SpeechRecognitionRequest request(NULL, GURL(""), this);
+  request.Send(std::string());
+  TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
+  ASSERT_TRUE(fetcher);
+  URLRequestStatus status;
+  status.set_status(success ? URLRequestStatus::SUCCESS :
+                              URLRequestStatus::FAILED);
+  fetcher->delegate()->OnURLFetchComplete(fetcher, fetcher->original_url(),
+                                          status, success ? 200 : 500,
+                                          ResponseCookies(),
+                                          http_response);
+  // Parsed response will be available in result_.
+}
+
+TEST_F(SpeechRecognitionRequestTest, BasicTest) {
+  // Normal success case with one result.
+  CreateAndTestRequest(true,
+      "{\"hypotheses\":[{\"utterance\":\"123456\",\"confidence\":0.9}]}");
+  EXPECT_EQ(false, error_);
+  EXPECT_EQ(ASCIIToUTF16("123456"), result_);
+
+  // Normal success case with multiple results.
+  CreateAndTestRequest(true,
+      "{\"hypotheses\":[{\"utterance\":\"hello\",\"confidence\":0.9},"
+      "{\"utterance\":\"123456\",\"confidence\":0.5}]}");
+  EXPECT_EQ(false, error_);
+  EXPECT_EQ(ASCIIToUTF16("hello"), result_);
+
+  // Http failure case.
+  CreateAndTestRequest(false, "");
+  EXPECT_EQ(true, error_);
+  EXPECT_EQ(ASCIIToUTF16(""), result_);
+
+  // Malformed JSON case.
+  CreateAndTestRequest(true, "{\"hypotheses\":[{\"unknownkey\":\"hello\"}]}");
+  EXPECT_EQ(true, error_);
+  EXPECT_EQ(ASCIIToUTF16(""), result_);
+}
+
+}  // namespace speech_input
diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi
index 56b4c62..375120e 100644
--- a/chrome/chrome_browser.gypi
+++ b/chrome/chrome_browser.gypi
@@ -2347,6 +2347,8 @@
         'browser/speech/speech_input_dispatcher_host.h',
         'browser/speech/speech_input_manager.cc',
         'browser/speech/speech_input_manager.h',
+        'browser/speech/speech_recognition_request.cc',
+        'browser/speech/speech_recognition_request.h',
         'browser/spellcheck_host.cc',
         'browser/spellcheck_host.h',
         'browser/spellchecker_linux.cc',
diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi
index 67bcfc3e..efe1045 100644
--- a/chrome/chrome_tests.gypi
+++ b/chrome/chrome_tests.gypi
@@ -1088,6 +1088,7 @@
         'browser/sessions/session_service_unittest.cc',
         'browser/sessions/tab_restore_service_unittest.cc',
         'browser/shell_integration_unittest.cc',
+        'browser/speech/speech_recognition_request_unittest.cc',
         'browser/spellchecker_platform_engine_unittest.cc',
         'browser/ssl/ssl_host_state_unittest.cc',
         'browser/status_icons/status_icon_unittest.cc',
author	satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-08-10 20:02:48 +0000
committer	satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-08-10 20:02:48 +0000
commit	c8463a431cf61e71db88419f89e44cdae57f8293 (patch)
tree	50d9631026418ac79158faa8db89fb56b227f59c
parent	9ba20809ed26f2dc47444c44e1f34b810b2760fc (diff)
download	chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.zip chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.tar.gz chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.tar.bz2