diff options
author | satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-08-10 20:02:48 +0000 |
---|---|---|
committer | satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-08-10 20:02:48 +0000 |
commit | c8463a431cf61e71db88419f89e44cdae57f8293 (patch) | |
tree | 50d9631026418ac79158faa8db89fb56b227f59c | |
parent | 9ba20809ed26f2dc47444c44e1f34b810b2760fc (diff) | |
download | chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.zip chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.tar.gz chromium_src-c8463a431cf61e71db88419f89e44cdae57f8293.tar.bz2 |
Adds SpeechRecognitionRequest class for sending recorded audio data to server and parse response.
Also added a unit test.
This code will be used in upcoming CLs for speech input.
TEST=unit_tests --gtest_filter=SpeechRecognitionRequestTest.*
BUG=none
Review URL: http://codereview.chromium.org/3164002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@55611 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/speech/speech_recognition_request.cc | 139 | ||||
-rw-r--r-- | chrome/browser/speech/speech_recognition_request.h | 73 | ||||
-rw-r--r-- | chrome/browser/speech/speech_recognition_request_unittest.cc | 85 | ||||
-rw-r--r-- | chrome/chrome_browser.gypi | 2 | ||||
-rw-r--r-- | chrome/chrome_tests.gypi | 1 |
5 files changed, 300 insertions, 0 deletions
diff --git a/chrome/browser/speech/speech_recognition_request.cc b/chrome/browser/speech/speech_recognition_request.cc new file mode 100644 index 0000000..7a26d22 --- /dev/null +++ b/chrome/browser/speech/speech_recognition_request.cc @@ -0,0 +1,139 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/speech/speech_recognition_request.h" + +#include "base/json/json_reader.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "base/values.h" +#include "chrome/common/net/url_request_context_getter.h" +#include "net/base/load_flags.h" +#include "net/url_request/url_request_status.h" + +namespace { + +// TODO(satish): Change this once speex compression is enabled for audio. +const char kMimeRawAudio[] = "audio/l16; rate=8000"; + +const char* const kHypothesesString = "hypotheses"; +const char* const kUtteranceString = "utterance"; + +bool ParseServerResponse(const std::string& response_body, string16* value) { + DCHECK(value); + + if (response_body.empty()) { + LOG(WARNING) << "ParseServerResponse: Response was empty.\n"; + return false; + } + DLOG(INFO) << "ParseServerResponse: Parsing response " + << response_body << ".\n"; + + // Parse the response, ignoring comments. + std::string error_msg; + scoped_ptr<Value> response_value(base::JSONReader::ReadAndReturnError( + response_body, false, NULL, &error_msg)); + if (response_value == NULL) { + LOG(WARNING) << "ParseServerResponse: JSONReader failed : " + << error_msg << ".\n"; + return false; + } + + if (!response_value->IsType(Value::TYPE_DICTIONARY)) { + LOG(INFO) << "ParseServerResponse: Unexpected response type " + << response_value->GetType() << ".\n"; + return false; + } + const DictionaryValue* response_object = + static_cast<DictionaryValue*>(response_value.get()); + + // Get the hypotheses + Value* hypotheses_value = NULL; + if (!response_object->Get(kHypothesesString, &hypotheses_value)) { + LOG(INFO) << "ParseServerResponse: Missing hypotheses attribute.\n"; + return false; + } + DCHECK(hypotheses_value); + if (!hypotheses_value->IsType(Value::TYPE_LIST)) { + LOG(INFO) << "ParseServerResponse: Unexpected hypotheses type " + << hypotheses_value->GetType() << ".\n"; + return false; + } + const ListValue* hypotheses_list = static_cast<ListValue*>(hypotheses_value); + if (hypotheses_list->GetSize() == 0) { + LOG(INFO) << "ParseServerResponse: hypotheses list is empty.\n"; + return false; + } + + Value* first_hypotheses = NULL; + if (!hypotheses_list->Get(0, &first_hypotheses)) { + LOG(INFO) << "ParseServerResponse: Unable to read hypotheses value.\n"; + return false; + } + DCHECK(first_hypotheses); + if (!first_hypotheses->IsType(Value::TYPE_DICTIONARY)) { + LOG(INFO) << "ParseServerResponse: Unexpected value type " + << first_hypotheses->GetType() << ".\n"; + return false; + } + const DictionaryValue* first_hypotheses_value = + static_cast<DictionaryValue*>(first_hypotheses); + if (!first_hypotheses_value->GetString(kUtteranceString, value)) { + LOG(INFO) << "ParseServerResponse: Missing utterance value.\n"; + return false; + } + + return true; +} + +} // namespace + +namespace speech_input { + +int SpeechRecognitionRequest::url_fetcher_id_for_tests = 0; + +SpeechRecognitionRequest::SpeechRecognitionRequest( + URLRequestContextGetter* context, const GURL& url, Delegate* delegate) + : url_context_(context), + url_(url), + delegate_(delegate) { + DCHECK(delegate); +} + +bool SpeechRecognitionRequest::Send(const std::string& audio_data) { + DCHECK(!url_fetcher_.get()); + + url_fetcher_.reset(URLFetcher::Create( + url_fetcher_id_for_tests, url_, URLFetcher::POST, this)); + url_fetcher_->set_upload_data(kMimeRawAudio, audio_data); + url_fetcher_->set_request_context(url_context_); + url_fetcher_->set_load_flags( + net::LOAD_BYPASS_CACHE | net::LOAD_DISABLE_CACHE | + net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES | + net::LOAD_DO_NOT_SEND_AUTH_DATA); + url_fetcher_->Start(); + return true; +} + +void SpeechRecognitionRequest::OnURLFetchComplete( + const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data) { + DCHECK_EQ(url_fetcher_.get(), source); + DCHECK(url_.possibly_invalid_spec() == url.possibly_invalid_spec()); + + bool error = !status.is_success() || response_code != 200; + string16 value; + if (!error) + error = !ParseServerResponse(data, &value); + url_fetcher_.reset(); + + DLOG(INFO) << "SpeechRecognitionRequest: Invoking delegate with result."; + delegate_->SetRecognitionResult(error, value); +} + +} // namespace speech_input diff --git a/chrome/browser/speech/speech_recognition_request.h b/chrome/browser/speech/speech_recognition_request.h new file mode 100644 index 0000000..db12d34 --- /dev/null +++ b/chrome/browser/speech/speech_recognition_request.h @@ -0,0 +1,73 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_ +#define CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_ + +#include <string> +#include "base/basictypes.h" +#include "base/ref_counted.h" +#include "base/scoped_ptr.h" +#include "chrome/common/net/url_fetcher.h" +#include "googleurl/src/gurl.h" + +class URLFetcher; +class URLRequestContextGetter; + +namespace speech_input { + +// Provides a simple interface for sending recorded speech data to the server +// and get back recognition results. +class SpeechRecognitionRequest : public URLFetcher::Delegate { + public: + // ID passed to URLFetcher::Create(). Used for testing. + static int url_fetcher_id_for_tests; + + // Interface for receiving callbacks from this object. + class Delegate { + public: + virtual void SetRecognitionResult(bool error, const string16& value) = 0; + + protected: + virtual ~Delegate() {} + }; + + // |url| is the server address to which the request wil be sent. + SpeechRecognitionRequest(URLRequestContextGetter* context, + const GURL& url, + Delegate* delegate); + + // Sends a new request with the given audio data, returns true if successful. + // The same object can be used to send multiple requests but only after the + // previous request has completed. + bool Send(const std::string& audio_data); + + bool HasPendingRequest() { return url_fetcher_ != NULL; } + + // URLFetcher::Delegate methods. + void OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data); + + private: + scoped_refptr<URLRequestContextGetter> url_context_; + const GURL url_; + Delegate* delegate_; + scoped_ptr<URLFetcher> url_fetcher_; + + DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionRequest); +}; + +// This typedef is to workaround the issue with certain versions of +// Visual Studio where it gets confused between multiple Delegate +// classes and gives a C2500 error. (I saw this error on the try bots - +// the workaround was not needed for my machine). +typedef SpeechRecognitionRequest::Delegate SpeechRecognitionRequestDelegate; + +} // namespace speech_input + +#endif // CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_ diff --git a/chrome/browser/speech/speech_recognition_request_unittest.cc b/chrome/browser/speech/speech_recognition_request_unittest.cc new file mode 100644 index 0000000..0e3559e --- /dev/null +++ b/chrome/browser/speech/speech_recognition_request_unittest.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/utf_string_conversions.h" +#include "chrome/browser/speech/speech_recognition_request.h" +#include "chrome/common/net/url_request_context_getter.h" +#include "chrome/common/net/test_url_fetcher_factory.h" +#include "net/url_request/url_request_status.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace speech_input { + +class SpeechRecognitionRequestTest : public SpeechRecognitionRequestDelegate, + public testing::Test { + public: + SpeechRecognitionRequestTest() : error_(false) { } + + // Creates a speech recognition request and invokes it's URL fetcher delegate + // with the given test data. + void CreateAndTestRequest(bool success, const std::string& http_response); + + // SpeechRecognitionRequestDelegate methods. + virtual void SetRecognitionResult(bool error, const string16& result) { + error_ = error; + result_ = result; + } + + // testing::Test methods. + virtual void SetUp() { + URLFetcher::set_factory(&url_fetcher_factory_); + } + + virtual void TearDown() { + URLFetcher::set_factory(NULL); + } + + protected: + TestURLFetcherFactory url_fetcher_factory_; + bool error_; + string16 result_; +}; + +void SpeechRecognitionRequestTest::CreateAndTestRequest( + bool success, const std::string& http_response) { + SpeechRecognitionRequest request(NULL, GURL(""), this); + request.Send(std::string()); + TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0); + ASSERT_TRUE(fetcher); + URLRequestStatus status; + status.set_status(success ? URLRequestStatus::SUCCESS : + URLRequestStatus::FAILED); + fetcher->delegate()->OnURLFetchComplete(fetcher, fetcher->original_url(), + status, success ? 200 : 500, + ResponseCookies(), + http_response); + // Parsed response will be available in result_. +} + +TEST_F(SpeechRecognitionRequestTest, BasicTest) { + // Normal success case with one result. + CreateAndTestRequest(true, + "{\"hypotheses\":[{\"utterance\":\"123456\",\"confidence\":0.9}]}"); + EXPECT_EQ(false, error_); + EXPECT_EQ(ASCIIToUTF16("123456"), result_); + + // Normal success case with multiple results. + CreateAndTestRequest(true, + "{\"hypotheses\":[{\"utterance\":\"hello\",\"confidence\":0.9}," + "{\"utterance\":\"123456\",\"confidence\":0.5}]}"); + EXPECT_EQ(false, error_); + EXPECT_EQ(ASCIIToUTF16("hello"), result_); + + // Http failure case. + CreateAndTestRequest(false, ""); + EXPECT_EQ(true, error_); + EXPECT_EQ(ASCIIToUTF16(""), result_); + + // Malformed JSON case. + CreateAndTestRequest(true, "{\"hypotheses\":[{\"unknownkey\":\"hello\"}]}"); + EXPECT_EQ(true, error_); + EXPECT_EQ(ASCIIToUTF16(""), result_); +} + +} // namespace speech_input diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index 56b4c62..375120e 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -2347,6 +2347,8 @@ 'browser/speech/speech_input_dispatcher_host.h', 'browser/speech/speech_input_manager.cc', 'browser/speech/speech_input_manager.h', + 'browser/speech/speech_recognition_request.cc', + 'browser/speech/speech_recognition_request.h', 'browser/spellcheck_host.cc', 'browser/spellcheck_host.h', 'browser/spellchecker_linux.cc', diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi index 67bcfc3e..efe1045 100644 --- a/chrome/chrome_tests.gypi +++ b/chrome/chrome_tests.gypi @@ -1088,6 +1088,7 @@ 'browser/sessions/session_service_unittest.cc', 'browser/sessions/tab_restore_service_unittest.cc', 'browser/shell_integration_unittest.cc', + 'browser/speech/speech_recognition_request_unittest.cc', 'browser/spellchecker_platform_engine_unittest.cc', 'browser/ssl/ssl_host_state_unittest.cc', 'browser/status_icons/status_icon_unittest.cc', |