diff options
author | satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-10-28 08:47:09 +0000 |
---|---|---|
committer | satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-10-28 08:47:09 +0000 |
commit | befab4b7c401a2368fe313db1be59c83626c6b36 (patch) | |
tree | 07abbabbe42a391265c21d0f843c86d122abec96 /chrome/browser/speech | |
parent | 2ae27702b4bd9bf31a19024131ba701c9253297c (diff) | |
download | chromium_src-befab4b7c401a2368fe313db1be59c83626c6b36.zip chromium_src-befab4b7c401a2368fe313db1be59c83626c6b36.tar.gz chromium_src-befab4b7c401a2368fe313db1be59c83626c6b36.tar.bz2 |
Added grammar attribute to speech input.
In https://bugs.webkit.org/show_bug.cgi?id=48339 we added support for a 'x-webkit-grammar' attribute for speech input.
In this CL we receive that as a parameter to SpeechInputDispatcher::startRecognition() and pass it up to the
SpeechRecognitionRequest object which encodes and sends it as a url parameter to the server.
To aid in this I also moved the speech server URL from SpeechRecognizer.cc to SpeechRecognitionRequest.cc.
Also added a browser test for this attribute. It is marked as FLAKY now because the code is exactly the same as with
the TestBasicRecognition test in that file and that test is FLAKY now. Both are marked with the same bug and I will
remove the FLAKY_ prefix on both when the bug is closed.
BUG=none
TEST=browser_tests --gtest_filter=SpeechInputBrowserTest.GrammarAttribute
Review URL: http://codereview.chromium.org/4183002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@64213 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/speech')
-rw-r--r-- | chrome/browser/speech/speech_input_browsertest.cc | 132 | ||||
-rw-r--r-- | chrome/browser/speech/speech_input_dispatcher_host.cc | 5 | ||||
-rw-r--r-- | chrome/browser/speech/speech_input_dispatcher_host.h | 3 | ||||
-rw-r--r-- | chrome/browser/speech/speech_input_manager.cc | 8 | ||||
-rw-r--r-- | chrome/browser/speech/speech_input_manager.h | 3 | ||||
-rw-r--r-- | chrome/browser/speech/speech_recognition_request.cc | 23 | ||||
-rw-r--r-- | chrome/browser/speech/speech_recognition_request.h | 6 | ||||
-rw-r--r-- | chrome/browser/speech/speech_recognition_request_unittest.cc | 4 | ||||
-rw-r--r-- | chrome/browser/speech/speech_recognizer.cc | 13 | ||||
-rw-r--r-- | chrome/browser/speech/speech_recognizer.h | 4 | ||||
-rw-r--r-- | chrome/browser/speech/speech_recognizer_unittest.cc | 2 |
11 files changed, 133 insertions, 70 deletions
diff --git a/chrome/browser/speech/speech_input_browsertest.cc b/chrome/browser/speech/speech_input_browsertest.cc index 8c329925..700773b1 100644 --- a/chrome/browser/speech/speech_input_browsertest.cc +++ b/chrome/browser/speech/speech_input_browsertest.cc @@ -32,22 +32,28 @@ const char* kTestResult = "Pictures of the moon"; class FakeSpeechInputManager : public SpeechInputManager { public: - explicit FakeSpeechInputManager() + FakeSpeechInputManager() : caller_id_(0), delegate_(NULL) { } + std::string grammar() { + return grammar_; + } + // SpeechInputManager methods. void StartRecognition(Delegate* delegate, int caller_id, int render_process_id, int render_view_id, - const gfx::Rect& element_rect) { + const gfx::Rect& element_rect, + const std::string& grammar) { VLOG(1) << "StartRecognition invoked."; EXPECT_EQ(0, caller_id_); EXPECT_EQ(NULL, delegate_); caller_id_ = caller_id; delegate_ = delegate; + grammar_ = grammar; // Give the fake result in a short while. MessageLoop::current()->PostTask(FROM_HERE, NewRunnableMethod(this, &FakeSpeechInputManager::SetFakeRecognitionResult)); @@ -81,14 +87,9 @@ class FakeSpeechInputManager : public SpeechInputManager { int caller_id_; Delegate* delegate_; + std::string grammar_; }; -// Factory method. -SpeechInputManager* fakeManagerAccessor() { - static FakeSpeechInputManager fake_speech_input_manager; - return &fake_speech_input_manager; -} - class SpeechInputBrowserTest : public InProcessBrowserTest { public: // InProcessBrowserTest methods @@ -100,49 +101,94 @@ class SpeechInputBrowserTest : public InProcessBrowserTest { const FilePath kTestDir(FILE_PATH_LITERAL("speech")); return ui_test_utils::GetTestUrl(kTestDir, FilePath(filename)); } + + protected: + void LoadAndRunSpeechInputTest(const FilePath::CharType* filename) { + // The test page calculates the speech button's coordinate in the page on + // load & sets that coordinate in the URL fragment. We send mouse down & up + // events at that coordinate to trigger speech recognition. + GURL test_url = testUrl(filename); + ui_test_utils::NavigateToURL(browser(), test_url); + std::string coords = browser()->GetSelectedTabContents()->GetURL().ref(); + VLOG(1) << "Coordinates given by script: " << coords; + int comma_pos = coords.find(','); + ASSERT_NE(-1, comma_pos); + int x = 0; + ASSERT_TRUE(base::StringToInt(coords.substr(0, comma_pos).c_str(), &x)); + int y = 0; + ASSERT_TRUE(base::StringToInt(coords.substr(comma_pos + 1).c_str(), &y)); + + WebKit::WebMouseEvent mouse_event; + mouse_event.type = WebKit::WebInputEvent::MouseDown; + mouse_event.button = WebKit::WebMouseEvent::ButtonLeft; + mouse_event.x = x; + mouse_event.y = y; + mouse_event.clickCount = 1; + TabContents* tab_contents = browser()->GetSelectedTabContents(); + tab_contents->render_view_host()->ForwardMouseEvent(mouse_event); + mouse_event.type = WebKit::WebInputEvent::MouseUp; + tab_contents->render_view_host()->ForwardMouseEvent(mouse_event); + + // The fake speech input manager would receive the speech input + // request and return the test string as recognition result. The test page + // then sets the URL fragment as 'pass' if it received the expected string. + ui_test_utils::WaitForNavigations(&tab_contents->controller(), 1); + EXPECT_EQ("pass", browser()->GetSelectedTabContents()->GetURL().ref()); + } + + // InProcessBrowserTest methods. + virtual void SetUpInProcessBrowserTestFixture() { + speech_input_manager_ = &fake_speech_input_manager_; + + // Inject the fake manager factory so that the test result is returned to + // the web page. + SpeechInputDispatcherHost::set_manager_accessor(&fakeManagerAccessor); + } + + virtual void TearDownInProcessBrowserTestFixture() { + speech_input_manager_ = NULL; + } + + // Factory method. + static SpeechInputManager* fakeManagerAccessor() { + return speech_input_manager_; + } + + FakeSpeechInputManager fake_speech_input_manager_; + + // This is used by the static |fakeManagerAccessor|, and it is a pointer + // rather than a direct instance per the style guide. + static SpeechInputManager* speech_input_manager_; }; +SpeechInputManager* SpeechInputBrowserTest::speech_input_manager_ = NULL; + // Marked as FLAKY due to http://crbug.com/51337 // // TODO(satish): Once this flakiness has been fixed, add a second test here to // check for sending many clicks in succession to the speech button and verify // that it doesn't cause any crash but works as expected. This should act as the // test for http://crbug.com/59173 -IN_PROC_BROWSER_TEST_F(SpeechInputBrowserTest, FLAKY_TestBasicRecognition) { - // Inject the fake manager factory so that the test result is returned to the - // web page. - SpeechInputDispatcherHost::set_manager_accessor(&fakeManagerAccessor); - - // The test page calculates the speech button's coordinate in the page on load - // and sets that coordinate in the URL fragment. We send mouse down & up - // events at that coordinate to trigger speech recognition. - GURL test_url = testUrl(FILE_PATH_LITERAL("basic_recognition.html")); - ui_test_utils::NavigateToURL(browser(), test_url); - std::string coords = browser()->GetSelectedTabContents()->GetURL().ref(); - VLOG(1) << "Coordinates given by script: " << coords; - int comma_pos = coords.find(','); - ASSERT_NE(-1, comma_pos); - int x = 0; - ASSERT_TRUE(base::StringToInt(coords.substr(0, comma_pos).c_str(), &x)); - int y = 0; - ASSERT_TRUE(base::StringToInt(coords.substr(comma_pos + 1).c_str(), &y)); - - WebKit::WebMouseEvent mouse_event; - mouse_event.type = WebKit::WebInputEvent::MouseDown; - mouse_event.button = WebKit::WebMouseEvent::ButtonLeft; - mouse_event.x = x; - mouse_event.y = y; - mouse_event.clickCount = 1; - TabContents* tab_contents = browser()->GetSelectedTabContents(); - tab_contents->render_view_host()->ForwardMouseEvent(mouse_event); - mouse_event.type = WebKit::WebInputEvent::MouseUp; - tab_contents->render_view_host()->ForwardMouseEvent(mouse_event); - - // The above defined fake speech input manager would receive the speech input - // request and return the test string as recognition result. The test page - // then sets the URL fragment as 'pass' if it received the expected string. - ui_test_utils::WaitForNavigations(&tab_contents->controller(), 1); - EXPECT_EQ("pass", browser()->GetSelectedTabContents()->GetURL().ref()); +#if defined(OS_WIN) +#define MAYBE_TestBasicRecognition FLAKY_TestBasicRecognition +#else +#define MAYBE_TestBasicRecognition TestBasicRecognition +#endif +IN_PROC_BROWSER_TEST_F(SpeechInputBrowserTest, MAYBE_TestBasicRecognition) { + LoadAndRunSpeechInputTest(FILE_PATH_LITERAL("basic_recognition.html")); + EXPECT_TRUE(fake_speech_input_manager_.grammar().empty()); +} + +// Marked as FLAKY due to http://crbug.com/51337 +#if defined(OS_WIN) +#define MAYBE_GrammarAttribute FLAKY_GrammarAttribute +#else +#define MAYBE_GrammarAttribute GrammarAttribute +#endif +IN_PROC_BROWSER_TEST_F(SpeechInputBrowserTest, MAYBE_GrammarAttribute) { + LoadAndRunSpeechInputTest(FILE_PATH_LITERAL("grammar_attribute.html")); + EXPECT_EQ("http://example.com/grammar.xml", + fake_speech_input_manager_.grammar()); } } // namespace speech_input diff --git a/chrome/browser/speech/speech_input_dispatcher_host.cc b/chrome/browser/speech/speech_input_dispatcher_host.cc index 9fc6bb2..0b2ca1d 100644 --- a/chrome/browser/speech/speech_input_dispatcher_host.cc +++ b/chrome/browser/speech/speech_input_dispatcher_host.cc @@ -140,12 +140,13 @@ bool SpeechInputDispatcherHost::OnMessageReceived( void SpeechInputDispatcherHost::OnStartRecognition( int render_view_id, int request_id, - const gfx::Rect& element_rect) { + const gfx::Rect& element_rect, + const std::string& grammar) { int caller_id = callers_->CreateId(resource_message_filter_process_id_, render_view_id, request_id); manager()->StartRecognition(this, caller_id, resource_message_filter_process_id_, - render_view_id, element_rect); + render_view_id, element_rect, grammar); } void SpeechInputDispatcherHost::OnCancelRecognition(int render_view_id, diff --git a/chrome/browser/speech/speech_input_dispatcher_host.h b/chrome/browser/speech/speech_input_dispatcher_host.h index a23cc28..db42f1e 100644 --- a/chrome/browser/speech/speech_input_dispatcher_host.h +++ b/chrome/browser/speech/speech_input_dispatcher_host.h @@ -45,7 +45,8 @@ class SpeechInputDispatcherHost void SendMessageToRenderView(IPC::Message* message, int render_view_id); void OnStartRecognition(int render_view_id, int request_id, - const gfx::Rect& element_rect); + const gfx::Rect& element_rect, + const std::string& grammar); void OnCancelRecognition(int render_view_id, int request_id); void OnStopRecording(int render_view_id, int request_id); diff --git a/chrome/browser/speech/speech_input_manager.cc b/chrome/browser/speech/speech_input_manager.cc index 5447087..24f0f5d7 100644 --- a/chrome/browser/speech/speech_input_manager.cc +++ b/chrome/browser/speech/speech_input_manager.cc @@ -28,7 +28,8 @@ class SpeechInputManagerImpl : public SpeechInputManager, int caller_id, int render_process_id, int render_view_id, - const gfx::Rect& element_rect); + const gfx::Rect& element_rect, + const std::string& grammar); virtual void CancelRecognition(int caller_id); virtual void StopRecording(int caller_id); @@ -104,7 +105,8 @@ void SpeechInputManagerImpl::StartRecognition( int caller_id, int render_process_id, int render_view_id, - const gfx::Rect& element_rect) { + const gfx::Rect& element_rect, + const std::string& grammar) { DCHECK(!HasPendingRequest(caller_id)); bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id, @@ -112,7 +114,7 @@ void SpeechInputManagerImpl::StartRecognition( SpeechInputRequest* request = &requests_[caller_id]; request->delegate = delegate; - request->recognizer = new SpeechRecognizer(this, caller_id); + request->recognizer = new SpeechRecognizer(this, caller_id, grammar); request->is_active = false; StartRecognitionForRequest(caller_id); diff --git a/chrome/browser/speech/speech_input_manager.h b/chrome/browser/speech/speech_input_manager.h index bb95030..ba938b9 100644 --- a/chrome/browser/speech/speech_input_manager.h +++ b/chrome/browser/speech/speech_input_manager.h @@ -53,7 +53,8 @@ class SpeechInputManager { int caller_id, int render_process_id, int render_view_id, - const gfx::Rect& element_rect) = 0; + const gfx::Rect& element_rect, + const std::string& grammar) = 0; virtual void CancelRecognition(int caller_id) = 0; virtual void StopRecording(int caller_id) = 0; }; diff --git a/chrome/browser/speech/speech_recognition_request.cc b/chrome/browser/speech/speech_recognition_request.cc index 60ace07..d3a9837 100644 --- a/chrome/browser/speech/speech_recognition_request.cc +++ b/chrome/browser/speech/speech_recognition_request.cc @@ -9,11 +9,14 @@ #include "base/utf_string_conversions.h" #include "base/values.h" #include "chrome/common/net/url_request_context_getter.h" +#include "net/base/escape.h" #include "net/base/load_flags.h" #include "net/url_request/url_request_status.h" namespace { +const char* const kDefaultSpeechRecognitionUrl = + "http://www.google.com/speech-api/v1/recognize?client=chromium&"; const char* const kHypothesesString = "hypotheses"; const char* const kUtteranceString = "utterance"; const char* const kConfidenceString = "confidence"; @@ -106,21 +109,30 @@ namespace speech_input { int SpeechRecognitionRequest::url_fetcher_id_for_tests = 0; SpeechRecognitionRequest::SpeechRecognitionRequest( - URLRequestContextGetter* context, const GURL& url, Delegate* delegate) + URLRequestContextGetter* context, Delegate* delegate) : url_context_(context), - url_(url), delegate_(delegate) { DCHECK(delegate); } SpeechRecognitionRequest::~SpeechRecognitionRequest() {} -bool SpeechRecognitionRequest::Send(const std::string& content_type, +bool SpeechRecognitionRequest::Send(const std::string& grammar, + const std::string& content_type, const std::string& audio_data) { DCHECK(!url_fetcher_.get()); - url_fetcher_.reset(URLFetcher::Create( - url_fetcher_id_for_tests, url_, URLFetcher::POST, this)); + std::vector<std::string> parts; + // TODO(leandro): Replace with the language tag given by WebKit.
+ parts.push_back("lang=en-us"); + if (!grammar.empty()) + parts.push_back("grammar=" + EscapeQueryParamValue(grammar, true)); + GURL url(std::string(kDefaultSpeechRecognitionUrl) + JoinString(parts, '&')); + + url_fetcher_.reset(URLFetcher::Create(url_fetcher_id_for_tests, + url, + URLFetcher::POST, + this)); url_fetcher_->set_upload_data(content_type, audio_data); url_fetcher_->set_request_context(url_context_); @@ -143,7 +155,6 @@ void SpeechRecognitionRequest::OnURLFetchComplete( const ResponseCookies& cookies, const std::string& data) { DCHECK_EQ(url_fetcher_.get(), source); - DCHECK(url_.possibly_invalid_spec() == url.possibly_invalid_spec()); bool error = !status.is_success() || response_code != 200; SpeechInputResultArray result; diff --git a/chrome/browser/speech/speech_recognition_request.h b/chrome/browser/speech/speech_recognition_request.h index 1b77c39..d567541 100644 --- a/chrome/browser/speech/speech_recognition_request.h +++ b/chrome/browser/speech/speech_recognition_request.h @@ -37,7 +37,6 @@ class SpeechRecognitionRequest : public URLFetcher::Delegate { // |url| is the server address to which the request wil be sent. SpeechRecognitionRequest(URLRequestContextGetter* context, - const GURL& url, Delegate* delegate); virtual ~SpeechRecognitionRequest(); @@ -45,7 +44,9 @@ class SpeechRecognitionRequest : public URLFetcher::Delegate { // Sends a new request with the given audio data, returns true if successful. // The same object can be used to send multiple requests but only after the // previous request has completed. - bool Send(const std::string& content_type, const std::string& audio_data); + bool Send(const std::string& grammar, + const std::string& content_type, + const std::string& audio_data); bool HasPendingRequest() { return url_fetcher_ != NULL; } @@ -59,7 +60,6 @@ class SpeechRecognitionRequest : public URLFetcher::Delegate { private: scoped_refptr<URLRequestContextGetter> url_context_; - const GURL url_; Delegate* delegate_; scoped_ptr<URLFetcher> url_fetcher_; diff --git a/chrome/browser/speech/speech_recognition_request_unittest.cc b/chrome/browser/speech/speech_recognition_request_unittest.cc index b9f1668..833f5c2 100644 --- a/chrome/browser/speech/speech_recognition_request_unittest.cc +++ b/chrome/browser/speech/speech_recognition_request_unittest.cc @@ -44,8 +44,8 @@ class SpeechRecognitionRequestTest : public SpeechRecognitionRequestDelegate, void SpeechRecognitionRequestTest::CreateAndTestRequest( bool success, const std::string& http_response) { - SpeechRecognitionRequest request(NULL, GURL(""), this); - request.Send(std::string(), std::string()); + SpeechRecognitionRequest request(NULL, this); + request.Send(std::string(), std::string(), std::string()); TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0); ASSERT_TRUE(fetcher); URLRequestStatus status; diff --git a/chrome/browser/speech/speech_recognizer.cc b/chrome/browser/speech/speech_recognizer.cc index 580339a..2852c5e 100644 --- a/chrome/browser/speech/speech_recognizer.cc +++ b/chrome/browser/speech/speech_recognizer.cc @@ -17,8 +17,6 @@ using std::list; using std::string; namespace { -const char* const kDefaultSpeechRecognitionUrl = - "http://www.google.com/speech-api/v1/recognize?lang=en-us&client=chromium"; const char* const kContentTypeSpeex = "audio/x-speex-with-header-byte; rate=16000"; const int kSpeexEncodingQuality = 8; @@ -109,9 +107,12 @@ void SpeexEncoder::Encode(const short* samples, } } -SpeechRecognizer::SpeechRecognizer(Delegate* delegate, int caller_id) +SpeechRecognizer::SpeechRecognizer(Delegate* delegate, + int caller_id, + const std::string& grammar) : delegate_(delegate), caller_id_(caller_id), + grammar_(grammar), encoder_(new SpeexEncoder()), endpointer_(kAudioSampleRate), num_samples_recorded_(0), @@ -211,10 +212,8 @@ void SpeechRecognizer::StopRecording() { } DCHECK(!request_.get()); request_.reset(new SpeechRecognitionRequest( - Profile::GetDefaultRequestContext(), - GURL(kDefaultSpeechRecognitionUrl), - this)); - request_->Send(kContentTypeSpeex, data); + Profile::GetDefaultRequestContext(), this)); + request_->Send(grammar_, kContentTypeSpeex, data); ReleaseAudioBuffers(); // No need to keep the audio anymore. } diff --git a/chrome/browser/speech/speech_recognizer.h b/chrome/browser/speech/speech_recognizer.h index 48a73c8..0473dde 100644 --- a/chrome/browser/speech/speech_recognizer.h +++ b/chrome/browser/speech/speech_recognizer.h @@ -73,7 +73,8 @@ class SpeechRecognizer virtual ~Delegate() {} }; - SpeechRecognizer(Delegate* delegate, int caller_id); + SpeechRecognizer(Delegate* delegate, int caller_id, + const std::string& grammar); ~SpeechRecognizer(); // Starts audio recording and does recognition after recording ends. The same @@ -117,6 +118,7 @@ class SpeechRecognizer Delegate* delegate_; int caller_id_; + std::string grammar_; // Buffer holding the recorded audio. Owns the strings inside the list. typedef std::list<std::string*> AudioBufferQueue; diff --git a/chrome/browser/speech/speech_recognizer_unittest.cc b/chrome/browser/speech/speech_recognizer_unittest.cc index f8d905f..65315949 100644 --- a/chrome/browser/speech/speech_recognizer_unittest.cc +++ b/chrome/browser/speech/speech_recognizer_unittest.cc @@ -23,7 +23,7 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate, SpeechRecognizerTest() : io_thread_(BrowserThread::IO, &message_loop_), ALLOW_THIS_IN_INITIALIZER_LIST( - recognizer_(new SpeechRecognizer(this, 1))), + recognizer_(new SpeechRecognizer(this, 1, std::string()))), recording_complete_(false), recognition_complete_(false), result_received_(false), |