diff options
author | tommi@chromium.org <tommi@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-12-01 18:10:13 +0000 |
---|---|---|
committer | tommi@chromium.org <tommi@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-12-01 18:10:13 +0000 |
commit | c0cd97078bcc108053e1a5906e72478511ded3ec (patch) | |
tree | d470d69eb2e05ce6ae80532046d8b6107ce0e232 /content/browser/speech | |
parent | 5b819ee7638bfc85f6b7700dcb6977b6fcf4bee7 (diff) | |
download | chromium_src-c0cd97078bcc108053e1a5906e72478511ded3ec.zip chromium_src-c0cd97078bcc108053e1a5906e72478511ded3ec.tar.gz chromium_src-c0cd97078bcc108053e1a5906e72478511ded3ec.tar.bz2 |
Update the Speech Api to support array(s) of result items
instead of a single item at a time.
BUG=143124
TEST=Covered by content_unittests
Review URL: https://chromiumcodereview.appspot.com/11421103
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@170668 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'content/browser/speech')
15 files changed, 159 insertions, 113 deletions
diff --git a/content/browser/speech/google_one_shot_remote_engine.cc b/content/browser/speech/google_one_shot_remote_engine.cc index 73e621e..113a939 100644 --- a/content/browser/speech/google_one_shot_remote_engine.cc +++ b/content/browser/speech/google_one_shot_remote_engine.cc @@ -262,7 +262,9 @@ void GoogleOneShotRemoteEngine::AudioChunksEnded() { void GoogleOneShotRemoteEngine::OnURLFetchComplete( const net::URLFetcher* source) { DCHECK_EQ(url_fetcher_.get(), source); - SpeechRecognitionResult result; + SpeechRecognitionResults results; + results.push_back(SpeechRecognitionResult()); + SpeechRecognitionResult& result = results.back(); SpeechRecognitionError error(SPEECH_RECOGNITION_ERROR_NETWORK); std::string data; @@ -278,7 +280,7 @@ void GoogleOneShotRemoteEngine::OnURLFetchComplete( delegate()->OnSpeechRecognitionEngineError(error); } else { DVLOG(1) << "GoogleOneShotRemoteEngine: Invoking delegate with result."; - delegate()->OnSpeechRecognitionEngineResult(result); + delegate()->OnSpeechRecognitionEngineResults(results); } } diff --git a/content/browser/speech/google_one_shot_remote_engine_unittest.cc b/content/browser/speech/google_one_shot_remote_engine_unittest.cc index 7142eeb..efcf84f 100644 --- a/content/browser/speech/google_one_shot_remote_engine_unittest.cc +++ b/content/browser/speech/google_one_shot_remote_engine_unittest.cc @@ -26,9 +26,9 @@ class GoogleOneShotRemoteEngineTest : public SpeechRecognitionEngineDelegate, void CreateAndTestRequest(bool success, const std::string& http_response); // SpeechRecognitionRequestDelegate methods. - virtual void OnSpeechRecognitionEngineResult( - const SpeechRecognitionResult& result) OVERRIDE { - result_ = result; + virtual void OnSpeechRecognitionEngineResults( + const SpeechRecognitionResults& results) OVERRIDE { + results_ = results; } virtual void OnSpeechRecognitionEngineError( @@ -36,11 +36,17 @@ class GoogleOneShotRemoteEngineTest : public SpeechRecognitionEngineDelegate, error_ = error.code; } + // Accessor for the only result item. + const SpeechRecognitionResult& result() const { + DCHECK_EQ(results_.size(), 1U); + return results_[0]; + } + protected: MessageLoop message_loop_; net::TestURLFetcherFactory url_fetcher_factory_; SpeechRecognitionErrorCode error_; - SpeechRecognitionResult result_; + SpeechRecognitionResults results_; }; void GoogleOneShotRemoteEngineTest::CreateAndTestRequest( @@ -67,7 +73,7 @@ void GoogleOneShotRemoteEngineTest::CreateAndTestRequest( fetcher->SetResponseString(http_response); fetcher->delegate()->OnURLFetchComplete(fetcher); - // Parsed response will be available in result_. + // Parsed response will be available in result(). } TEST_F(GoogleOneShotRemoteEngineTest, BasicTest) { @@ -76,9 +82,9 @@ TEST_F(GoogleOneShotRemoteEngineTest, BasicTest) { "{\"status\":0,\"hypotheses\":" "[{\"utterance\":\"123456\",\"confidence\":0.9}]}"); EXPECT_EQ(error_, SPEECH_RECOGNITION_ERROR_NONE); - EXPECT_EQ(1U, result_.hypotheses.size()); - EXPECT_EQ(ASCIIToUTF16("123456"), result_.hypotheses[0].utterance); - EXPECT_EQ(0.9, result_.hypotheses[0].confidence); + EXPECT_EQ(1U, result().hypotheses.size()); + EXPECT_EQ(ASCIIToUTF16("123456"), result().hypotheses[0].utterance); + EXPECT_EQ(0.9, result().hypotheses[0].confidence); // Normal success case with multiple results. CreateAndTestRequest(true, @@ -86,37 +92,37 @@ TEST_F(GoogleOneShotRemoteEngineTest, BasicTest) { "{\"utterance\":\"hello\",\"confidence\":0.9}," "{\"utterance\":\"123456\",\"confidence\":0.5}]}"); EXPECT_EQ(error_, SPEECH_RECOGNITION_ERROR_NONE); - EXPECT_EQ(2u, result_.hypotheses.size()); - EXPECT_EQ(ASCIIToUTF16("hello"), result_.hypotheses[0].utterance); - EXPECT_EQ(0.9, result_.hypotheses[0].confidence); - EXPECT_EQ(ASCIIToUTF16("123456"), result_.hypotheses[1].utterance); - EXPECT_EQ(0.5, result_.hypotheses[1].confidence); + EXPECT_EQ(2u, result().hypotheses.size()); + EXPECT_EQ(ASCIIToUTF16("hello"), result().hypotheses[0].utterance); + EXPECT_EQ(0.9, result().hypotheses[0].confidence); + EXPECT_EQ(ASCIIToUTF16("123456"), result().hypotheses[1].utterance); + EXPECT_EQ(0.5, result().hypotheses[1].confidence); // Zero results. CreateAndTestRequest(true, "{\"status\":0,\"hypotheses\":[]}"); EXPECT_EQ(error_, SPEECH_RECOGNITION_ERROR_NONE); - EXPECT_EQ(0U, result_.hypotheses.size()); + EXPECT_EQ(0U, result().hypotheses.size()); // Http failure case. CreateAndTestRequest(false, ""); EXPECT_EQ(error_, SPEECH_RECOGNITION_ERROR_NETWORK); - EXPECT_EQ(0U, result_.hypotheses.size()); + EXPECT_EQ(0U, result().hypotheses.size()); // Invalid status case. CreateAndTestRequest(true, "{\"status\":\"invalid\",\"hypotheses\":[]}"); EXPECT_EQ(error_, SPEECH_RECOGNITION_ERROR_NETWORK); - EXPECT_EQ(0U, result_.hypotheses.size()); + EXPECT_EQ(0U, result().hypotheses.size()); // Server-side error case. CreateAndTestRequest(true, "{\"status\":1,\"hypotheses\":[]}"); EXPECT_EQ(error_, SPEECH_RECOGNITION_ERROR_NETWORK); - EXPECT_EQ(0U, result_.hypotheses.size()); + EXPECT_EQ(0U, result().hypotheses.size()); // Malformed JSON case. CreateAndTestRequest(true, "{\"status\":0,\"hypotheses\":" "[{\"unknownkey\":\"hello\"}]}"); EXPECT_EQ(error_, SPEECH_RECOGNITION_ERROR_NETWORK); - EXPECT_EQ(0U, result_.hypotheses.size()); + EXPECT_EQ(0U, result().hypotheses.size()); } } // namespace content diff --git a/content/browser/speech/google_streaming_remote_engine.cc b/content/browser/speech/google_streaming_remote_engine.cc index 974118f..2ababe5 100644 --- a/content/browser/speech/google_streaming_remote_engine.cc +++ b/content/browser/speech/google_streaming_remote_engine.cc @@ -436,9 +436,11 @@ GoogleStreamingRemoteEngine::ProcessDownstreamResponse( } } + SpeechRecognitionResults results; for (int i = 0; i < ws_event.result_size(); ++i) { const proto::SpeechRecognitionResult& ws_result = ws_event.result(i); - SpeechRecognitionResult result; + results.push_back(SpeechRecognitionResult()); + SpeechRecognitionResult& result = results.back(); result.is_provisional = !(ws_result.has_final() && ws_result.final()); if (!result.is_provisional) @@ -459,10 +461,10 @@ GoogleStreamingRemoteEngine::ProcessDownstreamResponse( result.hypotheses.push_back(hypothesis); } - - delegate()->OnSpeechRecognitionEngineResult(result); } + delegate()->OnSpeechRecognitionEngineResults(results); + return state_; } @@ -472,7 +474,7 @@ GoogleStreamingRemoteEngine::RaiseNoMatchErrorIfGotNoResults( if (!got_last_definitive_result_) { // Provide an empty result to notify that recognition is ended with no // errors, yet neither any further results. - delegate()->OnSpeechRecognitionEngineResult(SpeechRecognitionResult()); + delegate()->OnSpeechRecognitionEngineResults(SpeechRecognitionResults()); } return AbortSilently(event_args); } diff --git a/content/browser/speech/google_streaming_remote_engine_unittest.cc b/content/browser/speech/google_streaming_remote_engine_unittest.cc index 3aa91c8..8223fec 100644 --- a/content/browser/speech/google_streaming_remote_engine_unittest.cc +++ b/content/browser/speech/google_streaming_remote_engine_unittest.cc @@ -38,9 +38,9 @@ class GoogleStreamingRemoteEngineTest : public SpeechRecognitionEngineDelegate, void CreateAndTestRequest(bool success, const std::string& http_response); // SpeechRecognitionRequestDelegate methods. - virtual void OnSpeechRecognitionEngineResult( - const SpeechRecognitionResult& result) OVERRIDE { - results_.push(result); + virtual void OnSpeechRecognitionEngineResults( + const SpeechRecognitionResults& results) OVERRIDE { + results_.push(results); } virtual void OnSpeechRecognitionEngineError( const SpeechRecognitionError& error) OVERRIDE { @@ -58,8 +58,8 @@ class GoogleStreamingRemoteEngineTest : public SpeechRecognitionEngineDelegate, DOWNSTREAM_ERROR_NETWORK, DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH }; - static bool ResultsAreEqual(const SpeechRecognitionResult& a, - const SpeechRecognitionResult& b); + static bool ResultsAreEqual(const SpeechRecognitionResults& a, + const SpeechRecognitionResults& b); static std::string SerializeProtobufResponse( const proto::SpeechRecognitionEvent& msg); static std::string ToBigEndian32(uint32 value); @@ -73,7 +73,7 @@ class GoogleStreamingRemoteEngineTest : public SpeechRecognitionEngineDelegate, void ProvideMockProtoResultDownstream( const proto::SpeechRecognitionEvent& result); void ProvideMockResultDownstream(const SpeechRecognitionResult& result); - void ExpectResultReceived(const SpeechRecognitionResult& result); + void ExpectResultsReceived(const SpeechRecognitionResults& result); void CloseMockDownstream(DownstreamError error); scoped_ptr<GoogleStreamingRemoteEngine> engine_under_test_; @@ -82,7 +82,7 @@ class GoogleStreamingRemoteEngineTest : public SpeechRecognitionEngineDelegate, MessageLoop message_loop_; std::string response_buffer_; SpeechRecognitionErrorCode error_; - std::queue<SpeechRecognitionResult> results_; + std::queue<SpeechRecognitionResults> results_; }; TEST_F(GoogleStreamingRemoteEngineTest, SingleDefinitiveResult) { @@ -104,7 +104,9 @@ TEST_F(GoogleStreamingRemoteEngineTest, SingleDefinitiveResult) { // Simulate a protobuf message streamed from the server containing a single // result with two hypotheses. - SpeechRecognitionResult result; + SpeechRecognitionResults results; + results.push_back(SpeechRecognitionResult()); + SpeechRecognitionResult& result = results.back(); result.is_provisional = false; result.hypotheses.push_back( SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis 1"), 0.1F)); @@ -112,7 +114,7 @@ TEST_F(GoogleStreamingRemoteEngineTest, SingleDefinitiveResult) { SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis 2"), 0.2F)); ProvideMockResultDownstream(result); - ExpectResultReceived(result); + ExpectResultsReceived(results); ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); // Ensure everything is closed cleanly after the downstream is closed. @@ -132,14 +134,16 @@ TEST_F(GoogleStreamingRemoteEngineTest, SeveralStreamingResults) { InjectDummyAudioChunk(); ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); - SpeechRecognitionResult result; + SpeechRecognitionResults results; + results.push_back(SpeechRecognitionResult()); + SpeechRecognitionResult& result = results.back(); result.is_provisional = (i % 2 == 0); // Alternate result types. float confidence = result.is_provisional ? 0.0F : (i * 0.1F); result.hypotheses.push_back( SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis"), confidence)); ProvideMockResultDownstream(result); - ExpectResultReceived(result); + ExpectResultsReceived(results); ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); } @@ -149,12 +153,14 @@ TEST_F(GoogleStreamingRemoteEngineTest, SeveralStreamingResults) { ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); // Simulate a final definitive result. - SpeechRecognitionResult result; + SpeechRecognitionResults results; + results.push_back(SpeechRecognitionResult()); + SpeechRecognitionResult& result = results.back(); result.is_provisional = false; result.hypotheses.push_back( SpeechRecognitionHypothesis(UTF8ToUTF16("The final result"), 1.0F)); ProvideMockResultDownstream(result); - ExpectResultReceived(result); + ExpectResultsReceived(results); ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); // Ensure everything is closed cleanly after the downstream is closed. @@ -175,11 +181,13 @@ TEST_F(GoogleStreamingRemoteEngineTest, NoFinalResultAfterAudioChunksEnded) { ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); // Simulate the corresponding definitive result. - SpeechRecognitionResult result; + SpeechRecognitionResults results; + results.push_back(SpeechRecognitionResult()); + SpeechRecognitionResult& result = results.back(); result.hypotheses.push_back( SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis"), 1.0F)); ProvideMockResultDownstream(result); - ExpectResultReceived(result); + ExpectResultsReceived(results); ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); // Simulate a silent downstream closure after |AudioChunksEnded|. @@ -190,8 +198,8 @@ TEST_F(GoogleStreamingRemoteEngineTest, NoFinalResultAfterAudioChunksEnded) { // Expect an empty result, aimed at notifying recognition ended with no // actual results nor errors. - SpeechRecognitionResult empty_result; - ExpectResultReceived(empty_result); + SpeechRecognitionResults empty_results; + ExpectResultsReceived(empty_results); // Ensure everything is closed cleanly after the downstream is closed. ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); @@ -212,12 +220,14 @@ TEST_F(GoogleStreamingRemoteEngineTest, NoMatchError) { ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); // Simulate only a provisional result. - SpeechRecognitionResult result; + SpeechRecognitionResults results; + results.push_back(SpeechRecognitionResult()); + SpeechRecognitionResult& result = results.back(); result.is_provisional = true; result.hypotheses.push_back( SpeechRecognitionHypothesis(UTF8ToUTF16("The final result"), 0.0F)); ProvideMockResultDownstream(result); - ExpectResultReceived(result); + ExpectResultsReceived(results); ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); CloseMockDownstream(DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH); @@ -225,8 +235,8 @@ TEST_F(GoogleStreamingRemoteEngineTest, NoMatchError) { // Expect an empty result. ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); EndMockRecognition(); - SpeechRecognitionResult empty_result; - ExpectResultReceived(empty_result); + SpeechRecognitionResults empty_result; + ExpectResultsReceived(empty_result); } TEST_F(GoogleStreamingRemoteEngineTest, HTTPError) { @@ -287,13 +297,15 @@ TEST_F(GoogleStreamingRemoteEngineTest, Stability) { ProvideMockProtoResultDownstream(proto_event); // Set up expectations. - SpeechRecognitionResult expected; - expected.is_provisional = true; - expected.hypotheses.push_back( + SpeechRecognitionResults results; + results.push_back(SpeechRecognitionResult()); + SpeechRecognitionResult& result = results.back(); + result.is_provisional = true; + result.hypotheses.push_back( SpeechRecognitionHypothesis(UTF8ToUTF16("foo"), 0.5)); // Check that the protobuf generated the expected result. - ExpectResultReceived(expected); + ExpectResultsReceived(results); // Since it was a provisional result, recognition is still pending. ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); @@ -304,8 +316,8 @@ TEST_F(GoogleStreamingRemoteEngineTest, Stability) { EndMockRecognition(); // Since there was no final result, we get an empty "no match" result. - SpeechRecognitionResult empty_result; - ExpectResultReceived(empty_result); + SpeechRecognitionResults empty_result; + ExpectResultsReceived(empty_result); ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); ASSERT_EQ(0U, results_.size()); } @@ -436,27 +448,35 @@ void GoogleStreamingRemoteEngineTest::CloseMockDownstream( downstream_fetcher->delegate()->OnURLFetchComplete(downstream_fetcher); } -void GoogleStreamingRemoteEngineTest::ExpectResultReceived( - const SpeechRecognitionResult& result) { +void GoogleStreamingRemoteEngineTest::ExpectResultsReceived( + const SpeechRecognitionResults& results) { ASSERT_GE(1U, results_.size()); - ASSERT_TRUE(ResultsAreEqual(result, results_.front())); + ASSERT_TRUE(ResultsAreEqual(results, results_.front())); results_.pop(); } bool GoogleStreamingRemoteEngineTest::ResultsAreEqual( - const SpeechRecognitionResult& a, const SpeechRecognitionResult& b) { - if (a.is_provisional != b.is_provisional || - a.hypotheses.size() != b.hypotheses.size()) { + const SpeechRecognitionResults& a, const SpeechRecognitionResults& b) { + if (a.size() != b.size()) return false; - } - for (size_t i = 0; i < a.hypotheses.size(); ++i) { - const SpeechRecognitionHypothesis& hyp_a = a.hypotheses[i]; - const SpeechRecognitionHypothesis& hyp_b = b.hypotheses[i]; - if (hyp_a.utterance != hyp_b.utterance || - hyp_a.confidence != hyp_b.confidence) { + + SpeechRecognitionResults::const_iterator it_a = a.begin(); + SpeechRecognitionResults::const_iterator it_b = b.begin(); + for (; it_a != a.end() && it_b != b.end(); ++it_a, ++it_b) { + if (it_a->is_provisional != it_b->is_provisional || + it_a->hypotheses.size() != it_b->hypotheses.size()) { return false; } + for (size_t i = 0; i < it_a->hypotheses.size(); ++i) { + const SpeechRecognitionHypothesis& hyp_a = it_a->hypotheses[i]; + const SpeechRecognitionHypothesis& hyp_b = it_b->hypotheses[i]; + if (hyp_a.utterance != hyp_b.utterance || + hyp_a.confidence != hyp_b.confidence) { + return false; + } + } } + return true; } diff --git a/content/browser/speech/input_tag_speech_dispatcher_host.cc b/content/browser/speech/input_tag_speech_dispatcher_host.cc index 3594f60..7e9d663 100644 --- a/content/browser/speech/input_tag_speech_dispatcher_host.cc +++ b/content/browser/speech/input_tag_speech_dispatcher_host.cc @@ -168,39 +168,39 @@ void InputTagSpeechDispatcherHost::OnStopRecording(int render_view_id, } // -------- SpeechRecognitionEventListener interface implementation ----------- -void InputTagSpeechDispatcherHost::OnRecognitionResult( +void InputTagSpeechDispatcherHost::OnRecognitionResults( int session_id, - const SpeechRecognitionResult& result) { - VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionResult enter"; + const SpeechRecognitionResults& results) { + DVLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionResults enter"; const SpeechRecognitionSessionContext& context = manager()->GetSessionContext(session_id); - Send(new InputTagSpeechMsg_SetRecognitionResult( + Send(new InputTagSpeechMsg_SetRecognitionResults( context.render_view_id, context.request_id, - result)); - VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionResult exit"; + results)); + DVLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionResults exit"; } void InputTagSpeechDispatcherHost::OnAudioEnd(int session_id) { - VLOG(1) << "InputTagSpeechDispatcherHost::OnAudioEnd enter"; + DVLOG(1) << "InputTagSpeechDispatcherHost::OnAudioEnd enter"; const SpeechRecognitionSessionContext& context = manager()->GetSessionContext(session_id); Send(new InputTagSpeechMsg_RecordingComplete(context.render_view_id, context.request_id)); - VLOG(1) << "InputTagSpeechDispatcherHost::OnAudioEnd exit"; + DVLOG(1) << "InputTagSpeechDispatcherHost::OnAudioEnd exit"; } void InputTagSpeechDispatcherHost::OnRecognitionEnd(int session_id) { - VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionEnd enter"; + DVLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionEnd enter"; const SpeechRecognitionSessionContext& context = manager()->GetSessionContext(session_id); Send(new InputTagSpeechMsg_RecognitionComplete(context.render_view_id, context.request_id)); - VLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionEnd exit"; + DVLOG(1) << "InputTagSpeechDispatcherHost::OnRecognitionEnd exit"; } // The events below are currently not used by x-webkit-speech implementation. diff --git a/content/browser/speech/input_tag_speech_dispatcher_host.h b/content/browser/speech/input_tag_speech_dispatcher_host.h index 48b4624..7bd3a01 100644 --- a/content/browser/speech/input_tag_speech_dispatcher_host.h +++ b/content/browser/speech/input_tag_speech_dispatcher_host.h @@ -10,6 +10,7 @@ #include "content/common/content_export.h" #include "content/public/browser/browser_message_filter.h" #include "content/public/browser/speech_recognition_event_listener.h" +#include "content/public/common/speech_recognition_result.h" #include "net/url_request/url_request_context_getter.h" struct InputTagSpeechHostMsg_StartRecognition_Params; @@ -18,7 +19,6 @@ namespace content { class SpeechRecognitionManager; class SpeechRecognitionPreferences; -struct SpeechRecognitionResult; // InputTagSpeechDispatcherHost is a delegate for Speech API messages used by // RenderMessageFilter. Basically it acts as a proxy, relaying the events coming @@ -42,9 +42,9 @@ class CONTENT_EXPORT InputTagSpeechDispatcherHost virtual void OnSoundEnd(int session_id) OVERRIDE; virtual void OnAudioEnd(int session_id) OVERRIDE; virtual void OnRecognitionEnd(int session_id) OVERRIDE; - virtual void OnRecognitionResult( + virtual void OnRecognitionResults( int session_id, - const SpeechRecognitionResult& result) OVERRIDE; + const SpeechRecognitionResults& results) OVERRIDE; virtual void OnRecognitionError( int session_id, const SpeechRecognitionError& error) OVERRIDE; diff --git a/content/browser/speech/speech_recognition_browsertest.cc b/content/browser/speech/speech_recognition_browsertest.cc index 9986459..f833e9c 100644 --- a/content/browser/speech/speech_recognition_browsertest.cc +++ b/content/browser/speech/speech_recognition_browsertest.cc @@ -153,10 +153,12 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManager { if (session_id_) { // Do a check in case we were cancelled.. VLOG(1) << "Setting fake recognition result."; listener_->OnAudioEnd(session_id_); - SpeechRecognitionResult results; - results.hypotheses.push_back(SpeechRecognitionHypothesis( + SpeechRecognitionResult result; + result.hypotheses.push_back(SpeechRecognitionHypothesis( ASCIIToUTF16(kTestResult), 1.0)); - listener_->OnRecognitionResult(session_id_, results); + SpeechRecognitionResults results; + results.push_back(result); + listener_->OnRecognitionResults(session_id_, results); listener_->OnRecognitionEnd(session_id_); session_id_ = 0; listener_ = NULL; diff --git a/content/browser/speech/speech_recognition_dispatcher_host.cc b/content/browser/speech/speech_recognition_dispatcher_host.cc index 6a398f9..01a6a56 100644 --- a/content/browser/speech/speech_recognition_dispatcher_host.cc +++ b/content/browser/speech/speech_recognition_dispatcher_host.cc @@ -165,14 +165,14 @@ void SpeechRecognitionDispatcherHost::OnRecognitionEnd(int session_id) { context.request_id)); } -void SpeechRecognitionDispatcherHost::OnRecognitionResult( +void SpeechRecognitionDispatcherHost::OnRecognitionResults( int session_id, - const SpeechRecognitionResult& result) { + const SpeechRecognitionResults& results) { const SpeechRecognitionSessionContext& context = manager()->GetSessionContext(session_id); Send(new SpeechRecognitionMsg_ResultRetrieved(context.render_view_id, context.request_id, - result)); + results)); } void SpeechRecognitionDispatcherHost::OnRecognitionError( diff --git a/content/browser/speech/speech_recognition_dispatcher_host.h b/content/browser/speech/speech_recognition_dispatcher_host.h index df14953..dea822d 100644 --- a/content/browser/speech/speech_recognition_dispatcher_host.h +++ b/content/browser/speech/speech_recognition_dispatcher_host.h @@ -40,9 +40,9 @@ class CONTENT_EXPORT SpeechRecognitionDispatcherHost virtual void OnSoundEnd(int session_id) OVERRIDE; virtual void OnAudioEnd(int session_id) OVERRIDE; virtual void OnRecognitionEnd(int session_id) OVERRIDE; - virtual void OnRecognitionResult( + virtual void OnRecognitionResults( int session_id, - const SpeechRecognitionResult& result) OVERRIDE; + const SpeechRecognitionResults& results) OVERRIDE; virtual void OnRecognitionError( int session_id, const SpeechRecognitionError& error) OVERRIDE; diff --git a/content/browser/speech/speech_recognition_engine.h b/content/browser/speech/speech_recognition_engine.h index abd94e9..73ba26e 100644 --- a/content/browser/speech/speech_recognition_engine.h +++ b/content/browser/speech/speech_recognition_engine.h @@ -10,11 +10,11 @@ #include "base/basictypes.h" #include "content/common/content_export.h" #include "content/public/common/speech_recognition_grammar.h" +#include "content/public/common/speech_recognition_result.h" namespace content { class AudioChunk; -struct SpeechRecognitionResult; struct SpeechRecognitionError; // This interface models the basic contract that a speech recognition engine, @@ -35,8 +35,8 @@ class SpeechRecognitionEngine { // Called whenever a result is retrieved. It might be issued several times, // (e.g., in the case of continuous speech recognition engine // implementations). - virtual void OnSpeechRecognitionEngineResult( - const SpeechRecognitionResult& result) = 0; + virtual void OnSpeechRecognitionEngineResults( + const SpeechRecognitionResults& results) = 0; virtual void OnSpeechRecognitionEngineError( const SpeechRecognitionError& error) = 0; diff --git a/content/browser/speech/speech_recognition_manager_impl.cc b/content/browser/speech/speech_recognition_manager_impl.cc index 0cabc4d..f854901 100644 --- a/content/browser/speech/speech_recognition_manager_impl.cc +++ b/content/browser/speech/speech_recognition_manager_impl.cc @@ -329,16 +329,16 @@ void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) { weak_factory_.GetWeakPtr(), session_id, EVENT_AUDIO_ENDED)); } -void SpeechRecognitionManagerImpl::OnRecognitionResult( - int session_id, const SpeechRecognitionResult& result) { +void SpeechRecognitionManagerImpl::OnRecognitionResults( + int session_id, const SpeechRecognitionResults& results) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); if (!SessionExists(session_id)) return; if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener()) - delegate_listener->OnRecognitionResult(session_id, result); + delegate_listener->OnRecognitionResults(session_id, results); if (SpeechRecognitionEventListener* listener = GetListener(session_id)) - listener->OnRecognitionResult(session_id, result); + listener->OnRecognitionResults(session_id, results); } void SpeechRecognitionManagerImpl::OnRecognitionError( diff --git a/content/browser/speech/speech_recognition_manager_impl.h b/content/browser/speech/speech_recognition_manager_impl.h index 732426c..39d49cc 100644 --- a/content/browser/speech/speech_recognition_manager_impl.h +++ b/content/browser/speech/speech_recognition_manager_impl.h @@ -81,8 +81,8 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl : virtual void OnSoundEnd(int session_id) OVERRIDE; virtual void OnAudioEnd(int session_id) OVERRIDE; virtual void OnRecognitionEnd(int session_id) OVERRIDE; - virtual void OnRecognitionResult( - int session_id, const SpeechRecognitionResult& result) OVERRIDE; + virtual void OnRecognitionResults( + int session_id, const SpeechRecognitionResults& result) OVERRIDE; virtual void OnRecognitionError( int session_id, const SpeechRecognitionError& error) OVERRIDE; virtual void OnAudioLevelsChange(int session_id, float volume, diff --git a/content/browser/speech/speech_recognizer.cc b/content/browser/speech/speech_recognizer.cc index 481306b..7c3dff2 100644 --- a/content/browser/speech/speech_recognizer.cc +++ b/content/browser/speech/speech_recognizer.cc @@ -186,10 +186,10 @@ void SpeechRecognizer::OnData(AudioInputController* controller, void SpeechRecognizer::OnAudioClosed(AudioInputController*) {} -void SpeechRecognizer::OnSpeechRecognitionEngineResult( - const SpeechRecognitionResult& result) { +void SpeechRecognizer::OnSpeechRecognitionEngineResults( + const SpeechRecognitionResults& results) { FSMEventArgs event_args(EVENT_ENGINE_RESULT); - event_args.engine_result = result; + event_args.engine_results = results; BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, base::Bind(&SpeechRecognizer::DispatchEvent, this, event_args)); @@ -554,23 +554,37 @@ SpeechRecognizer::FSMState SpeechRecognizer::ProcessIntermediateResult( DCHECK_EQ(STATE_RECOGNIZING, state_); } - const SpeechRecognitionResult& result = event_args.engine_result; - listener_->OnRecognitionResult(session_id_, result); + listener_->OnRecognitionResults(session_id_, event_args.engine_results); return STATE_RECOGNIZING; } SpeechRecognizer::FSMState SpeechRecognizer::ProcessFinalResult(const FSMEventArgs& event_args) { - const SpeechRecognitionResult& result = event_args.engine_result; - if (result.is_provisional) { - DCHECK(!is_single_shot_); - listener_->OnRecognitionResult(session_id_, result); + const SpeechRecognitionResults& results = event_args.engine_results; + SpeechRecognitionResults::const_iterator i = results.begin(); + bool provisional_results_pending = false; + bool results_are_empty = true; + for (; i != results.end(); ++i) { + const SpeechRecognitionResult& result = *i; + if (result.is_provisional) { + provisional_results_pending = true; + DCHECK(!is_single_shot_); + } else if (results_are_empty) { + results_are_empty = result.hypotheses.empty(); + } + } + + if (provisional_results_pending) { + listener_->OnRecognitionResults(session_id_, results); // We don't end the recognition if a provisional result is received in // STATE_WAITING_FINAL_RESULT. A definitive result will come next and will // end the recognition. return state_; - } else { - recognition_engine_->EndRecognition(); + } + + recognition_engine_->EndRecognition(); + + if (!results_are_empty) { // We could receive an empty result (which we won't propagate further) // in the following (continuous) scenario: // 1. The caller start pushing audio and receives some results; @@ -580,11 +594,11 @@ SpeechRecognizer::ProcessFinalResult(const FSMEventArgs& event_args) { // 4. The speech recognition engine, therefore, emits an empty result to // notify that the recognition is ended with no error, yet neither any // further result. - if (result.hypotheses.size() > 0) - listener_->OnRecognitionResult(session_id_, result); - listener_->OnRecognitionEnd(session_id_); - return STATE_IDLE; + listener_->OnRecognitionResults(session_id_, results); } + + listener_->OnRecognitionEnd(session_id_); + return STATE_IDLE; } SpeechRecognizer::FSMState diff --git a/content/browser/speech/speech_recognizer.h b/content/browser/speech/speech_recognizer.h index 7df44fe..3d4e85c 100644 --- a/content/browser/speech/speech_recognizer.h +++ b/content/browser/speech/speech_recognizer.h @@ -83,7 +83,7 @@ class CONTENT_EXPORT SpeechRecognizer FSMEvent event; int audio_error_code; scoped_refptr<AudioChunk> audio_data; - SpeechRecognitionResult engine_result; + SpeechRecognitionResults engine_results; SpeechRecognitionError engine_error; }; @@ -135,8 +135,8 @@ class CONTENT_EXPORT SpeechRecognizer const uint8* data, uint32 size) OVERRIDE; // SpeechRecognitionEngineDelegate methods. - virtual void OnSpeechRecognitionEngineResult( - const SpeechRecognitionResult& result) OVERRIDE; + virtual void OnSpeechRecognitionEngineResults( + const SpeechRecognitionResults& results) OVERRIDE; virtual void OnSpeechRecognitionEngineError( const SpeechRecognitionError& error) OVERRIDE; diff --git a/content/browser/speech/speech_recognizer_unittest.cc b/content/browser/speech/speech_recognizer_unittest.cc index a697ab4..ff5bb48 100644 --- a/content/browser/speech/speech_recognizer_unittest.cc +++ b/content/browser/speech/speech_recognizer_unittest.cc @@ -96,8 +96,8 @@ class SpeechRecognizerTest : public SpeechRecognitionEventListener, CheckEventsConsistency(); } - virtual void OnRecognitionResult( - int session_id, const SpeechRecognitionResult& result) OVERRIDE { + virtual void OnRecognitionResults( + int session_id, const SpeechRecognitionResults& results) OVERRIDE { result_received_ = true; } |