diff options
20 files changed, 149 insertions, 62 deletions
diff --git a/chrome/browser/speech/speech_input_extension_apitest.cc b/chrome/browser/speech/speech_input_extension_apitest.cc index 7dab696..cf5e7a9 100644 --- a/chrome/browser/speech/speech_input_extension_apitest.cc +++ b/chrome/browser/speech/speech_input_extension_apitest.cc @@ -174,7 +174,8 @@ void SpeechInputExtensionApiTest::ProvideResults(int session_id) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); if (next_error_ != content::SPEECH_RECOGNITION_ERROR_NONE) { - GetManager()->OnRecognitionError(session_id, next_error_); + GetManager()->OnRecognitionError( + session_id, content::SpeechRecognitionError(next_error_)); return; } diff --git a/content/browser/speech/google_one_shot_remote_engine.cc b/content/browser/speech/google_one_shot_remote_engine.cc index 2007d4b..7d7b237 100644 --- a/content/browser/speech/google_one_shot_remote_engine.cc +++ b/content/browser/speech/google_one_shot_remote_engine.cc @@ -35,8 +35,6 @@ const char* const kConfidenceString = "confidence"; const int kWebServiceStatusNoError = 0; const int kWebServiceStatusNoSpeech = 4; const int kWebServiceStatusNoMatch = 5; -const int kDefaultConfigSampleRate = 8000; -const int kDefaultConfigBitsPerSample = 16; const speech::AudioEncoder::Codec kDefaultAudioCodec = speech::AudioEncoder::CODEC_FLAC; // TODO(satish): Remove this hardcoded value once the page is allowed to @@ -156,14 +154,6 @@ namespace speech { const int GoogleOneShotRemoteEngine::kAudioPacketIntervalMs = 100; int GoogleOneShotRemoteEngine::url_fetcher_id_for_tests = 0; -GoogleOneShotRemoteEngineConfig::GoogleOneShotRemoteEngineConfig() - : filter_profanities(false), - audio_sample_rate(kDefaultConfigSampleRate), - audio_num_bits_per_sample(kDefaultConfigBitsPerSample) { -} - -GoogleOneShotRemoteEngineConfig::~GoogleOneShotRemoteEngineConfig() {} - GoogleOneShotRemoteEngine::GoogleOneShotRemoteEngine( net::URLRequestContextGetter* context) : url_context_(context) { @@ -172,7 +162,7 @@ GoogleOneShotRemoteEngine::GoogleOneShotRemoteEngine( GoogleOneShotRemoteEngine::~GoogleOneShotRemoteEngine() {} void GoogleOneShotRemoteEngine::SetConfig( - const GoogleOneShotRemoteEngineConfig& config) { + const SpeechRecognitionEngineConfig& config) { config_ = config; } @@ -199,8 +189,11 @@ void GoogleOneShotRemoteEngine::StartRecognition() { std::vector<std::string> parts; parts.push_back("lang=" + net::EscapeQueryParamValue(lang_param, true)); - if (!config_.grammar.empty()) - parts.push_back("lm=" + net::EscapeQueryParamValue(config_.grammar, true)); + if (!config_.grammars.empty()) { + DCHECK_EQ(config_.grammars.size(), 1U); + parts.push_back("lm=" + net::EscapeQueryParamValue(config_.grammars[0].url, + true)); + } if (!config_.hardware_info.empty()) parts.push_back("xhw=" + net::EscapeQueryParamValue(config_.hardware_info, diff --git a/content/browser/speech/google_one_shot_remote_engine.h b/content/browser/speech/google_one_shot_remote_engine.h index 734089b..ecd9270 100644 --- a/content/browser/speech/google_one_shot_remote_engine.h +++ b/content/browser/speech/google_one_shot_remote_engine.h @@ -31,19 +31,6 @@ namespace speech { class AudioChunk; -struct CONTENT_EXPORT GoogleOneShotRemoteEngineConfig { - std::string language; - std::string grammar; - bool filter_profanities; - std::string hardware_info; - std::string origin_url; - int audio_sample_rate; - int audio_num_bits_per_sample; - - GoogleOneShotRemoteEngineConfig(); - ~GoogleOneShotRemoteEngineConfig(); -}; - // Implements a SpeechRecognitionEngine by means of remote interaction with // Google speech recognition webservice. class CONTENT_EXPORT GoogleOneShotRemoteEngine @@ -57,9 +44,9 @@ class CONTENT_EXPORT GoogleOneShotRemoteEngine explicit GoogleOneShotRemoteEngine(net::URLRequestContextGetter* context); virtual ~GoogleOneShotRemoteEngine(); - void SetConfig(const GoogleOneShotRemoteEngineConfig& config); // SpeechRecognitionEngine methods. + virtual void SetConfig(const SpeechRecognitionEngineConfig& config) OVERRIDE; virtual void StartRecognition() OVERRIDE; virtual void EndRecognition() OVERRIDE; virtual void TakeAudioChunk(const AudioChunk& data) OVERRIDE; @@ -71,7 +58,7 @@ class CONTENT_EXPORT GoogleOneShotRemoteEngine virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE; private: - GoogleOneShotRemoteEngineConfig config_; + SpeechRecognitionEngineConfig config_; scoped_ptr<content::URLFetcher> url_fetcher_; scoped_refptr<net::URLRequestContextGetter> url_context_; scoped_ptr<AudioEncoder> encoder_; diff --git a/content/browser/speech/input_tag_speech_dispatcher_host.cc b/content/browser/speech/input_tag_speech_dispatcher_host.cc index 4831ecd..cf64597 100644 --- a/content/browser/speech/input_tag_speech_dispatcher_host.cc +++ b/content/browser/speech/input_tag_speech_dispatcher_host.cc @@ -100,7 +100,10 @@ void InputTagSpeechDispatcherHost::OnStartRecognition( SpeechRecognitionSessionConfig config; config.language = params.language; - config.grammar = params.grammar; + if (!params.grammar.empty()) { + config.grammars.push_back( + content::SpeechRecognitionGrammar(params.grammar)); + } config.origin_url = params.origin_url; config.initial_context = context; config.url_request_context_getter = url_request_context_getter_.get(); diff --git a/content/browser/speech/speech_recognition_browsertest.cc b/content/browser/speech/speech_recognition_browsertest.cc index e1bd7b6..ce9c4a63 100644 --- a/content/browser/speech/speech_recognition_browsertest.cc +++ b/content/browser/speech/speech_recognition_browsertest.cc @@ -75,7 +75,8 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl { EXPECT_EQ(0, session_id_); EXPECT_EQ(NULL, listener_); listener_ = event_listener; - grammar_ = config.grammar; + if (config.grammars.size() > 0) + grammar_ = config.grammars[0].url; session_ctx_ = config.initial_context; session_id_ = 1; return session_id_; diff --git a/content/browser/speech/speech_recognition_engine.cc b/content/browser/speech/speech_recognition_engine.cc new file mode 100644 index 0000000..22b8c11 --- /dev/null +++ b/content/browser/speech/speech_recognition_engine.cc @@ -0,0 +1,23 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "content/browser/speech/speech_recognition_engine.h" + +namespace { +const int kDefaultConfigSampleRate = 8000; +const int kDefaultConfigBitsPerSample = 16; +} // namespace + +namespace speech { + +SpeechRecognitionEngine::Config::Config() + : filter_profanities(false), + audio_sample_rate(kDefaultConfigSampleRate), + audio_num_bits_per_sample(kDefaultConfigBitsPerSample) { +} + +SpeechRecognitionEngine::Config::~Config() { +} + +} // namespace speech diff --git a/content/browser/speech/speech_recognition_engine.h b/content/browser/speech/speech_recognition_engine.h index 5b64d67..2203c5ef 100644 --- a/content/browser/speech/speech_recognition_engine.h +++ b/content/browser/speech/speech_recognition_engine.h @@ -7,9 +7,10 @@ #pragma once #include <string> -#include <vector> #include "base/basictypes.h" +#include "content/common/content_export.h" +#include "content/public/common/speech_recognition_grammar.h" namespace content { struct SpeechRecognitionResult; @@ -27,7 +28,9 @@ class AudioChunk; // TakeAudioChunk For every audio chunk pushed. // AudioChunksEnded Finalize the audio stream (omitted in case of errors). // EndRecognition Mandatory at end of SR (even on errors). -// No delegate callback is allowed before Initialize() or after Cleanup(). +// No delegate callbacks are allowed before StartRecognition or after +// EndRecognition. If a recognition was started, the caller can free the +// SpeechRecognitionEngine only after calling EndRecognition. class SpeechRecognitionEngine { public: // Interface for receiving callbacks from this object. @@ -45,8 +48,26 @@ class SpeechRecognitionEngine { virtual ~Delegate() {} }; + // Remote engine configuration. + struct CONTENT_EXPORT Config { + Config(); + ~Config(); + + std::string language; + content::SpeechRecognitionGrammarArray grammars; + bool filter_profanities; + std::string hardware_info; + std::string origin_url; + int audio_sample_rate; + int audio_num_bits_per_sample; + }; + virtual ~SpeechRecognitionEngine() {} + // Set/change the recognition engine configuration. It is not allowed to call + // this function while a recognition is ongoing. + virtual void SetConfig(const Config& config) = 0; + // Called when the speech recognition begins, before any TakeAudioChunk call. virtual void StartRecognition() = 0; @@ -81,11 +102,11 @@ class SpeechRecognitionEngine { Delegate* delegate_; }; -// This typedef is to workaround the issue with certain versions of +// These typedefs are to workaround the issue with certain versions of // Visual Studio where it gets confused between multiple Delegate -// classes and gives a C2500 error. (I saw this error on the try bots - -// the workaround was not needed for my machine). +// classes and gives a C2500 error. typedef SpeechRecognitionEngine::Delegate SpeechRecognitionEngineDelegate; +typedef SpeechRecognitionEngine::Config SpeechRecognitionEngineConfig; } // namespace speech diff --git a/content/browser/speech/speech_recognition_manager_impl.cc b/content/browser/speech/speech_recognition_manager_impl.cc index 34a962a..982cb38 100644 --- a/content/browser/speech/speech_recognition_manager_impl.cc +++ b/content/browser/speech/speech_recognition_manager_impl.cc @@ -77,9 +77,9 @@ int SpeechRecognitionManagerImpl::CreateSession( if (delegate_) delegate_->GetDiagnosticInformation(&can_report_metrics, &hardware_info); - GoogleOneShotRemoteEngineConfig remote_engine_config; + SpeechRecognitionEngineConfig remote_engine_config; remote_engine_config.language = config.language; - remote_engine_config.grammar = config.grammar; + remote_engine_config.grammars = config.grammars; remote_engine_config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate; remote_engine_config.audio_num_bits_per_sample = @@ -88,8 +88,8 @@ int SpeechRecognitionManagerImpl::CreateSession( remote_engine_config.hardware_info = hardware_info; remote_engine_config.origin_url = can_report_metrics ? config.origin_url : ""; - GoogleOneShotRemoteEngine* google_remote_engine = - new GoogleOneShotRemoteEngine(config.url_request_context_getter); + SpeechRecognitionEngine* google_remote_engine = + new GoogleOneShotRemoteEngine(config.url_request_context_getter); google_remote_engine->SetConfig(remote_engine_config); session.recognizer = new SpeechRecognizerImpl(this, diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc index 9f5f7e5..241055a 100644 --- a/content/browser/speech/speech_recognizer_impl.cc +++ b/content/browser/speech/speech_recognizer_impl.cc @@ -14,6 +14,7 @@ #include "content/public/browser/speech_recognition_event_listener.h" #include "content/public/browser/speech_recognizer.h" #include "content/public/common/speech_recognition_error.h" +#include "content/public/common/speech_recognition_grammar.h" #include "content/public/common/speech_recognition_result.h" #include "net/url_request/url_request_context_getter.h" @@ -21,6 +22,7 @@ using content::BrowserMainLoop; using content::BrowserThread; using content::SpeechRecognitionError; using content::SpeechRecognitionEventListener; +using content::SpeechRecognitionGrammar; using content::SpeechRecognitionResult; using content::SpeechRecognizer; using media::AudioInputController; @@ -79,9 +81,10 @@ SpeechRecognizer* SpeechRecognizer::Create( bool filter_profanities, const std::string& hardware_info, const std::string& origin_url) { - speech::GoogleOneShotRemoteEngineConfig remote_engine_config; + speech::SpeechRecognitionEngineConfig remote_engine_config; remote_engine_config.language = language; - remote_engine_config.grammar = grammar; + if (!grammar.empty()) + remote_engine_config.grammars.push_back(SpeechRecognitionGrammar(grammar)); remote_engine_config.audio_sample_rate = speech::SpeechRecognizerImpl::kAudioSampleRate; remote_engine_config.audio_num_bits_per_sample = diff --git a/content/browser/speech/speech_recognizer_impl_unittest.cc b/content/browser/speech/speech_recognizer_impl_unittest.cc index d9658f9..efd2f2f 100644 --- a/content/browser/speech/speech_recognizer_impl_unittest.cc +++ b/content/browser/speech/speech_recognizer_impl_unittest.cc @@ -108,9 +108,9 @@ class SpeechRecognizerImplTest : public content::SpeechRecognitionEventListener, error_(content::SPEECH_RECOGNITION_ERROR_NONE), volume_(-1.0f) { // SpeechRecognizerImpl takes ownership of sr_engine. - GoogleOneShotRemoteEngine* sr_engine = + SpeechRecognitionEngine* sr_engine = new GoogleOneShotRemoteEngine(NULL /* URLRequestContextGetter */); - GoogleOneShotRemoteEngineConfig config; + SpeechRecognitionEngineConfig config; config.audio_num_bits_per_sample = SpeechRecognizerImpl::kNumBitsPerAudioSample; config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate; diff --git a/content/content_browser.gypi b/content/content_browser.gypi index 0d94c19..1d0a45d 100644 --- a/content/content_browser.gypi +++ b/content/content_browser.gypi @@ -640,6 +640,7 @@ 'browser/speech/google_one_shot_remote_engine.h', 'browser/speech/input_tag_speech_dispatcher_host.cc', 'browser/speech/input_tag_speech_dispatcher_host.h', + 'browser/speech/speech_recognition_engine.cc', 'browser/speech/speech_recognition_engine.h', 'browser/speech/speech_recognition_manager_impl.cc', 'browser/speech/speech_recognition_manager_impl.h', diff --git a/content/content_common.gypi b/content/content_common.gypi index 8d1a59f..e7114b6 100644 --- a/content/content_common.gypi +++ b/content/content_common.gypi @@ -82,6 +82,7 @@ 'public/common/show_desktop_notification_params.cc', 'public/common/show_desktop_notification_params.h', 'public/common/speech_recognition_error.h', + 'public/common/speech_recognition_grammar.h', 'public/common/speech_recognition_result.h', 'public/common/speech_recognition_result.cc', 'public/common/ssl_status.cc', diff --git a/content/public/browser/speech_recognition_event_listener.h b/content/public/browser/speech_recognition_event_listener.h index f847a1d..436f9cf 100644 --- a/content/public/browser/speech_recognition_event_listener.h +++ b/content/public/browser/speech_recognition_event_listener.h @@ -31,13 +31,13 @@ class CONTENT_EXPORT SpeechRecognitionEventListener { // recognition UI once this callback is received. virtual void OnEnvironmentEstimationComplete(int session_id) = 0; - // Informs that the end pointer has started detecting sound (possibly speech). + // Informs that the endpointer has started detecting sound (possibly speech). virtual void OnSoundStart(int session_id) = 0; - // Informs that the end pointer has stopped detecting sound (a long silence). + // Informs that the endpointer has stopped detecting sound (a long silence). virtual void OnSoundEnd(int session_id) = 0; - // Invoked when audio capture stops, either due to the end pointer detecting + // Invoked when audio capture stops, either due to the endpointer detecting // silence, an internal error, or an explicit stop was issued. virtual void OnAudioEnd(int session_id) = 0; diff --git a/content/public/browser/speech_recognition_session_config.cc b/content/public/browser/speech_recognition_session_config.cc index 7b84862..8cb7749 100644 --- a/content/public/browser/speech_recognition_session_config.cc +++ b/content/public/browser/speech_recognition_session_config.cc @@ -8,7 +8,8 @@ namespace content { SpeechRecognitionSessionConfig::SpeechRecognitionSessionConfig() - : filter_profanities(false) { + : is_one_shot(true), + filter_profanities(false) { } SpeechRecognitionSessionConfig::~SpeechRecognitionSessionConfig() { diff --git a/content/public/browser/speech_recognition_session_config.h b/content/public/browser/speech_recognition_session_config.h index b5167c3..ec8a18d 100644 --- a/content/public/browser/speech_recognition_session_config.h +++ b/content/public/browser/speech_recognition_session_config.h @@ -10,6 +10,7 @@ #include "base/memory/ref_counted.h" #include "content/common/content_export.h" #include "content/public/browser/speech_recognition_session_context.h" +#include "content/public/common/speech_recognition_grammar.h" namespace net { class URLRequestContextGetter; @@ -22,8 +23,12 @@ struct CONTENT_EXPORT SpeechRecognitionSessionConfig { SpeechRecognitionSessionConfig(); ~SpeechRecognitionSessionConfig(); + // Enables one shot mode, which delivers a single result at the end of the + // speech, ending automatically recognition. When deasserted, continuous mode + // is used instead, carrying out recognition until an explicit stop request. + bool is_one_shot; std::string language; - std::string grammar; + SpeechRecognitionGrammarArray grammars; std::string origin_url; bool filter_profanities; SpeechRecognitionSessionContext initial_context; diff --git a/content/public/browser/speech_recognition_session_context.h b/content/public/browser/speech_recognition_session_context.h index 70a533a..4bfcc29 100644 --- a/content/public/browser/speech_recognition_session_context.h +++ b/content/public/browser/speech_recognition_session_context.h @@ -12,24 +12,26 @@ namespace content { // The context information required by clients of the SpeechRecognitionManager -// (InputTagSpeechDispatcherHost) and its delegates for mapping the recognition -// session to other browser elements involved with the it (e.g., the page -// element that requested the recognition). The SpeechRecognitionManager is -// not aware of the content of this struct and does NOT use it for its purposes. -// However the manager keeps this struct "attached" to the recognition session -// during all the session lifetime, making its contents available to clients -// (In this regard, see SpeechRecognitionManager::GetSessionContext and +// and its delegates for mapping the recognition session to other browser +// elements involved with it (e.g., the page element that requested the +// recognition). The SpeechRecognitionManager is not aware of the content of +// this struct and does NOT use it for its purposes. However the manager keeps +// this struct "attached" to the recognition session during all the session +// lifetime, making its contents available to clients (In this regard, see +// SpeechRecognitionManager::GetSessionContext and // SpeechRecognitionManager::LookupSessionByContext methods). struct CONTENT_EXPORT SpeechRecognitionSessionContext { SpeechRecognitionSessionContext() : render_process_id(0), render_view_id(0), - render_request_id(0) {} + render_request_id(0), + js_handle_id(0) {} ~SpeechRecognitionSessionContext() {} int render_process_id; int render_view_id; int render_request_id; + int js_handle_id; gfx::Rect element_rect; }; diff --git a/content/public/common/speech_recognition_error.h b/content/public/common/speech_recognition_error.h index d882330..e00a84e 100644 --- a/content/public/common/speech_recognition_error.h +++ b/content/public/common/speech_recognition_error.h @@ -38,13 +38,19 @@ struct CONTENT_EXPORT SpeechRecognitionError { SpeechRecognitionErrorCode code; SpeechAudioErrorDetails details; - SpeechRecognitionError(SpeechRecognitionErrorCode code_value) + SpeechRecognitionError() + : code(SPEECH_RECOGNITION_ERROR_NONE), + details(SPEECH_AUDIO_ERROR_DETAILS_NONE) { + } + explicit SpeechRecognitionError(SpeechRecognitionErrorCode code_value) : code(code_value), - details(SPEECH_AUDIO_ERROR_DETAILS_NONE) {} + details(SPEECH_AUDIO_ERROR_DETAILS_NONE) { + } SpeechRecognitionError(SpeechRecognitionErrorCode code_value, SpeechAudioErrorDetails details_value) : code(code_value), - details(details_value) {} + details(details_value) { + } }; } // namespace content diff --git a/content/public/common/speech_recognition_grammar.h b/content/public/common/speech_recognition_grammar.h new file mode 100644 index 0000000..c0e7842 --- /dev/null +++ b/content/public/common/speech_recognition_grammar.h @@ -0,0 +1,37 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CONTENT_PUBLIC_COMMON_SPEECH_RECOGNITION_GRAMMAR_H_ +#define CONTENT_PUBLIC_COMMON_SPEECH_RECOGNITION_GRAMMAR_H_ +#pragma once + +#include <vector> + +#include "base/basictypes.h" +#include "content/common/content_export.h" + +namespace content { + +struct CONTENT_EXPORT SpeechRecognitionGrammar { + SpeechRecognitionGrammar() + : weight(0.0f) { + } + explicit SpeechRecognitionGrammar(std::string url_value) + : url(url_value), + weight(0.0f) { + } + SpeechRecognitionGrammar(std::string url_value, double weight_value) + : url(url_value), + weight(weight_value) { + } + + std::string url; + double weight; +}; + +typedef std::vector<SpeechRecognitionGrammar> SpeechRecognitionGrammarArray; + +} // namespace content + +#endif // CONTENT_PUBLIC_COMMON_SPEECH_RECOGNITION_GRAMMAR_H_ diff --git a/content/public/common/speech_recognition_result.cc b/content/public/common/speech_recognition_result.cc index 3280932c..9a89bab 100644 --- a/content/public/common/speech_recognition_result.cc +++ b/content/public/common/speech_recognition_result.cc @@ -6,10 +6,11 @@ namespace content { -SpeechRecognitionResult::SpeechRecognitionResult() { +SpeechRecognitionResult::SpeechRecognitionResult() + : provisional(false) { } SpeechRecognitionResult::~SpeechRecognitionResult() { } -} +} // namespace content diff --git a/content/public/common/speech_recognition_result.h b/content/public/common/speech_recognition_result.h index 57477bf..d66e2db 100644 --- a/content/public/common/speech_recognition_result.h +++ b/content/public/common/speech_recognition_result.h @@ -32,6 +32,7 @@ typedef std::vector<SpeechRecognitionHypothesis> struct CONTENT_EXPORT SpeechRecognitionResult { SpeechRecognitionHypothesisArray hypotheses; + bool provisional; SpeechRecognitionResult(); ~SpeechRecognitionResult(); |