summaryrefslogtreecommitdiffstats
path: root/chrome/browser/speech
diff options
context:
space:
mode:
authorjam@chromium.org <jam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-26 18:46:15 +0000
committerjam@chromium.org <jam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-26 18:46:15 +0000
commit50fab53bddb2c3cb24d5682c913a03226ccf49ef (patch)
treebb04af83ca5f2be010e32c2e10cfd245117a4847 /chrome/browser/speech
parent5c557f37629dc12dfd99e8fb55c235c8c46a8098 (diff)
downloadchromium_src-50fab53bddb2c3cb24d5682c913a03226ccf49ef.zip
chromium_src-50fab53bddb2c3cb24d5682c913a03226ccf49ef.tar.gz
chromium_src-50fab53bddb2c3cb24d5682c913a03226ccf49ef.tar.bz2
Move core pieces of speech from chrome to content.
TBR=satish Review URL: http://codereview.chromium.org/6591024 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@76165 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/speech')
-rw-r--r--chrome/browser/speech/audio_encoder.cc206
-rw-r--r--chrome/browser/speech/audio_encoder.h59
-rw-r--r--chrome/browser/speech/endpointer/endpointer.cc166
-rw-r--r--chrome/browser/speech/endpointer/endpointer.h148
-rw-r--r--chrome/browser/speech/endpointer/endpointer_unittest.cc146
-rw-r--r--chrome/browser/speech/endpointer/energy_endpointer.cc369
-rw-r--r--chrome/browser/speech/endpointer/energy_endpointer.h150
-rw-r--r--chrome/browser/speech/endpointer/energy_endpointer_params.cc53
-rw-r--r--chrome/browser/speech/endpointer/energy_endpointer_params.h137
-rw-r--r--chrome/browser/speech/speech_input_browsertest.cc207
-rw-r--r--chrome/browser/speech/speech_input_dispatcher_host.cc225
-rw-r--r--chrome/browser/speech/speech_input_dispatcher_host.h63
-rw-r--r--chrome/browser/speech/speech_input_manager.cc6
-rw-r--r--chrome/browser/speech/speech_input_manager.h78
-rw-r--r--chrome/browser/speech/speech_recognition_request.cc197
-rw-r--r--chrome/browser/speech/speech_recognition_request.h81
-rw-r--r--chrome/browser/speech/speech_recognition_request_unittest.cc94
-rw-r--r--chrome/browser/speech/speech_recognizer.cc264
-rw-r--r--chrome/browser/speech/speech_recognizer.h151
-rw-r--r--chrome/browser/speech/speech_recognizer_unittest.cc300
20 files changed, 3 insertions, 3097 deletions
diff --git a/chrome/browser/speech/audio_encoder.cc b/chrome/browser/speech/audio_encoder.cc
deleted file mode 100644
index fe48639..0000000
--- a/chrome/browser/speech/audio_encoder.cc
+++ /dev/null
@@ -1,206 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/browser/speech/audio_encoder.h"
-
-#include "base/basictypes.h"
-#include "base/logging.h"
-#include "base/scoped_ptr.h"
-#include "base/stl_util-inl.h"
-#include "base/string_number_conversions.h"
-#include "third_party/flac/flac.h"
-#include "third_party/speex/speex.h"
-
-using std::string;
-
-namespace {
-
-//-------------------------------- FLACEncoder ---------------------------------
-
-const char* const kContentTypeFLAC = "audio/x-flac; rate=";
-const int kFLACCompressionLevel = 0; // 0 for speed
-
-class FLACEncoder : public speech_input::AudioEncoder {
- public:
- FLACEncoder(int sampling_rate, int bits_per_sample);
- virtual ~FLACEncoder();
- virtual void Encode(const short* samples, int num_samples);
- virtual void Flush();
-
- private:
- static FLAC__StreamEncoderWriteStatus WriteCallback(
- const FLAC__StreamEncoder* encoder,
- const FLAC__byte buffer[],
- size_t bytes,
- unsigned samples,
- unsigned current_frame,
- void* client_data);
-
- FLAC__StreamEncoder* encoder_;
- bool is_encoder_initialized_;
-
- DISALLOW_COPY_AND_ASSIGN(FLACEncoder);
-};
-
-FLAC__StreamEncoderWriteStatus FLACEncoder::WriteCallback(
- const FLAC__StreamEncoder* encoder,
- const FLAC__byte buffer[],
- size_t bytes,
- unsigned samples,
- unsigned current_frame,
- void* client_data) {
- FLACEncoder* me = static_cast<FLACEncoder*>(client_data);
- DCHECK(me->encoder_ == encoder);
- me->AppendToBuffer(new string(reinterpret_cast<const char*>(buffer), bytes));
- return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
-}
-
-FLACEncoder::FLACEncoder(int sampling_rate, int bits_per_sample)
- : AudioEncoder(std::string(kContentTypeFLAC) +
- base::IntToString(sampling_rate)),
- encoder_(FLAC__stream_encoder_new()),
- is_encoder_initialized_(false) {
- FLAC__stream_encoder_set_channels(encoder_, 1);
- FLAC__stream_encoder_set_bits_per_sample(encoder_, bits_per_sample);
- FLAC__stream_encoder_set_sample_rate(encoder_, sampling_rate);
- FLAC__stream_encoder_set_compression_level(encoder_, kFLACCompressionLevel);
-
- // Initializing the encoder will cause sync bytes to be written to
- // its output stream, so we wait until the first call to this method
- // before doing so.
-}
-
-FLACEncoder::~FLACEncoder() {
- FLAC__stream_encoder_delete(encoder_);
-}
-
-void FLACEncoder::Encode(const short* samples, int num_samples) {
- if (!is_encoder_initialized_) {
- const FLAC__StreamEncoderInitStatus encoder_status =
- FLAC__stream_encoder_init_stream(encoder_, WriteCallback, NULL, NULL,
- NULL, this);
- DCHECK(encoder_status == FLAC__STREAM_ENCODER_INIT_STATUS_OK);
- is_encoder_initialized_ = true;
- }
-
- // FLAC encoder wants samples as int32s.
- scoped_ptr<FLAC__int32> flac_samples(new FLAC__int32[num_samples]);
- FLAC__int32* flac_samples_ptr = flac_samples.get();
- for (int i = 0; i < num_samples; ++i)
- flac_samples_ptr[i] = samples[i];
-
- FLAC__stream_encoder_process(encoder_, &flac_samples_ptr, num_samples);
-}
-
-void FLACEncoder::Flush() {
- FLAC__stream_encoder_finish(encoder_);
-}
-
-//-------------------------------- SpeexEncoder --------------------------------
-
-const char* const kContentTypeSpeex = "audio/x-speex-with-header-byte; rate=";
-const int kSpeexEncodingQuality = 8;
-const int kMaxSpeexFrameLength = 110; // (44kbps rate sampled at 32kHz).
-
-// Since the frame length gets written out as a byte in the encoded packet,
-// make sure it is within the byte range.
-COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength);
-
-class SpeexEncoder : public speech_input::AudioEncoder {
- public:
- explicit SpeexEncoder(int sampling_rate);
- virtual ~SpeexEncoder();
- virtual void Encode(const short* samples, int num_samples);
- virtual void Flush() {}
-
- private:
- void* encoder_state_;
- SpeexBits bits_;
- int samples_per_frame_;
- char encoded_frame_data_[kMaxSpeexFrameLength + 1]; // +1 for the frame size.
- DISALLOW_COPY_AND_ASSIGN(SpeexEncoder);
-};
-
-SpeexEncoder::SpeexEncoder(int sampling_rate)
- : AudioEncoder(std::string(kContentTypeSpeex) +
- base::IntToString(sampling_rate)) {
- // speex_bits_init() does not initialize all of the |bits_| struct.
- memset(&bits_, 0, sizeof(bits_));
- speex_bits_init(&bits_);
- encoder_state_ = speex_encoder_init(&speex_wb_mode);
- DCHECK(encoder_state_);
- speex_encoder_ctl(encoder_state_, SPEEX_GET_FRAME_SIZE, &samples_per_frame_);
- DCHECK(samples_per_frame_ > 0);
- int quality = kSpeexEncodingQuality;
- speex_encoder_ctl(encoder_state_, SPEEX_SET_QUALITY, &quality);
- int vbr = 1;
- speex_encoder_ctl(encoder_state_, SPEEX_SET_VBR, &vbr);
- memset(encoded_frame_data_, 0, sizeof(encoded_frame_data_));
-}
-
-SpeexEncoder::~SpeexEncoder() {
- speex_bits_destroy(&bits_);
- speex_encoder_destroy(encoder_state_);
-}
-
-void SpeexEncoder::Encode(const short* samples, int num_samples) {
- // Drop incomplete frames, typically those which come in when recording stops.
- num_samples -= (num_samples % samples_per_frame_);
- for (int i = 0; i < num_samples; i += samples_per_frame_) {
- speex_bits_reset(&bits_);
- speex_encode_int(encoder_state_, const_cast<spx_int16_t*>(samples + i),
- &bits_);
-
- // Encode the frame and place the size of the frame as the first byte. This
- // is the packet format for MIME type x-speex-with-header-byte.
- int frame_length = speex_bits_write(&bits_, encoded_frame_data_ + 1,
- kMaxSpeexFrameLength);
- encoded_frame_data_[0] = static_cast<char>(frame_length);
- AppendToBuffer(new string(encoded_frame_data_, frame_length + 1));
- }
-}
-
-} // namespace
-
-namespace speech_input {
-
-AudioEncoder* AudioEncoder::Create(Codec codec,
- int sampling_rate,
- int bits_per_sample) {
- if (codec == CODEC_FLAC)
- return new FLACEncoder(sampling_rate, bits_per_sample);
- return new SpeexEncoder(sampling_rate);
-}
-
-AudioEncoder::AudioEncoder(const std::string& mime_type)
- : mime_type_(mime_type) {
-}
-
-AudioEncoder::~AudioEncoder() {
- STLDeleteElements(&audio_buffers_);
-}
-
-bool AudioEncoder::GetEncodedData(std::string* encoded_data) {
- if (!audio_buffers_.size())
- return false;
-
- int audio_buffer_length = 0;
- for (AudioBufferQueue::iterator it = audio_buffers_.begin();
- it != audio_buffers_.end(); ++it) {
- audio_buffer_length += (*it)->length();
- }
- encoded_data->reserve(audio_buffer_length);
- for (AudioBufferQueue::iterator it = audio_buffers_.begin();
- it != audio_buffers_.end(); ++it) {
- encoded_data->append(*(*it));
- }
-
- return true;
-}
-
-void AudioEncoder::AppendToBuffer(std::string* item) {
- audio_buffers_.push_back(item);
-}
-
-} // namespace speech_input
diff --git a/chrome/browser/speech/audio_encoder.h b/chrome/browser/speech/audio_encoder.h
deleted file mode 100644
index e17a413..0000000
--- a/chrome/browser/speech/audio_encoder.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef CHROME_BROWSER_SPEECH_AUDIO_ENCODER_H_
-#define CHROME_BROWSER_SPEECH_AUDIO_ENCODER_H_
-
-#include <list>
-#include <string>
-
-#include "base/basictypes.h"
-
-namespace speech_input {
-
-// Provides a simple interface to encode raw audio using the various speech
-// codecs.
-class AudioEncoder {
- public:
- enum Codec {
- CODEC_FLAC,
- CODEC_SPEEX,
- };
-
- static AudioEncoder* Create(Codec codec,
- int sampling_rate,
- int bits_per_sample);
-
- virtual ~AudioEncoder();
-
- // Encodes each frame of raw audio in |samples| to the internal buffer. Use
- // |GetEncodedData| to read the result after this call or when recording
- // completes.
- virtual void Encode(const short* samples, int num_samples) = 0;
-
- // Finish encoding and flush any pending encoded bits out.
- virtual void Flush() = 0;
-
- // Copies the encoded audio to the given string. Returns true if the output
- // is not empty.
- bool GetEncodedData(std::string* encoded_data);
-
- const std::string& mime_type() { return mime_type_; }
-
- protected:
- AudioEncoder(const std::string& mime_type);
-
- void AppendToBuffer(std::string* item);
-
- private:
- // Buffer holding the recorded audio. Owns the strings inside the list.
- typedef std::list<std::string*> AudioBufferQueue;
- AudioBufferQueue audio_buffers_;
- std::string mime_type_;
- DISALLOW_COPY_AND_ASSIGN(AudioEncoder);
-};
-
-} // namespace speech_input
-
-#endif // CHROME_BROWSER_SPEECH_AUDIO_ENCODER_H_
diff --git a/chrome/browser/speech/endpointer/endpointer.cc b/chrome/browser/speech/endpointer/endpointer.cc
deleted file mode 100644
index c30e1f2..0000000
--- a/chrome/browser/speech/endpointer/endpointer.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/browser/speech/endpointer/endpointer.h"
-#include "base/time.h"
-
-using base::Time;
-
-namespace {
-static const int kFrameRate = 50; // 1 frame = 20ms of audio.
-}
-
-namespace speech_input {
-
-Endpointer::Endpointer(int sample_rate)
- : speech_input_possibly_complete_silence_length_us_(-1),
- speech_input_complete_silence_length_us_(-1),
- audio_frame_time_us_(0),
- sample_rate_(sample_rate),
- frame_size_(0) {
- Reset();
-
- frame_size_ = static_cast<int>(sample_rate / static_cast<float>(kFrameRate));
-
- speech_input_minimum_length_us_ =
- static_cast<int64>(1.7 * Time::kMicrosecondsPerSecond);
- speech_input_complete_silence_length_us_ =
- static_cast<int64>(0.5 * Time::kMicrosecondsPerSecond);
- long_speech_input_complete_silence_length_us_ = -1;
- long_speech_length_us_ = -1;
- speech_input_possibly_complete_silence_length_us_ =
- 1 * Time::kMicrosecondsPerSecond;
-
- // Set the default configuration for Push To Talk mode.
- EnergyEndpointerParams ep_config;
- ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
- ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
- ep_config.set_endpoint_margin(0.2f);
- ep_config.set_onset_window(0.15f);
- ep_config.set_speech_on_window(0.4f);
- ep_config.set_offset_window(0.15f);
- ep_config.set_onset_detect_dur(0.09f);
- ep_config.set_onset_confirm_dur(0.075f);
- ep_config.set_on_maintain_dur(0.10f);
- ep_config.set_offset_confirm_dur(0.12f);
- ep_config.set_decision_threshold(1000.0f);
- ep_config.set_min_decision_threshold(50.0f);
- ep_config.set_fast_update_dur(0.2f);
- ep_config.set_sample_rate(static_cast<float>(sample_rate));
- ep_config.set_min_fundamental_frequency(57.143f);
- ep_config.set_max_fundamental_frequency(400.0f);
- ep_config.set_contamination_rejection_period(0.25f);
- energy_endpointer_.Init(ep_config);
-}
-
-void Endpointer::Reset() {
- old_ep_status_ = EP_PRE_SPEECH;
- waiting_for_speech_possibly_complete_timeout_ = false;
- waiting_for_speech_complete_timeout_ = false;
- speech_previously_detected_ = false;
- speech_input_complete_ = false;
- audio_frame_time_us_ = 0; // Reset time for packets sent to endpointer.
- speech_end_time_us_ = -1;
- speech_start_time_us_ = -1;
-}
-
-void Endpointer::StartSession() {
- Reset();
- energy_endpointer_.StartSession();
-}
-
-void Endpointer::EndSession() {
- energy_endpointer_.EndSession();
-}
-
-void Endpointer::SetEnvironmentEstimationMode() {
- Reset();
- energy_endpointer_.SetEnvironmentEstimationMode();
-}
-
-void Endpointer::SetUserInputMode() {
- energy_endpointer_.SetUserInputMode();
-}
-
-EpStatus Endpointer::Status(int64 *time) {
- return energy_endpointer_.Status(time);
-}
-
-EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples,
- float* rms_out) {
- EpStatus ep_status = EP_PRE_SPEECH;
-
- // Process the input data in blocks of frame_size_, dropping any incomplete
- // frames at the end (which is ok since typically the caller will be recording
- // audio in multiples of our frame size).
- int sample_index = 0;
- while (sample_index + frame_size_ <= num_samples) {
- // Have the endpointer process the frame.
- energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_,
- audio_data + sample_index,
- frame_size_,
- rms_out);
- sample_index += frame_size_;
- audio_frame_time_us_ += (frame_size_ * Time::kMicrosecondsPerSecond) /
- sample_rate_;
-
- // Get the status of the endpointer.
- int64 ep_time;
- ep_status = energy_endpointer_.Status(&ep_time);
-
- // Handle state changes.
- if ((EP_SPEECH_PRESENT == ep_status) &&
- (EP_POSSIBLE_ONSET == old_ep_status_)) {
- speech_end_time_us_ = -1;
- waiting_for_speech_possibly_complete_timeout_ = false;
- waiting_for_speech_complete_timeout_ = false;
- // Trigger SpeechInputDidStart event on first detection.
- if (false == speech_previously_detected_) {
- speech_previously_detected_ = true;
- speech_start_time_us_ = ep_time;
- }
- }
- if ((EP_PRE_SPEECH == ep_status) &&
- (EP_POSSIBLE_OFFSET == old_ep_status_)) {
- speech_end_time_us_ = ep_time;
- waiting_for_speech_possibly_complete_timeout_ = true;
- waiting_for_speech_complete_timeout_ = true;
- }
- if (ep_time > speech_input_minimum_length_us_) {
- // Speech possibly complete timeout.
- if ((waiting_for_speech_possibly_complete_timeout_) &&
- (ep_time - speech_end_time_us_ >
- speech_input_possibly_complete_silence_length_us_)) {
- waiting_for_speech_possibly_complete_timeout_ = false;
- }
- if (waiting_for_speech_complete_timeout_) {
- // The length of the silence timeout period can be held constant, or it
- // can be changed after a fixed amount of time from the beginning of
- // speech.
- bool has_stepped_silence =
- (long_speech_length_us_ > 0) &&
- (long_speech_input_complete_silence_length_us_ > 0);
- int64 requested_silence_length;
- if (has_stepped_silence &&
- (ep_time - speech_start_time_us_) > long_speech_length_us_) {
- requested_silence_length =
- long_speech_input_complete_silence_length_us_;
- } else {
- requested_silence_length =
- speech_input_complete_silence_length_us_;
- }
-
- // Speech complete timeout.
- if ((ep_time - speech_end_time_us_) > requested_silence_length) {
- waiting_for_speech_complete_timeout_ = false;
- speech_input_complete_ = true;
- }
- }
- }
- old_ep_status_ = ep_status;
- }
- return ep_status;
-}
-
-} // namespace speech
diff --git a/chrome/browser/speech/endpointer/endpointer.h b/chrome/browser/speech/endpointer/endpointer.h
deleted file mode 100644
index 8af6016..0000000
--- a/chrome/browser/speech/endpointer/endpointer.h
+++ /dev/null
@@ -1,148 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
-#define CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
-
-#include "base/basictypes.h"
-#include "chrome/browser/speech/endpointer/energy_endpointer.h"
-
-class EpStatus;
-
-namespace speech_input {
-
-// A simple interface to the underlying energy-endpointer implementation, this
-// class lets callers provide audio as being recorded and let them poll to find
-// when the user has stopped speaking.
-//
-// There are two events that may trigger the end of speech:
-//
-// speechInputPossiblyComplete event:
-//
-// Signals that silence/noise has been detected for a *short* amount of
-// time after some speech has been detected. It can be used for low latency
-// UI feedback. To disable it, set it to a large amount.
-//
-// speechInputComplete event:
-//
-// This event is intended to signal end of input and to stop recording.
-// The amount of time to wait after speech is set by
-// speech_input_complete_silence_length_ and optionally two other
-// parameters (see below).
-// This time can be held constant, or can change as more speech is detected.
-// In the latter case, the time changes after a set amount of time from the
-// *beginning* of speech. This is motivated by the expectation that there
-// will be two distinct types of inputs: short search queries and longer
-// dictation style input.
-//
-// Three parameters are used to define the piecewise constant timeout function.
-// The timeout length is speech_input_complete_silence_length until
-// long_speech_length, when it changes to
-// long_speech_input_complete_silence_length.
-class Endpointer {
- public:
- explicit Endpointer(int sample_rate);
-
- // Start the endpointer. This should be called at the beginning of a session.
- void StartSession();
-
- // Stop the endpointer.
- void EndSession();
-
- // Start environment estimation. Audio will be used for environment estimation
- // i.e. noise level estimation.
- void SetEnvironmentEstimationMode();
-
- // Start user input. This should be called when the user indicates start of
- // input, e.g. by pressing a button.
- void SetUserInputMode();
-
- // Process a segment of audio, which may be more than one frame.
- // The status of the last frame will be returned.
- EpStatus ProcessAudio(const int16* audio_data, int num_samples,
- float* rms_out);
-
- // Get the status of the endpointer.
- EpStatus Status(int64 *time_us);
-
- // Returns true if the endpointer detected reasonable audio levels above
- // background noise which could be user speech, false if not.
- bool DidStartReceivingSpeech() const {
- return speech_previously_detected_;
- }
-
- bool IsEstimatingEnvironment() const {
- return energy_endpointer_.estimating_environment();
- }
-
- void set_speech_input_complete_silence_length(int64 time_us) {
- speech_input_complete_silence_length_us_ = time_us;
- }
-
- void set_long_speech_input_complete_silence_length(int64 time_us) {
- long_speech_input_complete_silence_length_us_ = time_us;
- }
-
- void set_speech_input_possibly_complete_silence_length(int64 time_us) {
- speech_input_possibly_complete_silence_length_us_ = time_us;
- }
-
- void set_long_speech_length(int64 time_us) {
- long_speech_length_us_ = time_us;
- }
-
- bool speech_input_complete() const {
- return speech_input_complete_;
- }
-
- private:
- // Reset internal states. Helper method common to initial input utterance
- // and following input utternaces.
- void Reset();
-
- // Minimum allowable length of speech input.
- int64 speech_input_minimum_length_us_;
-
- // The speechInputPossiblyComplete event signals that silence/noise has been
- // detected for a *short* amount of time after some speech has been detected.
- // This proporty specifies the time period.
- int64 speech_input_possibly_complete_silence_length_us_;
-
- // The speechInputComplete event signals that silence/noise has been
- // detected for a *long* amount of time after some speech has been detected.
- // This property specifies the time period.
- int64 speech_input_complete_silence_length_us_;
-
- // Same as above, this specifies the required silence period after speech
- // detection. This period is used instead of
- // speech_input_complete_silence_length_ when the utterance is longer than
- // long_speech_length_. This parameter is optional.
- int64 long_speech_input_complete_silence_length_us_;
-
- // The period of time after which the endpointer should consider
- // long_speech_input_complete_silence_length_ as a valid silence period
- // instead of speech_input_complete_silence_length_. This parameter is
- // optional.
- int64 long_speech_length_us_;
-
- // First speech onset time, used in determination of speech complete timeout.
- int64 speech_start_time_us_;
-
- // Most recent end time, used in determination of speech complete timeout.
- int64 speech_end_time_us_;
-
- int64 audio_frame_time_us_;
- EpStatus old_ep_status_;
- bool waiting_for_speech_possibly_complete_timeout_;
- bool waiting_for_speech_complete_timeout_;
- bool speech_previously_detected_;
- bool speech_input_complete_;
- EnergyEndpointer energy_endpointer_;
- int sample_rate_;
- int32 frame_size_;
-};
-
-} // namespace speech_input
-
-#endif // CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
diff --git a/chrome/browser/speech/endpointer/endpointer_unittest.cc b/chrome/browser/speech/endpointer/endpointer_unittest.cc
deleted file mode 100644
index bbdc572..0000000
--- a/chrome/browser/speech/endpointer/endpointer_unittest.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/task.h"
-#include "chrome/browser/speech/endpointer/endpointer.h"
-#include "testing/gtest/include/gtest/gtest.h"
-
-namespace {
-const int kFrameRate = 50; // 20 ms long frames for AMR encoding.
-const int kSampleRate = 8000; // 8 k samples per second for AMR encoding.
-
-// At 8 sample per second a 20 ms frame is 160 samples, which corrsponds
-// to the AMR codec.
-const int kFrameSize = kSampleRate / kFrameRate; // 160 samples.
-COMPILE_ASSERT(kFrameSize == 160, invalid_frame_size);
-}
-
-namespace speech_input {
-
-class FrameProcessor {
- public:
- // Process a single frame of test audio samples.
- virtual EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) = 0;
-};
-
-void RunEndpointerEventsTest(FrameProcessor* processor) {
- int16 samples[kFrameSize];
-
- // We will create a white noise signal of 150 frames. The frames from 50 to
- // 100 will have more power, and the endpointer should fire on those frames.
- const int kNumFrames = 150;
-
- // Create a random sequence of samples.
- srand(1);
- float gain = 0.0;
- int64 time = 0;
- for (int frame_count = 0; frame_count < kNumFrames; ++frame_count) {
- // The frames from 50 to 100 will have more power, and the endpointer
- // should detect those frames as speech.
- if ((frame_count >= 50) && (frame_count < 100)) {
- gain = 2000.0;
- } else {
- gain = 1.0;
- }
- // Create random samples.
- for (int i = 0; i < kFrameSize; ++i) {
- float randNum = static_cast<float>(rand() - (RAND_MAX / 2)) /
- static_cast<float>(RAND_MAX);
- samples[i] = static_cast<int16>(gain * randNum);
- }
-
- EpStatus ep_status = processor->ProcessFrame(time, samples, kFrameSize);
- time += static_cast<int64>(kFrameSize * (1e6 / kSampleRate));
-
- // Log the status.
- if (20 == frame_count)
- EXPECT_EQ(EP_PRE_SPEECH, ep_status);
- if (70 == frame_count)
- EXPECT_EQ(EP_SPEECH_PRESENT, ep_status);
- if (120 == frame_count)
- EXPECT_EQ(EP_PRE_SPEECH, ep_status);
- }
-}
-
-// This test instantiates and initializes a stand alone endpointer module.
-// The test creates FrameData objects with random noise and send them
-// to the endointer module. The energy of the first 50 frames is low,
-// followed by 500 high energy frames, and another 50 low energy frames.
-// We test that the correct start and end frames were detected.
-class EnergyEndpointerFrameProcessor : public FrameProcessor {
- public:
- explicit EnergyEndpointerFrameProcessor(EnergyEndpointer* endpointer)
- : endpointer_(endpointer) {}
-
- EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
- endpointer_->ProcessAudioFrame(time, samples, kFrameSize, NULL);
- int64 ep_time;
- return endpointer_->Status(&ep_time);
- }
-
- private:
- EnergyEndpointer* endpointer_;
-};
-
-TEST(EndpointerTest, TestEnergyEndpointerEvents) {
- // Initialize endpointer and configure it. We specify the parameters
- // here for a 20ms window, and a 20ms step size, which corrsponds to
- // the narrow band AMR codec.
- EnergyEndpointerParams ep_config;
- ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
- ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
- ep_config.set_endpoint_margin(0.2f);
- ep_config.set_onset_window(0.15f);
- ep_config.set_speech_on_window(0.4f);
- ep_config.set_offset_window(0.15f);
- ep_config.set_onset_detect_dur(0.09f);
- ep_config.set_onset_confirm_dur(0.075f);
- ep_config.set_on_maintain_dur(0.10f);
- ep_config.set_offset_confirm_dur(0.12f);
- ep_config.set_decision_threshold(100.0f);
- EnergyEndpointer endpointer;
- endpointer.Init(ep_config);
-
- endpointer.StartSession();
-
- EnergyEndpointerFrameProcessor frame_processor(&endpointer);
- RunEndpointerEventsTest(&frame_processor);
-
- endpointer.EndSession();
-};
-
-// Test endpointer wrapper class.
-class EndpointerFrameProcessor : public FrameProcessor {
- public:
- explicit EndpointerFrameProcessor(Endpointer* endpointer)
- : endpointer_(endpointer) {}
-
- EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
- endpointer_->ProcessAudio(samples, kFrameSize, NULL);
- int64 ep_time;
- return endpointer_->Status(&ep_time);
- }
-
- private:
- Endpointer* endpointer_;
-};
-
-TEST(EndpointerTest, TestEmbeddedEndpointerEvents) {
- const int kSampleRate = 8000; // 8 k samples per second for AMR encoding.
-
- Endpointer endpointer(kSampleRate);
- const int64 kMillisecondsPerMicrosecond = 1000;
- const int64 short_timeout = 300 * kMillisecondsPerMicrosecond;
- endpointer.set_speech_input_possibly_complete_silence_length(short_timeout);
- const int64 long_timeout = 500 * kMillisecondsPerMicrosecond;
- endpointer.set_speech_input_complete_silence_length(long_timeout);
- endpointer.StartSession();
-
- EndpointerFrameProcessor frame_processor(&endpointer);
- RunEndpointerEventsTest(&frame_processor);
-
- endpointer.EndSession();
-}
-
-} // namespace speech_input
diff --git a/chrome/browser/speech/endpointer/energy_endpointer.cc b/chrome/browser/speech/endpointer/energy_endpointer.cc
deleted file mode 100644
index 85d4a29..0000000
--- a/chrome/browser/speech/endpointer/energy_endpointer.cc
+++ /dev/null
@@ -1,369 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// To know more about the algorithm used and the original code which this is
-// based of, see
-// https://wiki.corp.google.com/twiki/bin/view/Main/ChromeGoogleCodeXRef
-
-#include "chrome/browser/speech/endpointer/energy_endpointer.h"
-
-#include "base/logging.h"
-#include <math.h>
-#include <vector>
-
-namespace {
-
-// Returns the RMS (quadratic mean) of the input signal.
-float RMS(const int16* samples, int num_samples) {
- int64 ssq_int64 = 0;
- int64 sum_int64 = 0;
- for (int i = 0; i < num_samples; ++i) {
- sum_int64 += samples[i];
- ssq_int64 += samples[i] * samples[i];
- }
- // now convert to floats.
- double sum = static_cast<double>(sum_int64);
- sum /= num_samples;
- double ssq = static_cast<double>(ssq_int64);
- return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum)));
-}
-
-int64 Secs2Usecs(float seconds) {
- return static_cast<int64>(0.5 + (1.0e6 * seconds));
-}
-
-} // namespace
-
-namespace speech_input {
-
-// Stores threshold-crossing histories for making decisions about the speech
-// state.
-class EnergyEndpointer::HistoryRing {
- public:
- HistoryRing() : insertion_index_(0) {}
-
- // Resets the ring to |size| elements each with state |initial_state|
- void SetRing(int size, bool initial_state);
-
- // Inserts a new entry into the ring and drops the oldest entry.
- void Insert(int64 time_us, bool decision);
-
- // Returns the time in microseconds of the most recently added entry.
- int64 EndTime() const;
-
- // Returns the sum of all intervals during which 'decision' is true within
- // the time in seconds specified by 'duration'. The returned interval is
- // in seconds.
- float RingSum(float duration_sec);
-
- private:
- struct DecisionPoint {
- int64 time_us;
- bool decision;
- };
-
- std::vector<DecisionPoint> decision_points_;
- int insertion_index_; // Index at which the next item gets added/inserted.
-
- DISALLOW_COPY_AND_ASSIGN(HistoryRing);
-};
-
-void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) {
- insertion_index_ = 0;
- decision_points_.clear();
- DecisionPoint init = { -1, initial_state };
- decision_points_.resize(size, init);
-}
-
-void EnergyEndpointer::HistoryRing::Insert(int64 time_us, bool decision) {
- decision_points_[insertion_index_].time_us = time_us;
- decision_points_[insertion_index_].decision = decision;
- insertion_index_ = (insertion_index_ + 1) % decision_points_.size();
-}
-
-int64 EnergyEndpointer::HistoryRing::EndTime() const {
- int ind = insertion_index_ - 1;
- if (ind < 0)
- ind = decision_points_.size() - 1;
- return decision_points_[ind].time_us;
-}
-
-float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) {
- if (!decision_points_.size())
- return 0.0;
-
- int64 sum_us = 0;
- int ind = insertion_index_ - 1;
- if (ind < 0)
- ind = decision_points_.size() - 1;
- int64 end_us = decision_points_[ind].time_us;
- bool is_on = decision_points_[ind].decision;
- int64 start_us = end_us - static_cast<int64>(0.5 + (1.0e6 * duration_sec));
- if (start_us < 0)
- start_us = 0;
- size_t n_summed = 1; // n points ==> (n-1) intervals
- while ((decision_points_[ind].time_us > start_us) &&
- (n_summed < decision_points_.size())) {
- --ind;
- if (ind < 0)
- ind = decision_points_.size() - 1;
- if (is_on)
- sum_us += end_us - decision_points_[ind].time_us;
- is_on = decision_points_[ind].decision;
- end_us = decision_points_[ind].time_us;
- n_summed++;
- }
-
- return 1.0e-6f * sum_us; // Returns total time that was super threshold.
-}
-
-EnergyEndpointer::EnergyEndpointer()
- : status_(EP_PRE_SPEECH),
- offset_confirm_dur_sec_(0),
- endpointer_time_us_(0),
- fast_update_frames_(0),
- frame_counter_(0),
- max_window_dur_(4.0),
- sample_rate_(0),
- history_(new HistoryRing()),
- decision_threshold_(0),
- estimating_environment_(false),
- noise_level_(0),
- rms_adapt_(0),
- start_lag_(0),
- end_lag_(0),
- user_input_start_time_us_(0) {
-}
-
-EnergyEndpointer::~EnergyEndpointer() {
-}
-
-int EnergyEndpointer::TimeToFrame(float time) const {
- return static_cast<int32>(0.5 + (time / params_.frame_period()));
-}
-
-void EnergyEndpointer::Restart(bool reset_threshold) {
- status_ = EP_PRE_SPEECH;
- user_input_start_time_us_ = 0;
-
- if (reset_threshold) {
- decision_threshold_ = params_.decision_threshold();
- rms_adapt_ = decision_threshold_;
- noise_level_ = params_.decision_threshold() / 2.0f;
- frame_counter_ = 0; // Used for rapid initial update of levels.
- }
-
- // Set up the memories to hold the history windows.
- history_->SetRing(TimeToFrame(max_window_dur_), false);
-
- // Flag that indicates that current input should be used for
- // estimating the environment. The user has not yet started input
- // by e.g. pressed the push-to-talk button. By default, this is
- // false for backward compatibility.
- estimating_environment_ = false;
-}
-
-void EnergyEndpointer::Init(const EnergyEndpointerParams& params) {
- params_ = params;
-
- // Find the longest history interval to be used, and make the ring
- // large enough to accommodate that number of frames. NOTE: This
- // depends upon ep_frame_period being set correctly in the factory
- // that did this instantiation.
- max_window_dur_ = params_.onset_window();
- if (params_.speech_on_window() > max_window_dur_)
- max_window_dur_ = params_.speech_on_window();
- if (params_.offset_window() > max_window_dur_)
- max_window_dur_ = params_.offset_window();
- Restart(true);
-
- offset_confirm_dur_sec_ = params_.offset_window() -
- params_.offset_confirm_dur();
- if (offset_confirm_dur_sec_ < 0.0)
- offset_confirm_dur_sec_ = 0.0;
-
- user_input_start_time_us_ = 0;
-
- // Flag that indicates that current input should be used for
- // estimating the environment. The user has not yet started input
- // by e.g. pressed the push-to-talk button. By default, this is
- // false for backward compatibility.
- estimating_environment_ = false;
- // The initial value of the noise and speech levels is inconsequential.
- // The level of the first frame will overwrite these values.
- noise_level_ = params_.decision_threshold() / 2.0f;
- fast_update_frames_ =
- static_cast<int64>(params_.fast_update_dur() / params_.frame_period());
-
- frame_counter_ = 0; // Used for rapid initial update of levels.
-
- sample_rate_ = params_.sample_rate();
- start_lag_ = static_cast<int>(sample_rate_ /
- params_.max_fundamental_frequency());
- end_lag_ = static_cast<int>(sample_rate_ /
- params_.min_fundamental_frequency());
-}
-
-void EnergyEndpointer::StartSession() {
- Restart(true);
-}
-
-void EnergyEndpointer::EndSession() {
- status_ = EP_POST_SPEECH;
-}
-
-void EnergyEndpointer::SetEnvironmentEstimationMode() {
- Restart(true);
- estimating_environment_ = true;
-}
-
-void EnergyEndpointer::SetUserInputMode() {
- estimating_environment_ = false;
- user_input_start_time_us_ = endpointer_time_us_;
-}
-
-void EnergyEndpointer::ProcessAudioFrame(int64 time_us,
- const int16* samples,
- int num_samples,
- float* rms_out) {
- endpointer_time_us_ = time_us;
- float rms = RMS(samples, num_samples);
-
- // Check that this is user input audio vs. pre-input adaptation audio.
- // Input audio starts when the user indicates start of input, by e.g.
- // pressing push-to-talk. Audio recieved prior to that is used to update
- // noise and speech level estimates.
- if (!estimating_environment_) {
- bool decision = false;
- if ((endpointer_time_us_ - user_input_start_time_us_) <
- Secs2Usecs(params_.contamination_rejection_period())) {
- decision = false;
- DVLOG(1) << "decision: forced to false, time: " << endpointer_time_us_;
- } else {
- decision = (rms > decision_threshold_);
- }
-
- history_->Insert(endpointer_time_us_, decision);
-
- switch (status_) {
- case EP_PRE_SPEECH:
- if (history_->RingSum(params_.onset_window()) >
- params_.onset_detect_dur()) {
- status_ = EP_POSSIBLE_ONSET;
- }
- break;
-
- case EP_POSSIBLE_ONSET: {
- float tsum = history_->RingSum(params_.onset_window());
- if (tsum > params_.onset_confirm_dur()) {
- status_ = EP_SPEECH_PRESENT;
- } else { // If signal is not maintained, drop back to pre-speech.
- if (tsum <= params_.onset_detect_dur())
- status_ = EP_PRE_SPEECH;
- }
- break;
- }
-
- case EP_SPEECH_PRESENT: {
- // To induce hysteresis in the state residency, we allow a
- // smaller residency time in the on_ring, than was required to
- // enter the SPEECH_PERSENT state.
- float on_time = history_->RingSum(params_.speech_on_window());
- if (on_time < params_.on_maintain_dur())
- status_ = EP_POSSIBLE_OFFSET;
- break;
- }
-
- case EP_POSSIBLE_OFFSET:
- if (history_->RingSum(params_.offset_window()) <=
- offset_confirm_dur_sec_) {
- // Note that this offset time may be beyond the end
- // of the input buffer in a real-time system. It will be up
- // to the RecognizerSession to decide what to do.
- status_ = EP_PRE_SPEECH; // Automatically reset for next utterance.
- } else { // If speech picks up again we allow return to SPEECH_PRESENT.
- if (history_->RingSum(params_.speech_on_window()) >=
- params_.on_maintain_dur())
- status_ = EP_SPEECH_PRESENT;
- }
- break;
-
- default:
- LOG(WARNING) << "Invalid case in switch: " << status_;
- break;
- }
-
- // If this is a quiet, non-speech region, slowly adapt the detection
- // threshold to be about 6dB above the average RMS.
- if ((!decision) && (status_ == EP_PRE_SPEECH)) {
- decision_threshold_ = (0.98f * decision_threshold_) + (0.02f * 2 * rms);
- rms_adapt_ = decision_threshold_;
- } else {
- // If this is in a speech region, adapt the decision threshold to
- // be about 10dB below the average RMS. If the noise level is high,
- // the threshold is pushed up.
- // Adaptation up to a higher level is 5 times faster than decay to
- // a lower level.
- if ((status_ == EP_SPEECH_PRESENT) && decision) {
- if (rms_adapt_ > rms) {
- rms_adapt_ = (0.99f * rms_adapt_) + (0.01f * rms);
- } else {
- rms_adapt_ = (0.95f * rms_adapt_) + (0.05f * rms);
- }
- float target_threshold = 0.3f * rms_adapt_ + noise_level_;
- decision_threshold_ = (.90f * decision_threshold_) +
- (0.10f * target_threshold);
- }
- }
-
- // Set a floor
- if (decision_threshold_ < params_.min_decision_threshold())
- decision_threshold_ = params_.min_decision_threshold();
- }
-
- // Update speech and noise levels.
- UpdateLevels(rms);
- ++frame_counter_;
-
- if (rms_out) {
- *rms_out = -120.0;
- if ((noise_level_ > 0.0) && ((rms / noise_level_ ) > 0.000001))
- *rms_out = static_cast<float>(20.0 * log10(rms / noise_level_));
- }
-}
-
-void EnergyEndpointer::UpdateLevels(float rms) {
- // Update quickly initially. We assume this is noise and that
- // speech is 6dB above the noise.
- if (frame_counter_ < fast_update_frames_) {
- // Alpha increases from 0 to (k-1)/k where k is the number of time
- // steps in the initial adaptation period.
- float alpha = static_cast<float>(frame_counter_) /
- static_cast<float>(fast_update_frames_);
- noise_level_ = (alpha * noise_level_) + ((1 - alpha) * rms);
- DVLOG(1) << "FAST UPDATE, frame_counter_ " << frame_counter_
- << ", fast_update_frames_ " << fast_update_frames_;
- } else {
- // Update Noise level. The noise level adapts quickly downward, but
- // slowly upward. The noise_level_ parameter is not currently used
- // for threshold adaptation. It is used for UI feedback.
- if (noise_level_ < rms)
- noise_level_ = (0.999f * noise_level_) + (0.001f * rms);
- else
- noise_level_ = (0.95f * noise_level_) + (0.05f * rms);
- }
- if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) {
- decision_threshold_ = noise_level_ * 2; // 6dB above noise level.
- // Set a floor
- if (decision_threshold_ < params_.min_decision_threshold())
- decision_threshold_ = params_.min_decision_threshold();
- }
-}
-
-EpStatus EnergyEndpointer::Status(int64* status_time) const {
- *status_time = history_->EndTime();
- return status_;
-}
-
-} // namespace speech
diff --git a/chrome/browser/speech/endpointer/energy_endpointer.h b/chrome/browser/speech/endpointer/energy_endpointer.h
deleted file mode 100644
index 20476e7..0000000
--- a/chrome/browser/speech/endpointer/energy_endpointer.h
+++ /dev/null
@@ -1,150 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// The EnergyEndpointer class finds likely speech onset and offset points.
-//
-// The implementation described here is about the simplest possible.
-// It is based on timings of threshold crossings for overall signal
-// RMS. It is suitable for light weight applications.
-//
-// As written, the basic idea is that one specifies intervals that
-// must be occupied by super- and sub-threshold energy levels, and
-// defers decisions re onset and offset times until these
-// specifications have been met. Three basic intervals are tested: an
-// onset window, a speech-on window, and an offset window. We require
-// super-threshold to exceed some mimimum total durations in the onset
-// and speech-on windows before declaring the speech onset time, and
-// we specify a required sub-threshold residency in the offset window
-// before declaring speech offset. As the various residency requirements are
-// met, the EnergyEndpointer instance assumes various states, and can return the
-// ID of these states to the client (see EpStatus below).
-//
-// The levels of the speech and background noise are continuously updated. It is
-// important that the background noise level be estimated initially for
-// robustness in noisy conditions. The first frames are assumed to be background
-// noise and a fast update rate is used for the noise level. The duration for
-// fast update is controlled by the fast_update_dur_ paramter.
-//
-// If used in noisy conditions, the endpointer should be started and run in the
-// EnvironmentEstimation mode, for at least 200ms, before switching to
-// UserInputMode.
-// Audio feedback contamination can appear in the input audio, if not cut
-// out or handled by echo cancellation. Audio feedback can trigger a false
-// accept. The false accepts can be ignored by setting
-// ep_contamination_rejection_period.
-
-#ifndef CHROME_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
-#define CHROME_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
-
-#include "base/basictypes.h"
-#include "base/scoped_ptr.h"
-#include "chrome/browser/speech/endpointer/energy_endpointer_params.h"
-#include <vector>
-
-namespace speech_input {
-
-// Endpointer status codes
-enum EpStatus {
- EP_PRE_SPEECH = 10,
- EP_POSSIBLE_ONSET,
- EP_SPEECH_PRESENT,
- EP_POSSIBLE_OFFSET,
- EP_POST_SPEECH,
-};
-
-class EnergyEndpointer {
- public:
- // The default construction MUST be followed by Init(), before any
- // other use can be made of the instance.
- EnergyEndpointer();
- virtual ~EnergyEndpointer();
-
- void Init(const EnergyEndpointerParams& params);
-
- // Start the endpointer. This should be called at the beginning of a session.
- void StartSession();
-
- // Stop the endpointer.
- void EndSession();
-
- // Start environment estimation. Audio will be used for environment estimation
- // i.e. noise level estimation.
- void SetEnvironmentEstimationMode();
-
- // Start user input. This should be called when the user indicates start of
- // input, e.g. by pressing a button.
- void SetUserInputMode();
-
- // Computes the next input frame and modifies EnergyEndpointer status as
- // appropriate based on the computation.
- void ProcessAudioFrame(int64 time_us,
- const int16* samples, int num_samples,
- float* rms_out);
-
- // Returns the current state of the EnergyEndpointer and the time
- // corresponding to the most recently computed frame.
- EpStatus Status(int64* status_time_us) const;
-
- bool estimating_environment() const {
- return estimating_environment_;
- }
-
- private:
- class HistoryRing;
-
- // Resets the endpointer internal state. If reset_threshold is true, the
- // state will be reset completely, including adaptive thresholds and the
- // removal of all history information.
- void Restart(bool reset_threshold);
-
- // Update internal speech and noise levels.
- void UpdateLevels(float rms);
-
- // Returns the number of frames (or frame number) corresponding to
- // the 'time' (in seconds).
- int TimeToFrame(float time) const;
-
- EpStatus status_; // The current state of this instance.
- float offset_confirm_dur_sec_; // max on time allowed to confirm POST_SPEECH
- int64 endpointer_time_us_; // Time of the most recently received audio frame.
- int64 fast_update_frames_; // Number of frames for initial level adaptation.
- int64 frame_counter_; // Number of frames seen. Used for initial adaptation.
- float max_window_dur_; // Largest search window size (seconds)
- float sample_rate_; // Sampling rate.
-
- // Ring buffers to hold the speech activity history.
- scoped_ptr<HistoryRing> history_;
-
- // Configuration parameters.
- EnergyEndpointerParams params_;
-
- // RMS which must be exceeded to conclude frame is speech.
- float decision_threshold_;
-
- // Flag to indicate that audio should be used to estimate environment, prior
- // to receiving user input.
- bool estimating_environment_;
-
- // Estimate of the background noise level. Used externally for UI feedback.
- float noise_level_;
-
- // An adaptive threshold used to update decision_threshold_ when appropriate.
- float rms_adapt_;
-
- // Start lag corresponds to the highest fundamental frequency.
- int start_lag_;
-
- // End lag corresponds to the lowest fundamental frequency.
- int end_lag_;
-
- // Time when mode switched from environment estimation to user input. This
- // is used to time forced rejection of audio feedback contamination.
- int64 user_input_start_time_us_;
-
- DISALLOW_COPY_AND_ASSIGN(EnergyEndpointer);
-};
-
-} // namespace speech_input
-
-#endif // CHROME_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
diff --git a/chrome/browser/speech/endpointer/energy_endpointer_params.cc b/chrome/browser/speech/endpointer/energy_endpointer_params.cc
deleted file mode 100644
index 1ab044a..0000000
--- a/chrome/browser/speech/endpointer/energy_endpointer_params.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/browser/speech/endpointer/energy_endpointer_params.h"
-
-namespace speech_input {
-
-EnergyEndpointerParams::EnergyEndpointerParams() {
- SetDefaults();
-}
-
-void EnergyEndpointerParams::SetDefaults() {
- frame_period_ = 0.01f;
- frame_duration_ = 0.01f;
- endpoint_margin_ = 0.2f;
- onset_window_ = 0.15f;
- speech_on_window_ = 0.4f;
- offset_window_ = 0.15f;
- onset_detect_dur_ = 0.09f;
- onset_confirm_dur_ = 0.075f;
- on_maintain_dur_ = 0.10f;
- offset_confirm_dur_ = 0.12f;
- decision_threshold_ = 150.0f;
- min_decision_threshold_ = 50.0f;
- fast_update_dur_ = 0.2f;
- sample_rate_ = 8000.0f;
- min_fundamental_frequency_ = 57.143f;
- max_fundamental_frequency_ = 400.0f;
- contamination_rejection_period_ = 0.25f;
-}
-
-void EnergyEndpointerParams::operator=(const EnergyEndpointerParams& source) {
- frame_period_ = source.frame_period();
- frame_duration_ = source.frame_duration();
- endpoint_margin_ = source.endpoint_margin();
- onset_window_ = source.onset_window();
- speech_on_window_ = source.speech_on_window();
- offset_window_ = source.offset_window();
- onset_detect_dur_ = source.onset_detect_dur();
- onset_confirm_dur_ = source.onset_confirm_dur();
- on_maintain_dur_ = source.on_maintain_dur();
- offset_confirm_dur_ = source.offset_confirm_dur();
- decision_threshold_ = source.decision_threshold();
- min_decision_threshold_ = source.min_decision_threshold();
- fast_update_dur_ = source.fast_update_dur();
- sample_rate_ = source.sample_rate();
- min_fundamental_frequency_ = source.min_fundamental_frequency();
- max_fundamental_frequency_ = source.max_fundamental_frequency();
- contamination_rejection_period_ = source.contamination_rejection_period();
-}
-
-} // namespace speech_input
diff --git a/chrome/browser/speech/endpointer/energy_endpointer_params.h b/chrome/browser/speech/endpointer/energy_endpointer_params.h
deleted file mode 100644
index 86e44c9..0000000
--- a/chrome/browser/speech/endpointer/energy_endpointer_params.h
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef CHROME_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
-#define CHROME_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
-
-#include "base/basictypes.h"
-
-namespace speech_input {
-
-// Input parameters for the EnergyEndpointer class.
-class EnergyEndpointerParams {
- public:
- EnergyEndpointerParams();
-
- void SetDefaults();
-
- void operator=(const EnergyEndpointerParams& source);
-
- // Accessors and mutators
- float frame_period() const { return frame_period_; }
- void set_frame_period(float frame_period) {
- frame_period_ = frame_period;
- }
-
- float frame_duration() const { return frame_duration_; }
- void set_frame_duration(float frame_duration) {
- frame_duration_ = frame_duration;
- }
-
- float endpoint_margin() const { return endpoint_margin_; }
- void set_endpoint_margin(float endpoint_margin) {
- endpoint_margin_ = endpoint_margin;
- }
-
- float onset_window() const { return onset_window_; }
- void set_onset_window(float onset_window) { onset_window_ = onset_window; }
-
- float speech_on_window() const { return speech_on_window_; }
- void set_speech_on_window(float speech_on_window) {
- speech_on_window_ = speech_on_window;
- }
-
- float offset_window() const { return offset_window_; }
- void set_offset_window(float offset_window) {
- offset_window_ = offset_window;
- }
-
- float onset_detect_dur() const { return onset_detect_dur_; }
- void set_onset_detect_dur(float onset_detect_dur) {
- onset_detect_dur_ = onset_detect_dur;
- }
-
- float onset_confirm_dur() const { return onset_confirm_dur_; }
- void set_onset_confirm_dur(float onset_confirm_dur) {
- onset_confirm_dur_ = onset_confirm_dur;
- }
-
- float on_maintain_dur() const { return on_maintain_dur_; }
- void set_on_maintain_dur(float on_maintain_dur) {
- on_maintain_dur_ = on_maintain_dur;
- }
-
- float offset_confirm_dur() const { return offset_confirm_dur_; }
- void set_offset_confirm_dur(float offset_confirm_dur) {
- offset_confirm_dur_ = offset_confirm_dur;
- }
-
- float decision_threshold() const { return decision_threshold_; }
- void set_decision_threshold(float decision_threshold) {
- decision_threshold_ = decision_threshold;
- }
-
- float min_decision_threshold() const { return min_decision_threshold_; }
- void set_min_decision_threshold(float min_decision_threshold) {
- min_decision_threshold_ = min_decision_threshold;
- }
-
- float fast_update_dur() const { return fast_update_dur_; }
- void set_fast_update_dur(float fast_update_dur) {
- fast_update_dur_ = fast_update_dur;
- }
-
- float sample_rate() const { return sample_rate_; }
- void set_sample_rate(float sample_rate) { sample_rate_ = sample_rate; }
-
- float min_fundamental_frequency() const { return min_fundamental_frequency_; }
- void set_min_fundamental_frequency(float min_fundamental_frequency) {
- min_fundamental_frequency_ = min_fundamental_frequency;
- }
-
- float max_fundamental_frequency() const { return max_fundamental_frequency_; }
- void set_max_fundamental_frequency(float max_fundamental_frequency) {
- max_fundamental_frequency_ = max_fundamental_frequency;
- }
-
- float contamination_rejection_period() const {
- return contamination_rejection_period_;
- }
- void set_contamination_rejection_period(
- float contamination_rejection_period) {
- contamination_rejection_period_ = contamination_rejection_period;
- }
-
- private:
- float frame_period_; // Frame period
- float frame_duration_; // Window size
- float onset_window_; // Interval scanned for onset activity
- float speech_on_window_; // Inverval scanned for ongoing speech
- float offset_window_; // Interval scanned for offset evidence
- float offset_confirm_dur_; // Silence duration required to confirm offset
- float decision_threshold_; // Initial rms detection threshold
- float min_decision_threshold_; // Minimum rms detection threshold
- float fast_update_dur_; // Period for initial estimation of levels.
- float sample_rate_; // Expected sample rate.
-
- // Time to add on either side of endpoint threshold crossings
- float endpoint_margin_;
- // Total dur within onset_window required to enter ONSET state
- float onset_detect_dur_;
- // Total on time within onset_window required to enter SPEECH_ON state
- float onset_confirm_dur_;
- // Minimum dur in SPEECH_ON state required to maintain ON state
- float on_maintain_dur_;
- // Minimum fundamental frequency for autocorrelation.
- float min_fundamental_frequency_;
- // Maximum fundamental frequency for autocorrelation.
- float max_fundamental_frequency_;
- // Period after start of user input that above threshold values are ignored.
- // This is to reject audio feedback contamination.
- float contamination_rejection_period_;
-};
-
-} // namespace speech_input
-
-#endif // CHROME_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
diff --git a/chrome/browser/speech/speech_input_browsertest.cc b/chrome/browser/speech/speech_input_browsertest.cc
deleted file mode 100644
index 0b8c904..0000000
--- a/chrome/browser/speech/speech_input_browsertest.cc
+++ /dev/null
@@ -1,207 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/command_line.h"
-#include "base/file_path.h"
-#include "base/string_number_conversions.h"
-#include "base/utf_string_conversions.h"
-#include "chrome/browser/renderer_host/render_view_host.h"
-#include "chrome/browser/speech/speech_input_dispatcher_host.h"
-#include "chrome/browser/speech/speech_input_manager.h"
-#include "chrome/browser/tab_contents/tab_contents.h"
-#include "chrome/browser/ui/browser.h"
-#include "chrome/common/chrome_switches.h"
-#include "chrome/test/in_process_browser_test.h"
-#include "chrome/test/ui_test_utils.h"
-#include "third_party/WebKit/Source/WebKit/chromium/public/WebInputEvent.h"
-
-namespace speech_input {
-class FakeSpeechInputManager;
-}
-
-// This class does not need to be refcounted (typically done by PostTask) since
-// it will outlive the test and gets released only when the test shuts down.
-// Disabling refcounting here saves a bit of unnecessary code and the factory
-// method can return a plain pointer below as required by the real code.
-DISABLE_RUNNABLE_METHOD_REFCOUNT(speech_input::FakeSpeechInputManager);
-
-namespace speech_input {
-
-const char* kTestResult = "Pictures of the moon";
-
-class FakeSpeechInputManager : public SpeechInputManager {
- public:
- FakeSpeechInputManager()
- : caller_id_(0),
- delegate_(NULL) {
- }
-
- std::string grammar() {
- return grammar_;
- }
-
- // SpeechInputManager methods.
- virtual void StartRecognition(Delegate* delegate,
- int caller_id,
- int render_process_id,
- int render_view_id,
- const gfx::Rect& element_rect,
- const std::string& language,
- const std::string& grammar,
- const std::string& origin_url) {
- VLOG(1) << "StartRecognition invoked.";
- EXPECT_EQ(0, caller_id_);
- EXPECT_EQ(NULL, delegate_);
- caller_id_ = caller_id;
- delegate_ = delegate;
- grammar_ = grammar;
- // Give the fake result in a short while.
- MessageLoop::current()->PostTask(FROM_HERE, NewRunnableMethod(this,
- &FakeSpeechInputManager::SetFakeRecognitionResult));
- }
- virtual void CancelRecognition(int caller_id) {
- VLOG(1) << "CancelRecognition invoked.";
- EXPECT_EQ(caller_id_, caller_id);
- caller_id_ = 0;
- delegate_ = NULL;
- }
- virtual void StopRecording(int caller_id) {
- VLOG(1) << "StopRecording invoked.";
- EXPECT_EQ(caller_id_, caller_id);
- // Nothing to do here since we aren't really recording.
- }
- virtual void CancelAllRequestsWithDelegate(Delegate* delegate) {
- VLOG(1) << "CancelAllRequestsWithDelegate invoked.";
- }
-
- private:
- void SetFakeRecognitionResult() {
- if (caller_id_) { // Do a check in case we were cancelled..
- VLOG(1) << "Setting fake recognition result.";
- delegate_->DidCompleteRecording(caller_id_);
- SpeechInputResultArray results;
- results.push_back(SpeechInputResultItem(ASCIIToUTF16(kTestResult), 1.0));
- delegate_->SetRecognitionResult(caller_id_, results);
- delegate_->DidCompleteRecognition(caller_id_);
- caller_id_ = 0;
- delegate_ = NULL;
- VLOG(1) << "Finished setting fake recognition result.";
- }
- }
-
- int caller_id_;
- Delegate* delegate_;
- std::string grammar_;
-};
-
-class SpeechInputBrowserTest : public InProcessBrowserTest {
- public:
- // InProcessBrowserTest methods
- GURL testUrl(const FilePath::CharType* filename) {
- const FilePath kTestDir(FILE_PATH_LITERAL("speech"));
- return ui_test_utils::GetTestUrl(kTestDir, FilePath(filename));
- }
-
- protected:
- void LoadAndRunSpeechInputTest(const FilePath::CharType* filename) {
- // The test page calculates the speech button's coordinate in the page on
- // load & sets that coordinate in the URL fragment. We send mouse down & up
- // events at that coordinate to trigger speech recognition.
- GURL test_url = testUrl(filename);
- ui_test_utils::NavigateToURL(browser(), test_url);
- std::string coords = browser()->GetSelectedTabContents()->GetURL().ref();
- VLOG(1) << "Coordinates given by script: " << coords;
- int comma_pos = coords.find(',');
- ASSERT_NE(-1, comma_pos);
- int x = 0;
- ASSERT_TRUE(base::StringToInt(coords.substr(0, comma_pos).c_str(), &x));
- int y = 0;
- ASSERT_TRUE(base::StringToInt(coords.substr(comma_pos + 1).c_str(), &y));
-
- WebKit::WebMouseEvent mouse_event;
- mouse_event.type = WebKit::WebInputEvent::MouseDown;
- mouse_event.button = WebKit::WebMouseEvent::ButtonLeft;
- mouse_event.x = x;
- mouse_event.y = y;
- mouse_event.clickCount = 1;
- TabContents* tab_contents = browser()->GetSelectedTabContents();
- tab_contents->render_view_host()->ForwardMouseEvent(mouse_event);
- mouse_event.type = WebKit::WebInputEvent::MouseUp;
- tab_contents->render_view_host()->ForwardMouseEvent(mouse_event);
-
- // The fake speech input manager would receive the speech input
- // request and return the test string as recognition result. The test page
- // then sets the URL fragment as 'pass' if it received the expected string.
- ui_test_utils::WaitForNavigations(&tab_contents->controller(), 1);
- EXPECT_EQ("pass", browser()->GetSelectedTabContents()->GetURL().ref());
- }
-
- // InProcessBrowserTest methods.
- virtual void SetUpInProcessBrowserTestFixture() {
- speech_input_manager_ = &fake_speech_input_manager_;
-
- // Inject the fake manager factory so that the test result is returned to
- // the web page.
- SpeechInputDispatcherHost::set_manager_accessor(&fakeManagerAccessor);
- }
-
- virtual void TearDownInProcessBrowserTestFixture() {
- speech_input_manager_ = NULL;
- }
-
- // Factory method.
- static SpeechInputManager* fakeManagerAccessor() {
- return speech_input_manager_;
- }
-
- FakeSpeechInputManager fake_speech_input_manager_;
-
- // This is used by the static |fakeManagerAccessor|, and it is a pointer
- // rather than a direct instance per the style guide.
- static SpeechInputManager* speech_input_manager_;
-};
-
-SpeechInputManager* SpeechInputBrowserTest::speech_input_manager_ = NULL;
-
-// Marked as FLAKY due to http://crbug.com/51337
-//
-// TODO(satish): Once this flakiness has been fixed, add a second test here to
-// check for sending many clicks in succession to the speech button and verify
-// that it doesn't cause any crash but works as expected. This should act as the
-// test for http://crbug.com/59173
-//
-// TODO(satish): Similar to above, once this flakiness has been fixed add
-// another test here to check that when speech recognition is in progress and
-// a renderer crashes, we get a call to
-// SpeechInputManager::CancelAllRequestsWithDelegate.
-//
-// Marked as DISABLED due to http://crbug.com/71227
-#if defined(GOOGLE_CHROME_BUILD)
-#define MAYBE_TestBasicRecognition DISABLED_TestBasicRecognition
-#elif defined(OS_WIN)
-#define MAYBE_TestBasicRecognition FLAKY_TestBasicRecognition
-#else
-#define MAYBE_TestBasicRecognition TestBasicRecognition
-#endif
-IN_PROC_BROWSER_TEST_F(SpeechInputBrowserTest, MAYBE_TestBasicRecognition) {
- LoadAndRunSpeechInputTest(FILE_PATH_LITERAL("basic_recognition.html"));
- EXPECT_TRUE(fake_speech_input_manager_.grammar().empty());
-}
-
-// Marked as FLAKY due to http://crbug.com/51337
-// Marked as DISALBED due to http://crbug.com/71227
-#if defined(GOOGLE_CHROME_BUILD)
-#define MAYBE_GrammarAttribute DISABLED_GrammarAttribute
-#elif defined(OS_WIN)
-#define MAYBE_GrammarAttribute FLAKY_GrammarAttribute
-#else
-#define MAYBE_GrammarAttribute GrammarAttribute
-#endif
-IN_PROC_BROWSER_TEST_F(SpeechInputBrowserTest, MAYBE_GrammarAttribute) {
- LoadAndRunSpeechInputTest(FILE_PATH_LITERAL("grammar_attribute.html"));
- EXPECT_EQ("http://example.com/grammar.xml",
- fake_speech_input_manager_.grammar());
-}
-
-} // namespace speech_input
diff --git a/chrome/browser/speech/speech_input_dispatcher_host.cc b/chrome/browser/speech/speech_input_dispatcher_host.cc
deleted file mode 100644
index dc993d5..0000000
--- a/chrome/browser/speech/speech_input_dispatcher_host.cc
+++ /dev/null
@@ -1,225 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/browser/speech/speech_input_dispatcher_host.h"
-
-#include "base/lazy_instance.h"
-#include "chrome/common/speech_input_messages.h"
-
-namespace speech_input {
-
-//----------------------------- SpeechInputCallers -----------------------------
-
-// A singleton class to map the tuple
-// (render-process-id, render-view-id, requestid) to a single ID which is passed
-// through rest of the speech code.
-class SpeechInputDispatcherHost::SpeechInputCallers {
- public:
- // Creates a new ID for a given tuple.
- int CreateId(int render_process_id, int render_view_id, int request_id);
-
- // Returns the ID for a tuple assuming the ID was created earlier.
- int GetId(int render_process_id, int render_view_id, int request_id);
-
- // Removes the ID and associated tuple from the map.
- void RemoveId(int id);
-
- // Getters for the various tuple elements for the given ID.
- int render_process_id(int id);
- int render_view_id(int id);
- int request_id(int id);
-
- private:
- struct CallerInfo {
- int render_process_id;
- int render_view_id;
- int request_id;
- };
- friend struct base::DefaultLazyInstanceTraits<SpeechInputCallers>;
-
- SpeechInputCallers();
-
- std::map<int, CallerInfo> callers_;
- int next_id_;
-};
-
-static base::LazyInstance<SpeechInputDispatcherHost::SpeechInputCallers>
- g_speech_input_callers(base::LINKER_INITIALIZED);
-
-SpeechInputDispatcherHost::SpeechInputCallers::SpeechInputCallers()
- : next_id_(1) {
-}
-
-int SpeechInputDispatcherHost::SpeechInputCallers::GetId(int render_process_id,
- int render_view_id,
- int request_id) {
- for (std::map<int, CallerInfo>::iterator it = callers_.begin();
- it != callers_.end(); it++) {
- const CallerInfo& item = it->second;
- if (item.render_process_id == render_process_id &&
- item.render_view_id == render_view_id &&
- item.request_id == request_id) {
- return it->first;
- }
- }
-
- // Not finding an entry here is valid since a cancel/stop may have been issued
- // by the renderer and before it received our response the user may have
- // clicked the button to stop again. The caller of this method should take
- // care of this case.
- return 0;
-}
-
-int SpeechInputDispatcherHost::SpeechInputCallers::CreateId(
- int render_process_id,
- int render_view_id,
- int request_id) {
- CallerInfo info;
- info.render_process_id = render_process_id;
- info.render_view_id = render_view_id;
- info.request_id = request_id;
- callers_[next_id_] = info;
- return next_id_++;
-}
-
-void SpeechInputDispatcherHost::SpeechInputCallers::RemoveId(int id) {
- callers_.erase(id);
-}
-
-int SpeechInputDispatcherHost::SpeechInputCallers::render_process_id(int id) {
- return callers_[id].render_process_id;
-}
-
-int SpeechInputDispatcherHost::SpeechInputCallers::render_view_id(int id) {
- return callers_[id].render_view_id;
-}
-
-int SpeechInputDispatcherHost::SpeechInputCallers::request_id(int id) {
- return callers_[id].request_id;
-}
-
-//-------------------------- SpeechInputDispatcherHost -------------------------
-
-SpeechInputManager::AccessorMethod*
- SpeechInputDispatcherHost::manager_accessor_ = &SpeechInputManager::Get;
-
-SpeechInputDispatcherHost::SpeechInputDispatcherHost(int render_process_id)
- : render_process_id_(render_process_id),
- may_have_pending_requests_(false) {
- // This is initialized by Browser. Do not add any non-trivial
- // initialization here, instead do it lazily when required (e.g. see the
- // method |manager()|) or add an Init() method.
-}
-
-SpeechInputDispatcherHost::~SpeechInputDispatcherHost() {
- // If the renderer crashed for some reason or if we didn't receive a proper
- // Cancel/Stop call for an existing session, cancel such active sessions now.
- // We first check if this dispatcher received any speech IPC requst so that
- // we don't end up creating the speech input manager for web pages which don't
- // use speech input.
- if (may_have_pending_requests_)
- manager()->CancelAllRequestsWithDelegate(this);
-}
-
-SpeechInputManager* SpeechInputDispatcherHost::manager() {
- return (*manager_accessor_)();
-}
-
-bool SpeechInputDispatcherHost::OnMessageReceived(
- const IPC::Message& message, bool* message_was_ok) {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
-
- uint32 message_type = message.type();
- if (message_type == SpeechInputHostMsg_StartRecognition::ID ||
- message_type == SpeechInputHostMsg_CancelRecognition::ID ||
- message_type == SpeechInputHostMsg_StopRecording::ID) {
- if (!SpeechInputManager::IsFeatureEnabled()) {
- *message_was_ok = false;
- return true;
- }
-
- may_have_pending_requests_ = true;
- IPC_BEGIN_MESSAGE_MAP_EX(SpeechInputDispatcherHost, message,
- *message_was_ok)
- IPC_MESSAGE_HANDLER(SpeechInputHostMsg_StartRecognition,
- OnStartRecognition)
- IPC_MESSAGE_HANDLER(SpeechInputHostMsg_CancelRecognition,
- OnCancelRecognition)
- IPC_MESSAGE_HANDLER(SpeechInputHostMsg_StopRecording,
- OnStopRecording)
- IPC_END_MESSAGE_MAP()
- return true;
- }
-
- return false;
-}
-
-void SpeechInputDispatcherHost::OnStartRecognition(
- const SpeechInputHostMsg_StartRecognition_Params &params) {
- int caller_id = g_speech_input_callers.Get().CreateId(
- render_process_id_, params.render_view_id, params.request_id);
- manager()->StartRecognition(this, caller_id,
- render_process_id_,
- params.render_view_id, params.element_rect,
- params.language, params.grammar,
- params.origin_url);
-}
-
-void SpeechInputDispatcherHost::OnCancelRecognition(int render_view_id,
- int request_id) {
- int caller_id = g_speech_input_callers.Get().GetId(
- render_process_id_, render_view_id, request_id);
- if (caller_id) {
- manager()->CancelRecognition(caller_id);
- // Request sequence ended so remove mapping.
- g_speech_input_callers.Get().RemoveId(caller_id);
- }
-}
-
-void SpeechInputDispatcherHost::OnStopRecording(int render_view_id,
- int request_id) {
- int caller_id = g_speech_input_callers.Get().GetId(
- render_process_id_, render_view_id, request_id);
- if (caller_id)
- manager()->StopRecording(caller_id);
-}
-
-void SpeechInputDispatcherHost::SetRecognitionResult(
- int caller_id, const SpeechInputResultArray& result) {
- VLOG(1) << "SpeechInputDispatcherHost::SetRecognitionResult enter";
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- int caller_render_view_id =
- g_speech_input_callers.Get().render_view_id(caller_id);
- int caller_request_id = g_speech_input_callers.Get().request_id(caller_id);
- Send(new SpeechInputMsg_SetRecognitionResult(caller_render_view_id,
- caller_request_id,
- result));
- VLOG(1) << "SpeechInputDispatcherHost::SetRecognitionResult exit";
-}
-
-void SpeechInputDispatcherHost::DidCompleteRecording(int caller_id) {
- VLOG(1) << "SpeechInputDispatcherHost::DidCompleteRecording enter";
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- int caller_render_view_id =
- g_speech_input_callers.Get().render_view_id(caller_id);
- int caller_request_id = g_speech_input_callers.Get().request_id(caller_id);
- Send(new SpeechInputMsg_RecordingComplete(caller_render_view_id,
- caller_request_id));
- VLOG(1) << "SpeechInputDispatcherHost::DidCompleteRecording exit";
-}
-
-void SpeechInputDispatcherHost::DidCompleteRecognition(int caller_id) {
- VLOG(1) << "SpeechInputDispatcherHost::DidCompleteRecognition enter";
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- int caller_render_view_id =
- g_speech_input_callers.Get().render_view_id(caller_id);
- int caller_request_id = g_speech_input_callers.Get().request_id(caller_id);
- Send(new SpeechInputMsg_RecognitionComplete(caller_render_view_id,
- caller_request_id));
- // Request sequence ended, so remove mapping.
- g_speech_input_callers.Get().RemoveId(caller_id);
- VLOG(1) << "SpeechInputDispatcherHost::DidCompleteRecognition exit";
-}
-
-} // namespace speech_input
diff --git a/chrome/browser/speech/speech_input_dispatcher_host.h b/chrome/browser/speech/speech_input_dispatcher_host.h
deleted file mode 100644
index 23a1f23..0000000
--- a/chrome/browser/speech/speech_input_dispatcher_host.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef CHROME_BROWSER_SPEECH_SPEECH_INPUT_DISPATCHER_HOST_H_
-#define CHROME_BROWSER_SPEECH_SPEECH_INPUT_DISPATCHER_HOST_H_
-
-#include "base/scoped_ptr.h"
-#include "chrome/browser/browser_message_filter.h"
-#include "chrome/browser/speech/speech_input_manager.h"
-
-struct SpeechInputHostMsg_StartRecognition_Params;
-
-namespace speech_input {
-
-// SpeechInputDispatcherHost is a delegate for Speech API messages used by
-// RenderMessageFilter.
-// It's the complement of SpeechInputDispatcher (owned by RenderView).
-class SpeechInputDispatcherHost : public BrowserMessageFilter,
- public SpeechInputManager::Delegate {
- public:
- class SpeechInputCallers;
-
- explicit SpeechInputDispatcherHost(int render_process_id);
-
- // SpeechInputManager::Delegate methods.
- virtual void SetRecognitionResult(int caller_id,
- const SpeechInputResultArray& result);
- virtual void DidCompleteRecording(int caller_id);
- virtual void DidCompleteRecognition(int caller_id);
-
- // BrowserMessageFilter implementation.
- virtual bool OnMessageReceived(const IPC::Message& message,
- bool* message_was_ok);
-
- // Singleton accessor setter useful for tests.
- static void set_manager_accessor(SpeechInputManager::AccessorMethod* method) {
- manager_accessor_ = method;
- }
-
- private:
- virtual ~SpeechInputDispatcherHost();
-
- void OnStartRecognition(
- const SpeechInputHostMsg_StartRecognition_Params &params);
- void OnCancelRecognition(int render_view_id, int request_id);
- void OnStopRecording(int render_view_id, int request_id);
-
- // Returns the speech input manager to forward events to, creating one if
- // needed.
- SpeechInputManager* manager();
-
- int render_process_id_;
- bool may_have_pending_requests_; // Set if we received any speech IPC request
-
- static SpeechInputManager::AccessorMethod* manager_accessor_;
-
- DISALLOW_COPY_AND_ASSIGN(SpeechInputDispatcherHost);
-};
-
-} // namespace speech_input
-
-#endif // CHROME_BROWSER_SPEECH_SPEECH_INPUT_DISPATCHER_HOST_H_
diff --git a/chrome/browser/speech/speech_input_manager.cc b/chrome/browser/speech/speech_input_manager.cc
index 7a07543..626bf4f 100644
--- a/chrome/browser/speech/speech_input_manager.cc
+++ b/chrome/browser/speech/speech_input_manager.cc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "chrome/browser/speech/speech_input_manager.h"
+#include "content/browser/speech/speech_input_manager.h"
#include <map>
#include <string>
@@ -14,14 +14,14 @@
#include "base/threading/thread_restrictions.h"
#include "base/utf_string_conversions.h"
#include "chrome/browser/browser_process.h"
-#include "chrome/browser/browser_thread.h"
#include "chrome/browser/platform_util.h"
#include "chrome/browser/prefs/pref_service.h"
#include "chrome/browser/speech/speech_input_bubble_controller.h"
-#include "chrome/browser/speech/speech_recognizer.h"
#include "chrome/browser/tab_contents/tab_util.h"
#include "chrome/common/chrome_switches.h"
#include "chrome/common/pref_names.h"
+#include "content/browser/browser_thread.h"
+#include "content/browser/speech/speech_recognizer.h"
#include "grit/generated_resources.h"
#include "media/audio/audio_manager.h"
#include "ui/base/l10n/l10n_util.h"
diff --git a/chrome/browser/speech/speech_input_manager.h b/chrome/browser/speech/speech_input_manager.h
deleted file mode 100644
index 3646f4f..0000000
--- a/chrome/browser/speech/speech_input_manager.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef CHROME_BROWSER_SPEECH_SPEECH_INPUT_MANAGER_H_
-#define CHROME_BROWSER_SPEECH_SPEECH_INPUT_MANAGER_H_
-
-#include "base/basictypes.h"
-#include "chrome/common/speech_input_result.h"
-#include "ipc/ipc_message.h"
-#include "ui/gfx/rect.h"
-
-namespace speech_input {
-
-// This is the gatekeeper for speech recognition in the browser process. It
-// handles requests received from various render views and makes sure only one
-// of them can use speech recognition at a time. It also sends recognition
-// results and status events to the render views when required.
-// This class is a singleton and accessed via the Get method.
-class SpeechInputManager {
- public:
- // Implemented by the dispatcher host to relay events to the render views.
- class Delegate {
- public:
- virtual void SetRecognitionResult(
- int caller_id,
- const SpeechInputResultArray& result) = 0;
- virtual void DidCompleteRecording(int caller_id) = 0;
- virtual void DidCompleteRecognition(int caller_id) = 0;
-
- protected:
- virtual ~Delegate() {}
- };
-
- // Whether the speech input feature is enabled, based on the browser channel
- // information and command line flags.
- static bool IsFeatureEnabled();
-
- // Factory method to access the singleton. We have this method here instead of
- // using Singleton directly in the calling code to aid tests in injection
- // mocks.
- static SpeechInputManager* Get();
- // Factory method definition useful for tests.
- typedef SpeechInputManager* (AccessorMethod)();
-
- virtual ~SpeechInputManager() {}
-
- // Handlers for requests from render views.
-
- // |delegate| is a weak pointer and should remain valid until
- // its |DidCompleteRecognition| method is called or recognition is cancelled.
- // |render_process_id| is the ID of the renderer process initiating the
- // request.
- // |element_rect| is the display bounds of the html element requesting speech
- // input (in page coordinates).
- virtual void StartRecognition(Delegate* delegate,
- int caller_id,
- int render_process_id,
- int render_view_id,
- const gfx::Rect& element_rect,
- const std::string& language,
- const std::string& grammar,
- const std::string& origin_url) = 0;
- virtual void CancelRecognition(int caller_id) = 0;
- virtual void StopRecording(int caller_id) = 0;
-
- virtual void CancelAllRequestsWithDelegate(Delegate* delegate) = 0;
-};
-
-// This typedef is to workaround the issue with certain versions of
-// Visual Studio where it gets confused between multiple Delegate
-// classes and gives a C2500 error. (I saw this error on the try bots -
-// the workaround was not needed for my machine).
-typedef SpeechInputManager::Delegate SpeechInputManagerDelegate;
-
-} // namespace speech_input
-
-#endif // CHROME_BROWSER_SPEECH_SPEECH_INPUT_MANAGER_H_
diff --git a/chrome/browser/speech/speech_recognition_request.cc b/chrome/browser/speech/speech_recognition_request.cc
deleted file mode 100644
index dc8dc27..0000000
--- a/chrome/browser/speech/speech_recognition_request.cc
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/browser/speech/speech_recognition_request.h"
-
-#include <vector>
-
-#include "base/json/json_reader.h"
-#include "base/string_util.h"
-#include "base/values.h"
-#include "chrome/common/net/url_request_context_getter.h"
-#include "net/base/escape.h"
-#include "net/base/load_flags.h"
-#include "net/url_request/url_request_context.h"
-#include "net/url_request/url_request_status.h"
-#include "ui/base/l10n/l10n_util.h"
-
-namespace {
-
-const char* const kDefaultSpeechRecognitionUrl =
- "https://www.google.com/speech-api/v1/recognize?client=chromium&";
-const char* const kHypothesesString = "hypotheses";
-const char* const kUtteranceString = "utterance";
-const char* const kConfidenceString = "confidence";
-
-bool ParseServerResponse(const std::string& response_body,
- speech_input::SpeechInputResultArray* result) {
- if (response_body.empty()) {
- LOG(WARNING) << "ParseServerResponse: Response was empty.";
- return false;
- }
- DVLOG(1) << "ParseServerResponse: Parsing response " << response_body;
-
- // Parse the response, ignoring comments.
- std::string error_msg;
- scoped_ptr<Value> response_value(base::JSONReader::ReadAndReturnError(
- response_body, false, NULL, &error_msg));
- if (response_value == NULL) {
- LOG(WARNING) << "ParseServerResponse: JSONReader failed : " << error_msg;
- return false;
- }
-
- if (!response_value->IsType(Value::TYPE_DICTIONARY)) {
- VLOG(1) << "ParseServerResponse: Unexpected response type "
- << response_value->GetType();
- return false;
- }
- const DictionaryValue* response_object =
- static_cast<DictionaryValue*>(response_value.get());
-
- // Get the hypotheses
- Value* hypotheses_value = NULL;
- if (!response_object->Get(kHypothesesString, &hypotheses_value)) {
- VLOG(1) << "ParseServerResponse: Missing hypotheses attribute.";
- return false;
- }
- DCHECK(hypotheses_value);
- if (!hypotheses_value->IsType(Value::TYPE_LIST)) {
- VLOG(1) << "ParseServerResponse: Unexpected hypotheses type "
- << hypotheses_value->GetType();
- return false;
- }
- const ListValue* hypotheses_list = static_cast<ListValue*>(hypotheses_value);
- if (hypotheses_list->GetSize() == 0) {
- VLOG(1) << "ParseServerResponse: hypotheses list is empty.";
- return false;
- }
-
- size_t index = 0;
- for (; index < hypotheses_list->GetSize(); ++index) {
- Value* hypothesis = NULL;
- if (!hypotheses_list->Get(index, &hypothesis)) {
- LOG(WARNING) << "ParseServerResponse: Unable to read hypothesis value.";
- break;
- }
- DCHECK(hypothesis);
- if (!hypothesis->IsType(Value::TYPE_DICTIONARY)) {
- LOG(WARNING) << "ParseServerResponse: Unexpected value type "
- << hypothesis->GetType();
- break;
- }
-
- const DictionaryValue* hypothesis_value =
- static_cast<DictionaryValue*>(hypothesis);
- string16 utterance;
- if (!hypothesis_value->GetString(kUtteranceString, &utterance)) {
- LOG(WARNING) << "ParseServerResponse: Missing utterance value.";
- break;
- }
-
- // It is not an error if the 'confidence' field is missing.
- double confidence = 0.0;
- hypothesis_value->GetDouble(kConfidenceString, &confidence);
-
- result->push_back(speech_input::SpeechInputResultItem(utterance,
- confidence));
- }
-
- if (index < hypotheses_list->GetSize()) {
- result->clear();
- return false;
- }
-
- return true;
-}
-
-} // namespace
-
-namespace speech_input {
-
-int SpeechRecognitionRequest::url_fetcher_id_for_tests = 0;
-
-SpeechRecognitionRequest::SpeechRecognitionRequest(
- URLRequestContextGetter* context, Delegate* delegate)
- : url_context_(context),
- delegate_(delegate) {
- DCHECK(delegate);
-}
-
-SpeechRecognitionRequest::~SpeechRecognitionRequest() {}
-
-bool SpeechRecognitionRequest::Send(const std::string& language,
- const std::string& grammar,
- const std::string& hardware_info,
- const std::string& origin_url,
- const std::string& content_type,
- const std::string& audio_data) {
- DCHECK(!url_fetcher_.get());
-
- std::vector<std::string> parts;
-
- std::string lang_param = language;
- if (lang_param.empty() && url_context_) {
- // If no language is provided then we use the first from the accepted
- // language list. If this list is empty then it defaults to "en-US".
- // Example of the contents of this list: "es,en-GB;q=0.8", ""
- net::URLRequestContext* request_context =
- url_context_->GetURLRequestContext();
- DCHECK(request_context);
- std::string accepted_language_list = request_context->accept_language();
- size_t separator = accepted_language_list.find_first_of(",;");
- lang_param = accepted_language_list.substr(0, separator);
- }
- if (lang_param.empty())
- lang_param = "en-US";
- parts.push_back("lang=" + EscapeQueryParamValue(lang_param, true));
-
- if (!grammar.empty())
- parts.push_back("lm=" + EscapeQueryParamValue(grammar, true));
- if (!hardware_info.empty())
- parts.push_back("xhw=" + EscapeQueryParamValue(hardware_info, true));
- // TODO(satish): Remove this hardcoded value once the page is allowed to
- // set this via an attribute.
- parts.push_back("maxresults=3");
-
- GURL url(std::string(kDefaultSpeechRecognitionUrl) + JoinString(parts, '&'));
-
- url_fetcher_.reset(URLFetcher::Create(url_fetcher_id_for_tests,
- url,
- URLFetcher::POST,
- this));
- url_fetcher_->set_upload_data(content_type, audio_data);
- url_fetcher_->set_request_context(url_context_);
- url_fetcher_->set_referrer(origin_url);
-
- // The speech recognition API does not require user identification as part
- // of requests, so we don't send cookies or auth data for these requests to
- // prevent any accidental connection between users who are logged into the
- // domain for other services (e.g. bookmark sync) with the speech requests.
- url_fetcher_->set_load_flags(
- net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |
- net::LOAD_DO_NOT_SEND_AUTH_DATA);
- url_fetcher_->Start();
- return true;
-}
-
-void SpeechRecognitionRequest::OnURLFetchComplete(
- const URLFetcher* source,
- const GURL& url,
- const net::URLRequestStatus& status,
- int response_code,
- const ResponseCookies& cookies,
- const std::string& data) {
- DCHECK_EQ(url_fetcher_.get(), source);
-
- bool error = !status.is_success() || response_code != 200;
- SpeechInputResultArray result;
- if (!error)
- error = !ParseServerResponse(data, &result);
- url_fetcher_.reset();
-
- DVLOG(1) << "SpeechRecognitionRequest: Invoking delegate with result.";
- delegate_->SetRecognitionResult(error, result);
-}
-
-} // namespace speech_input
diff --git a/chrome/browser/speech/speech_recognition_request.h b/chrome/browser/speech/speech_recognition_request.h
deleted file mode 100644
index 9b022cf..0000000
--- a/chrome/browser/speech/speech_recognition_request.h
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_
-#define CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_
-#pragma once
-
-#include <string>
-#include "base/basictypes.h"
-#include "base/ref_counted.h"
-#include "base/scoped_ptr.h"
-#include "chrome/common/net/url_fetcher.h"
-#include "chrome/common/speech_input_result.h"
-#include "googleurl/src/gurl.h"
-
-class URLFetcher;
-class URLRequestContextGetter;
-
-namespace speech_input {
-
-// Provides a simple interface for sending recorded speech data to the server
-// and get back recognition results.
-class SpeechRecognitionRequest : public URLFetcher::Delegate {
- public:
- // ID passed to URLFetcher::Create(). Used for testing.
- static int url_fetcher_id_for_tests;
-
- // Interface for receiving callbacks from this object.
- class Delegate {
- public:
- virtual void SetRecognitionResult(
- bool error, const SpeechInputResultArray& result) = 0;
-
- protected:
- virtual ~Delegate() {}
- };
-
- // |url| is the server address to which the request wil be sent.
- SpeechRecognitionRequest(URLRequestContextGetter* context,
- Delegate* delegate);
-
- virtual ~SpeechRecognitionRequest();
-
- // Sends a new request with the given audio data, returns true if successful.
- // The same object can be used to send multiple requests but only after the
- // previous request has completed.
- bool Send(const std::string& language,
- const std::string& grammar,
- const std::string& hardware_info,
- const std::string& origin_url,
- const std::string& content_type,
- const std::string& audio_data);
-
- bool HasPendingRequest() { return url_fetcher_ != NULL; }
-
- // URLFetcher::Delegate methods.
- virtual void OnURLFetchComplete(const URLFetcher* source,
- const GURL& url,
- const net::URLRequestStatus& status,
- int response_code,
- const ResponseCookies& cookies,
- const std::string& data);
-
- private:
- scoped_refptr<URLRequestContextGetter> url_context_;
- Delegate* delegate_;
- scoped_ptr<URLFetcher> url_fetcher_;
-
- DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionRequest);
-};
-
-// This typedef is to workaround the issue with certain versions of
-// Visual Studio where it gets confused between multiple Delegate
-// classes and gives a C2500 error. (I saw this error on the try bots -
-// the workaround was not needed for my machine).
-typedef SpeechRecognitionRequest::Delegate SpeechRecognitionRequestDelegate;
-
-} // namespace speech_input
-
-#endif // CHROME_BROWSER_SPEECH_SPEECH_RECOGNITION_REQUEST_H_
diff --git a/chrome/browser/speech/speech_recognition_request_unittest.cc b/chrome/browser/speech/speech_recognition_request_unittest.cc
deleted file mode 100644
index bd2a26e..0000000
--- a/chrome/browser/speech/speech_recognition_request_unittest.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/utf_string_conversions.h"
-#include "chrome/browser/speech/speech_recognition_request.h"
-#include "chrome/common/net/url_request_context_getter.h"
-#include "chrome/common/net/test_url_fetcher_factory.h"
-#include "net/url_request/url_request_status.h"
-#include "testing/gtest/include/gtest/gtest.h"
-
-namespace speech_input {
-
-class SpeechRecognitionRequestTest : public SpeechRecognitionRequestDelegate,
- public testing::Test {
- public:
- SpeechRecognitionRequestTest() : error_(false) { }
-
- // Creates a speech recognition request and invokes it's URL fetcher delegate
- // with the given test data.
- void CreateAndTestRequest(bool success, const std::string& http_response);
-
- // SpeechRecognitionRequestDelegate methods.
- virtual void SetRecognitionResult(bool error,
- const SpeechInputResultArray& result) {
- error_ = error;
- result_ = result;
- }
-
- // testing::Test methods.
- virtual void SetUp() {
- URLFetcher::set_factory(&url_fetcher_factory_);
- }
-
- virtual void TearDown() {
- URLFetcher::set_factory(NULL);
- }
-
- protected:
- MessageLoop message_loop_;
- TestURLFetcherFactory url_fetcher_factory_;
- bool error_;
- SpeechInputResultArray result_;
-};
-
-void SpeechRecognitionRequestTest::CreateAndTestRequest(
- bool success, const std::string& http_response) {
- SpeechRecognitionRequest request(NULL, this);
- request.Send(std::string(), std::string(), std::string(), std::string(),
- std::string(), std::string());
- TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
- ASSERT_TRUE(fetcher);
- net::URLRequestStatus status;
- status.set_status(success ? net::URLRequestStatus::SUCCESS :
- net::URLRequestStatus::FAILED);
- fetcher->delegate()->OnURLFetchComplete(fetcher, fetcher->original_url(),
- status, success ? 200 : 500,
- ResponseCookies(),
- http_response);
- // Parsed response will be available in result_.
-}
-
-TEST_F(SpeechRecognitionRequestTest, BasicTest) {
- // Normal success case with one result.
- CreateAndTestRequest(true,
- "{\"hypotheses\":[{\"utterance\":\"123456\",\"confidence\":0.9}]}");
- EXPECT_FALSE(error_);
- EXPECT_EQ(1U, result_.size());
- EXPECT_EQ(ASCIIToUTF16("123456"), result_[0].utterance);
- EXPECT_EQ(0.9, result_[0].confidence);
-
- // Normal success case with multiple results.
- CreateAndTestRequest(true,
- "{\"hypotheses\":[{\"utterance\":\"hello\",\"confidence\":0.9},"
- "{\"utterance\":\"123456\",\"confidence\":0.5}]}");
- EXPECT_FALSE(error_);
- EXPECT_EQ(2u, result_.size());
- EXPECT_EQ(ASCIIToUTF16("hello"), result_[0].utterance);
- EXPECT_EQ(0.9, result_[0].confidence);
- EXPECT_EQ(ASCIIToUTF16("123456"), result_[1].utterance);
- EXPECT_EQ(0.5, result_[1].confidence);
-
- // Http failure case.
- CreateAndTestRequest(false, "");
- EXPECT_TRUE(error_);
- EXPECT_EQ(0U, result_.size());
-
- // Malformed JSON case.
- CreateAndTestRequest(true, "{\"hypotheses\":[{\"unknownkey\":\"hello\"}]}");
- EXPECT_TRUE(error_);
- EXPECT_EQ(0U, result_.size());
-}
-
-} // namespace speech_input
diff --git a/chrome/browser/speech/speech_recognizer.cc b/chrome/browser/speech/speech_recognizer.cc
deleted file mode 100644
index 113600b..0000000
--- a/chrome/browser/speech/speech_recognizer.cc
+++ /dev/null
@@ -1,264 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/browser/speech/speech_recognizer.h"
-
-#include "base/ref_counted.h"
-#include "base/scoped_ptr.h"
-#include "base/time.h"
-#include "chrome/browser/browser_thread.h"
-#include "chrome/browser/profiles/profile.h"
-#include "chrome/common/net/url_request_context_getter.h"
-
-using media::AudioInputController;
-using std::string;
-
-namespace {
-
-// The following constants are related to the volume level indicator shown in
-// the UI for recorded audio.
-// Multiplier used when new volume is greater than previous level.
-const float kUpSmoothingFactor = 0.9f;
-// Multiplier used when new volume is lesser than previous level.
-const float kDownSmoothingFactor = 0.4f;
-const float kAudioMeterMinDb = 10.0f; // Lower bar for volume meter.
-const float kAudioMeterDbRange = 25.0f;
-} // namespace
-
-namespace speech_input {
-
-const int SpeechRecognizer::kAudioSampleRate = 16000;
-const int SpeechRecognizer::kAudioPacketIntervalMs = 100;
-const int SpeechRecognizer::kNumAudioChannels = 1;
-const int SpeechRecognizer::kNumBitsPerAudioSample = 16;
-const int SpeechRecognizer::kNoSpeechTimeoutSec = 8;
-const int SpeechRecognizer::kEndpointerEstimationTimeMs = 300;
-
-SpeechRecognizer::SpeechRecognizer(Delegate* delegate,
- int caller_id,
- const std::string& language,
- const std::string& grammar,
- const std::string& hardware_info,
- const std::string& origin_url)
- : delegate_(delegate),
- caller_id_(caller_id),
- language_(language),
- grammar_(grammar),
- hardware_info_(hardware_info),
- origin_url_(origin_url),
- codec_(AudioEncoder::CODEC_SPEEX),
- encoder_(NULL),
- endpointer_(kAudioSampleRate),
- num_samples_recorded_(0),
- audio_level_(0.0f) {
- endpointer_.set_speech_input_complete_silence_length(
- base::Time::kMicrosecondsPerSecond / 2);
- endpointer_.set_long_speech_input_complete_silence_length(
- base::Time::kMicrosecondsPerSecond);
- endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);
- endpointer_.StartSession();
-}
-
-SpeechRecognizer::~SpeechRecognizer() {
- // Recording should have stopped earlier due to the endpointer or
- // |StopRecording| being called.
- DCHECK(!audio_controller_.get());
- DCHECK(!request_.get() || !request_->HasPendingRequest());
- DCHECK(!encoder_.get());
- endpointer_.EndSession();
-}
-
-bool SpeechRecognizer::StartRecording() {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- DCHECK(!audio_controller_.get());
- DCHECK(!request_.get() || !request_->HasPendingRequest());
- DCHECK(!encoder_.get());
-
- // The endpointer needs to estimate the environment/background noise before
- // starting to treat the audio as user input. In |HandleOnData| we wait until
- // such time has passed before switching to user input mode.
- endpointer_.SetEnvironmentEstimationMode();
-
- encoder_.reset(AudioEncoder::Create(codec_, kAudioSampleRate,
- kNumBitsPerAudioSample));
- int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000;
- AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels,
- kAudioSampleRate, kNumBitsPerAudioSample,
- samples_per_packet);
- audio_controller_ = AudioInputController::Create(this, params);
- DCHECK(audio_controller_.get());
- VLOG(1) << "SpeechRecognizer starting record.";
- num_samples_recorded_ = 0;
- audio_controller_->Record();
-
- return true;
-}
-
-void SpeechRecognizer::CancelRecognition() {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- DCHECK(audio_controller_.get() || request_.get());
-
- // Stop recording if required.
- if (audio_controller_.get()) {
- VLOG(1) << "SpeechRecognizer stopping record.";
- audio_controller_->Close();
- audio_controller_ = NULL; // Releases the ref ptr.
- }
-
- VLOG(1) << "SpeechRecognizer canceling recognition.";
- encoder_.reset();
- request_.reset();
-}
-
-void SpeechRecognizer::StopRecording() {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
-
- // If audio recording has already stopped and we are in recognition phase,
- // silently ignore any more calls to stop recording.
- if (!audio_controller_.get())
- return;
-
- VLOG(1) << "SpeechRecognizer stopping record.";
- audio_controller_->Close();
- audio_controller_ = NULL; // Releases the ref ptr.
- encoder_->Flush();
-
- delegate_->DidCompleteRecording(caller_id_);
-
- // Since the http request takes a single string as POST data, allocate
- // one and copy over bytes from the audio buffers to the string.
- // And If we haven't got any audio yet end the recognition sequence here.
- string mime_type = encoder_->mime_type();
- string data;
- encoder_->GetEncodedData(&data);
- encoder_.reset();
-
- if (data.empty()) {
- // Guard against the delegate freeing us until we finish our job.
- scoped_refptr<SpeechRecognizer> me(this);
- delegate_->DidCompleteRecognition(caller_id_);
- } else {
- DCHECK(!request_.get());
- request_.reset(new SpeechRecognitionRequest(
- Profile::GetDefaultRequestContext(), this));
- request_->Send(language_, grammar_, hardware_info_, origin_url_,
- mime_type, data);
- }
-}
-
-void SpeechRecognizer::ReleaseAudioBuffers() {
-}
-
-// Invoked in the audio thread.
-void SpeechRecognizer::OnError(AudioInputController* controller,
- int error_code) {
- BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
- NewRunnableMethod(this,
- &SpeechRecognizer::HandleOnError,
- error_code));
-}
-
-void SpeechRecognizer::HandleOnError(int error_code) {
- LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code;
-
- // Check if we are still recording before canceling recognition, as
- // recording might have been stopped after this error was posted to the queue
- // by |OnError|.
- if (!audio_controller_.get())
- return;
-
- InformErrorAndCancelRecognition(RECOGNIZER_ERROR_CAPTURE);
-}
-
-void SpeechRecognizer::OnData(AudioInputController* controller,
- const uint8* data, uint32 size) {
- if (size == 0) // This could happen when recording stops and is normal.
- return;
-
- string* str_data = new string(reinterpret_cast<const char*>(data), size);
- BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
- NewRunnableMethod(this,
- &SpeechRecognizer::HandleOnData,
- str_data));
-}
-
-void SpeechRecognizer::HandleOnData(string* data) {
- // Check if we are still recording and if not discard this buffer, as
- // recording might have been stopped after this buffer was posted to the queue
- // by |OnData|.
- if (!audio_controller_.get()) {
- delete data;
- return;
- }
-
- const short* samples = reinterpret_cast<const short*>(data->data());
- DCHECK((data->length() % sizeof(short)) == 0);
- int num_samples = data->length() / sizeof(short);
-
- encoder_->Encode(samples, num_samples);
- float rms;
- endpointer_.ProcessAudio(samples, num_samples, &rms);
- delete data;
- num_samples_recorded_ += num_samples;
-
- if (endpointer_.IsEstimatingEnvironment()) {
- // Check if we have gathered enough audio for the endpointer to do
- // environment estimation and should move on to detect speech/end of speech.
- if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *
- kAudioSampleRate) / 1000) {
- endpointer_.SetUserInputMode();
- delegate_->DidCompleteEnvironmentEstimation(caller_id_);
- }
- return; // No more processing since we are still estimating environment.
- }
-
- // Check if we have waited too long without hearing any speech.
- if (!endpointer_.DidStartReceivingSpeech() &&
- num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) {
- InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH);
- return;
- }
-
- // Calculate the input volume to display in the UI, smoothing towards the
- // new level.
- float level = (rms - kAudioMeterMinDb) / kAudioMeterDbRange;
- level = std::min(std::max(0.0f, level), 1.0f);
- if (level > audio_level_) {
- audio_level_ += (level - audio_level_) * kUpSmoothingFactor;
- } else {
- audio_level_ += (level - audio_level_) * kDownSmoothingFactor;
- }
- delegate_->SetInputVolume(caller_id_, audio_level_);
-
- if (endpointer_.speech_input_complete()) {
- StopRecording();
- }
-
- // TODO(satish): Once we have streaming POST, start sending the data received
- // here as POST chunks.
-}
-
-void SpeechRecognizer::SetRecognitionResult(
- bool error, const SpeechInputResultArray& result) {
- if (result.empty()) {
- InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_RESULTS);
- return;
- }
-
- delegate_->SetRecognitionResult(caller_id_, error, result);
-
- // Guard against the delegate freeing us until we finish our job.
- scoped_refptr<SpeechRecognizer> me(this);
- delegate_->DidCompleteRecognition(caller_id_);
-}
-
-void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {
- CancelRecognition();
-
- // Guard against the delegate freeing us until we finish our job.
- scoped_refptr<SpeechRecognizer> me(this);
- delegate_->OnRecognizerError(caller_id_, error);
-}
-
-} // namespace speech_input
diff --git a/chrome/browser/speech/speech_recognizer.h b/chrome/browser/speech/speech_recognizer.h
deleted file mode 100644
index 2570fba..0000000
--- a/chrome/browser/speech/speech_recognizer.h
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
-#define CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
-
-#include <list>
-#include <string>
-#include <utility>
-
-#include "base/ref_counted.h"
-#include "base/scoped_ptr.h"
-#include "chrome/browser/speech/audio_encoder.h"
-#include "chrome/browser/speech/endpointer/endpointer.h"
-#include "chrome/browser/speech/speech_recognition_request.h"
-#include "media/audio/audio_input_controller.h"
-
-namespace speech_input {
-
-// Records audio, sends recorded audio to server and translates server response
-// to recognition result.
-class SpeechRecognizer
- : public base::RefCountedThreadSafe<SpeechRecognizer>,
- public media::AudioInputController::EventHandler,
- public SpeechRecognitionRequestDelegate {
- public:
- enum ErrorCode {
- RECOGNIZER_NO_ERROR,
- RECOGNIZER_ERROR_CAPTURE,
- RECOGNIZER_ERROR_NO_SPEECH,
- RECOGNIZER_ERROR_NO_RESULTS,
- };
-
- // Implemented by the caller to receive recognition events.
- class Delegate {
- public:
- virtual void SetRecognitionResult(
- int caller_id,
- bool error,
- const SpeechInputResultArray& result) = 0;
-
- // Invoked when audio recording stops, either due to the end pointer
- // detecting silence in user input or if |StopRecording| was called. The
- // delegate has to wait until |DidCompleteRecognition| is invoked before
- // destroying the |SpeechRecognizer| object.
- virtual void DidCompleteRecording(int caller_id) = 0;
-
- // This is guaranteed to be the last method invoked in the recognition
- // sequence and the |SpeechRecognizer| object can be freed up if necessary.
- virtual void DidCompleteRecognition(int caller_id) = 0;
-
- // Invoked if there was an error while recording or recognizing audio. The
- // session has already been cancelled when this call is made and the DidXxxx
- // callbacks will not be issued. It is safe to destroy/release the
- // |SpeechRecognizer| object while processing this call.
- virtual void OnRecognizerError(int caller_id,
- SpeechRecognizer::ErrorCode error) = 0;
-
- // At the start of recognition, a short amount of audio is recorded to
- // estimate the environment/background noise and this callback is issued
- // after that is complete. Typically the delegate brings up any speech
- // recognition UI once this callback is received.
- virtual void DidCompleteEnvironmentEstimation(int caller_id) = 0;
-
- // Informs of a change in the captured audio level, useful if displaying
- // a microphone volume indicator while recording.
- // The value of |volume| is in the [0.0, 1.0] range.
- virtual void SetInputVolume(int caller_id, float volume) = 0;
-
- protected:
- virtual ~Delegate() {}
- };
-
- SpeechRecognizer(Delegate* delegate,
- int caller_id,
- const std::string& language,
- const std::string& grammar,
- const std::string& hardware_info,
- const std::string& origin_url);
- ~SpeechRecognizer();
-
- // Starts audio recording and does recognition after recording ends. The same
- // SpeechRecognizer instance can be used multiple times for speech recognition
- // though each recognition request can be made only after the previous one
- // completes (i.e. after receiving Delegate::DidCompleteRecognition).
- bool StartRecording();
-
- // Stops recording audio and starts recognition.
- void StopRecording();
-
- // Stops recording audio and cancels recognition. Any audio recorded so far
- // gets discarded.
- void CancelRecognition();
-
- // AudioInputController::EventHandler methods.
- virtual void OnCreated(media::AudioInputController* controller) { }
- virtual void OnRecording(media::AudioInputController* controller) { }
- virtual void OnError(media::AudioInputController* controller, int error_code);
- virtual void OnData(media::AudioInputController* controller,
- const uint8* data,
- uint32 size);
-
- // SpeechRecognitionRequest::Delegate methods.
- virtual void SetRecognitionResult(bool error,
- const SpeechInputResultArray& result);
-
- static const int kAudioSampleRate;
- static const int kAudioPacketIntervalMs; // Duration of each audio packet.
- static const int kNumAudioChannels;
- static const int kNumBitsPerAudioSample;
- static const int kNoSpeechTimeoutSec;
- static const int kEndpointerEstimationTimeMs;
-
- private:
- void ReleaseAudioBuffers();
- void InformErrorAndCancelRecognition(ErrorCode error);
- void SendRecordedAudioToServer();
-
- void HandleOnError(int error_code); // Handles OnError in the IO thread.
-
- // Handles OnData in the IO thread. Takes ownership of |data|.
- void HandleOnData(std::string* data);
-
- Delegate* delegate_;
- int caller_id_;
- std::string language_;
- std::string grammar_;
- std::string hardware_info_;
- std::string origin_url_;
-
- scoped_ptr<SpeechRecognitionRequest> request_;
- scoped_refptr<media::AudioInputController> audio_controller_;
- AudioEncoder::Codec codec_;
- scoped_ptr<AudioEncoder> encoder_;
- Endpointer endpointer_;
- int num_samples_recorded_;
- float audio_level_;
-
- DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer);
-};
-
-// This typedef is to workaround the issue with certain versions of
-// Visual Studio where it gets confused between multiple Delegate
-// classes and gives a C2500 error. (I saw this error on the try bots -
-// the workaround was not needed for my machine).
-typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate;
-
-} // namespace speech_input
-
-#endif // CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
diff --git a/chrome/browser/speech/speech_recognizer_unittest.cc b/chrome/browser/speech/speech_recognizer_unittest.cc
deleted file mode 100644
index 855f35a..0000000
--- a/chrome/browser/speech/speech_recognizer_unittest.cc
+++ /dev/null
@@ -1,300 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <vector>
-
-#include "chrome/browser/browser_thread.h"
-#include "chrome/browser/speech/speech_recognizer.h"
-#include "chrome/common/net/test_url_fetcher_factory.h"
-#include "media/audio/test_audio_input_controller_factory.h"
-#include "net/url_request/url_request_status.h"
-#include "testing/gtest/include/gtest/gtest.h"
-
-using media::AudioInputController;
-using media::TestAudioInputController;
-using media::TestAudioInputControllerFactory;
-
-namespace speech_input {
-
-class SpeechRecognizerTest : public SpeechRecognizerDelegate,
- public testing::Test {
- public:
- SpeechRecognizerTest()
- : io_thread_(BrowserThread::IO, &message_loop_),
- ALLOW_THIS_IN_INITIALIZER_LIST(
- recognizer_(new SpeechRecognizer(this, 1, std::string(),
- std::string(), std::string(),
- std::string()))),
- recording_complete_(false),
- recognition_complete_(false),
- result_received_(false),
- error_(SpeechRecognizer::RECOGNIZER_NO_ERROR),
- volume_(-1.0f) {
- int audio_packet_length_bytes =
- (SpeechRecognizer::kAudioSampleRate *
- SpeechRecognizer::kAudioPacketIntervalMs *
- SpeechRecognizer::kNumAudioChannels *
- SpeechRecognizer::kNumBitsPerAudioSample) / (8 * 1000);
- audio_packet_.resize(audio_packet_length_bytes);
- }
-
- // SpeechRecognizer::Delegate methods.
- virtual void SetRecognitionResult(int caller_id,
- bool error,
- const SpeechInputResultArray& result) {
- result_received_ = true;
- }
-
- virtual void DidCompleteRecording(int caller_id) {
- recording_complete_ = true;
- }
-
- virtual void DidCompleteRecognition(int caller_id) {
- recognition_complete_ = true;
- }
-
- virtual void DidCompleteEnvironmentEstimation(int caller_id) {
- }
-
- virtual void OnRecognizerError(int caller_id,
- SpeechRecognizer::ErrorCode error) {
- error_ = error;
- }
-
- virtual void SetInputVolume(int caller_id, float volume) {
- volume_ = volume;
- }
-
- // testing::Test methods.
- virtual void SetUp() {
- URLFetcher::set_factory(&url_fetcher_factory_);
- AudioInputController::set_factory(&audio_input_controller_factory_);
- }
-
- virtual void TearDown() {
- URLFetcher::set_factory(NULL);
- AudioInputController::set_factory(NULL);
- }
-
- void FillPacketWithTestWaveform() {
- // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
- for (size_t i = 0; i < audio_packet_.size(); ++i)
- audio_packet_[i] = static_cast<uint8>(i);
- }
-
- protected:
- MessageLoopForIO message_loop_;
- BrowserThread io_thread_;
- scoped_refptr<SpeechRecognizer> recognizer_;
- bool recording_complete_;
- bool recognition_complete_;
- bool result_received_;
- SpeechRecognizer::ErrorCode error_;
- TestURLFetcherFactory url_fetcher_factory_;
- TestAudioInputControllerFactory audio_input_controller_factory_;
- std::vector<uint8> audio_packet_;
- float volume_;
-};
-
-TEST_F(SpeechRecognizerTest, StopNoData) {
- // Check for callbacks when stopping record before any audio gets recorded.
- EXPECT_TRUE(recognizer_->StartRecording());
- recognizer_->CancelRecognition();
- EXPECT_FALSE(recording_complete_);
- EXPECT_FALSE(recognition_complete_);
- EXPECT_FALSE(result_received_);
- EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);
-}
-
-TEST_F(SpeechRecognizerTest, CancelNoData) {
- // Check for callbacks when canceling recognition before any audio gets
- // recorded.
- EXPECT_TRUE(recognizer_->StartRecording());
- recognizer_->StopRecording();
- EXPECT_TRUE(recording_complete_);
- EXPECT_TRUE(recognition_complete_);
- EXPECT_FALSE(result_received_);
- EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);
-}
-
-TEST_F(SpeechRecognizerTest, StopWithData) {
- // Start recording, give some data and then stop. This should wait for the
- // network callback to arrive before completion.
- EXPECT_TRUE(recognizer_->StartRecording());
- TestAudioInputController* controller =
- audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
- controller = audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
- controller->event_handler()->OnData(controller, &audio_packet_[0],
- audio_packet_.size());
- MessageLoop::current()->RunAllPending();
- recognizer_->StopRecording();
- EXPECT_TRUE(recording_complete_);
- EXPECT_FALSE(recognition_complete_);
- EXPECT_FALSE(result_received_);
- EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);
-
- // Issue the network callback to complete the process.
- TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
- ASSERT_TRUE(fetcher);
- net::URLRequestStatus status;
- status.set_status(net::URLRequestStatus::SUCCESS);
- fetcher->delegate()->OnURLFetchComplete(
- fetcher, fetcher->original_url(), status, 200, ResponseCookies(),
- "{\"hypotheses\":[{\"utterance\":\"123\"}]}");
- EXPECT_TRUE(recognition_complete_);
- EXPECT_TRUE(result_received_);
- EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);
-}
-
-TEST_F(SpeechRecognizerTest, CancelWithData) {
- // Start recording, give some data and then cancel. This should not create
- // a network request and finish immediately.
- EXPECT_TRUE(recognizer_->StartRecording());
- TestAudioInputController* controller =
- audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
- controller->event_handler()->OnData(controller, &audio_packet_[0],
- audio_packet_.size());
- MessageLoop::current()->RunAllPending();
- recognizer_->CancelRecognition();
- EXPECT_EQ(NULL, url_fetcher_factory_.GetFetcherByID(0));
- EXPECT_FALSE(recording_complete_);
- EXPECT_FALSE(recognition_complete_);
- EXPECT_FALSE(result_received_);
- EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);
-}
-
-TEST_F(SpeechRecognizerTest, AudioControllerErrorNoData) {
- // Check if things tear down properly if AudioInputController threw an error.
- EXPECT_TRUE(recognizer_->StartRecording());
- TestAudioInputController* controller =
- audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
- controller->event_handler()->OnError(controller, 0);
- MessageLoop::current()->RunAllPending();
- EXPECT_FALSE(recording_complete_);
- EXPECT_FALSE(recognition_complete_);
- EXPECT_FALSE(result_received_);
- EXPECT_EQ(SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE, error_);
-}
-
-TEST_F(SpeechRecognizerTest, AudioControllerErrorWithData) {
- // Check if things tear down properly if AudioInputController threw an error
- // after giving some audio data.
- EXPECT_TRUE(recognizer_->StartRecording());
- TestAudioInputController* controller =
- audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
- controller->event_handler()->OnData(controller, &audio_packet_[0],
- audio_packet_.size());
- controller->event_handler()->OnError(controller, 0);
- MessageLoop::current()->RunAllPending();
- EXPECT_EQ(NULL, url_fetcher_factory_.GetFetcherByID(0));
- EXPECT_FALSE(recording_complete_);
- EXPECT_FALSE(recognition_complete_);
- EXPECT_FALSE(result_received_);
- EXPECT_EQ(SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE, error_);
-}
-
-TEST_F(SpeechRecognizerTest, NoSpeechCallbackIssued) {
- // Start recording and give a lot of packets with audio samples set to zero.
- // This should trigger the no-speech detector and issue a callback.
- EXPECT_TRUE(recognizer_->StartRecording());
- TestAudioInputController* controller =
- audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
- controller = audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
-
- int num_packets = (SpeechRecognizer::kNoSpeechTimeoutSec * 1000) /
- SpeechRecognizer::kAudioPacketIntervalMs;
- // The vector is already filled with zero value samples on create.
- for (int i = 0; i < num_packets; ++i) {
- controller->event_handler()->OnData(controller, &audio_packet_[0],
- audio_packet_.size());
- }
- MessageLoop::current()->RunAllPending();
- EXPECT_FALSE(recording_complete_);
- EXPECT_FALSE(recognition_complete_);
- EXPECT_FALSE(result_received_);
- EXPECT_EQ(SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH, error_);
-}
-
-TEST_F(SpeechRecognizerTest, NoSpeechCallbackNotIssued) {
- // Start recording and give a lot of packets with audio samples set to zero
- // and then some more with reasonably loud audio samples. This should be
- // treated as normal speech input and the no-speech detector should not get
- // triggered.
- EXPECT_TRUE(recognizer_->StartRecording());
- TestAudioInputController* controller =
- audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
- controller = audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
-
- int num_packets = (SpeechRecognizer::kNoSpeechTimeoutSec * 1000) /
- SpeechRecognizer::kAudioPacketIntervalMs;
-
- // The vector is already filled with zero value samples on create.
- for (int i = 0; i < num_packets / 2; ++i) {
- controller->event_handler()->OnData(controller, &audio_packet_[0],
- audio_packet_.size());
- }
-
- FillPacketWithTestWaveform();
- for (int i = 0; i < num_packets / 2; ++i) {
- controller->event_handler()->OnData(controller, &audio_packet_[0],
- audio_packet_.size());
- }
-
- MessageLoop::current()->RunAllPending();
- EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);
- EXPECT_FALSE(recording_complete_);
- EXPECT_FALSE(recognition_complete_);
- recognizer_->CancelRecognition();
-}
-
-TEST_F(SpeechRecognizerTest, SetInputVolumeCallback) {
- // Start recording and give a lot of packets with audio samples set to zero
- // and then some more with reasonably loud audio samples. Check that we don't
- // get the callback during estimation phase, then get zero for the silence
- // samples and proper volume for the loud audio.
- EXPECT_TRUE(recognizer_->StartRecording());
- TestAudioInputController* controller =
- audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
- controller = audio_input_controller_factory_.controller();
- ASSERT_TRUE(controller);
-
- // Feed some samples to begin with for the endpointer to do noise estimation.
- int num_packets = SpeechRecognizer::kEndpointerEstimationTimeMs /
- SpeechRecognizer::kAudioPacketIntervalMs;
- for (int i = 0; i < num_packets; ++i) {
- controller->event_handler()->OnData(controller, &audio_packet_[0],
- audio_packet_.size());
- }
- MessageLoop::current()->RunAllPending();
- EXPECT_EQ(-1.0f, volume_); // No audio volume set yet.
-
- // The vector is already filled with zero value samples on create.
- controller->event_handler()->OnData(controller, &audio_packet_[0],
- audio_packet_.size());
- MessageLoop::current()->RunAllPending();
- EXPECT_EQ(0, volume_);
-
- FillPacketWithTestWaveform();
- controller->event_handler()->OnData(controller, &audio_packet_[0],
- audio_packet_.size());
- MessageLoop::current()->RunAllPending();
- EXPECT_FLOAT_EQ(0.9f, volume_);
-
- EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);
- EXPECT_FALSE(recording_complete_);
- EXPECT_FALSE(recognition_complete_);
- recognizer_->CancelRecognition();
-}
-
-} // namespace speech_input