diff options
Diffstat (limited to 'content/browser/speech/speech_recognizer_impl.cc')
-rw-r--r-- | content/browser/speech/speech_recognizer_impl.cc | 51 |
1 files changed, 25 insertions, 26 deletions
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc index bbcfbd2..c5342d8 100644 --- a/content/browser/speech/speech_recognizer_impl.cc +++ b/content/browser/speech/speech_recognizer_impl.cc @@ -7,6 +7,7 @@ #include "base/bind.h" #include "base/time.h" #include "content/browser/browser_main_loop.h" +#include "content/browser/speech/audio_buffer.h" #include "content/public/browser/speech_recognizer_delegate.h" #include "content/public/browser/browser_thread.h" #include "content/public/common/speech_recognition_result.h" @@ -38,9 +39,11 @@ const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; // Returns true if more than 5% of the samples are at min or max value. -bool Clipping(const int16* samples, int num_samples) { - int clipping_samples = 0; +bool DetectClipping(const speech::AudioChunk& chunk) { + const int num_samples = chunk.NumSamples(); + const int16* samples = chunk.SamplesData16(); const int kThreshold = num_samples / 20; + int clipping_samples = 0; for (int i = 0; i < num_samples; ++i) { if (samples[i] <= -32767 || samples[i] >= 32767) { if (++clipping_samples > kThreshold) @@ -174,11 +177,13 @@ void SpeechRecognizerImpl::StopRecording() { // of silence in case encoder had no data already. std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) / 1000); - encoder_->Encode(&samples[0], samples.size()); + AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]), + samples.size() * sizeof(short), + encoder_->bits_per_sample() / 8); + encoder_->Encode(dummy_chunk); encoder_->Flush(); - string encoded_data; - encoder_->GetEncodedDataAndClear(&encoded_data); - DCHECK(!encoded_data.empty()); + scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear()); + DCHECK(!encoded_data->IsEmpty()); encoder_.reset(); // If we haven't got any audio yet end the recognition sequence here. @@ -187,7 +192,7 @@ void SpeechRecognizerImpl::StopRecording() { scoped_refptr<SpeechRecognizerImpl> me(this); delegate_->DidCompleteRecognition(caller_id_); } else { - request_->UploadAudioChunk(encoded_data, true /* is_last_chunk */); + request_->UploadAudioChunk(*encoded_data, true /* is_last_chunk */); } } @@ -215,33 +220,28 @@ void SpeechRecognizerImpl::OnData(AudioInputController* controller, const uint8* data, uint32 size) { if (size == 0) // This could happen when recording stops and is normal. return; - - string* str_data = new string(reinterpret_cast<const char*>(data), size); + AudioChunk* raw_audio = new AudioChunk(data, static_cast<size_t>(size), + kNumBitsPerAudioSample / 8); BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, base::Bind(&SpeechRecognizerImpl::HandleOnData, - this, str_data)); + this, raw_audio)); } -void SpeechRecognizerImpl::HandleOnData(string* data) { +void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) { + scoped_ptr<AudioChunk> free_raw_audio_on_return(raw_audio); // Check if we are still recording and if not discard this buffer, as // recording might have been stopped after this buffer was posted to the queue // by |OnData|. - if (!audio_controller_.get()) { - delete data; + if (!audio_controller_.get()) return; - } bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); - const short* samples = reinterpret_cast<const short*>(data->data()); - DCHECK_EQ((data->length() % sizeof(short)), 0U); - int num_samples = data->length() / sizeof(short); - encoder_->Encode(samples, num_samples); + encoder_->Encode(*raw_audio); float rms; - endpointer_.ProcessAudio(samples, num_samples, &rms); - bool did_clip = Clipping(samples, num_samples); - delete data; - num_samples_recorded_ += num_samples; + endpointer_.ProcessAudio(*raw_audio, &rms); + bool did_clip = DetectClipping(*raw_audio); + num_samples_recorded_ += raw_audio->NumSamples(); if (request_ == NULL) { // This was the first audio packet recorded, so start a request to the @@ -252,10 +252,9 @@ void SpeechRecognizerImpl::HandleOnData(string* data) { hardware_info_, origin_url_, encoder_->mime_type()); } - string encoded_data; - encoder_->GetEncodedDataAndClear(&encoded_data); - DCHECK(!encoded_data.empty()); - request_->UploadAudioChunk(encoded_data, false /* is_last_chunk */); + scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear()); + DCHECK(!encoded_data->IsEmpty()); + request_->UploadAudioChunk(*encoded_data, false /* is_last_chunk */); if (endpointer_.IsEstimatingEnvironment()) { // Check if we have gathered enough audio for the endpointer to do |