summaryrefslogtreecommitdiffstats
path: root/content/browser/speech/speech_recognizer_impl.cc
diff options
context:
space:
mode:
Diffstat (limited to 'content/browser/speech/speech_recognizer_impl.cc')
-rw-r--r--content/browser/speech/speech_recognizer_impl.cc51
1 files changed, 25 insertions, 26 deletions
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
index bbcfbd2..c5342d8 100644
--- a/content/browser/speech/speech_recognizer_impl.cc
+++ b/content/browser/speech/speech_recognizer_impl.cc
@@ -7,6 +7,7 @@
#include "base/bind.h"
#include "base/time.h"
#include "content/browser/browser_main_loop.h"
+#include "content/browser/speech/audio_buffer.h"
#include "content/public/browser/speech_recognizer_delegate.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/common/speech_recognition_result.h"
@@ -38,9 +39,11 @@ const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb;
const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f;
// Returns true if more than 5% of the samples are at min or max value.
-bool Clipping(const int16* samples, int num_samples) {
- int clipping_samples = 0;
+bool DetectClipping(const speech::AudioChunk& chunk) {
+ const int num_samples = chunk.NumSamples();
+ const int16* samples = chunk.SamplesData16();
const int kThreshold = num_samples / 20;
+ int clipping_samples = 0;
for (int i = 0; i < num_samples; ++i) {
if (samples[i] <= -32767 || samples[i] >= 32767) {
if (++clipping_samples > kThreshold)
@@ -174,11 +177,13 @@ void SpeechRecognizerImpl::StopRecording() {
// of silence in case encoder had no data already.
std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /
1000);
- encoder_->Encode(&samples[0], samples.size());
+ AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]),
+ samples.size() * sizeof(short),
+ encoder_->bits_per_sample() / 8);
+ encoder_->Encode(dummy_chunk);
encoder_->Flush();
- string encoded_data;
- encoder_->GetEncodedDataAndClear(&encoded_data);
- DCHECK(!encoded_data.empty());
+ scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
+ DCHECK(!encoded_data->IsEmpty());
encoder_.reset();
// If we haven't got any audio yet end the recognition sequence here.
@@ -187,7 +192,7 @@ void SpeechRecognizerImpl::StopRecording() {
scoped_refptr<SpeechRecognizerImpl> me(this);
delegate_->DidCompleteRecognition(caller_id_);
} else {
- request_->UploadAudioChunk(encoded_data, true /* is_last_chunk */);
+ request_->UploadAudioChunk(*encoded_data, true /* is_last_chunk */);
}
}
@@ -215,33 +220,28 @@ void SpeechRecognizerImpl::OnData(AudioInputController* controller,
const uint8* data, uint32 size) {
if (size == 0) // This could happen when recording stops and is normal.
return;
-
- string* str_data = new string(reinterpret_cast<const char*>(data), size);
+ AudioChunk* raw_audio = new AudioChunk(data, static_cast<size_t>(size),
+ kNumBitsPerAudioSample / 8);
BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
base::Bind(&SpeechRecognizerImpl::HandleOnData,
- this, str_data));
+ this, raw_audio));
}
-void SpeechRecognizerImpl::HandleOnData(string* data) {
+void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {
+ scoped_ptr<AudioChunk> free_raw_audio_on_return(raw_audio);
// Check if we are still recording and if not discard this buffer, as
// recording might have been stopped after this buffer was posted to the queue
// by |OnData|.
- if (!audio_controller_.get()) {
- delete data;
+ if (!audio_controller_.get())
return;
- }
bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();
- const short* samples = reinterpret_cast<const short*>(data->data());
- DCHECK_EQ((data->length() % sizeof(short)), 0U);
- int num_samples = data->length() / sizeof(short);
- encoder_->Encode(samples, num_samples);
+ encoder_->Encode(*raw_audio);
float rms;
- endpointer_.ProcessAudio(samples, num_samples, &rms);
- bool did_clip = Clipping(samples, num_samples);
- delete data;
- num_samples_recorded_ += num_samples;
+ endpointer_.ProcessAudio(*raw_audio, &rms);
+ bool did_clip = DetectClipping(*raw_audio);
+ num_samples_recorded_ += raw_audio->NumSamples();
if (request_ == NULL) {
// This was the first audio packet recorded, so start a request to the
@@ -252,10 +252,9 @@ void SpeechRecognizerImpl::HandleOnData(string* data) {
hardware_info_, origin_url_, encoder_->mime_type());
}
- string encoded_data;
- encoder_->GetEncodedDataAndClear(&encoded_data);
- DCHECK(!encoded_data.empty());
- request_->UploadAudioChunk(encoded_data, false /* is_last_chunk */);
+ scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
+ DCHECK(!encoded_data->IsEmpty());
+ request_->UploadAudioChunk(*encoded_data, false /* is_last_chunk */);
if (endpointer_.IsEstimatingEnvironment()) {
// Check if we have gathered enough audio for the endpointer to do