summaryrefslogtreecommitdiffstats
path: root/content/browser/speech
diff options
context:
space:
mode:
authorprimiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-03-13 23:57:51 +0000
committerprimiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-03-13 23:57:51 +0000
commitfad64e7a123b6ddd2ba8af13441c74f8f37966ee (patch)
tree28f09e77787e4a77ed30d45743086f12b62f58ee /content/browser/speech
parent4a5aebb91b0784ef133a926773b0b9e517f288d9 (diff)
downloadchromium_src-fad64e7a123b6ddd2ba8af13441c74f8f37966ee.zip
chromium_src-fad64e7a123b6ddd2ba8af13441c74f8f37966ee.tar.gz
chromium_src-fad64e7a123b6ddd2ba8af13441c74f8f37966ee.tar.bz2
Added AudioBuffer/AudioChunk abstractions for speech recognition and improved speech_recognizer_impl_unittest.
audio_encoder - Introduced AudioBuffer class in order to hide the current string-based implementation (which involved a lot of dirty and distributed casts) and make room for future implementations based on a circular buffer. speech_recognizer_impl_unittest - Created MockAudioManager class, in order to avoid using the true audio manager on trybots, which could lead to errors accessing the audio device. BUG=116954 TEST=speech_recognizer_impl_uinittest should never raise errors related to the audio driver (e.g, device in use, no microphone attached, etc). Review URL: http://codereview.chromium.org/9646031 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@126512 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'content/browser/speech')
-rw-r--r--content/browser/speech/audio_buffer.cc91
-rw-r--r--content/browser/speech/audio_buffer.h73
-rw-r--r--content/browser/speech/audio_encoder.cc74
-rw-r--r--content/browser/speech/audio_encoder.h28
-rw-r--r--content/browser/speech/endpointer/endpointer.cc6
-rw-r--r--content/browser/speech/endpointer/endpointer.h5
-rw-r--r--content/browser/speech/endpointer/endpointer_unittest.cc4
-rw-r--r--content/browser/speech/speech_recognition_request.cc5
-rw-r--r--content/browser/speech/speech_recognition_request.h4
-rw-r--r--content/browser/speech/speech_recognition_request_unittest.cc7
-rw-r--r--content/browser/speech/speech_recognizer_impl.cc51
-rw-r--r--content/browser/speech/speech_recognizer_impl.h4
-rw-r--r--content/browser/speech/speech_recognizer_impl_unittest.cc46
13 files changed, 304 insertions, 94 deletions
diff --git a/content/browser/speech/audio_buffer.cc b/content/browser/speech/audio_buffer.cc
new file mode 100644
index 0000000..5b887d7
--- /dev/null
+++ b/content/browser/speech/audio_buffer.cc
@@ -0,0 +1,91 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/logging.h"
+#include "base/stl_util.h"
+#include "content/browser/speech/audio_buffer.h"
+
+namespace speech {
+
+AudioChunk::AudioChunk(int bytes_per_sample)
+ : bytes_per_sample_(bytes_per_sample) {
+}
+
+AudioChunk::AudioChunk(const uint8* data, size_t length, int bytes_per_sample)
+ : data_string_(reinterpret_cast<const char*>(data), length),
+ bytes_per_sample_(bytes_per_sample) {
+ DCHECK_EQ(length % bytes_per_sample, 0U);
+}
+
+bool AudioChunk::IsEmpty() const {
+ return data_string_.empty();
+}
+
+size_t AudioChunk::NumSamples() const {
+ return data_string_.size() / bytes_per_sample_;
+}
+
+const std::string& AudioChunk::AsString() const {
+ return data_string_;
+}
+
+int16 AudioChunk::GetSample16(size_t index) const {
+ DCHECK(index < (data_string_.size() / sizeof(int16)));
+ return SamplesData16()[index];
+}
+
+const int16* AudioChunk::SamplesData16() const {
+ return reinterpret_cast<const int16*>(data_string_.data());
+}
+
+
+AudioBuffer::AudioBuffer(int bytes_per_sample)
+ : bytes_per_sample_(bytes_per_sample) {
+ DCHECK(bytes_per_sample == 1 ||
+ bytes_per_sample == 2 ||
+ bytes_per_sample == 4);
+}
+
+AudioBuffer::~AudioBuffer() {
+ Clear();
+}
+
+void AudioBuffer::Enqueue(const uint8* data, size_t length) {
+ AudioChunk* chunk = new AudioChunk(data, length, bytes_per_sample_);
+ chunks_.push_back(chunk);
+}
+
+scoped_ptr<AudioChunk> AudioBuffer::DequeueSingleChunk() {
+ DCHECK(!chunks_.empty());
+ AudioChunk* chunk = *chunks_.begin();
+ chunks_.weak_erase(chunks_.begin());
+ return scoped_ptr<AudioChunk>(chunk);
+}
+
+scoped_ptr<AudioChunk> AudioBuffer::DequeueAll() {
+ AudioChunk* chunk = new AudioChunk(bytes_per_sample_);
+ size_t resulting_length = 0;
+ ChunksContainer::const_iterator it;
+ // In order to improve performance, calulate in advance the total length
+ // and then copy the chunks.
+ for (it = chunks_.begin(); it != chunks_.end(); ++it) {
+ resulting_length += (*it)->data_string_.length();
+ }
+ chunk->data_string_.reserve(resulting_length);
+ for (it = chunks_.begin(); it != chunks_.end(); ++it) {
+ chunk->data_string_.append((*it)->data_string_);
+ }
+ Clear();
+ return scoped_ptr<AudioChunk>(chunk);
+}
+
+void AudioBuffer::Clear() {
+ chunks_.erase(chunks_.begin(), chunks_.end());
+}
+
+bool AudioBuffer::IsEmpty() const {
+ return chunks_.empty();
+}
+
+} // namespace speech
diff --git a/content/browser/speech/audio_buffer.h b/content/browser/speech/audio_buffer.h
new file mode 100644
index 0000000..c1d5103
--- /dev/null
+++ b/content/browser/speech/audio_buffer.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_AUDIO_BUFFER_H_
+#define CONTENT_BROWSER_SPEECH_AUDIO_BUFFER_H_
+#pragma once
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/memory/scoped_vector.h"
+#include "content/common/content_export.h"
+
+namespace speech {
+
+// Models a chunk derived from an AudioBuffer.
+class CONTENT_EXPORT AudioChunk {
+ public:
+ explicit AudioChunk(int bytes_per_sample);
+ AudioChunk(const uint8* data, size_t length, int bytes_per_sample);
+
+ bool IsEmpty() const;
+ int bytes_per_sample() const { return bytes_per_sample_; }
+ size_t NumSamples() const;
+ const std::string& AsString() const;
+ int16 GetSample16(size_t index) const;
+ const int16* SamplesData16() const;
+ friend class AudioBuffer;
+
+ private:
+ std::string data_string_;
+ int bytes_per_sample_;
+
+ DISALLOW_COPY_AND_ASSIGN(AudioChunk);
+};
+
+// Models an audio buffer. The current implementation relies on on-demand
+// allocations of AudioChunk(s) (which uses a string as storage).
+class AudioBuffer {
+ public:
+ explicit AudioBuffer(int bytes_per_sample);
+ ~AudioBuffer();
+
+ // Enqueues a copy of |length| bytes of |data| buffer.
+ void Enqueue(const uint8* data, size_t length);
+
+ // Dequeues, in FIFO order, a single chunk respecting the length of the
+ // corresponding Enqueue call (in a nutshell: multiple Enqueue calls followed
+ // by Dequeue calls will return the individual chunks without merging them).
+ scoped_ptr<AudioChunk> DequeueSingleChunk();
+
+ // Dequeues all previously enqueued chunks, merging them in a single chunk.
+ scoped_ptr<AudioChunk> DequeueAll();
+
+ // Removes and frees all the enqueued chunks.
+ void Clear();
+
+ // Checks whether the buffer is empty.
+ bool IsEmpty() const;
+
+ private:
+ typedef ScopedVector<AudioChunk> ChunksContainer;
+ ChunksContainer chunks_;
+ int bytes_per_sample_;
+
+ DISALLOW_COPY_AND_ASSIGN(AudioBuffer);
+};
+
+} // namespace speech
+
+#endif // CONTENT_BROWSER_SPEECH_AUDIO_BUFFER_H_
diff --git a/content/browser/speech/audio_encoder.cc b/content/browser/speech/audio_encoder.cc
index 83e0475..92ccdce 100644
--- a/content/browser/speech/audio_encoder.cc
+++ b/content/browser/speech/audio_encoder.cc
@@ -9,10 +9,12 @@
#include "base/memory/scoped_ptr.h"
#include "base/stl_util.h"
#include "base/string_number_conversions.h"
+#include "content/browser/speech/audio_buffer.h"
#include "third_party/flac/flac.h"
#include "third_party/speex/speex.h"
using std::string;
+using speech::AudioChunk;
namespace {
@@ -25,8 +27,8 @@ class FLACEncoder : public speech::AudioEncoder {
public:
FLACEncoder(int sampling_rate, int bits_per_sample);
virtual ~FLACEncoder();
- virtual void Encode(const short* samples, int num_samples);
- virtual void Flush();
+ virtual void Encode(const AudioChunk& raw_audio) OVERRIDE;
+ virtual void Flush() OVERRIDE;
private:
static FLAC__StreamEncoderWriteStatus WriteCallback(
@@ -52,13 +54,14 @@ FLAC__StreamEncoderWriteStatus FLACEncoder::WriteCallback(
void* client_data) {
FLACEncoder* me = static_cast<FLACEncoder*>(client_data);
DCHECK(me->encoder_ == encoder);
- me->AppendToBuffer(new string(reinterpret_cast<const char*>(buffer), bytes));
+ me->encoded_audio_buffer_.Enqueue(buffer, bytes);
return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
}
FLACEncoder::FLACEncoder(int sampling_rate, int bits_per_sample)
: AudioEncoder(std::string(kContentTypeFLAC) +
- base::IntToString(sampling_rate)),
+ base::IntToString(sampling_rate),
+ bits_per_sample),
encoder_(FLAC__stream_encoder_new()),
is_encoder_initialized_(false) {
FLAC__stream_encoder_set_channels(encoder_, 1);
@@ -75,20 +78,22 @@ FLACEncoder::~FLACEncoder() {
FLAC__stream_encoder_delete(encoder_);
}
-void FLACEncoder::Encode(const short* samples, int num_samples) {
+void FLACEncoder::Encode(const AudioChunk& raw_audio) {
+ DCHECK_EQ(raw_audio.bytes_per_sample(), 2);
if (!is_encoder_initialized_) {
const FLAC__StreamEncoderInitStatus encoder_status =
FLAC__stream_encoder_init_stream(encoder_, WriteCallback, NULL, NULL,
NULL, this);
- DCHECK(encoder_status == FLAC__STREAM_ENCODER_INIT_STATUS_OK);
+ DCHECK_EQ(encoder_status, FLAC__STREAM_ENCODER_INIT_STATUS_OK);
is_encoder_initialized_ = true;
}
// FLAC encoder wants samples as int32s.
+ const int num_samples = raw_audio.NumSamples();
scoped_array<FLAC__int32> flac_samples(new FLAC__int32[num_samples]);
FLAC__int32* flac_samples_ptr = flac_samples.get();
for (int i = 0; i < num_samples; ++i)
- flac_samples_ptr[i] = samples[i];
+ flac_samples_ptr[i] = static_cast<FLAC__int32>(raw_audio.GetSample16(i));
FLAC__stream_encoder_process(encoder_, &flac_samples_ptr, num_samples);
}
@@ -109,10 +114,10 @@ COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength);
class SpeexEncoder : public speech::AudioEncoder {
public:
- explicit SpeexEncoder(int sampling_rate);
+ explicit SpeexEncoder(int sampling_rate, int bits_per_sample);
virtual ~SpeexEncoder();
- virtual void Encode(const short* samples, int num_samples);
- virtual void Flush() {}
+ virtual void Encode(const AudioChunk& raw_audio) OVERRIDE;
+ virtual void Flush() OVERRIDE {}
private:
void* encoder_state_;
@@ -122,9 +127,10 @@ class SpeexEncoder : public speech::AudioEncoder {
DISALLOW_COPY_AND_ASSIGN(SpeexEncoder);
};
-SpeexEncoder::SpeexEncoder(int sampling_rate)
+SpeexEncoder::SpeexEncoder(int sampling_rate, int bits_per_sample)
: AudioEncoder(std::string(kContentTypeSpeex) +
- base::IntToString(sampling_rate)) {
+ base::IntToString(sampling_rate),
+ bits_per_sample) {
// speex_bits_init() does not initialize all of the |bits_| struct.
memset(&bits_, 0, sizeof(bits_));
speex_bits_init(&bits_);
@@ -144,20 +150,23 @@ SpeexEncoder::~SpeexEncoder() {
speex_encoder_destroy(encoder_state_);
}
-void SpeexEncoder::Encode(const short* samples, int num_samples) {
+void SpeexEncoder::Encode(const AudioChunk& raw_audio) {
+ spx_int16_t* src_buffer =
+ const_cast<spx_int16_t*>(raw_audio.SamplesData16());
+ int num_samples = raw_audio.NumSamples();
// Drop incomplete frames, typically those which come in when recording stops.
num_samples -= (num_samples % samples_per_frame_);
for (int i = 0; i < num_samples; i += samples_per_frame_) {
speex_bits_reset(&bits_);
- speex_encode_int(encoder_state_, const_cast<spx_int16_t*>(samples + i),
- &bits_);
+ speex_encode_int(encoder_state_, src_buffer + i, &bits_);
// Encode the frame and place the size of the frame as the first byte. This
// is the packet format for MIME type x-speex-with-header-byte.
int frame_length = speex_bits_write(&bits_, encoded_frame_data_ + 1,
kMaxSpeexFrameLength);
encoded_frame_data_[0] = static_cast<char>(frame_length);
- AppendToBuffer(new string(encoded_frame_data_, frame_length + 1));
+ encoded_audio_buffer_.Enqueue(
+ reinterpret_cast<uint8*>(&encoded_frame_data_[0]), frame_length + 1);
}
}
@@ -170,39 +179,20 @@ AudioEncoder* AudioEncoder::Create(Codec codec,
int bits_per_sample) {
if (codec == CODEC_FLAC)
return new FLACEncoder(sampling_rate, bits_per_sample);
- return new SpeexEncoder(sampling_rate);
+ return new SpeexEncoder(sampling_rate, bits_per_sample);
}
-AudioEncoder::AudioEncoder(const std::string& mime_type)
- : mime_type_(mime_type) {
+AudioEncoder::AudioEncoder(const std::string& mime_type, int bits_per_sample)
+ : encoded_audio_buffer_(1), /* Byte granularity of encoded samples. */
+ mime_type_(mime_type),
+ bits_per_sample_(bits_per_sample) {
}
AudioEncoder::~AudioEncoder() {
- STLDeleteElements(&audio_buffers_);
}
-bool AudioEncoder::GetEncodedDataAndClear(std::string* encoded_data) {
- if (!audio_buffers_.size())
- return false;
-
- int audio_buffer_length = 0;
- for (AudioBufferQueue::iterator it = audio_buffers_.begin();
- it != audio_buffers_.end(); ++it) {
- audio_buffer_length += (*it)->length();
- }
- encoded_data->reserve(audio_buffer_length);
- for (AudioBufferQueue::iterator it = audio_buffers_.begin();
- it != audio_buffers_.end(); ++it) {
- encoded_data->append(*(*it));
- }
-
- STLDeleteElements(&audio_buffers_);
-
- return true;
-}
-
-void AudioEncoder::AppendToBuffer(std::string* item) {
- audio_buffers_.push_back(item);
+scoped_ptr<AudioChunk> AudioEncoder::GetEncodedDataAndClear() {
+ return encoded_audio_buffer_.DequeueAll();
}
} // namespace speech
diff --git a/content/browser/speech/audio_encoder.h b/content/browser/speech/audio_encoder.h
index 92bc645..65ceb97 100644
--- a/content/browser/speech/audio_encoder.h
+++ b/content/browser/speech/audio_encoder.h
@@ -9,9 +9,11 @@
#include <string>
#include "base/basictypes.h"
+#include "base/memory/scoped_ptr.h"
+#include "content/browser/speech/audio_buffer.h"
namespace speech {
-
+class AudioChunk;
// Provides a simple interface to encode raw audio using the various speech
// codecs.
class AudioEncoder {
@@ -27,30 +29,28 @@ class AudioEncoder {
virtual ~AudioEncoder();
- // Encodes each frame of raw audio in |samples| to the internal buffer. Use
- // |GetEncodedData| to read the result after this call or when recording
- // completes.
- virtual void Encode(const short* samples, int num_samples) = 0;
+ // Encodes |raw audio| to the internal buffer. Use
+ // |GetEncodedDataAndClear| to read the result after this call or when
+ // audio capture completes.
+ virtual void Encode(const AudioChunk& raw_audio) = 0;
// Finish encoding and flush any pending encoded bits out.
virtual void Flush() = 0;
- // Copies the encoded audio to the given string. Returns true if the output
- // is not empty.
- bool GetEncodedDataAndClear(std::string* encoded_data);
+ // Merges, retrieves and clears all the accumulated encoded audio chunks.
+ scoped_ptr<AudioChunk> GetEncodedDataAndClear();
const std::string& mime_type() { return mime_type_; }
+ int bits_per_sample() { return bits_per_sample_; }
protected:
- AudioEncoder(const std::string& mime_type);
-
- void AppendToBuffer(std::string* item);
+ AudioEncoder(const std::string& mime_type, int bits_per_sample);
+ AudioBuffer encoded_audio_buffer_;
private:
- // Buffer holding the recorded audio. Owns the strings inside the list.
- typedef std::list<std::string*> AudioBufferQueue;
- AudioBufferQueue audio_buffers_;
std::string mime_type_;
+ int bits_per_sample_;
+
DISALLOW_COPY_AND_ASSIGN(AudioEncoder);
};
diff --git a/content/browser/speech/endpointer/endpointer.cc b/content/browser/speech/endpointer/endpointer.cc
index fe3e0bf..b4a54c1 100644
--- a/content/browser/speech/endpointer/endpointer.cc
+++ b/content/browser/speech/endpointer/endpointer.cc
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include "content/browser/speech/audio_buffer.h"
#include "content/browser/speech/endpointer/endpointer.h"
#include "base/time.h"
@@ -88,8 +89,9 @@ EpStatus Endpointer::Status(int64 *time) {
return energy_endpointer_.Status(time);
}
-EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples,
- float* rms_out) {
+EpStatus Endpointer::ProcessAudio(const AudioChunk& raw_audio, float* rms_out) {
+ const int16* audio_data = raw_audio.SamplesData16();
+ const int num_samples = raw_audio.NumSamples();
EpStatus ep_status = EP_PRE_SPEECH;
// Process the input data in blocks of frame_size_, dropping any incomplete
diff --git a/content/browser/speech/endpointer/endpointer.h b/content/browser/speech/endpointer/endpointer.h
index 9ba2018..89ec3a9 100644
--- a/content/browser/speech/endpointer/endpointer.h
+++ b/content/browser/speech/endpointer/endpointer.h
@@ -13,6 +13,8 @@ class EpStatus;
namespace speech {
+class AudioChunk;
+
// A simple interface to the underlying energy-endpointer implementation, this
// class lets callers provide audio as being recorded and let them poll to find
// when the user has stopped speaking.
@@ -61,8 +63,7 @@ class CONTENT_EXPORT Endpointer {
// Process a segment of audio, which may be more than one frame.
// The status of the last frame will be returned.
- EpStatus ProcessAudio(const int16* audio_data, int num_samples,
- float* rms_out);
+ EpStatus ProcessAudio(const AudioChunk& raw_audio, float* rms_out);
// Get the status of the endpointer.
EpStatus Status(int64 *time_us);
diff --git a/content/browser/speech/endpointer/endpointer_unittest.cc b/content/browser/speech/endpointer/endpointer_unittest.cc
index 240e5dc..37f2339 100644
--- a/content/browser/speech/endpointer/endpointer_unittest.cc
+++ b/content/browser/speech/endpointer/endpointer_unittest.cc
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include "content/browser/speech/audio_buffer.h"
#include "content/browser/speech/endpointer/endpointer.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -116,7 +117,8 @@ class EndpointerFrameProcessor : public FrameProcessor {
: endpointer_(endpointer) {}
EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
- endpointer_->ProcessAudio(samples, kFrameSize, NULL);
+ AudioChunk frame(reinterpret_cast<uint8*>(samples), kFrameSize * 2, 2);
+ endpointer_->ProcessAudio(frame, NULL);
int64 ep_time;
return endpointer_->Status(&ep_time);
}
diff --git a/content/browser/speech/speech_recognition_request.cc b/content/browser/speech/speech_recognition_request.cc
index 89dde84..a143699 100644
--- a/content/browser/speech/speech_recognition_request.cc
+++ b/content/browser/speech/speech_recognition_request.cc
@@ -10,6 +10,7 @@
#include "base/string_number_conversions.h"
#include "base/string_util.h"
#include "base/values.h"
+#include "content/browser/speech/audio_buffer.h"
#include "content/common/net/url_fetcher_impl.h"
#include "content/public/common/speech_recognition_result.h"
#include "net/base/escape.h"
@@ -201,10 +202,10 @@ void SpeechRecognitionRequest::Start(const std::string& language,
url_fetcher_->Start();
}
-void SpeechRecognitionRequest::UploadAudioChunk(const std::string& audio_data,
+void SpeechRecognitionRequest::UploadAudioChunk(const AudioChunk& audio_chunk,
bool is_last_chunk) {
DCHECK(url_fetcher_.get());
- url_fetcher_->AppendChunkToUpload(audio_data, is_last_chunk);
+ url_fetcher_->AppendChunkToUpload(audio_chunk.AsString(), is_last_chunk);
}
void SpeechRecognitionRequest::OnURLFetchComplete(
diff --git a/content/browser/speech/speech_recognition_request.h b/content/browser/speech/speech_recognition_request.h
index b6ce077..2f29e0a 100644
--- a/content/browser/speech/speech_recognition_request.h
+++ b/content/browser/speech/speech_recognition_request.h
@@ -27,6 +27,8 @@ class URLRequestContextGetter;
namespace speech {
+class AudioChunk;
+
// Provides a simple interface for sending recorded speech data to the server
// and get back recognition results.
class SpeechRecognitionRequest : public content::URLFetcherDelegate {
@@ -61,7 +63,7 @@ class SpeechRecognitionRequest : public content::URLFetcherDelegate {
const std::string& content_type);
// Send a single chunk of audio immediately to the server.
- CONTENT_EXPORT void UploadAudioChunk(const std::string& audio_data,
+ CONTENT_EXPORT void UploadAudioChunk(const AudioChunk& audio_chunk,
bool is_last_chunk);
CONTENT_EXPORT bool HasPendingRequest() { return url_fetcher_ != NULL; }
diff --git a/content/browser/speech/speech_recognition_request_unittest.cc b/content/browser/speech/speech_recognition_request_unittest.cc
index 37b82f8..822e254 100644
--- a/content/browser/speech/speech_recognition_request_unittest.cc
+++ b/content/browser/speech/speech_recognition_request_unittest.cc
@@ -4,6 +4,7 @@
#include "base/message_loop.h"
#include "base/utf_string_conversions.h"
+#include "content/browser/speech/audio_buffer.h"
#include "content/browser/speech/speech_recognition_request.h"
#include "content/public/common/speech_recognition_result.h"
#include "content/test/test_url_fetcher_factory.h"
@@ -39,7 +40,11 @@ void SpeechRecognitionRequestTest::CreateAndTestRequest(
SpeechRecognitionRequest request(NULL, this);
request.Start(std::string(), std::string(), false, std::string(),
std::string(), std::string());
- request.UploadAudioChunk(std::string(" "), true);
+ unsigned char dummy_audio_buffer_data[2] = {'\0', '\0'};
+ AudioChunk dummy_audio_chunk(&dummy_audio_buffer_data[0],
+ sizeof(dummy_audio_buffer_data),
+ 2 /* bytes per sample */);
+ request.UploadAudioChunk(dummy_audio_chunk, true);
TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
ASSERT_TRUE(fetcher);
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
index bbcfbd2..c5342d8 100644
--- a/content/browser/speech/speech_recognizer_impl.cc
+++ b/content/browser/speech/speech_recognizer_impl.cc
@@ -7,6 +7,7 @@
#include "base/bind.h"
#include "base/time.h"
#include "content/browser/browser_main_loop.h"
+#include "content/browser/speech/audio_buffer.h"
#include "content/public/browser/speech_recognizer_delegate.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/common/speech_recognition_result.h"
@@ -38,9 +39,11 @@ const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb;
const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f;
// Returns true if more than 5% of the samples are at min or max value.
-bool Clipping(const int16* samples, int num_samples) {
- int clipping_samples = 0;
+bool DetectClipping(const speech::AudioChunk& chunk) {
+ const int num_samples = chunk.NumSamples();
+ const int16* samples = chunk.SamplesData16();
const int kThreshold = num_samples / 20;
+ int clipping_samples = 0;
for (int i = 0; i < num_samples; ++i) {
if (samples[i] <= -32767 || samples[i] >= 32767) {
if (++clipping_samples > kThreshold)
@@ -174,11 +177,13 @@ void SpeechRecognizerImpl::StopRecording() {
// of silence in case encoder had no data already.
std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /
1000);
- encoder_->Encode(&samples[0], samples.size());
+ AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]),
+ samples.size() * sizeof(short),
+ encoder_->bits_per_sample() / 8);
+ encoder_->Encode(dummy_chunk);
encoder_->Flush();
- string encoded_data;
- encoder_->GetEncodedDataAndClear(&encoded_data);
- DCHECK(!encoded_data.empty());
+ scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
+ DCHECK(!encoded_data->IsEmpty());
encoder_.reset();
// If we haven't got any audio yet end the recognition sequence here.
@@ -187,7 +192,7 @@ void SpeechRecognizerImpl::StopRecording() {
scoped_refptr<SpeechRecognizerImpl> me(this);
delegate_->DidCompleteRecognition(caller_id_);
} else {
- request_->UploadAudioChunk(encoded_data, true /* is_last_chunk */);
+ request_->UploadAudioChunk(*encoded_data, true /* is_last_chunk */);
}
}
@@ -215,33 +220,28 @@ void SpeechRecognizerImpl::OnData(AudioInputController* controller,
const uint8* data, uint32 size) {
if (size == 0) // This could happen when recording stops and is normal.
return;
-
- string* str_data = new string(reinterpret_cast<const char*>(data), size);
+ AudioChunk* raw_audio = new AudioChunk(data, static_cast<size_t>(size),
+ kNumBitsPerAudioSample / 8);
BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
base::Bind(&SpeechRecognizerImpl::HandleOnData,
- this, str_data));
+ this, raw_audio));
}
-void SpeechRecognizerImpl::HandleOnData(string* data) {
+void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {
+ scoped_ptr<AudioChunk> free_raw_audio_on_return(raw_audio);
// Check if we are still recording and if not discard this buffer, as
// recording might have been stopped after this buffer was posted to the queue
// by |OnData|.
- if (!audio_controller_.get()) {
- delete data;
+ if (!audio_controller_.get())
return;
- }
bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();
- const short* samples = reinterpret_cast<const short*>(data->data());
- DCHECK_EQ((data->length() % sizeof(short)), 0U);
- int num_samples = data->length() / sizeof(short);
- encoder_->Encode(samples, num_samples);
+ encoder_->Encode(*raw_audio);
float rms;
- endpointer_.ProcessAudio(samples, num_samples, &rms);
- bool did_clip = Clipping(samples, num_samples);
- delete data;
- num_samples_recorded_ += num_samples;
+ endpointer_.ProcessAudio(*raw_audio, &rms);
+ bool did_clip = DetectClipping(*raw_audio);
+ num_samples_recorded_ += raw_audio->NumSamples();
if (request_ == NULL) {
// This was the first audio packet recorded, so start a request to the
@@ -252,10 +252,9 @@ void SpeechRecognizerImpl::HandleOnData(string* data) {
hardware_info_, origin_url_, encoder_->mime_type());
}
- string encoded_data;
- encoder_->GetEncodedDataAndClear(&encoded_data);
- DCHECK(!encoded_data.empty());
- request_->UploadAudioChunk(encoded_data, false /* is_last_chunk */);
+ scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
+ DCHECK(!encoded_data->IsEmpty());
+ request_->UploadAudioChunk(*encoded_data, false /* is_last_chunk */);
if (endpointer_.IsEstimatingEnvironment()) {
// Check if we have gathered enough audio for the endpointer to do
diff --git a/content/browser/speech/speech_recognizer_impl.h b/content/browser/speech/speech_recognizer_impl.h
index 25e0c0c..eaec3e3 100644
--- a/content/browser/speech/speech_recognizer_impl.h
+++ b/content/browser/speech/speech_recognizer_impl.h
@@ -75,8 +75,8 @@ class CONTENT_EXPORT SpeechRecognizerImpl
void HandleOnError(int error_code); // Handles OnError in the IO thread.
- // Handles OnData in the IO thread. Takes ownership of |data|.
- void HandleOnData(std::string* data);
+ // Handles OnData in the IO thread. Takes ownership of |raw_audio|.
+ void HandleOnData(AudioChunk* raw_audio);
// Helper method which closes the audio controller and blocks until done.
void CloseAudioControllerSynchronously();
diff --git a/content/browser/speech/speech_recognizer_impl_unittest.cc b/content/browser/speech/speech_recognizer_impl_unittest.cc
index cfd2c33..baf3a3e 100644
--- a/content/browser/speech/speech_recognizer_impl_unittest.cc
+++ b/content/browser/speech/speech_recognizer_impl_unittest.cc
@@ -9,6 +9,8 @@
#include "content/public/browser/speech_recognizer_delegate.h"
#include "content/test/test_url_fetcher_factory.h"
#include "media/audio/audio_manager.h"
+#include "media/audio/fake_audio_input_stream.h"
+#include "media/audio/fake_audio_output_stream.h"
#include "media/audio/test_audio_input_controller_factory.h"
#include "net/base/net_errors.h"
#include "net/url_request/url_request_status.h"
@@ -20,6 +22,48 @@ using media::AudioInputController;
using media::TestAudioInputController;
using media::TestAudioInputControllerFactory;
+namespace {
+
+class MockAudioManager : public AudioManager {
+ public:
+ MockAudioManager() {
+ audio_thread_.reset(new base::Thread("MockAudioThread"));
+ CHECK(audio_thread_->Start());
+ }
+ virtual bool HasAudioOutputDevices() OVERRIDE { return true; }
+ virtual bool HasAudioInputDevices() OVERRIDE { return true; }
+ virtual string16 GetAudioInputDeviceModel() OVERRIDE { return string16(); }
+ virtual bool CanShowAudioInputSettings() OVERRIDE { return false; }
+ virtual void ShowAudioInputSettings() OVERRIDE {}
+ virtual void GetAudioInputDeviceNames(
+ media::AudioDeviceNames* device_names) OVERRIDE {}
+ virtual AudioOutputStream* MakeAudioOutputStream(
+ const AudioParameters& params) OVERRIDE {
+ return FakeAudioOutputStream::MakeFakeStream(params);
+ }
+ virtual AudioOutputStream* MakeAudioOutputStreamProxy(
+ const AudioParameters& params) OVERRIDE {
+ NOTREACHED();
+ return NULL;
+ }
+ virtual AudioInputStream* MakeAudioInputStream(
+ const AudioParameters& params, const std::string& device_id) OVERRIDE {
+ return FakeAudioInputStream::MakeFakeStream(params);
+ }
+ virtual void MuteAll() OVERRIDE {}
+ virtual void UnMuteAll() OVERRIDE {}
+ virtual bool IsRecordingInProcess() OVERRIDE { return false; }
+ virtual scoped_refptr<base::MessageLoopProxy> GetMessageLoop() OVERRIDE {
+ return audio_thread_->message_loop_proxy();
+ }
+ virtual void Init() OVERRIDE {};
+ private:
+ scoped_ptr<base::Thread> audio_thread_;
+ DISALLOW_COPY_AND_ASSIGN(MockAudioManager);
+};
+} // namespace
+
+
namespace speech {
class SpeechRecognizerTest : public content::SpeechRecognizerDelegate,
@@ -27,7 +71,7 @@ class SpeechRecognizerTest : public content::SpeechRecognizerDelegate,
public:
SpeechRecognizerTest()
: io_thread_(BrowserThread::IO, &message_loop_),
- audio_manager_(AudioManager::Create()),
+ audio_manager_(new MockAudioManager()),
recording_complete_(false),
recognition_complete_(false),
result_received_(false),