Added AudioBuffer/AudioChunk abstractions for speech recognition and improved speech_recognizer_impl_unittest.

audio_encoder - Introduced AudioBuffer class in order to hide the current string-based implementation (which involved a lot of dirty and distributed casts) and make room for future implementations based on a circular buffer. speech_recognizer_impl_unittest - Created MockAudioManager class, in order to avoid using the true audio manager on trybots, which could lead to errors accessing the audio device. BUG=116954 TEST=speech_recognizer_impl_uinittest should never raise errors related to the audio driver (e.g, device in use, no microphone attached, etc). Review URL: http://codereview.chromium.org/9646031 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@126512 0039d316-1c4b-4281-b951-d872f2087c98
author: primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-03-13 23:57:51 +0000
committer: primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-03-13 23:57:51 +0000
commit: fad64e7a123b6ddd2ba8af13441c74f8f37966ee (patch)
tree: 28f09e77787e4a77ed30d45743086f12b62f58ee /content/browser/speech
parent: 4a5aebb91b0784ef133a926773b0b9e517f288d9 (diff)
download: chromium_src-fad64e7a123b6ddd2ba8af13441c74f8f37966ee.zip
chromium_src-fad64e7a123b6ddd2ba8af13441c74f8f37966ee.tar.gz
chromium_src-fad64e7a123b6ddd2ba8af13441c74f8f37966ee.tar.bz2
13 files changed, 304 insertions, 94 deletions
diff --git a/content/browser/speech/audio_buffer.cc b/content/browser/speech/audio_buffer.cc
new file mode 100644
index 0000000..5b887d7
--- /dev/null
+++ b/content/browser/speech/audio_buffer.cc
@@ -0,0 +1,91 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/logging.h"
+#include "base/stl_util.h"
+#include "content/browser/speech/audio_buffer.h"
+
+namespace speech {
+
+AudioChunk::AudioChunk(int bytes_per_sample)
+    : bytes_per_sample_(bytes_per_sample) {
+}
+
+AudioChunk::AudioChunk(const uint8* data, size_t length, int bytes_per_sample)
+    : data_string_(reinterpret_cast<const char*>(data), length),
+      bytes_per_sample_(bytes_per_sample) {
+  DCHECK_EQ(length % bytes_per_sample, 0U);
+}
+
+bool AudioChunk::IsEmpty() const {
+  return data_string_.empty();
+}
+
+size_t AudioChunk::NumSamples() const {
+  return data_string_.size() / bytes_per_sample_;
+}
+
+const std::string& AudioChunk::AsString() const {
+  return data_string_;
+}
+
+int16 AudioChunk::GetSample16(size_t index) const {
+  DCHECK(index < (data_string_.size() / sizeof(int16)));
+  return SamplesData16()[index];
+}
+
+const int16* AudioChunk::SamplesData16() const {
+  return reinterpret_cast<const int16*>(data_string_.data());
+}
+
+
+AudioBuffer::AudioBuffer(int bytes_per_sample)
+    : bytes_per_sample_(bytes_per_sample) {
+  DCHECK(bytes_per_sample == 1 ||
+         bytes_per_sample == 2 ||
+         bytes_per_sample == 4);
+}
+
+AudioBuffer::~AudioBuffer() {
+  Clear();
+}
+
+void AudioBuffer::Enqueue(const uint8* data, size_t length) {
+  AudioChunk* chunk = new AudioChunk(data, length, bytes_per_sample_);
+  chunks_.push_back(chunk);
+}
+
+scoped_ptr<AudioChunk> AudioBuffer::DequeueSingleChunk() {
+  DCHECK(!chunks_.empty());
+  AudioChunk* chunk = *chunks_.begin();
+  chunks_.weak_erase(chunks_.begin());
+  return scoped_ptr<AudioChunk>(chunk);
+}
+
+scoped_ptr<AudioChunk> AudioBuffer::DequeueAll() {
+  AudioChunk* chunk = new AudioChunk(bytes_per_sample_);
+  size_t resulting_length = 0;
+  ChunksContainer::const_iterator it;
+  // In order to improve performance, calulate in advance the total length
+  // and then copy the chunks.
+  for (it = chunks_.begin(); it != chunks_.end(); ++it) {
+    resulting_length += (*it)->data_string_.length();
+  }
+  chunk->data_string_.reserve(resulting_length);
+  for (it = chunks_.begin(); it != chunks_.end(); ++it) {
+    chunk->data_string_.append((*it)->data_string_);
+  }
+  Clear();
+  return scoped_ptr<AudioChunk>(chunk);
+}
+
+void AudioBuffer::Clear() {
+  chunks_.erase(chunks_.begin(), chunks_.end());
+}
+
+bool AudioBuffer::IsEmpty() const {
+  return chunks_.empty();
+}
+
+}  // namespace speech
diff --git a/content/browser/speech/audio_buffer.h b/content/browser/speech/audio_buffer.h
new file mode 100644
index 0000000..c1d5103
--- /dev/null
+++ b/content/browser/speech/audio_buffer.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_AUDIO_BUFFER_H_
+#define CONTENT_BROWSER_SPEECH_AUDIO_BUFFER_H_
+#pragma once
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/memory/scoped_vector.h"
+#include "content/common/content_export.h"
+
+namespace speech {
+
+// Models a chunk derived from an AudioBuffer.
+class CONTENT_EXPORT AudioChunk {
+ public:
+  explicit AudioChunk(int bytes_per_sample);
+  AudioChunk(const uint8* data, size_t length, int bytes_per_sample);
+
+  bool IsEmpty() const;
+  int bytes_per_sample() const { return bytes_per_sample_; }
+  size_t NumSamples() const;
+  const std::string& AsString() const;
+  int16 GetSample16(size_t index) const;
+  const int16* SamplesData16() const;
+  friend class AudioBuffer;
+
+ private:
+  std::string data_string_;
+  int bytes_per_sample_;
+
+  DISALLOW_COPY_AND_ASSIGN(AudioChunk);
+};
+
+// Models an audio buffer. The current implementation relies on on-demand
+// allocations of AudioChunk(s) (which uses a string as storage).
+class AudioBuffer {
+ public:
+  explicit AudioBuffer(int bytes_per_sample);
+  ~AudioBuffer();
+
+  // Enqueues a copy of |length| bytes of |data| buffer.
+  void Enqueue(const uint8* data, size_t length);
+
+  // Dequeues, in FIFO order, a single chunk respecting the length of the
+  // corresponding Enqueue call (in a nutshell: multiple Enqueue calls followed
+  // by Dequeue calls will return the individual chunks without merging them).
+  scoped_ptr<AudioChunk> DequeueSingleChunk();
+
+  // Dequeues all previously enqueued chunks, merging them in a single chunk.
+  scoped_ptr<AudioChunk> DequeueAll();
+
+  // Removes and frees all the enqueued chunks.
+  void Clear();
+
+  // Checks whether the buffer is empty.
+  bool IsEmpty() const;
+
+ private:
+  typedef ScopedVector<AudioChunk> ChunksContainer;
+  ChunksContainer chunks_;
+  int bytes_per_sample_;
+
+  DISALLOW_COPY_AND_ASSIGN(AudioBuffer);
+};
+
+}  // namespace speech
+
+#endif  // CONTENT_BROWSER_SPEECH_AUDIO_BUFFER_H_
diff --git a/content/browser/speech/audio_encoder.cc b/content/browser/speech/audio_encoder.cc
index 83e0475..92ccdce 100644
--- a/content/browser/speech/audio_encoder.cc
+++ b/content/browser/speech/audio_encoder.cc
@@ -9,10 +9,12 @@
 #include "base/memory/scoped_ptr.h"
 #include "base/stl_util.h"
 #include "base/string_number_conversions.h"
+#include "content/browser/speech/audio_buffer.h"
 #include "third_party/flac/flac.h"
 #include "third_party/speex/speex.h"
 
 using std::string;
+using speech::AudioChunk;
 
 namespace {
 
@@ -25,8 +27,8 @@ class FLACEncoder : public speech::AudioEncoder {
  public:
   FLACEncoder(int sampling_rate, int bits_per_sample);
   virtual ~FLACEncoder();
-  virtual void Encode(const short* samples, int num_samples);
-  virtual void Flush();
+  virtual void Encode(const AudioChunk& raw_audio) OVERRIDE;
+  virtual void Flush() OVERRIDE;
 
  private:
   static FLAC__StreamEncoderWriteStatus WriteCallback(
@@ -52,13 +54,14 @@ FLAC__StreamEncoderWriteStatus FLACEncoder::WriteCallback(
     void* client_data) {
   FLACEncoder* me = static_cast<FLACEncoder*>(client_data);
   DCHECK(me->encoder_ == encoder);
-  me->AppendToBuffer(new string(reinterpret_cast<const char*>(buffer), bytes));
+  me->encoded_audio_buffer_.Enqueue(buffer, bytes);
   return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
 }
 
 FLACEncoder::FLACEncoder(int sampling_rate, int bits_per_sample)
     : AudioEncoder(std::string(kContentTypeFLAC) +
-                   base::IntToString(sampling_rate)),
+                   base::IntToString(sampling_rate),
+                   bits_per_sample),
       encoder_(FLAC__stream_encoder_new()),
       is_encoder_initialized_(false) {
   FLAC__stream_encoder_set_channels(encoder_, 1);
@@ -75,20 +78,22 @@ FLACEncoder::~FLACEncoder() {
   FLAC__stream_encoder_delete(encoder_);
 }
 
-void FLACEncoder::Encode(const short* samples, int num_samples) {
+void FLACEncoder::Encode(const AudioChunk& raw_audio) {
+  DCHECK_EQ(raw_audio.bytes_per_sample(), 2);
   if (!is_encoder_initialized_) {
     const FLAC__StreamEncoderInitStatus encoder_status =
         FLAC__stream_encoder_init_stream(encoder_, WriteCallback, NULL, NULL,
                                          NULL, this);
-    DCHECK(encoder_status == FLAC__STREAM_ENCODER_INIT_STATUS_OK);
+    DCHECK_EQ(encoder_status, FLAC__STREAM_ENCODER_INIT_STATUS_OK);
     is_encoder_initialized_ = true;
   }
 
   // FLAC encoder wants samples as int32s.
+  const int num_samples = raw_audio.NumSamples();
   scoped_array<FLAC__int32> flac_samples(new FLAC__int32[num_samples]);
   FLAC__int32* flac_samples_ptr = flac_samples.get();
   for (int i = 0; i < num_samples; ++i)
-    flac_samples_ptr[i] = samples[i];
+    flac_samples_ptr[i] = static_cast<FLAC__int32>(raw_audio.GetSample16(i));
 
   FLAC__stream_encoder_process(encoder_, &flac_samples_ptr, num_samples);
 }
@@ -109,10 +114,10 @@ COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength);
 
 class SpeexEncoder : public speech::AudioEncoder {
  public:
-  explicit SpeexEncoder(int sampling_rate);
+  explicit SpeexEncoder(int sampling_rate, int bits_per_sample);
   virtual ~SpeexEncoder();
-  virtual void Encode(const short* samples, int num_samples);
-  virtual void Flush() {}
+  virtual void Encode(const AudioChunk& raw_audio) OVERRIDE;
+  virtual void Flush() OVERRIDE {}
 
  private:
   void* encoder_state_;
@@ -122,9 +127,10 @@ class SpeexEncoder : public speech::AudioEncoder {
   DISALLOW_COPY_AND_ASSIGN(SpeexEncoder);
 };
 
-SpeexEncoder::SpeexEncoder(int sampling_rate)
+SpeexEncoder::SpeexEncoder(int sampling_rate, int bits_per_sample)
     : AudioEncoder(std::string(kContentTypeSpeex) +
-                   base::IntToString(sampling_rate)) {
+                   base::IntToString(sampling_rate),
+                   bits_per_sample) {
    // speex_bits_init() does not initialize all of the |bits_| struct.
    memset(&bits_, 0, sizeof(bits_));
    speex_bits_init(&bits_);
@@ -144,20 +150,23 @@ SpeexEncoder::~SpeexEncoder() {
   speex_encoder_destroy(encoder_state_);
 }
 
-void SpeexEncoder::Encode(const short* samples, int num_samples) {
+void SpeexEncoder::Encode(const AudioChunk& raw_audio) {
+  spx_int16_t* src_buffer =
+      const_cast<spx_int16_t*>(raw_audio.SamplesData16());
+  int num_samples = raw_audio.NumSamples();
   // Drop incomplete frames, typically those which come in when recording stops.
   num_samples -= (num_samples % samples_per_frame_);
   for (int i = 0; i < num_samples; i += samples_per_frame_) {
     speex_bits_reset(&bits_);
-    speex_encode_int(encoder_state_, const_cast<spx_int16_t*>(samples + i),
-                     &bits_);
+    speex_encode_int(encoder_state_, src_buffer + i, &bits_);
 
     // Encode the frame and place the size of the frame as the first byte. This
     // is the packet format for MIME type x-speex-with-header-byte.
     int frame_length = speex_bits_write(&bits_, encoded_frame_data_ + 1,
                                         kMaxSpeexFrameLength);
     encoded_frame_data_[0] = static_cast<char>(frame_length);
-    AppendToBuffer(new string(encoded_frame_data_, frame_length + 1));
+    encoded_audio_buffer_.Enqueue(
+        reinterpret_cast<uint8*>(&encoded_frame_data_[0]), frame_length + 1);
   }
 }
 
@@ -170,39 +179,20 @@ AudioEncoder* AudioEncoder::Create(Codec codec,
                                    int bits_per_sample) {
   if (codec == CODEC_FLAC)
     return new FLACEncoder(sampling_rate, bits_per_sample);
-  return new SpeexEncoder(sampling_rate);
+  return new SpeexEncoder(sampling_rate, bits_per_sample);
 }
 
-AudioEncoder::AudioEncoder(const std::string& mime_type)
-    : mime_type_(mime_type) {
+AudioEncoder::AudioEncoder(const std::string& mime_type, int bits_per_sample)
+    : encoded_audio_buffer_(1), /* Byte granularity of encoded samples. */
+      mime_type_(mime_type),
+      bits_per_sample_(bits_per_sample) {
 }
 
 AudioEncoder::~AudioEncoder() {
-  STLDeleteElements(&audio_buffers_);
 }
 
-bool AudioEncoder::GetEncodedDataAndClear(std::string* encoded_data) {
-  if (!audio_buffers_.size())
-    return false;
-
-  int audio_buffer_length = 0;
-  for (AudioBufferQueue::iterator it = audio_buffers_.begin();
-       it != audio_buffers_.end(); ++it) {
-    audio_buffer_length += (*it)->length();
-  }
-  encoded_data->reserve(audio_buffer_length);
-  for (AudioBufferQueue::iterator it = audio_buffers_.begin();
-       it != audio_buffers_.end(); ++it) {
-    encoded_data->append(*(*it));
-  }
-
-  STLDeleteElements(&audio_buffers_);
-
-  return true;
-}
-
-void AudioEncoder::AppendToBuffer(std::string* item) {
-  audio_buffers_.push_back(item);
+scoped_ptr<AudioChunk> AudioEncoder::GetEncodedDataAndClear() {
+  return encoded_audio_buffer_.DequeueAll();
 }
 
 }  // namespace speech
diff --git a/content/browser/speech/audio_encoder.h b/content/browser/speech/audio_encoder.h
index 92bc645..65ceb97 100644
--- a/content/browser/speech/audio_encoder.h
+++ b/content/browser/speech/audio_encoder.h
@@ -9,9 +9,11 @@
 #include <string>
 
 #include "base/basictypes.h"
+#include "base/memory/scoped_ptr.h"
+#include "content/browser/speech/audio_buffer.h"
 
 namespace speech {
-
+class AudioChunk;
 // Provides a simple interface to encode raw audio using the various speech
 // codecs.
 class AudioEncoder {
@@ -27,30 +29,28 @@ class AudioEncoder {
 
   virtual ~AudioEncoder();
 
-  // Encodes each frame of raw audio in |samples| to the internal buffer. Use
-  // |GetEncodedData| to read the result after this call or when recording
-  // completes.
-  virtual void Encode(const short* samples, int num_samples) = 0;
+  // Encodes |raw audio| to the internal buffer. Use
+  // |GetEncodedDataAndClear| to read the result after this call or when
+  // audio capture completes.
+  virtual void Encode(const AudioChunk& raw_audio) = 0;
 
   // Finish encoding and flush any pending encoded bits out.
   virtual void Flush() = 0;
 
-  // Copies the encoded audio to the given string. Returns true if the output
-  // is not empty.
-  bool GetEncodedDataAndClear(std::string* encoded_data);
+  // Merges, retrieves and clears all the accumulated encoded audio chunks.
+  scoped_ptr<AudioChunk> GetEncodedDataAndClear();
 
   const std::string& mime_type() { return mime_type_; }
+  int bits_per_sample() { return bits_per_sample_; }
 
  protected:
-  AudioEncoder(const std::string& mime_type);
-
-  void AppendToBuffer(std::string* item);
+  AudioEncoder(const std::string& mime_type, int bits_per_sample);
+  AudioBuffer encoded_audio_buffer_;
 
  private:
-  // Buffer holding the recorded audio. Owns the strings inside the list.
-  typedef std::list<std::string*> AudioBufferQueue;
-  AudioBufferQueue audio_buffers_;
   std::string mime_type_;
+  int bits_per_sample_;
+
   DISALLOW_COPY_AND_ASSIGN(AudioEncoder);
 };
 
diff --git a/content/browser/speech/endpointer/endpointer.cc b/content/browser/speech/endpointer/endpointer.cc
index fe3e0bf..b4a54c1 100644
--- a/content/browser/speech/endpointer/endpointer.cc
+++ b/content/browser/speech/endpointer/endpointer.cc
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include "content/browser/speech/audio_buffer.h"
 #include "content/browser/speech/endpointer/endpointer.h"
 
 #include "base/time.h"
@@ -88,8 +89,9 @@ EpStatus Endpointer::Status(int64 *time) {
   return energy_endpointer_.Status(time);
 }
 
-EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples,
-                                  float* rms_out) {
+EpStatus Endpointer::ProcessAudio(const AudioChunk& raw_audio, float* rms_out) {
+  const int16* audio_data = raw_audio.SamplesData16();
+  const int num_samples = raw_audio.NumSamples();
   EpStatus ep_status = EP_PRE_SPEECH;
 
   // Process the input data in blocks of frame_size_, dropping any incomplete
diff --git a/content/browser/speech/endpointer/endpointer.h b/content/browser/speech/endpointer/endpointer.h
index 9ba2018..89ec3a9 100644
--- a/content/browser/speech/endpointer/endpointer.h
+++ b/content/browser/speech/endpointer/endpointer.h
@@ -13,6 +13,8 @@ class EpStatus;
 
 namespace speech {
 
+class AudioChunk;
+
 // A simple interface to the underlying energy-endpointer implementation, this
 // class lets callers provide audio as being recorded and let them poll to find
 // when the user has stopped speaking.
@@ -61,8 +63,7 @@ class CONTENT_EXPORT Endpointer {
 
   // Process a segment of audio, which may be more than one frame.
   // The status of the last frame will be returned.
-  EpStatus ProcessAudio(const int16* audio_data, int num_samples,
-                        float* rms_out);
+  EpStatus ProcessAudio(const AudioChunk& raw_audio, float* rms_out);
 
   // Get the status of the endpointer.
   EpStatus Status(int64 *time_us);
diff --git a/content/browser/speech/endpointer/endpointer_unittest.cc b/content/browser/speech/endpointer/endpointer_unittest.cc
index 240e5dc..37f2339 100644
--- a/content/browser/speech/endpointer/endpointer_unittest.cc
+++ b/content/browser/speech/endpointer/endpointer_unittest.cc
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include "content/browser/speech/audio_buffer.h"
 #include "content/browser/speech/endpointer/endpointer.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
@@ -116,7 +117,8 @@ class EndpointerFrameProcessor : public FrameProcessor {
       : endpointer_(endpointer) {}
 
   EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
-    endpointer_->ProcessAudio(samples, kFrameSize, NULL);
+    AudioChunk frame(reinterpret_cast<uint8*>(samples), kFrameSize * 2, 2);
+    endpointer_->ProcessAudio(frame, NULL);
     int64 ep_time;
     return endpointer_->Status(&ep_time);
   }
diff --git a/content/browser/speech/speech_recognition_request.cc b/content/browser/speech/speech_recognition_request.cc
index 89dde84..a143699 100644
--- a/content/browser/speech/speech_recognition_request.cc
+++ b/content/browser/speech/speech_recognition_request.cc
@@ -10,6 +10,7 @@
 #include "base/string_number_conversions.h"
 #include "base/string_util.h"
 #include "base/values.h"
+#include "content/browser/speech/audio_buffer.h"
 #include "content/common/net/url_fetcher_impl.h"
 #include "content/public/common/speech_recognition_result.h"
 #include "net/base/escape.h"
@@ -201,10 +202,10 @@ void SpeechRecognitionRequest::Start(const std::string& language,
   url_fetcher_->Start();
 }
 
-void SpeechRecognitionRequest::UploadAudioChunk(const std::string& audio_data,
+void SpeechRecognitionRequest::UploadAudioChunk(const AudioChunk& audio_chunk,
                                                 bool is_last_chunk) {
   DCHECK(url_fetcher_.get());
-  url_fetcher_->AppendChunkToUpload(audio_data, is_last_chunk);
+  url_fetcher_->AppendChunkToUpload(audio_chunk.AsString(), is_last_chunk);
 }
 
 void SpeechRecognitionRequest::OnURLFetchComplete(
diff --git a/content/browser/speech/speech_recognition_request.h b/content/browser/speech/speech_recognition_request.h
index b6ce077..2f29e0a 100644
--- a/content/browser/speech/speech_recognition_request.h
+++ b/content/browser/speech/speech_recognition_request.h
@@ -27,6 +27,8 @@ class URLRequestContextGetter;
 
 namespace speech {
 
+class AudioChunk;
+
 // Provides a simple interface for sending recorded speech data to the server
 // and get back recognition results.
 class SpeechRecognitionRequest : public content::URLFetcherDelegate {
@@ -61,7 +63,7 @@ class SpeechRecognitionRequest : public content::URLFetcherDelegate {
                             const std::string& content_type);
 
   // Send a single chunk of audio immediately to the server.
-  CONTENT_EXPORT void UploadAudioChunk(const std::string& audio_data,
+  CONTENT_EXPORT void UploadAudioChunk(const AudioChunk& audio_chunk,
                                        bool is_last_chunk);
 
   CONTENT_EXPORT bool HasPendingRequest() { return url_fetcher_ != NULL; }
diff --git a/content/browser/speech/speech_recognition_request_unittest.cc b/content/browser/speech/speech_recognition_request_unittest.cc
index 37b82f8..822e254 100644
--- a/content/browser/speech/speech_recognition_request_unittest.cc
+++ b/content/browser/speech/speech_recognition_request_unittest.cc
@@ -4,6 +4,7 @@
 
 #include "base/message_loop.h"
 #include "base/utf_string_conversions.h"
+#include "content/browser/speech/audio_buffer.h"
 #include "content/browser/speech/speech_recognition_request.h"
 #include "content/public/common/speech_recognition_result.h"
 #include "content/test/test_url_fetcher_factory.h"
@@ -39,7 +40,11 @@ void SpeechRecognitionRequestTest::CreateAndTestRequest(
   SpeechRecognitionRequest request(NULL, this);
   request.Start(std::string(), std::string(), false, std::string(),
                 std::string(), std::string());
-  request.UploadAudioChunk(std::string(" "), true);
+  unsigned char dummy_audio_buffer_data[2] = {'\0', '\0'};
+  AudioChunk dummy_audio_chunk(&dummy_audio_buffer_data[0],
+                               sizeof(dummy_audio_buffer_data),
+                               2 /* bytes per sample */);
+  request.UploadAudioChunk(dummy_audio_chunk, true);
   TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
   ASSERT_TRUE(fetcher);
 
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
index bbcfbd2..c5342d8 100644
--- a/content/browser/speech/speech_recognizer_impl.cc
+++ b/content/browser/speech/speech_recognizer_impl.cc
@@ -7,6 +7,7 @@
 #include "base/bind.h"
 #include "base/time.h"
 #include "content/browser/browser_main_loop.h"
+#include "content/browser/speech/audio_buffer.h"
 #include "content/public/browser/speech_recognizer_delegate.h"
 #include "content/public/browser/browser_thread.h"
 #include "content/public/common/speech_recognition_result.h"
@@ -38,9 +39,11 @@ const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb;
 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f;
 
 // Returns true if more than 5% of the samples are at min or max value.
-bool Clipping(const int16* samples, int num_samples) {
-  int clipping_samples = 0;
+bool DetectClipping(const speech::AudioChunk& chunk) {
+  const int num_samples = chunk.NumSamples();
+  const int16* samples = chunk.SamplesData16();
   const int kThreshold = num_samples / 20;
+  int clipping_samples = 0;
   for (int i = 0; i < num_samples; ++i) {
     if (samples[i] <= -32767 || samples[i] >= 32767) {
       if (++clipping_samples > kThreshold)
@@ -174,11 +177,13 @@ void SpeechRecognizerImpl::StopRecording() {
   // of silence in case encoder had no data already.
   std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /
                              1000);
-  encoder_->Encode(&samples[0], samples.size());
+  AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]),
+                         samples.size() * sizeof(short),
+                         encoder_->bits_per_sample() / 8);
+  encoder_->Encode(dummy_chunk);
   encoder_->Flush();
-  string encoded_data;
-  encoder_->GetEncodedDataAndClear(&encoded_data);
-  DCHECK(!encoded_data.empty());
+  scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
+  DCHECK(!encoded_data->IsEmpty());
   encoder_.reset();
 
   // If we haven't got any audio yet end the recognition sequence here.
@@ -187,7 +192,7 @@ void SpeechRecognizerImpl::StopRecording() {
     scoped_refptr<SpeechRecognizerImpl> me(this);
     delegate_->DidCompleteRecognition(caller_id_);
   } else {
-    request_->UploadAudioChunk(encoded_data, true /* is_last_chunk */);
+    request_->UploadAudioChunk(*encoded_data, true /* is_last_chunk */);
   }
 }
 
@@ -215,33 +220,28 @@ void SpeechRecognizerImpl::OnData(AudioInputController* controller,
                                   const uint8* data, uint32 size) {
   if (size == 0)  // This could happen when recording stops and is normal.
     return;
-
-  string* str_data = new string(reinterpret_cast<const char*>(data), size);
+  AudioChunk* raw_audio = new AudioChunk(data, static_cast<size_t>(size),
+                                         kNumBitsPerAudioSample / 8);
   BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
                           base::Bind(&SpeechRecognizerImpl::HandleOnData,
-                                     this, str_data));
+                                     this, raw_audio));
 }
 
-void SpeechRecognizerImpl::HandleOnData(string* data) {
+void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {
+  scoped_ptr<AudioChunk> free_raw_audio_on_return(raw_audio);
   // Check if we are still recording and if not discard this buffer, as
   // recording might have been stopped after this buffer was posted to the queue
   // by |OnData|.
-  if (!audio_controller_.get()) {
-    delete data;
+  if (!audio_controller_.get())
     return;
-  }
 
   bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();
 
-  const short* samples = reinterpret_cast<const short*>(data->data());
-  DCHECK_EQ((data->length() % sizeof(short)), 0U);
-  int num_samples = data->length() / sizeof(short);
-  encoder_->Encode(samples, num_samples);
+  encoder_->Encode(*raw_audio);
   float rms;
-  endpointer_.ProcessAudio(samples, num_samples, &rms);
-  bool did_clip = Clipping(samples, num_samples);
-  delete data;
-  num_samples_recorded_ += num_samples;
+  endpointer_.ProcessAudio(*raw_audio, &rms);
+  bool did_clip = DetectClipping(*raw_audio);
+  num_samples_recorded_ += raw_audio->NumSamples();
 
   if (request_ == NULL) {
     // This was the first audio packet recorded, so start a request to the
@@ -252,10 +252,9 @@ void SpeechRecognizerImpl::HandleOnData(string* data) {
                     hardware_info_, origin_url_, encoder_->mime_type());
   }
 
-  string encoded_data;
-  encoder_->GetEncodedDataAndClear(&encoded_data);
-  DCHECK(!encoded_data.empty());
-  request_->UploadAudioChunk(encoded_data, false /* is_last_chunk */);
+  scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
+  DCHECK(!encoded_data->IsEmpty());
+  request_->UploadAudioChunk(*encoded_data, false /* is_last_chunk */);
 
   if (endpointer_.IsEstimatingEnvironment()) {
     // Check if we have gathered enough audio for the endpointer to do
diff --git a/content/browser/speech/speech_recognizer_impl.h b/content/browser/speech/speech_recognizer_impl.h
index 25e0c0c..eaec3e3 100644
--- a/content/browser/speech/speech_recognizer_impl.h
+++ b/content/browser/speech/speech_recognizer_impl.h
@@ -75,8 +75,8 @@ class CONTENT_EXPORT SpeechRecognizerImpl
 
   void HandleOnError(int error_code);  // Handles OnError in the IO thread.
 
-  // Handles OnData in the IO thread. Takes ownership of |data|.
-  void HandleOnData(std::string* data);
+  // Handles OnData in the IO thread. Takes ownership of |raw_audio|.
+  void HandleOnData(AudioChunk* raw_audio);
 
   // Helper method which closes the audio controller and blocks until done.
   void CloseAudioControllerSynchronously();
diff --git a/content/browser/speech/speech_recognizer_impl_unittest.cc b/content/browser/speech/speech_recognizer_impl_unittest.cc
index cfd2c33..baf3a3e 100644
--- a/content/browser/speech/speech_recognizer_impl_unittest.cc
+++ b/content/browser/speech/speech_recognizer_impl_unittest.cc
@@ -9,6 +9,8 @@
 #include "content/public/browser/speech_recognizer_delegate.h"
 #include "content/test/test_url_fetcher_factory.h"
 #include "media/audio/audio_manager.h"
+#include "media/audio/fake_audio_input_stream.h"
+#include "media/audio/fake_audio_output_stream.h"
 #include "media/audio/test_audio_input_controller_factory.h"
 #include "net/base/net_errors.h"
 #include "net/url_request/url_request_status.h"
@@ -20,6 +22,48 @@ using media::AudioInputController;
 using media::TestAudioInputController;
 using media::TestAudioInputControllerFactory;
 
+namespace {
+
+class MockAudioManager : public AudioManager {
+ public:
+  MockAudioManager() {
+    audio_thread_.reset(new base::Thread("MockAudioThread"));
+    CHECK(audio_thread_->Start());
+  }
+  virtual bool HasAudioOutputDevices() OVERRIDE { return true; }
+  virtual bool HasAudioInputDevices() OVERRIDE { return true; }
+  virtual string16 GetAudioInputDeviceModel() OVERRIDE { return string16(); }
+  virtual bool CanShowAudioInputSettings() OVERRIDE { return false; }
+  virtual void ShowAudioInputSettings() OVERRIDE {}
+  virtual void GetAudioInputDeviceNames(
+      media::AudioDeviceNames* device_names) OVERRIDE {}
+  virtual AudioOutputStream* MakeAudioOutputStream(
+        const AudioParameters& params) OVERRIDE {
+    return FakeAudioOutputStream::MakeFakeStream(params);
+  }
+  virtual AudioOutputStream* MakeAudioOutputStreamProxy(
+        const AudioParameters& params) OVERRIDE {
+    NOTREACHED();
+    return NULL;
+  }
+  virtual AudioInputStream* MakeAudioInputStream(
+        const AudioParameters& params, const std::string& device_id) OVERRIDE {
+    return FakeAudioInputStream::MakeFakeStream(params);
+  }
+  virtual void MuteAll() OVERRIDE {}
+  virtual void UnMuteAll() OVERRIDE {}
+  virtual bool IsRecordingInProcess() OVERRIDE { return false; }
+  virtual scoped_refptr<base::MessageLoopProxy> GetMessageLoop() OVERRIDE {
+    return audio_thread_->message_loop_proxy();
+  }
+  virtual void Init() OVERRIDE {};
+ private:
+  scoped_ptr<base::Thread> audio_thread_;
+  DISALLOW_COPY_AND_ASSIGN(MockAudioManager);
+};
+}  // namespace
+
+
 namespace speech {
 
 class SpeechRecognizerTest : public content::SpeechRecognizerDelegate,
@@ -27,7 +71,7 @@ class SpeechRecognizerTest : public content::SpeechRecognizerDelegate,
  public:
   SpeechRecognizerTest()
       : io_thread_(BrowserThread::IO, &message_loop_),
-        audio_manager_(AudioManager::Create()),
+        audio_manager_(new MockAudioManager()),
         recording_complete_(false),
         recognition_complete_(false),
         result_received_(false),
author	primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-03-13 23:57:51 +0000
committer	primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-03-13 23:57:51 +0000
commit	fad64e7a123b6ddd2ba8af13441c74f8f37966ee (patch)
tree	28f09e77787e4a77ed30d45743086f12b62f58ee /content/browser/speech
parent	4a5aebb91b0784ef133a926773b0b9e517f288d9 (diff)
download	chromium_src-fad64e7a123b6ddd2ba8af13441c74f8f37966ee.zip chromium_src-fad64e7a123b6ddd2ba8af13441c74f8f37966ee.tar.gz chromium_src-fad64e7a123b6ddd2ba8af13441c74f8f37966ee.tar.bz2