Show a volume indicator as audio is being recorded.

Per UX input from Cole, this matches the implementation in the android voice actions app. Changes in this CL: - Instead of the old mic icon use the recently added mic-volume-empty, mic-volume-full and mask images for the volume indicator. - Extended the endpointer code to return the audio RMS level (copied from the original source). - SpeechRecognizer receives the above calculated RMS level and computes a volume level in the [0.0-1.0] range. - SpeechInputManager receives the above computed volume level and passes it to SpeechInputBubbleController for display, which passes it to SpeechInputBubble. - SpeechInputBubbleBase creates the appropriate skia bitmap for the volume indicator and passes to the platform specific code for display. - As part of the above SpeechInputBubbleController addition for volume level, I wrote a single function to process all calls received by it and handled in the UI thread for simplicity. BUG=53598 TEST=updated existing tests. Also test manually, use speech input and verify the audio level changes appropriately in the UI as mic is moved near and far. Review URL: http://codereview.chromium.org/3384005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@59638 0039d316-1c4b-4281-b951-d872f2087c98
author: satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-09-16 12:07:57 +0000
committer: satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-09-16 12:07:57 +0000
commit: fc89d8ae5916652cce7a00f5adac4d0e812b5c16 (patch)
tree: d728195ba6ca3244adb47ff9e21dc971abb5f625 /chrome
parent: fda165787a252f0fd424f7137619f0bf0c1482dd (diff)
download: chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.zip
chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.tar.gz
chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.tar.bz2
20 files changed, 301 insertions, 81 deletions
diff --git a/chrome/app/theme/speech_input_recording.png b/chrome/app/theme/speech_input_recording.png
deleted file mode 100644
index 3644c2e..0000000
--- a/chrome/app/theme/speech_input_recording.png
+++ /dev/null
diff --git a/chrome/app/theme/theme_resources.grd b/chrome/app/theme/theme_resources.grd
index 4f627ea..1a20a67 100644
--- a/chrome/app/theme/theme_resources.grd
+++ b/chrome/app/theme/theme_resources.grd
@@ -423,7 +423,6 @@
       <include name="IDR_SPEECH_INPUT_MIC_EMPTY" file="speech_input_mic_empty.png" type="BINDATA" />
       <include name="IDR_SPEECH_INPUT_MIC_FULL" file="speech_input_mic_full.png" type="BINDATA" />
       <include name="IDR_SPEECH_INPUT_MIC_MASK" file="speech_input_mic_mask.png" type="BINDATA" />
-      <include name="IDR_SPEECH_INPUT_RECORDING" file="speech_input_recording.png" type="BINDATA" />
       <include name="IDR_SPEECH_INPUT_PROCESSING" file="speech_input_processing.png" type="BINDATA" />
       <if expr="pp_ifdef('_google_chrome')">
         <include name="IDR_WIZARD_ICON" file="google_chrome/wizard_icon.png" type="BINDATA" />
diff --git a/chrome/browser/cocoa/speech_input_window_controller.mm b/chrome/browser/cocoa/speech_input_window_controller.mm
index 24d7cfe..c3dae85 100644
--- a/chrome/browser/cocoa/speech_input_window_controller.mm
+++ b/chrome/browser/cocoa/speech_input_window_controller.mm
@@ -44,7 +44,7 @@ const int kBubbleHorizontalMargin = 15;  // Space on either sides of controls.
   NSWindow* window = [self window];
   [[self bubble] setArrowLocation:info_bubble::kTopLeft];
   NSImage* icon = ResourceBundle::GetSharedInstance().GetNSImageNamed(
-      IDR_SPEECH_INPUT_RECORDING);
+      IDR_SPEECH_INPUT_MIC_EMPTY);
   [iconImage_ setImage:icon];
   [iconImage_ setNeedsDisplay:YES];
 
@@ -76,7 +76,7 @@ const int kBubbleHorizontalMargin = 15;  // Space on either sides of controls.
   int newWidth = size.width;
 
   NSImage* icon = ResourceBundle::GetSharedInstance().GetNSImageNamed(
-      IDR_SPEECH_INPUT_RECORDING);
+      IDR_SPEECH_INPUT_MIC_EMPTY);
   size = [icon size];
   newHeight += size.height + kBubbleControlVerticalSpacing;
   if (newWidth < size.width)
diff --git a/chrome/browser/speech/endpointer/endpointer.cc b/chrome/browser/speech/endpointer/endpointer.cc
index 57a4f65..c30e1f2 100644
--- a/chrome/browser/speech/endpointer/endpointer.cc
+++ b/chrome/browser/speech/endpointer/endpointer.cc
@@ -87,7 +87,8 @@ EpStatus Endpointer::Status(int64 *time) {
   return energy_endpointer_.Status(time);
 }
 
-EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples) {
+EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples,
+                                  float* rms_out) {
   EpStatus ep_status = EP_PRE_SPEECH;
 
   // Process the input data in blocks of frame_size_, dropping any incomplete
@@ -98,7 +99,8 @@ EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples) {
     // Have the endpointer process the frame.
     energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_,
                                          audio_data + sample_index,
-                                         frame_size_);
+                                         frame_size_,
+                                         rms_out);
     sample_index += frame_size_;
     audio_frame_time_us_ += (frame_size_ * Time::kMicrosecondsPerSecond) /
                          sample_rate_;
diff --git a/chrome/browser/speech/endpointer/endpointer.h b/chrome/browser/speech/endpointer/endpointer.h
index e83aed5..8af6016 100644
--- a/chrome/browser/speech/endpointer/endpointer.h
+++ b/chrome/browser/speech/endpointer/endpointer.h
@@ -60,7 +60,8 @@ class Endpointer {
 
   // Process a segment of audio, which may be more than one frame.
   // The status of the last frame will be returned.
-  EpStatus ProcessAudio(const int16* audio_data, int num_samples);
+  EpStatus ProcessAudio(const int16* audio_data, int num_samples,
+                        float* rms_out);
 
   // Get the status of the endpointer.
   EpStatus Status(int64 *time_us);
diff --git a/chrome/browser/speech/endpointer/endpointer_unittest.cc b/chrome/browser/speech/endpointer/endpointer_unittest.cc
index b49a6a6..bbdc572 100644
--- a/chrome/browser/speech/endpointer/endpointer_unittest.cc
+++ b/chrome/browser/speech/endpointer/endpointer_unittest.cc
@@ -74,7 +74,7 @@ class EnergyEndpointerFrameProcessor : public FrameProcessor {
       : endpointer_(endpointer) {}
 
   EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
-    endpointer_->ProcessAudioFrame(time, samples, kFrameSize);
+    endpointer_->ProcessAudioFrame(time, samples, kFrameSize, NULL);
     int64 ep_time;
     return endpointer_->Status(&ep_time);
   }
@@ -117,7 +117,7 @@ class EndpointerFrameProcessor : public FrameProcessor {
       : endpointer_(endpointer) {}
 
   EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
-    endpointer_->ProcessAudio(samples, kFrameSize);
+    endpointer_->ProcessAudio(samples, kFrameSize, NULL);
     int64 ep_time;
     return endpointer_->Status(&ep_time);
   }
diff --git a/chrome/browser/speech/endpointer/energy_endpointer.cc b/chrome/browser/speech/endpointer/energy_endpointer.cc
index 44ca4dd..f6ce46f 100644
--- a/chrome/browser/speech/endpointer/energy_endpointer.cc
+++ b/chrome/browser/speech/endpointer/energy_endpointer.cc
@@ -213,7 +213,8 @@ void EnergyEndpointer::SetUserInputMode() {
 
 void EnergyEndpointer::ProcessAudioFrame(int64 time_us,
                                          const int16* samples,
-                                         int num_samples) {
+                                         int num_samples,
+                                         float* rms_out) {
   endpointer_time_us_ = time_us;
   float rms = RMS(samples, num_samples);
 
@@ -310,13 +311,19 @@ void EnergyEndpointer::ProcessAudioFrame(int64 time_us,
     }
 
     // Set a floor
-    if (decision_threshold_ <params_.min_decision_threshold())
+    if (decision_threshold_ < params_.min_decision_threshold())
       decision_threshold_ = params_.min_decision_threshold();
   }
 
   // Update speech and noise levels.
   UpdateLevels(rms);
   ++frame_counter_;
+
+  if (rms_out) {
+    *rms_out = -120.0;
+    if ((noise_level_ > 0.0) && ((rms / noise_level_ ) > 0.000001))
+      *rms_out = static_cast<float>(20.0 * log10(rms / noise_level_));
+  }
 }
 
 void EnergyEndpointer::UpdateLevels(float rms) {
diff --git a/chrome/browser/speech/endpointer/energy_endpointer.h b/chrome/browser/speech/endpointer/energy_endpointer.h
index cd461be..de79e76 100644
--- a/chrome/browser/speech/endpointer/energy_endpointer.h
+++ b/chrome/browser/speech/endpointer/energy_endpointer.h
@@ -78,7 +78,9 @@ class EnergyEndpointer {
 
   // Computes the next input frame and modifies EnergyEndpointer status as
   // appropriate based on the computation.
-  void ProcessAudioFrame(int64 time_us, const int16* samples, int num_samples);
+  void ProcessAudioFrame(int64 time_us,
+                         const int16* samples, int num_samples,
+                         float* rms_out);
 
   // Returns the current state of the EnergyEndpointer and the time
   // corresponding to the most recently computed frame.
diff --git a/chrome/browser/speech/speech_input_bubble.cc b/chrome/browser/speech/speech_input_bubble.cc
index 1c4b85f..be5d7c9 100644
--- a/chrome/browser/speech/speech_input_bubble.cc
+++ b/chrome/browser/speech/speech_input_bubble.cc
@@ -2,13 +2,21 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include "app/resource_bundle.h"
 #include "chrome/browser/tab_contents/tab_contents.h"
 #include "chrome/browser/speech/speech_input_bubble.h"
+#include "gfx/canvas_skia.h"
 #include "gfx/rect.h"
+#include "grit/generated_resources.h"
+#include "grit/theme_resources.h"
 
 SpeechInputBubble::FactoryMethod SpeechInputBubble::factory_ = NULL;
 const int SpeechInputBubble::kBubbleTargetOffsetX = 5;
 
+SkBitmap* SpeechInputBubbleBase::mic_empty_ = NULL;
+SkBitmap* SpeechInputBubbleBase::mic_full_ = NULL;
+SkBitmap* SpeechInputBubbleBase::mic_mask_ = NULL;
+
 SpeechInputBubble* SpeechInputBubble::Create(TabContents* tab_contents,
                                              Delegate* delegate,
                                              const gfx::Rect& element_rect) {
@@ -24,6 +32,31 @@ SpeechInputBubble* SpeechInputBubble::Create(TabContents* tab_contents,
 
 SpeechInputBubbleBase::SpeechInputBubbleBase()
     : display_mode_(DISPLAY_MODE_RECORDING) {
+  if (!mic_empty_) {  // Static variables.
+    mic_empty_ = ResourceBundle::GetSharedInstance().GetBitmapNamed(
+        IDR_SPEECH_INPUT_MIC_EMPTY);
+    mic_full_ = ResourceBundle::GetSharedInstance().GetBitmapNamed(
+        IDR_SPEECH_INPUT_MIC_FULL);
+    mic_mask_ = ResourceBundle::GetSharedInstance().GetBitmapNamed(
+        IDR_SPEECH_INPUT_MIC_MASK);
+  }
+
+  // Instance variables.
+  mic_image_.reset(new SkBitmap());
+  mic_image_->setConfig(SkBitmap::kARGB_8888_Config, mic_empty_->width(),
+                        mic_empty_->height());
+  mic_image_->allocPixels();
+
+  buffer_image_.reset(new SkBitmap());
+  buffer_image_->setConfig(SkBitmap::kARGB_8888_Config, mic_empty_->width(),
+                           mic_empty_->height());
+  buffer_image_->allocPixels();
+}
+
+SpeechInputBubbleBase::~SpeechInputBubbleBase() {
+  // This destructor is added to make sure members such as the scoped_ptr
+  // get destroyed here and the derived classes don't have to care about such
+  // member variables which they don't use.
 }
 
 void SpeechInputBubbleBase::SetRecordingMode() {
@@ -41,3 +74,32 @@ void SpeechInputBubbleBase::SetMessage(const string16& text) {
   display_mode_ = DISPLAY_MODE_MESSAGE;
   UpdateLayout();
 }
+
+void SpeechInputBubbleBase::SetInputVolume(float volume) {
+  mic_image_->eraseARGB(0, 0, 0, 0);
+  buffer_image_->eraseARGB(0, 0, 0, 0);
+
+  int width = mic_image_->width();
+  int height = mic_image_->height();
+  SkCanvas canvas(*mic_image_);
+  SkCanvas buffer_canvas(*buffer_image_);
+
+  // The 'full volume' mic image is drawn clipped to the current volume level,
+  // and a gradient mask is applied over it with the 'multiply' compositing
+  // operator to show soft edges at the top.
+  buffer_canvas.save();
+  SkScalar clip_top = ((1.0f - volume) * height * 3) / 2.0f - height / 2.0f;
+  buffer_canvas.clipRect(SkRect::MakeLTRB(0, clip_top,
+      SkIntToScalar(width), SkIntToScalar(height)));
+  buffer_canvas.drawBitmap(*mic_full_, 0, 0);
+  buffer_canvas.restore();
+  SkPaint multiply_paint;
+  multiply_paint.setXfermode(SkXfermode::Create(SkXfermode::kMultiply_Mode));
+  buffer_canvas.drawBitmap(*mic_mask_, 0, clip_top, &multiply_paint);
+
+  // Draw the empty volume image first and the current volume image on top.
+  canvas.drawBitmap(*mic_empty_, 0, 0);
+  canvas.drawBitmap(*buffer_image_.get(), 0, 0);
+
+  SetImage(*mic_image_.get());
+}
diff --git a/chrome/browser/speech/speech_input_bubble.h b/chrome/browser/speech/speech_input_bubble.h
index bfb88a5..73032b5 100644
--- a/chrome/browser/speech/speech_input_bubble.h
+++ b/chrome/browser/speech/speech_input_bubble.h
@@ -6,11 +6,13 @@
 #define CHROME_BROWSER_SPEECH_SPEECH_INPUT_BUBBLE_H_
 #pragma once
 
+#include "base/scoped_ptr.h"
 #include "base/string16.h"
 
 namespace gfx {
 class Rect;
 }
+class SkBitmap;
 class TabContents;
 
 // SpeechInputBubble displays a popup info bubble during speech recognition,
@@ -91,6 +93,9 @@ class SpeechInputBubble {
   // |Delegate::InfoBubbleFocusChanged| as well.
   virtual void Hide() = 0;
 
+  // Updates the current captured audio volume displayed on screen.
+  virtual void SetInputVolume(float volume) = 0;
+
   // The horizontal distance between the start of the html widget and the speech
   // bubble's arrow.
   static const int kBubbleTargetOffsetX;
@@ -112,16 +117,23 @@ class SpeechInputBubbleBase : public SpeechInputBubble {
   };
 
   SpeechInputBubbleBase();
+  virtual ~SpeechInputBubbleBase();
 
   // SpeechInputBubble methods
   virtual void SetRecordingMode();
   virtual void SetRecognizingMode();
   virtual void SetMessage(const string16& text);
+  virtual void SetInputVolume(float volume);
 
  protected:
   // Updates the platform specific UI layout for the current display mode.
   virtual void UpdateLayout() = 0;
 
+  // Sets the given image as the image to display in the speech bubble.
+  // TODO(satish): Make the SetRecognizingMode call use this to show an
+  // animation while waiting for results.
+  virtual void SetImage(const SkBitmap& image) = 0;
+
   DisplayMode display_mode() {
     return display_mode_;
   }
@@ -133,6 +145,14 @@ class SpeechInputBubbleBase : public SpeechInputBubble {
  private:
   DisplayMode display_mode_;
   string16 message_text_;  // Text displayed in DISPLAY_MODE_MESSAGE
+  // The current microphone image with volume level indication.
+  scoped_ptr<SkBitmap> mic_image_;
+  // A temporary buffer image used in creating the above mic image.
+  scoped_ptr<SkBitmap> buffer_image_;
+
+  static SkBitmap* mic_full_;  // Mic image with full volume.
+  static SkBitmap* mic_empty_;  // Mic image with zero volume.
+  static SkBitmap* mic_mask_;  // Gradient mask used by the volume indicator.
 };
 
 // This typedef is to workaround the issue with certain versions of
diff --git a/chrome/browser/speech/speech_input_bubble_controller.cc b/chrome/browser/speech/speech_input_bubble_controller.cc
index 08647d4..bf1f923 100644
--- a/chrome/browser/speech/speech_input_bubble_controller.cc
+++ b/chrome/browser/speech/speech_input_bubble_controller.cc
@@ -46,77 +46,79 @@ void SpeechInputBubbleController::CreateBubble(int caller_id,
 }
 
 void SpeechInputBubbleController::CloseBubble(int caller_id) {
-  if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) {
-    ChromeThread::PostTask(
-        ChromeThread::UI, FROM_HERE,
-        NewRunnableMethod(this, &SpeechInputBubbleController::CloseBubble,
-                          caller_id));
-    return;
-  }
-  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::UI));
-
-  if (current_bubble_caller_id_ == caller_id)
-    current_bubble_caller_id_ = 0;
-  delete bubbles_[caller_id];
-  bubbles_.erase(caller_id);
+  ProcessRequestInUiThread(caller_id, REQUEST_CLOSE, string16(), 0);
 }
 
 void SpeechInputBubbleController::SetBubbleRecordingMode(int caller_id) {
-  if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) {
-    ChromeThread::PostTask(ChromeThread::UI, FROM_HERE, NewRunnableMethod(
-        this, &SpeechInputBubbleController::SetBubbleRecordingMode,
-        caller_id));
-    return;
-  }
-  SetBubbleRecordingModeOrMessage(caller_id, string16());
+  ProcessRequestInUiThread(caller_id, REQUEST_SET_RECORDING_MODE,
+                           string16(), 0);
 }
 
 void SpeechInputBubbleController::SetBubbleRecognizingMode(int caller_id) {
-  if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) {
-    ChromeThread::PostTask(ChromeThread::UI, FROM_HERE, NewRunnableMethod(
-        this, &SpeechInputBubbleController::SetBubbleRecognizingMode,
-        caller_id));
-    return;
-  }
-  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::UI));
-  // The bubble may have been closed before we got a chance to process this
-  // request. So check before proceeding.
-  if (!bubbles_.count(caller_id))
-    return;
+  ProcessRequestInUiThread(caller_id, REQUEST_SET_RECOGNIZING_MODE,
+                           string16(), 0);
+}
 
-  bubbles_[caller_id]->SetRecognizingMode();
+void SpeechInputBubbleController::SetBubbleInputVolume(int caller_id,
+                                                       float volume) {
+  ProcessRequestInUiThread(caller_id, REQUEST_SET_INPUT_VOLUME, string16(),
+                           volume);
 }
 
 void SpeechInputBubbleController::SetBubbleMessage(int caller_id,
                                                    const string16& text) {
+  ProcessRequestInUiThread(caller_id, REQUEST_SET_MESSAGE, text, 0);
+}
+
+void SpeechInputBubbleController::ProcessRequestInUiThread(
+    int caller_id, RequestType type, const string16& text, float volume) {
   if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) {
     ChromeThread::PostTask(ChromeThread::UI, FROM_HERE, NewRunnableMethod(
-        this, &SpeechInputBubbleController::SetBubbleMessage,
-        caller_id, text));
+        this, &SpeechInputBubbleController::ProcessRequestInUiThread,
+        caller_id, type, text, volume));
     return;
   }
-  SetBubbleRecordingModeOrMessage(caller_id, text);
-}
-
-void SpeechInputBubbleController::SetBubbleRecordingModeOrMessage(
-    int caller_id, const string16& text) {
   DCHECK(ChromeThread::CurrentlyOn(ChromeThread::UI));
   // The bubble may have been closed before we got a chance to process this
   // request. So check before proceeding.
   if (!bubbles_.count(caller_id))
     return;
 
-  if (current_bubble_caller_id_ && current_bubble_caller_id_ != caller_id)
-    bubbles_[current_bubble_caller_id_]->Hide();
+  bool change_active_bubble = (type == REQUEST_SET_RECORDING_MODE ||
+                               type == REQUEST_SET_MESSAGE);
+  if (change_active_bubble) {
+    if (current_bubble_caller_id_ && current_bubble_caller_id_ != caller_id)
+      bubbles_[current_bubble_caller_id_]->Hide();
+    current_bubble_caller_id_ = caller_id;
+  }
 
-  current_bubble_caller_id_ = caller_id;
   SpeechInputBubble* bubble = bubbles_[caller_id];
-  if (text.empty()) {
-    bubble->SetRecordingMode();
-  } else {
-    bubble->SetMessage(text);
+  switch (type) {
+    case REQUEST_SET_RECORDING_MODE:
+      bubble->SetRecordingMode();
+      break;
+    case REQUEST_SET_RECOGNIZING_MODE:
+      bubble->SetRecognizingMode();
+      break;
+    case REQUEST_SET_MESSAGE:
+      bubble->SetMessage(text);
+      break;
+    case REQUEST_SET_INPUT_VOLUME:
+      bubble->SetInputVolume(volume);
+      break;
+    case REQUEST_CLOSE:
+      if (current_bubble_caller_id_ == caller_id)
+        current_bubble_caller_id_ = 0;
+      delete bubble;
+      bubbles_.erase(caller_id);
+      break;
+    default:
+      NOTREACHED();
+      break;
   }
-  bubble->Show();
+
+  if (change_active_bubble)
+    bubble->Show();
 }
 
 void SpeechInputBubbleController::InfoBubbleButtonClicked(
diff --git a/chrome/browser/speech/speech_input_bubble_controller.h b/chrome/browser/speech/speech_input_bubble_controller.h
index 2117b24..0a20333 100644
--- a/chrome/browser/speech/speech_input_bubble_controller.h
+++ b/chrome/browser/speech/speech_input_bubble_controller.h
@@ -64,6 +64,9 @@ class SpeechInputBubbleController
   // bubble is hidden, |Show| must be called to make it appear on screen.
   void SetBubbleMessage(int caller_id, const string16& text);
 
+  // Updates the current captured audio volume displayed on screen.
+  void SetBubbleInputVolume(int caller_id, float volume);
+
   void CloseBubble(int caller_id);
 
   // SpeechInputBubble::Delegate methods.
@@ -71,10 +74,22 @@ class SpeechInputBubbleController
   virtual void InfoBubbleFocusChanged();
 
  private:
+  // The various calls received by this object and handled in the UI thread.
+  enum RequestType {
+    REQUEST_SET_RECORDING_MODE,
+    REQUEST_SET_RECOGNIZING_MODE,
+    REQUEST_SET_MESSAGE,
+    REQUEST_SET_INPUT_VOLUME,
+    REQUEST_CLOSE,
+  };
+
   void InvokeDelegateButtonClicked(int caller_id,
                                    SpeechInputBubble::Button button);
   void InvokeDelegateFocusChanged(int caller_id);
-  void SetBubbleRecordingModeOrMessage(int caller_id, const string16& text);
+  void ProcessRequestInUiThread(int caller_id,
+                                RequestType type,
+                                const string16& text,
+                                float volume);
 
   // Only accessed in the IO thread.
   Delegate* delegate_;
diff --git a/chrome/browser/speech/speech_input_bubble_controller_unittest.cc b/chrome/browser/speech/speech_input_bubble_controller_unittest.cc
index 59eac15..bc479c8 100644
--- a/chrome/browser/speech/speech_input_bubble_controller_unittest.cc
+++ b/chrome/browser/speech/speech_input_bubble_controller_unittest.cc
@@ -8,6 +8,8 @@
 #include "gfx/rect.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
+class SkBitmap;
+
 namespace speech_input {
 
 // A mock bubble class which fakes a focus change or recognition cancel by the
@@ -49,6 +51,7 @@ class MockSpeechInputBubble : public SpeechInputBubbleBase {
   virtual void Show() {}
   virtual void Hide() {}
   virtual void UpdateLayout() {}
+  virtual void SetImage(const SkBitmap&) {}
 
  private:
   static BubbleType type_;
diff --git a/chrome/browser/speech/speech_input_bubble_gtk.cc b/chrome/browser/speech/speech_input_bubble_gtk.cc
index db61256..7564ac4 100644
--- a/chrome/browser/speech/speech_input_bubble_gtk.cc
+++ b/chrome/browser/speech/speech_input_bubble_gtk.cc
@@ -47,6 +47,7 @@ class SpeechInputBubbleGtk
   virtual void Show();
   virtual void Hide();
   virtual void UpdateLayout();
+  virtual void SetImage(const SkBitmap& image);
 
   CHROMEGTK_CALLBACK_0(SpeechInputBubbleGtk, void, OnCancelClicked);
   CHROMEGTK_CALLBACK_0(SpeechInputBubbleGtk, void, OnTryAgainClicked);
@@ -181,7 +182,7 @@ void SpeechInputBubbleGtk::UpdateLayout() {
     gtk_label_set_text(GTK_LABEL(label_),
         l10n_util::GetStringUTF8(IDS_SPEECH_INPUT_BUBBLE_HEADING).c_str());
     SkBitmap* image = ResourceBundle::GetSharedInstance().GetBitmapNamed(
-        display_mode() == DISPLAY_MODE_RECORDING ? IDR_SPEECH_INPUT_RECORDING :
+        display_mode() == DISPLAY_MODE_RECORDING ? IDR_SPEECH_INPUT_MIC_EMPTY :
                                                    IDR_SPEECH_INPUT_PROCESSING);
     GdkPixbuf* pixbuf = gfx::GdkPixbufFromSkBitmap(image);
     gtk_image_set_from_pixbuf(GTK_IMAGE(icon_), pixbuf);
@@ -191,6 +192,11 @@ void SpeechInputBubbleGtk::UpdateLayout() {
   }
 }
 
+void SpeechInputBubbleGtk::SetImage(const SkBitmap& image) {
+  // TODO(satish): Implement.
+  NOTREACHED();
+}
+
 }  // namespace
 
 SpeechInputBubble* SpeechInputBubble::CreateNativeBubble(
diff --git a/chrome/browser/speech/speech_input_bubble_mac.mm b/chrome/browser/speech/speech_input_bubble_mac.mm
index fd01ca2..f22db5f 100644
--- a/chrome/browser/speech/speech_input_bubble_mac.mm
+++ b/chrome/browser/speech/speech_input_bubble_mac.mm
@@ -25,6 +25,7 @@ class SpeechInputBubbleImpl : public SpeechInputBubbleBase {
   virtual void Show();
   virtual void Hide();
   virtual void UpdateLayout();
+  virtual void SetImage(const SkBitmap& image);
 
  private:
   scoped_nsobject<SpeechInputWindowController> window_;
@@ -55,6 +56,11 @@ SpeechInputBubbleImpl::~SpeechInputBubbleImpl() {
   [window_.get() close];
 }
 
+void SpeechInputBubbleImpl::SetImage(const SkBitmap& image) {
+  // TODO(satish): Implement.
+  NOTREACHED();
+}
+
 void SpeechInputBubbleImpl::Show() {
   // TODO(satish): Implement.
   NOTREACHED();
diff --git a/chrome/browser/speech/speech_input_bubble_views.cc b/chrome/browser/speech/speech_input_bubble_views.cc
index a3dd616..d41c79b 100644
--- a/chrome/browser/speech/speech_input_bubble_views.cc
+++ b/chrome/browser/speech/speech_input_bubble_views.cc
@@ -40,6 +40,7 @@ class ContentView
 
   void UpdateLayout(SpeechInputBubbleBase::DisplayMode mode,
                     const string16& message_text);
+  void SetImage(const SkBitmap& image);
 
   // views::ButtonListener methods.
   virtual void ButtonPressed(views::Button* source, const views::Event& event);
@@ -78,7 +79,7 @@ ContentView::ContentView(SpeechInputBubbleDelegate* delegate)
 
   icon_ = new views::ImageView();
   icon_->SetImage(*ResourceBundle::GetSharedInstance().GetBitmapNamed(
-      IDR_SPEECH_INPUT_RECORDING));
+      IDR_SPEECH_INPUT_MIC_EMPTY));
   icon_->SetHorizontalAlignment(views::ImageView::CENTER);
   AddChildView(icon_);
 
@@ -104,10 +105,14 @@ void ContentView::UpdateLayout(SpeechInputBubbleBase::DisplayMode mode,
   } else {
     icon_->SetImage(*ResourceBundle::GetSharedInstance().GetBitmapNamed(
         (mode == SpeechInputBubbleBase::DISPLAY_MODE_RECORDING) ?
-        IDR_SPEECH_INPUT_RECORDING : IDR_SPEECH_INPUT_PROCESSING));
+        IDR_SPEECH_INPUT_MIC_EMPTY : IDR_SPEECH_INPUT_PROCESSING));
   }
 }
 
+void ContentView::SetImage(const SkBitmap& image) {
+  icon_->SetImage(image);
+}
+
 void ContentView::ButtonPressed(views::Button* source,
                                 const views::Event& event) {
   if (source == cancel_) {
@@ -203,6 +208,7 @@ class SpeechInputBubbleImpl
 
   // SpeechInputBubbleBase methods.
   virtual void UpdateLayout();
+  virtual void SetImage(const SkBitmap& image);
 
   // Returns the screen rectangle to use as the info bubble's target.
   // |element_rect| is the html element's bounds in page coordinates.
@@ -324,6 +330,11 @@ void SpeechInputBubbleImpl::UpdateLayout() {
     info_bubble_->SizeToContents();
 }
 
+void SpeechInputBubbleImpl::SetImage(const SkBitmap& image) {
+  if (bubble_content_)
+    bubble_content_->SetImage(image);
+}
+
 }  // namespace
 
 SpeechInputBubble* SpeechInputBubble::CreateNativeBubble(
diff --git a/chrome/browser/speech/speech_input_manager.cc b/chrome/browser/speech/speech_input_manager.cc
index 4c2bdca..a029bb4 100644
--- a/chrome/browser/speech/speech_input_manager.cc
+++ b/chrome/browser/speech/speech_input_manager.cc
@@ -40,6 +40,7 @@ class SpeechInputManagerImpl : public SpeechInputManager,
   virtual void OnRecognizerError(int caller_id,
                                  SpeechRecognizer::ErrorCode error);
   virtual void DidCompleteEnvironmentEstimation(int caller_id);
+  virtual void SetInputVolume(int caller_id, float volume);
 
   // SpeechInputBubbleController::Delegate methods.
   virtual void InfoBubbleButtonClicked(int caller_id,
@@ -205,6 +206,13 @@ void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) {
   bubble_controller_->SetBubbleRecordingMode(caller_id);
 }
 
+void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume) {
+  DCHECK(HasPendingRequest(caller_id));
+  DCHECK_EQ(recording_caller_id_, caller_id);
+
+  bubble_controller_->SetBubbleInputVolume(caller_id, volume);
+}
+
 void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) {
   SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id);
   CancelRecognition(caller_id);
diff --git a/chrome/browser/speech/speech_recognizer.cc b/chrome/browser/speech/speech_recognizer.cc
index 800b044..6eed2f5 100644
--- a/chrome/browser/speech/speech_recognizer.cc
+++ b/chrome/browser/speech/speech_recognizer.cc
@@ -28,7 +28,14 @@ const int kMaxSpeexFrameLength = 110;  // (44kbps rate sampled at 32kHz).
 // make sure it is within the byte range.
 COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength);
 
-const int kEndpointerEstimationTimeMs = 300;
+// The following constants are related to the volume level indicator shown in
+// the UI for recorded audio.
+// Multiplier used when new volume is greater than previous level.
+const float kUpSmoothingFactor = 0.9f;
+// Multiplier used when new volume is lesser than previous level.
+const float kDownSmoothingFactor = 0.4f;
+const float kAudioMeterMinDb = 10.0f;  // Lower bar for volume meter.
+const float kAudioMeterDbRange = 25.0f;
 }  // namespace
 
 namespace speech_input {
@@ -38,6 +45,7 @@ const int SpeechRecognizer::kAudioPacketIntervalMs = 100;
 const int SpeechRecognizer::kNumAudioChannels = 1;
 const int SpeechRecognizer::kNumBitsPerAudioSample = 16;
 const int SpeechRecognizer::kNoSpeechTimeoutSec = 8;
+const int SpeechRecognizer::kEndpointerEstimationTimeMs = 300;
 
 // Provides a simple interface to encode raw audio using the Speex codec.
 class SpeexEncoder {
@@ -102,7 +110,8 @@ SpeechRecognizer::SpeechRecognizer(Delegate* delegate, int caller_id)
     : delegate_(delegate),
       caller_id_(caller_id),
       encoder_(new SpeexEncoder()),
-      endpointer_(kAudioSampleRate) {
+      endpointer_(kAudioSampleRate),
+      audio_level_(0.0f) {
   endpointer_.set_speech_input_complete_silence_length(
       base::Time::kMicrosecondsPerSecond / 2);
   endpointer_.set_long_speech_input_complete_silence_length(
@@ -259,18 +268,20 @@ void SpeechRecognizer::HandleOnData(string* data) {
   int num_samples = data->length() / sizeof(short);
 
   encoder_->Encode(samples, num_samples, &audio_buffers_);
-  endpointer_.ProcessAudio(samples, num_samples);
+  float rms;
+  endpointer_.ProcessAudio(samples, num_samples, &rms);
   delete data;
   num_samples_recorded_ += num_samples;
 
-  // Check if we have gathered enough audio for the endpointer to do environment
-  // estimation and should move on to detect speech/end of speech.
-  if (endpointer_.IsEstimatingEnvironment() &&
-      num_samples_recorded_ >= (kEndpointerEstimationTimeMs *
-                                kAudioSampleRate) / 1000) {
-    endpointer_.SetUserInputMode();
-    delegate_->DidCompleteEnvironmentEstimation(caller_id_);
-    return;
+  if (endpointer_.IsEstimatingEnvironment()) {
+    // Check if we have gathered enough audio for the endpointer to do
+    // environment estimation and should move on to detect speech/end of speech.
+    if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *
+                                  kAudioSampleRate) / 1000) {
+      endpointer_.SetUserInputMode();
+      delegate_->DidCompleteEnvironmentEstimation(caller_id_);
+    }
+    return;  // No more processing since we are still estimating environment.
   }
 
   // Check if we have waited too long without hearing any speech.
@@ -280,6 +291,17 @@ void SpeechRecognizer::HandleOnData(string* data) {
     return;
   }
 
+  // Calculate the input volume to display in the UI, smoothing towards the
+  // new level.
+  float level = (rms - kAudioMeterMinDb) / kAudioMeterDbRange;
+  level = std::min(std::max(0.0f, level), 1.0f);
+  if (level > audio_level_) {
+    audio_level_ += (level - audio_level_) * kUpSmoothingFactor;
+  } else {
+    audio_level_ += (level - audio_level_) * kDownSmoothingFactor;
+  }
+  delegate_->SetInputVolume(caller_id_, audio_level_);
+
   if (endpointer_.speech_input_complete()) {
     StopRecording();
   }
diff --git a/chrome/browser/speech/speech_recognizer.h b/chrome/browser/speech/speech_recognizer.h
index 4a18cb1..7e154ac 100644
--- a/chrome/browser/speech/speech_recognizer.h
+++ b/chrome/browser/speech/speech_recognizer.h
@@ -63,6 +63,11 @@ class SpeechRecognizer
     // recognition UI once this callback is received.
     virtual void DidCompleteEnvironmentEstimation(int caller_id) = 0;
 
+    // Informs of a change in the captured audio level, useful if displaying
+    // a microphone volume indicator while recording.
+    // The value of |volume| is in the [0.0, 1.0] range.
+    virtual void SetInputVolume(int caller_id, float volume) = 0;
+
    protected:
     virtual ~Delegate() {}
   };
@@ -98,6 +103,7 @@ class SpeechRecognizer
   static const int kNumAudioChannels;
   static const int kNumBitsPerAudioSample;
   static const int kNoSpeechTimeoutSec;
+  static const int kEndpointerEstimationTimeMs;
 
  private:
   void ReleaseAudioBuffers();
@@ -120,6 +126,7 @@ class SpeechRecognizer
   scoped_ptr<SpeexEncoder> encoder_;
   Endpointer endpointer_;
   int num_samples_recorded_;
+  float audio_level_;
 
   DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer);
 };
diff --git a/chrome/browser/speech/speech_recognizer_unittest.cc b/chrome/browser/speech/speech_recognizer_unittest.cc
index 9e1cf07..e049558 100644
--- a/chrome/browser/speech/speech_recognizer_unittest.cc
+++ b/chrome/browser/speech/speech_recognizer_unittest.cc
@@ -15,10 +15,6 @@ using media::AudioInputController;
 using media::TestAudioInputController;
 using media::TestAudioInputControllerFactory;
 
-namespace {
-const int kAudioPacketLengthBytes = 1000;
-}
-
 namespace speech_input {
 
 class SpeechRecognizerTest : public SpeechRecognizerDelegate,
@@ -31,7 +27,8 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate,
         recording_complete_(false),
         recognition_complete_(false),
         result_received_(false),
-        error_(SpeechRecognizer::RECOGNIZER_NO_ERROR) {
+        error_(SpeechRecognizer::RECOGNIZER_NO_ERROR),
+        volume_(-1.0f) {
     int audio_packet_length_bytes =
         (SpeechRecognizer::kAudioSampleRate *
          SpeechRecognizer::kAudioPacketIntervalMs *
@@ -67,6 +64,10 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate,
     error_ = error;
   }
 
+  virtual void SetInputVolume(int caller_id, float volume) {
+    volume_ = volume;
+  }
+
   // testing::Test methods.
   virtual void SetUp() {
     URLFetcher::set_factory(&url_fetcher_factory_);
@@ -78,6 +79,12 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate,
     AudioInputController::set_factory(NULL);
   }
 
+  void FillPacketWithTestWaveform() {
+    // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
+    for (size_t i = 0; i < audio_packet_.size(); ++i)
+      audio_packet_[i] = static_cast<uint8>(i);
+  }
+
  protected:
   MessageLoopForIO message_loop_;
   ChromeThread io_thread_;
@@ -89,6 +96,7 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate,
   TestURLFetcherFactory url_fetcher_factory_;
   TestAudioInputControllerFactory audio_input_controller_factory_;
   std::vector<uint8> audio_packet_;
+  float volume_;
 };
 
 TEST_F(SpeechRecognizerTest, StopNoData) {
@@ -237,9 +245,8 @@ TEST_F(SpeechRecognizerTest, NoSpeechCallbackNotIssued) {
     controller->event_handler()->OnData(controller, &audio_packet_[0],
                                         audio_packet_.size());
   }
-  // Fill the rest of input with a simple pattern, a 125Hz sawtooth waveform.
-  for (size_t i = 0; i < audio_packet_.size(); ++i)
-    audio_packet_[i] = static_cast<uint8>(i);
+
+  FillPacketWithTestWaveform();
   for (int i = 0; i < num_packets / 2; ++i) {
     controller->event_handler()->OnData(controller, &audio_packet_[0],
                                         audio_packet_.size());
@@ -252,4 +259,44 @@ TEST_F(SpeechRecognizerTest, NoSpeechCallbackNotIssued) {
   recognizer_->CancelRecognition();
 }
 
+TEST_F(SpeechRecognizerTest, SetInputVolumeCallback) {
+  // Start recording and give a lot of packets with audio samples set to zero
+  // and then some more with reasonably loud audio samples. Check that we don't
+  // get the callback during estimation phase, then get zero for the silence
+  // samples and proper volume for the loud audio.
+  EXPECT_TRUE(recognizer_->StartRecording());
+  TestAudioInputController* controller =
+      audio_input_controller_factory_.controller();
+  ASSERT_TRUE(controller);
+  controller = audio_input_controller_factory_.controller();
+  ASSERT_TRUE(controller);
+
+  // Feed some samples to begin with for the endpointer to do noise estimation.
+  int num_packets = SpeechRecognizer::kEndpointerEstimationTimeMs /
+                    SpeechRecognizer::kAudioPacketIntervalMs;
+  for (int i = 0; i < num_packets; ++i) {
+    controller->event_handler()->OnData(controller, &audio_packet_[0],
+                                        audio_packet_.size());
+  }
+  MessageLoop::current()->RunAllPending();
+  EXPECT_EQ(-1.0f, volume_);  // No audio volume set yet.
+
+  // The vector is already filled with zero value samples on create.
+  controller->event_handler()->OnData(controller, &audio_packet_[0],
+                                      audio_packet_.size());
+  MessageLoop::current()->RunAllPending();
+  EXPECT_EQ(0, volume_);
+
+  FillPacketWithTestWaveform();
+  controller->event_handler()->OnData(controller, &audio_packet_[0],
+                                      audio_packet_.size());
+  MessageLoop::current()->RunAllPending();
+  EXPECT_FLOAT_EQ(0.9f, volume_);
+
+  EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);
+  EXPECT_FALSE(recording_complete_);
+  EXPECT_FALSE(recognition_complete_);
+  recognizer_->CancelRecognition();
+}
+
 }  // namespace speech_input
author	satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-09-16 12:07:57 +0000
committer	satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-09-16 12:07:57 +0000
commit	fc89d8ae5916652cce7a00f5adac4d0e812b5c16 (patch)
tree	d728195ba6ca3244adb47ff9e21dc971abb5f625 /chrome
parent	fda165787a252f0fd424f7137619f0bf0c1482dd (diff)
download	chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.zip chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.tar.gz chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.tar.bz2