diff options
author | satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-09-16 12:07:57 +0000 |
---|---|---|
committer | satish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-09-16 12:07:57 +0000 |
commit | fc89d8ae5916652cce7a00f5adac4d0e812b5c16 (patch) | |
tree | d728195ba6ca3244adb47ff9e21dc971abb5f625 /chrome | |
parent | fda165787a252f0fd424f7137619f0bf0c1482dd (diff) | |
download | chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.zip chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.tar.gz chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.tar.bz2 |
Show a volume indicator as audio is being recorded.
Per UX input from Cole, this matches the implementation in the android voice actions app.
Changes in this CL:
- Instead of the old mic icon use the recently added mic-volume-empty, mic-volume-full and mask images for the volume indicator.
- Extended the endpointer code to return the audio RMS level (copied from the original source).
- SpeechRecognizer receives the above calculated RMS level and computes a volume level in the [0.0-1.0] range.
- SpeechInputManager receives the above computed volume level and passes it to SpeechInputBubbleController for display, which passes it to SpeechInputBubble.
- SpeechInputBubbleBase creates the appropriate skia bitmap for the volume indicator and passes to the platform specific code for display.
- As part of the above SpeechInputBubbleController addition for volume level, I wrote a single function to process all calls received by it and handled in the UI thread for simplicity.
BUG=53598
TEST=updated existing tests. Also test manually, use speech input and verify the audio level changes appropriately in the UI as mic is moved near and far.
Review URL: http://codereview.chromium.org/3384005
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@59638 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
20 files changed, 301 insertions, 81 deletions
diff --git a/chrome/app/theme/speech_input_recording.png b/chrome/app/theme/speech_input_recording.png Binary files differdeleted file mode 100644 index 3644c2e..0000000 --- a/chrome/app/theme/speech_input_recording.png +++ /dev/null diff --git a/chrome/app/theme/theme_resources.grd b/chrome/app/theme/theme_resources.grd index 4f627ea..1a20a67 100644 --- a/chrome/app/theme/theme_resources.grd +++ b/chrome/app/theme/theme_resources.grd @@ -423,7 +423,6 @@ <include name="IDR_SPEECH_INPUT_MIC_EMPTY" file="speech_input_mic_empty.png" type="BINDATA" /> <include name="IDR_SPEECH_INPUT_MIC_FULL" file="speech_input_mic_full.png" type="BINDATA" /> <include name="IDR_SPEECH_INPUT_MIC_MASK" file="speech_input_mic_mask.png" type="BINDATA" /> - <include name="IDR_SPEECH_INPUT_RECORDING" file="speech_input_recording.png" type="BINDATA" /> <include name="IDR_SPEECH_INPUT_PROCESSING" file="speech_input_processing.png" type="BINDATA" /> <if expr="pp_ifdef('_google_chrome')"> <include name="IDR_WIZARD_ICON" file="google_chrome/wizard_icon.png" type="BINDATA" /> diff --git a/chrome/browser/cocoa/speech_input_window_controller.mm b/chrome/browser/cocoa/speech_input_window_controller.mm index 24d7cfe..c3dae85 100644 --- a/chrome/browser/cocoa/speech_input_window_controller.mm +++ b/chrome/browser/cocoa/speech_input_window_controller.mm @@ -44,7 +44,7 @@ const int kBubbleHorizontalMargin = 15; // Space on either sides of controls. NSWindow* window = [self window]; [[self bubble] setArrowLocation:info_bubble::kTopLeft]; NSImage* icon = ResourceBundle::GetSharedInstance().GetNSImageNamed( - IDR_SPEECH_INPUT_RECORDING); + IDR_SPEECH_INPUT_MIC_EMPTY); [iconImage_ setImage:icon]; [iconImage_ setNeedsDisplay:YES]; @@ -76,7 +76,7 @@ const int kBubbleHorizontalMargin = 15; // Space on either sides of controls. int newWidth = size.width; NSImage* icon = ResourceBundle::GetSharedInstance().GetNSImageNamed( - IDR_SPEECH_INPUT_RECORDING); + IDR_SPEECH_INPUT_MIC_EMPTY); size = [icon size]; newHeight += size.height + kBubbleControlVerticalSpacing; if (newWidth < size.width) diff --git a/chrome/browser/speech/endpointer/endpointer.cc b/chrome/browser/speech/endpointer/endpointer.cc index 57a4f65..c30e1f2 100644 --- a/chrome/browser/speech/endpointer/endpointer.cc +++ b/chrome/browser/speech/endpointer/endpointer.cc @@ -87,7 +87,8 @@ EpStatus Endpointer::Status(int64 *time) { return energy_endpointer_.Status(time); } -EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples) { +EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples, + float* rms_out) { EpStatus ep_status = EP_PRE_SPEECH; // Process the input data in blocks of frame_size_, dropping any incomplete @@ -98,7 +99,8 @@ EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples) { // Have the endpointer process the frame. energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_, audio_data + sample_index, - frame_size_); + frame_size_, + rms_out); sample_index += frame_size_; audio_frame_time_us_ += (frame_size_ * Time::kMicrosecondsPerSecond) / sample_rate_; diff --git a/chrome/browser/speech/endpointer/endpointer.h b/chrome/browser/speech/endpointer/endpointer.h index e83aed5..8af6016 100644 --- a/chrome/browser/speech/endpointer/endpointer.h +++ b/chrome/browser/speech/endpointer/endpointer.h @@ -60,7 +60,8 @@ class Endpointer { // Process a segment of audio, which may be more than one frame. // The status of the last frame will be returned. - EpStatus ProcessAudio(const int16* audio_data, int num_samples); + EpStatus ProcessAudio(const int16* audio_data, int num_samples, + float* rms_out); // Get the status of the endpointer. EpStatus Status(int64 *time_us); diff --git a/chrome/browser/speech/endpointer/endpointer_unittest.cc b/chrome/browser/speech/endpointer/endpointer_unittest.cc index b49a6a6..bbdc572 100644 --- a/chrome/browser/speech/endpointer/endpointer_unittest.cc +++ b/chrome/browser/speech/endpointer/endpointer_unittest.cc @@ -74,7 +74,7 @@ class EnergyEndpointerFrameProcessor : public FrameProcessor { : endpointer_(endpointer) {} EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) { - endpointer_->ProcessAudioFrame(time, samples, kFrameSize); + endpointer_->ProcessAudioFrame(time, samples, kFrameSize, NULL); int64 ep_time; return endpointer_->Status(&ep_time); } @@ -117,7 +117,7 @@ class EndpointerFrameProcessor : public FrameProcessor { : endpointer_(endpointer) {} EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) { - endpointer_->ProcessAudio(samples, kFrameSize); + endpointer_->ProcessAudio(samples, kFrameSize, NULL); int64 ep_time; return endpointer_->Status(&ep_time); } diff --git a/chrome/browser/speech/endpointer/energy_endpointer.cc b/chrome/browser/speech/endpointer/energy_endpointer.cc index 44ca4dd..f6ce46f 100644 --- a/chrome/browser/speech/endpointer/energy_endpointer.cc +++ b/chrome/browser/speech/endpointer/energy_endpointer.cc @@ -213,7 +213,8 @@ void EnergyEndpointer::SetUserInputMode() { void EnergyEndpointer::ProcessAudioFrame(int64 time_us, const int16* samples, - int num_samples) { + int num_samples, + float* rms_out) { endpointer_time_us_ = time_us; float rms = RMS(samples, num_samples); @@ -310,13 +311,19 @@ void EnergyEndpointer::ProcessAudioFrame(int64 time_us, } // Set a floor - if (decision_threshold_ <params_.min_decision_threshold()) + if (decision_threshold_ < params_.min_decision_threshold()) decision_threshold_ = params_.min_decision_threshold(); } // Update speech and noise levels. UpdateLevels(rms); ++frame_counter_; + + if (rms_out) { + *rms_out = -120.0;
+ if ((noise_level_ > 0.0) && ((rms / noise_level_ ) > 0.000001))
+ *rms_out = static_cast<float>(20.0 * log10(rms / noise_level_));
+ } } void EnergyEndpointer::UpdateLevels(float rms) { diff --git a/chrome/browser/speech/endpointer/energy_endpointer.h b/chrome/browser/speech/endpointer/energy_endpointer.h index cd461be..de79e76 100644 --- a/chrome/browser/speech/endpointer/energy_endpointer.h +++ b/chrome/browser/speech/endpointer/energy_endpointer.h @@ -78,7 +78,9 @@ class EnergyEndpointer { // Computes the next input frame and modifies EnergyEndpointer status as // appropriate based on the computation. - void ProcessAudioFrame(int64 time_us, const int16* samples, int num_samples); + void ProcessAudioFrame(int64 time_us, + const int16* samples, int num_samples, + float* rms_out); // Returns the current state of the EnergyEndpointer and the time // corresponding to the most recently computed frame. diff --git a/chrome/browser/speech/speech_input_bubble.cc b/chrome/browser/speech/speech_input_bubble.cc index 1c4b85f..be5d7c9 100644 --- a/chrome/browser/speech/speech_input_bubble.cc +++ b/chrome/browser/speech/speech_input_bubble.cc @@ -2,13 +2,21 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include "app/resource_bundle.h" #include "chrome/browser/tab_contents/tab_contents.h" #include "chrome/browser/speech/speech_input_bubble.h" +#include "gfx/canvas_skia.h" #include "gfx/rect.h" +#include "grit/generated_resources.h" +#include "grit/theme_resources.h" SpeechInputBubble::FactoryMethod SpeechInputBubble::factory_ = NULL; const int SpeechInputBubble::kBubbleTargetOffsetX = 5; +SkBitmap* SpeechInputBubbleBase::mic_empty_ = NULL; +SkBitmap* SpeechInputBubbleBase::mic_full_ = NULL; +SkBitmap* SpeechInputBubbleBase::mic_mask_ = NULL; + SpeechInputBubble* SpeechInputBubble::Create(TabContents* tab_contents, Delegate* delegate, const gfx::Rect& element_rect) { @@ -24,6 +32,31 @@ SpeechInputBubble* SpeechInputBubble::Create(TabContents* tab_contents, SpeechInputBubbleBase::SpeechInputBubbleBase() : display_mode_(DISPLAY_MODE_RECORDING) { + if (!mic_empty_) { // Static variables. + mic_empty_ = ResourceBundle::GetSharedInstance().GetBitmapNamed( + IDR_SPEECH_INPUT_MIC_EMPTY); + mic_full_ = ResourceBundle::GetSharedInstance().GetBitmapNamed( + IDR_SPEECH_INPUT_MIC_FULL); + mic_mask_ = ResourceBundle::GetSharedInstance().GetBitmapNamed( + IDR_SPEECH_INPUT_MIC_MASK); + } + + // Instance variables. + mic_image_.reset(new SkBitmap()); + mic_image_->setConfig(SkBitmap::kARGB_8888_Config, mic_empty_->width(), + mic_empty_->height()); + mic_image_->allocPixels(); + + buffer_image_.reset(new SkBitmap()); + buffer_image_->setConfig(SkBitmap::kARGB_8888_Config, mic_empty_->width(), + mic_empty_->height()); + buffer_image_->allocPixels(); +} + +SpeechInputBubbleBase::~SpeechInputBubbleBase() { + // This destructor is added to make sure members such as the scoped_ptr + // get destroyed here and the derived classes don't have to care about such + // member variables which they don't use. } void SpeechInputBubbleBase::SetRecordingMode() { @@ -41,3 +74,32 @@ void SpeechInputBubbleBase::SetMessage(const string16& text) { display_mode_ = DISPLAY_MODE_MESSAGE; UpdateLayout(); } + +void SpeechInputBubbleBase::SetInputVolume(float volume) { + mic_image_->eraseARGB(0, 0, 0, 0); + buffer_image_->eraseARGB(0, 0, 0, 0); + + int width = mic_image_->width(); + int height = mic_image_->height(); + SkCanvas canvas(*mic_image_); + SkCanvas buffer_canvas(*buffer_image_); + + // The 'full volume' mic image is drawn clipped to the current volume level, + // and a gradient mask is applied over it with the 'multiply' compositing + // operator to show soft edges at the top. + buffer_canvas.save(); + SkScalar clip_top = ((1.0f - volume) * height * 3) / 2.0f - height / 2.0f; + buffer_canvas.clipRect(SkRect::MakeLTRB(0, clip_top, + SkIntToScalar(width), SkIntToScalar(height))); + buffer_canvas.drawBitmap(*mic_full_, 0, 0); + buffer_canvas.restore(); + SkPaint multiply_paint; + multiply_paint.setXfermode(SkXfermode::Create(SkXfermode::kMultiply_Mode)); + buffer_canvas.drawBitmap(*mic_mask_, 0, clip_top, &multiply_paint); + + // Draw the empty volume image first and the current volume image on top. + canvas.drawBitmap(*mic_empty_, 0, 0); + canvas.drawBitmap(*buffer_image_.get(), 0, 0); + + SetImage(*mic_image_.get()); +} diff --git a/chrome/browser/speech/speech_input_bubble.h b/chrome/browser/speech/speech_input_bubble.h index bfb88a5..73032b5 100644 --- a/chrome/browser/speech/speech_input_bubble.h +++ b/chrome/browser/speech/speech_input_bubble.h @@ -6,11 +6,13 @@ #define CHROME_BROWSER_SPEECH_SPEECH_INPUT_BUBBLE_H_ #pragma once +#include "base/scoped_ptr.h" #include "base/string16.h" namespace gfx { class Rect; } +class SkBitmap; class TabContents; // SpeechInputBubble displays a popup info bubble during speech recognition, @@ -91,6 +93,9 @@ class SpeechInputBubble { // |Delegate::InfoBubbleFocusChanged| as well. virtual void Hide() = 0; + // Updates the current captured audio volume displayed on screen. + virtual void SetInputVolume(float volume) = 0; + // The horizontal distance between the start of the html widget and the speech // bubble's arrow. static const int kBubbleTargetOffsetX; @@ -112,16 +117,23 @@ class SpeechInputBubbleBase : public SpeechInputBubble { }; SpeechInputBubbleBase(); + virtual ~SpeechInputBubbleBase(); // SpeechInputBubble methods virtual void SetRecordingMode(); virtual void SetRecognizingMode(); virtual void SetMessage(const string16& text); + virtual void SetInputVolume(float volume); protected: // Updates the platform specific UI layout for the current display mode. virtual void UpdateLayout() = 0; + // Sets the given image as the image to display in the speech bubble. + // TODO(satish): Make the SetRecognizingMode call use this to show an + // animation while waiting for results. + virtual void SetImage(const SkBitmap& image) = 0; + DisplayMode display_mode() { return display_mode_; } @@ -133,6 +145,14 @@ class SpeechInputBubbleBase : public SpeechInputBubble { private: DisplayMode display_mode_; string16 message_text_; // Text displayed in DISPLAY_MODE_MESSAGE + // The current microphone image with volume level indication. + scoped_ptr<SkBitmap> mic_image_; + // A temporary buffer image used in creating the above mic image. + scoped_ptr<SkBitmap> buffer_image_; + + static SkBitmap* mic_full_; // Mic image with full volume. + static SkBitmap* mic_empty_; // Mic image with zero volume. + static SkBitmap* mic_mask_; // Gradient mask used by the volume indicator. }; // This typedef is to workaround the issue with certain versions of diff --git a/chrome/browser/speech/speech_input_bubble_controller.cc b/chrome/browser/speech/speech_input_bubble_controller.cc index 08647d4..bf1f923 100644 --- a/chrome/browser/speech/speech_input_bubble_controller.cc +++ b/chrome/browser/speech/speech_input_bubble_controller.cc @@ -46,77 +46,79 @@ void SpeechInputBubbleController::CreateBubble(int caller_id, } void SpeechInputBubbleController::CloseBubble(int caller_id) { - if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) { - ChromeThread::PostTask( - ChromeThread::UI, FROM_HERE, - NewRunnableMethod(this, &SpeechInputBubbleController::CloseBubble, - caller_id)); - return; - } - DCHECK(ChromeThread::CurrentlyOn(ChromeThread::UI)); - - if (current_bubble_caller_id_ == caller_id) - current_bubble_caller_id_ = 0; - delete bubbles_[caller_id]; - bubbles_.erase(caller_id); + ProcessRequestInUiThread(caller_id, REQUEST_CLOSE, string16(), 0); } void SpeechInputBubbleController::SetBubbleRecordingMode(int caller_id) { - if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) { - ChromeThread::PostTask(ChromeThread::UI, FROM_HERE, NewRunnableMethod( - this, &SpeechInputBubbleController::SetBubbleRecordingMode, - caller_id)); - return; - } - SetBubbleRecordingModeOrMessage(caller_id, string16()); + ProcessRequestInUiThread(caller_id, REQUEST_SET_RECORDING_MODE, + string16(), 0); } void SpeechInputBubbleController::SetBubbleRecognizingMode(int caller_id) { - if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) { - ChromeThread::PostTask(ChromeThread::UI, FROM_HERE, NewRunnableMethod( - this, &SpeechInputBubbleController::SetBubbleRecognizingMode, - caller_id)); - return; - } - DCHECK(ChromeThread::CurrentlyOn(ChromeThread::UI)); - // The bubble may have been closed before we got a chance to process this - // request. So check before proceeding. - if (!bubbles_.count(caller_id)) - return; + ProcessRequestInUiThread(caller_id, REQUEST_SET_RECOGNIZING_MODE, + string16(), 0); +} - bubbles_[caller_id]->SetRecognizingMode(); +void SpeechInputBubbleController::SetBubbleInputVolume(int caller_id, + float volume) { + ProcessRequestInUiThread(caller_id, REQUEST_SET_INPUT_VOLUME, string16(), + volume); } void SpeechInputBubbleController::SetBubbleMessage(int caller_id, const string16& text) { + ProcessRequestInUiThread(caller_id, REQUEST_SET_MESSAGE, text, 0); +} + +void SpeechInputBubbleController::ProcessRequestInUiThread( + int caller_id, RequestType type, const string16& text, float volume) { if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) { ChromeThread::PostTask(ChromeThread::UI, FROM_HERE, NewRunnableMethod( - this, &SpeechInputBubbleController::SetBubbleMessage, - caller_id, text)); + this, &SpeechInputBubbleController::ProcessRequestInUiThread, + caller_id, type, text, volume)); return; } - SetBubbleRecordingModeOrMessage(caller_id, text); -} - -void SpeechInputBubbleController::SetBubbleRecordingModeOrMessage( - int caller_id, const string16& text) { DCHECK(ChromeThread::CurrentlyOn(ChromeThread::UI)); // The bubble may have been closed before we got a chance to process this // request. So check before proceeding. if (!bubbles_.count(caller_id)) return; - if (current_bubble_caller_id_ && current_bubble_caller_id_ != caller_id) - bubbles_[current_bubble_caller_id_]->Hide(); + bool change_active_bubble = (type == REQUEST_SET_RECORDING_MODE || + type == REQUEST_SET_MESSAGE); + if (change_active_bubble) { + if (current_bubble_caller_id_ && current_bubble_caller_id_ != caller_id) + bubbles_[current_bubble_caller_id_]->Hide(); + current_bubble_caller_id_ = caller_id; + } - current_bubble_caller_id_ = caller_id; SpeechInputBubble* bubble = bubbles_[caller_id]; - if (text.empty()) { - bubble->SetRecordingMode(); - } else { - bubble->SetMessage(text); + switch (type) { + case REQUEST_SET_RECORDING_MODE: + bubble->SetRecordingMode(); + break; + case REQUEST_SET_RECOGNIZING_MODE: + bubble->SetRecognizingMode(); + break; + case REQUEST_SET_MESSAGE: + bubble->SetMessage(text); + break; + case REQUEST_SET_INPUT_VOLUME: + bubble->SetInputVolume(volume); + break; + case REQUEST_CLOSE: + if (current_bubble_caller_id_ == caller_id) + current_bubble_caller_id_ = 0; + delete bubble; + bubbles_.erase(caller_id); + break; + default: + NOTREACHED(); + break; } - bubble->Show(); + + if (change_active_bubble) + bubble->Show(); } void SpeechInputBubbleController::InfoBubbleButtonClicked( diff --git a/chrome/browser/speech/speech_input_bubble_controller.h b/chrome/browser/speech/speech_input_bubble_controller.h index 2117b24..0a20333 100644 --- a/chrome/browser/speech/speech_input_bubble_controller.h +++ b/chrome/browser/speech/speech_input_bubble_controller.h @@ -64,6 +64,9 @@ class SpeechInputBubbleController // bubble is hidden, |Show| must be called to make it appear on screen. void SetBubbleMessage(int caller_id, const string16& text); + // Updates the current captured audio volume displayed on screen. + void SetBubbleInputVolume(int caller_id, float volume); + void CloseBubble(int caller_id); // SpeechInputBubble::Delegate methods. @@ -71,10 +74,22 @@ class SpeechInputBubbleController virtual void InfoBubbleFocusChanged(); private: + // The various calls received by this object and handled in the UI thread. + enum RequestType { + REQUEST_SET_RECORDING_MODE, + REQUEST_SET_RECOGNIZING_MODE, + REQUEST_SET_MESSAGE, + REQUEST_SET_INPUT_VOLUME, + REQUEST_CLOSE, + }; + void InvokeDelegateButtonClicked(int caller_id, SpeechInputBubble::Button button); void InvokeDelegateFocusChanged(int caller_id); - void SetBubbleRecordingModeOrMessage(int caller_id, const string16& text); + void ProcessRequestInUiThread(int caller_id, + RequestType type, + const string16& text, + float volume); // Only accessed in the IO thread. Delegate* delegate_; diff --git a/chrome/browser/speech/speech_input_bubble_controller_unittest.cc b/chrome/browser/speech/speech_input_bubble_controller_unittest.cc index 59eac15..bc479c8 100644 --- a/chrome/browser/speech/speech_input_bubble_controller_unittest.cc +++ b/chrome/browser/speech/speech_input_bubble_controller_unittest.cc @@ -8,6 +8,8 @@ #include "gfx/rect.h" #include "testing/gtest/include/gtest/gtest.h" +class SkBitmap; + namespace speech_input { // A mock bubble class which fakes a focus change or recognition cancel by the @@ -49,6 +51,7 @@ class MockSpeechInputBubble : public SpeechInputBubbleBase { virtual void Show() {} virtual void Hide() {} virtual void UpdateLayout() {} + virtual void SetImage(const SkBitmap&) {} private: static BubbleType type_; diff --git a/chrome/browser/speech/speech_input_bubble_gtk.cc b/chrome/browser/speech/speech_input_bubble_gtk.cc index db61256..7564ac4 100644 --- a/chrome/browser/speech/speech_input_bubble_gtk.cc +++ b/chrome/browser/speech/speech_input_bubble_gtk.cc @@ -47,6 +47,7 @@ class SpeechInputBubbleGtk virtual void Show(); virtual void Hide(); virtual void UpdateLayout(); + virtual void SetImage(const SkBitmap& image); CHROMEGTK_CALLBACK_0(SpeechInputBubbleGtk, void, OnCancelClicked); CHROMEGTK_CALLBACK_0(SpeechInputBubbleGtk, void, OnTryAgainClicked); @@ -181,7 +182,7 @@ void SpeechInputBubbleGtk::UpdateLayout() { gtk_label_set_text(GTK_LABEL(label_), l10n_util::GetStringUTF8(IDS_SPEECH_INPUT_BUBBLE_HEADING).c_str()); SkBitmap* image = ResourceBundle::GetSharedInstance().GetBitmapNamed( - display_mode() == DISPLAY_MODE_RECORDING ? IDR_SPEECH_INPUT_RECORDING : + display_mode() == DISPLAY_MODE_RECORDING ? IDR_SPEECH_INPUT_MIC_EMPTY : IDR_SPEECH_INPUT_PROCESSING); GdkPixbuf* pixbuf = gfx::GdkPixbufFromSkBitmap(image); gtk_image_set_from_pixbuf(GTK_IMAGE(icon_), pixbuf); @@ -191,6 +192,11 @@ void SpeechInputBubbleGtk::UpdateLayout() { } } +void SpeechInputBubbleGtk::SetImage(const SkBitmap& image) { + // TODO(satish): Implement. + NOTREACHED(); +} + } // namespace SpeechInputBubble* SpeechInputBubble::CreateNativeBubble( diff --git a/chrome/browser/speech/speech_input_bubble_mac.mm b/chrome/browser/speech/speech_input_bubble_mac.mm index fd01ca2..f22db5f 100644 --- a/chrome/browser/speech/speech_input_bubble_mac.mm +++ b/chrome/browser/speech/speech_input_bubble_mac.mm @@ -25,6 +25,7 @@ class SpeechInputBubbleImpl : public SpeechInputBubbleBase { virtual void Show(); virtual void Hide(); virtual void UpdateLayout(); + virtual void SetImage(const SkBitmap& image); private: scoped_nsobject<SpeechInputWindowController> window_; @@ -55,6 +56,11 @@ SpeechInputBubbleImpl::~SpeechInputBubbleImpl() { [window_.get() close]; } +void SpeechInputBubbleImpl::SetImage(const SkBitmap& image) { + // TODO(satish): Implement. + NOTREACHED(); +} + void SpeechInputBubbleImpl::Show() { // TODO(satish): Implement. NOTREACHED(); diff --git a/chrome/browser/speech/speech_input_bubble_views.cc b/chrome/browser/speech/speech_input_bubble_views.cc index a3dd616..d41c79b 100644 --- a/chrome/browser/speech/speech_input_bubble_views.cc +++ b/chrome/browser/speech/speech_input_bubble_views.cc @@ -40,6 +40,7 @@ class ContentView void UpdateLayout(SpeechInputBubbleBase::DisplayMode mode, const string16& message_text); + void SetImage(const SkBitmap& image); // views::ButtonListener methods. virtual void ButtonPressed(views::Button* source, const views::Event& event); @@ -78,7 +79,7 @@ ContentView::ContentView(SpeechInputBubbleDelegate* delegate) icon_ = new views::ImageView(); icon_->SetImage(*ResourceBundle::GetSharedInstance().GetBitmapNamed( - IDR_SPEECH_INPUT_RECORDING)); + IDR_SPEECH_INPUT_MIC_EMPTY)); icon_->SetHorizontalAlignment(views::ImageView::CENTER); AddChildView(icon_); @@ -104,10 +105,14 @@ void ContentView::UpdateLayout(SpeechInputBubbleBase::DisplayMode mode, } else { icon_->SetImage(*ResourceBundle::GetSharedInstance().GetBitmapNamed( (mode == SpeechInputBubbleBase::DISPLAY_MODE_RECORDING) ? - IDR_SPEECH_INPUT_RECORDING : IDR_SPEECH_INPUT_PROCESSING)); + IDR_SPEECH_INPUT_MIC_EMPTY : IDR_SPEECH_INPUT_PROCESSING)); } } +void ContentView::SetImage(const SkBitmap& image) { + icon_->SetImage(image); +} + void ContentView::ButtonPressed(views::Button* source, const views::Event& event) { if (source == cancel_) { @@ -203,6 +208,7 @@ class SpeechInputBubbleImpl // SpeechInputBubbleBase methods. virtual void UpdateLayout(); + virtual void SetImage(const SkBitmap& image); // Returns the screen rectangle to use as the info bubble's target. // |element_rect| is the html element's bounds in page coordinates. @@ -324,6 +330,11 @@ void SpeechInputBubbleImpl::UpdateLayout() { info_bubble_->SizeToContents(); } +void SpeechInputBubbleImpl::SetImage(const SkBitmap& image) { + if (bubble_content_) + bubble_content_->SetImage(image); +} + } // namespace SpeechInputBubble* SpeechInputBubble::CreateNativeBubble( diff --git a/chrome/browser/speech/speech_input_manager.cc b/chrome/browser/speech/speech_input_manager.cc index 4c2bdca..a029bb4 100644 --- a/chrome/browser/speech/speech_input_manager.cc +++ b/chrome/browser/speech/speech_input_manager.cc @@ -40,6 +40,7 @@ class SpeechInputManagerImpl : public SpeechInputManager, virtual void OnRecognizerError(int caller_id, SpeechRecognizer::ErrorCode error); virtual void DidCompleteEnvironmentEstimation(int caller_id); + virtual void SetInputVolume(int caller_id, float volume); // SpeechInputBubbleController::Delegate methods. virtual void InfoBubbleButtonClicked(int caller_id, @@ -205,6 +206,13 @@ void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) { bubble_controller_->SetBubbleRecordingMode(caller_id); } +void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume) { + DCHECK(HasPendingRequest(caller_id)); + DCHECK_EQ(recording_caller_id_, caller_id); + + bubble_controller_->SetBubbleInputVolume(caller_id, volume); +} + void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) { SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id); CancelRecognition(caller_id); diff --git a/chrome/browser/speech/speech_recognizer.cc b/chrome/browser/speech/speech_recognizer.cc index 800b044..6eed2f5 100644 --- a/chrome/browser/speech/speech_recognizer.cc +++ b/chrome/browser/speech/speech_recognizer.cc @@ -28,7 +28,14 @@ const int kMaxSpeexFrameLength = 110; // (44kbps rate sampled at 32kHz). // make sure it is within the byte range. COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength); -const int kEndpointerEstimationTimeMs = 300; +// The following constants are related to the volume level indicator shown in +// the UI for recorded audio. +// Multiplier used when new volume is greater than previous level. +const float kUpSmoothingFactor = 0.9f;
+// Multiplier used when new volume is lesser than previous level.
+const float kDownSmoothingFactor = 0.4f;
+const float kAudioMeterMinDb = 10.0f; // Lower bar for volume meter.
+const float kAudioMeterDbRange = 25.0f;
} // namespace namespace speech_input { @@ -38,6 +45,7 @@ const int SpeechRecognizer::kAudioPacketIntervalMs = 100; const int SpeechRecognizer::kNumAudioChannels = 1; const int SpeechRecognizer::kNumBitsPerAudioSample = 16; const int SpeechRecognizer::kNoSpeechTimeoutSec = 8; +const int SpeechRecognizer::kEndpointerEstimationTimeMs = 300; // Provides a simple interface to encode raw audio using the Speex codec. class SpeexEncoder { @@ -102,7 +110,8 @@ SpeechRecognizer::SpeechRecognizer(Delegate* delegate, int caller_id) : delegate_(delegate), caller_id_(caller_id), encoder_(new SpeexEncoder()), - endpointer_(kAudioSampleRate) { + endpointer_(kAudioSampleRate), + audio_level_(0.0f) { endpointer_.set_speech_input_complete_silence_length( base::Time::kMicrosecondsPerSecond / 2); endpointer_.set_long_speech_input_complete_silence_length( @@ -259,18 +268,20 @@ void SpeechRecognizer::HandleOnData(string* data) { int num_samples = data->length() / sizeof(short); encoder_->Encode(samples, num_samples, &audio_buffers_); - endpointer_.ProcessAudio(samples, num_samples); + float rms; + endpointer_.ProcessAudio(samples, num_samples, &rms); delete data; num_samples_recorded_ += num_samples; - // Check if we have gathered enough audio for the endpointer to do environment - // estimation and should move on to detect speech/end of speech. - if (endpointer_.IsEstimatingEnvironment() && - num_samples_recorded_ >= (kEndpointerEstimationTimeMs * - kAudioSampleRate) / 1000) { - endpointer_.SetUserInputMode(); - delegate_->DidCompleteEnvironmentEstimation(caller_id_); - return; + if (endpointer_.IsEstimatingEnvironment()) { + // Check if we have gathered enough audio for the endpointer to do + // environment estimation and should move on to detect speech/end of speech. + if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * + kAudioSampleRate) / 1000) { + endpointer_.SetUserInputMode(); + delegate_->DidCompleteEnvironmentEstimation(caller_id_); + } + return; // No more processing since we are still estimating environment. } // Check if we have waited too long without hearing any speech. @@ -280,6 +291,17 @@ void SpeechRecognizer::HandleOnData(string* data) { return; } + // Calculate the input volume to display in the UI, smoothing towards the + // new level. + float level = (rms - kAudioMeterMinDb) / kAudioMeterDbRange;
+ level = std::min(std::max(0.0f, level), 1.0f);
+ if (level > audio_level_) {
+ audio_level_ += (level - audio_level_) * kUpSmoothingFactor;
+ } else {
+ audio_level_ += (level - audio_level_) * kDownSmoothingFactor;
+ }
+ delegate_->SetInputVolume(caller_id_, audio_level_); + if (endpointer_.speech_input_complete()) { StopRecording(); } diff --git a/chrome/browser/speech/speech_recognizer.h b/chrome/browser/speech/speech_recognizer.h index 4a18cb1..7e154ac 100644 --- a/chrome/browser/speech/speech_recognizer.h +++ b/chrome/browser/speech/speech_recognizer.h @@ -63,6 +63,11 @@ class SpeechRecognizer // recognition UI once this callback is received. virtual void DidCompleteEnvironmentEstimation(int caller_id) = 0; + // Informs of a change in the captured audio level, useful if displaying + // a microphone volume indicator while recording. + // The value of |volume| is in the [0.0, 1.0] range. + virtual void SetInputVolume(int caller_id, float volume) = 0; + protected: virtual ~Delegate() {} }; @@ -98,6 +103,7 @@ class SpeechRecognizer static const int kNumAudioChannels; static const int kNumBitsPerAudioSample; static const int kNoSpeechTimeoutSec; + static const int kEndpointerEstimationTimeMs; private: void ReleaseAudioBuffers(); @@ -120,6 +126,7 @@ class SpeechRecognizer scoped_ptr<SpeexEncoder> encoder_; Endpointer endpointer_; int num_samples_recorded_; + float audio_level_; DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer); }; diff --git a/chrome/browser/speech/speech_recognizer_unittest.cc b/chrome/browser/speech/speech_recognizer_unittest.cc index 9e1cf07..e049558 100644 --- a/chrome/browser/speech/speech_recognizer_unittest.cc +++ b/chrome/browser/speech/speech_recognizer_unittest.cc @@ -15,10 +15,6 @@ using media::AudioInputController; using media::TestAudioInputController; using media::TestAudioInputControllerFactory; -namespace { -const int kAudioPacketLengthBytes = 1000; -} - namespace speech_input { class SpeechRecognizerTest : public SpeechRecognizerDelegate, @@ -31,7 +27,8 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate, recording_complete_(false), recognition_complete_(false), result_received_(false), - error_(SpeechRecognizer::RECOGNIZER_NO_ERROR) { + error_(SpeechRecognizer::RECOGNIZER_NO_ERROR), + volume_(-1.0f) { int audio_packet_length_bytes = (SpeechRecognizer::kAudioSampleRate * SpeechRecognizer::kAudioPacketIntervalMs * @@ -67,6 +64,10 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate, error_ = error; } + virtual void SetInputVolume(int caller_id, float volume) { + volume_ = volume; + } + // testing::Test methods. virtual void SetUp() { URLFetcher::set_factory(&url_fetcher_factory_); @@ -78,6 +79,12 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate, AudioInputController::set_factory(NULL); } + void FillPacketWithTestWaveform() { + // Fill the input with a simple pattern, a 125Hz sawtooth waveform. + for (size_t i = 0; i < audio_packet_.size(); ++i) + audio_packet_[i] = static_cast<uint8>(i); + } + protected: MessageLoopForIO message_loop_; ChromeThread io_thread_; @@ -89,6 +96,7 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate, TestURLFetcherFactory url_fetcher_factory_; TestAudioInputControllerFactory audio_input_controller_factory_; std::vector<uint8> audio_packet_; + float volume_; }; TEST_F(SpeechRecognizerTest, StopNoData) { @@ -237,9 +245,8 @@ TEST_F(SpeechRecognizerTest, NoSpeechCallbackNotIssued) { controller->event_handler()->OnData(controller, &audio_packet_[0], audio_packet_.size()); } - // Fill the rest of input with a simple pattern, a 125Hz sawtooth waveform. - for (size_t i = 0; i < audio_packet_.size(); ++i) - audio_packet_[i] = static_cast<uint8>(i); + + FillPacketWithTestWaveform(); for (int i = 0; i < num_packets / 2; ++i) { controller->event_handler()->OnData(controller, &audio_packet_[0], audio_packet_.size()); @@ -252,4 +259,44 @@ TEST_F(SpeechRecognizerTest, NoSpeechCallbackNotIssued) { recognizer_->CancelRecognition(); } +TEST_F(SpeechRecognizerTest, SetInputVolumeCallback) { + // Start recording and give a lot of packets with audio samples set to zero + // and then some more with reasonably loud audio samples. Check that we don't + // get the callback during estimation phase, then get zero for the silence + // samples and proper volume for the loud audio. + EXPECT_TRUE(recognizer_->StartRecording()); + TestAudioInputController* controller = + audio_input_controller_factory_.controller(); + ASSERT_TRUE(controller); + controller = audio_input_controller_factory_.controller(); + ASSERT_TRUE(controller); + + // Feed some samples to begin with for the endpointer to do noise estimation. + int num_packets = SpeechRecognizer::kEndpointerEstimationTimeMs / + SpeechRecognizer::kAudioPacketIntervalMs; + for (int i = 0; i < num_packets; ++i) { + controller->event_handler()->OnData(controller, &audio_packet_[0], + audio_packet_.size()); + } + MessageLoop::current()->RunAllPending(); + EXPECT_EQ(-1.0f, volume_); // No audio volume set yet. + + // The vector is already filled with zero value samples on create. + controller->event_handler()->OnData(controller, &audio_packet_[0], + audio_packet_.size()); + MessageLoop::current()->RunAllPending(); + EXPECT_EQ(0, volume_); + + FillPacketWithTestWaveform(); + controller->event_handler()->OnData(controller, &audio_packet_[0], + audio_packet_.size()); + MessageLoop::current()->RunAllPending(); + EXPECT_FLOAT_EQ(0.9f, volume_); + + EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_); + EXPECT_FALSE(recording_complete_); + EXPECT_FALSE(recognition_complete_); + recognizer_->CancelRecognition(); +} + } // namespace speech_input |