summaryrefslogtreecommitdiffstats
path: root/chrome
diff options
context:
space:
mode:
authorsatish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-09-16 12:07:57 +0000
committersatish@chromium.org <satish@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-09-16 12:07:57 +0000
commitfc89d8ae5916652cce7a00f5adac4d0e812b5c16 (patch)
treed728195ba6ca3244adb47ff9e21dc971abb5f625 /chrome
parentfda165787a252f0fd424f7137619f0bf0c1482dd (diff)
downloadchromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.zip
chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.tar.gz
chromium_src-fc89d8ae5916652cce7a00f5adac4d0e812b5c16.tar.bz2
Show a volume indicator as audio is being recorded.
Per UX input from Cole, this matches the implementation in the android voice actions app. Changes in this CL: - Instead of the old mic icon use the recently added mic-volume-empty, mic-volume-full and mask images for the volume indicator. - Extended the endpointer code to return the audio RMS level (copied from the original source). - SpeechRecognizer receives the above calculated RMS level and computes a volume level in the [0.0-1.0] range. - SpeechInputManager receives the above computed volume level and passes it to SpeechInputBubbleController for display, which passes it to SpeechInputBubble. - SpeechInputBubbleBase creates the appropriate skia bitmap for the volume indicator and passes to the platform specific code for display. - As part of the above SpeechInputBubbleController addition for volume level, I wrote a single function to process all calls received by it and handled in the UI thread for simplicity. BUG=53598 TEST=updated existing tests. Also test manually, use speech input and verify the audio level changes appropriately in the UI as mic is moved near and far. Review URL: http://codereview.chromium.org/3384005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@59638 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r--chrome/app/theme/speech_input_recording.pngbin2592 -> 0 bytes
-rw-r--r--chrome/app/theme/theme_resources.grd1
-rw-r--r--chrome/browser/cocoa/speech_input_window_controller.mm4
-rw-r--r--chrome/browser/speech/endpointer/endpointer.cc6
-rw-r--r--chrome/browser/speech/endpointer/endpointer.h3
-rw-r--r--chrome/browser/speech/endpointer/endpointer_unittest.cc4
-rw-r--r--chrome/browser/speech/endpointer/energy_endpointer.cc11
-rw-r--r--chrome/browser/speech/endpointer/energy_endpointer.h4
-rw-r--r--chrome/browser/speech/speech_input_bubble.cc62
-rw-r--r--chrome/browser/speech/speech_input_bubble.h20
-rw-r--r--chrome/browser/speech/speech_input_bubble_controller.cc96
-rw-r--r--chrome/browser/speech/speech_input_bubble_controller.h17
-rw-r--r--chrome/browser/speech/speech_input_bubble_controller_unittest.cc3
-rw-r--r--chrome/browser/speech/speech_input_bubble_gtk.cc8
-rw-r--r--chrome/browser/speech/speech_input_bubble_mac.mm6
-rw-r--r--chrome/browser/speech/speech_input_bubble_views.cc15
-rw-r--r--chrome/browser/speech/speech_input_manager.cc8
-rw-r--r--chrome/browser/speech/speech_recognizer.cc44
-rw-r--r--chrome/browser/speech/speech_recognizer.h7
-rw-r--r--chrome/browser/speech/speech_recognizer_unittest.cc63
20 files changed, 301 insertions, 81 deletions
diff --git a/chrome/app/theme/speech_input_recording.png b/chrome/app/theme/speech_input_recording.png
deleted file mode 100644
index 3644c2e..0000000
--- a/chrome/app/theme/speech_input_recording.png
+++ /dev/null
Binary files differ
diff --git a/chrome/app/theme/theme_resources.grd b/chrome/app/theme/theme_resources.grd
index 4f627ea..1a20a67 100644
--- a/chrome/app/theme/theme_resources.grd
+++ b/chrome/app/theme/theme_resources.grd
@@ -423,7 +423,6 @@
<include name="IDR_SPEECH_INPUT_MIC_EMPTY" file="speech_input_mic_empty.png" type="BINDATA" />
<include name="IDR_SPEECH_INPUT_MIC_FULL" file="speech_input_mic_full.png" type="BINDATA" />
<include name="IDR_SPEECH_INPUT_MIC_MASK" file="speech_input_mic_mask.png" type="BINDATA" />
- <include name="IDR_SPEECH_INPUT_RECORDING" file="speech_input_recording.png" type="BINDATA" />
<include name="IDR_SPEECH_INPUT_PROCESSING" file="speech_input_processing.png" type="BINDATA" />
<if expr="pp_ifdef('_google_chrome')">
<include name="IDR_WIZARD_ICON" file="google_chrome/wizard_icon.png" type="BINDATA" />
diff --git a/chrome/browser/cocoa/speech_input_window_controller.mm b/chrome/browser/cocoa/speech_input_window_controller.mm
index 24d7cfe..c3dae85 100644
--- a/chrome/browser/cocoa/speech_input_window_controller.mm
+++ b/chrome/browser/cocoa/speech_input_window_controller.mm
@@ -44,7 +44,7 @@ const int kBubbleHorizontalMargin = 15; // Space on either sides of controls.
NSWindow* window = [self window];
[[self bubble] setArrowLocation:info_bubble::kTopLeft];
NSImage* icon = ResourceBundle::GetSharedInstance().GetNSImageNamed(
- IDR_SPEECH_INPUT_RECORDING);
+ IDR_SPEECH_INPUT_MIC_EMPTY);
[iconImage_ setImage:icon];
[iconImage_ setNeedsDisplay:YES];
@@ -76,7 +76,7 @@ const int kBubbleHorizontalMargin = 15; // Space on either sides of controls.
int newWidth = size.width;
NSImage* icon = ResourceBundle::GetSharedInstance().GetNSImageNamed(
- IDR_SPEECH_INPUT_RECORDING);
+ IDR_SPEECH_INPUT_MIC_EMPTY);
size = [icon size];
newHeight += size.height + kBubbleControlVerticalSpacing;
if (newWidth < size.width)
diff --git a/chrome/browser/speech/endpointer/endpointer.cc b/chrome/browser/speech/endpointer/endpointer.cc
index 57a4f65..c30e1f2 100644
--- a/chrome/browser/speech/endpointer/endpointer.cc
+++ b/chrome/browser/speech/endpointer/endpointer.cc
@@ -87,7 +87,8 @@ EpStatus Endpointer::Status(int64 *time) {
return energy_endpointer_.Status(time);
}
-EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples) {
+EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples,
+ float* rms_out) {
EpStatus ep_status = EP_PRE_SPEECH;
// Process the input data in blocks of frame_size_, dropping any incomplete
@@ -98,7 +99,8 @@ EpStatus Endpointer::ProcessAudio(const int16* audio_data, int num_samples) {
// Have the endpointer process the frame.
energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_,
audio_data + sample_index,
- frame_size_);
+ frame_size_,
+ rms_out);
sample_index += frame_size_;
audio_frame_time_us_ += (frame_size_ * Time::kMicrosecondsPerSecond) /
sample_rate_;
diff --git a/chrome/browser/speech/endpointer/endpointer.h b/chrome/browser/speech/endpointer/endpointer.h
index e83aed5..8af6016 100644
--- a/chrome/browser/speech/endpointer/endpointer.h
+++ b/chrome/browser/speech/endpointer/endpointer.h
@@ -60,7 +60,8 @@ class Endpointer {
// Process a segment of audio, which may be more than one frame.
// The status of the last frame will be returned.
- EpStatus ProcessAudio(const int16* audio_data, int num_samples);
+ EpStatus ProcessAudio(const int16* audio_data, int num_samples,
+ float* rms_out);
// Get the status of the endpointer.
EpStatus Status(int64 *time_us);
diff --git a/chrome/browser/speech/endpointer/endpointer_unittest.cc b/chrome/browser/speech/endpointer/endpointer_unittest.cc
index b49a6a6..bbdc572 100644
--- a/chrome/browser/speech/endpointer/endpointer_unittest.cc
+++ b/chrome/browser/speech/endpointer/endpointer_unittest.cc
@@ -74,7 +74,7 @@ class EnergyEndpointerFrameProcessor : public FrameProcessor {
: endpointer_(endpointer) {}
EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
- endpointer_->ProcessAudioFrame(time, samples, kFrameSize);
+ endpointer_->ProcessAudioFrame(time, samples, kFrameSize, NULL);
int64 ep_time;
return endpointer_->Status(&ep_time);
}
@@ -117,7 +117,7 @@ class EndpointerFrameProcessor : public FrameProcessor {
: endpointer_(endpointer) {}
EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
- endpointer_->ProcessAudio(samples, kFrameSize);
+ endpointer_->ProcessAudio(samples, kFrameSize, NULL);
int64 ep_time;
return endpointer_->Status(&ep_time);
}
diff --git a/chrome/browser/speech/endpointer/energy_endpointer.cc b/chrome/browser/speech/endpointer/energy_endpointer.cc
index 44ca4dd..f6ce46f 100644
--- a/chrome/browser/speech/endpointer/energy_endpointer.cc
+++ b/chrome/browser/speech/endpointer/energy_endpointer.cc
@@ -213,7 +213,8 @@ void EnergyEndpointer::SetUserInputMode() {
void EnergyEndpointer::ProcessAudioFrame(int64 time_us,
const int16* samples,
- int num_samples) {
+ int num_samples,
+ float* rms_out) {
endpointer_time_us_ = time_us;
float rms = RMS(samples, num_samples);
@@ -310,13 +311,19 @@ void EnergyEndpointer::ProcessAudioFrame(int64 time_us,
}
// Set a floor
- if (decision_threshold_ <params_.min_decision_threshold())
+ if (decision_threshold_ < params_.min_decision_threshold())
decision_threshold_ = params_.min_decision_threshold();
}
// Update speech and noise levels.
UpdateLevels(rms);
++frame_counter_;
+
+ if (rms_out) {
+ *rms_out = -120.0;
+ if ((noise_level_ > 0.0) && ((rms / noise_level_ ) > 0.000001))
+ *rms_out = static_cast<float>(20.0 * log10(rms / noise_level_));
+ }
}
void EnergyEndpointer::UpdateLevels(float rms) {
diff --git a/chrome/browser/speech/endpointer/energy_endpointer.h b/chrome/browser/speech/endpointer/energy_endpointer.h
index cd461be..de79e76 100644
--- a/chrome/browser/speech/endpointer/energy_endpointer.h
+++ b/chrome/browser/speech/endpointer/energy_endpointer.h
@@ -78,7 +78,9 @@ class EnergyEndpointer {
// Computes the next input frame and modifies EnergyEndpointer status as
// appropriate based on the computation.
- void ProcessAudioFrame(int64 time_us, const int16* samples, int num_samples);
+ void ProcessAudioFrame(int64 time_us,
+ const int16* samples, int num_samples,
+ float* rms_out);
// Returns the current state of the EnergyEndpointer and the time
// corresponding to the most recently computed frame.
diff --git a/chrome/browser/speech/speech_input_bubble.cc b/chrome/browser/speech/speech_input_bubble.cc
index 1c4b85f..be5d7c9 100644
--- a/chrome/browser/speech/speech_input_bubble.cc
+++ b/chrome/browser/speech/speech_input_bubble.cc
@@ -2,13 +2,21 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include "app/resource_bundle.h"
#include "chrome/browser/tab_contents/tab_contents.h"
#include "chrome/browser/speech/speech_input_bubble.h"
+#include "gfx/canvas_skia.h"
#include "gfx/rect.h"
+#include "grit/generated_resources.h"
+#include "grit/theme_resources.h"
SpeechInputBubble::FactoryMethod SpeechInputBubble::factory_ = NULL;
const int SpeechInputBubble::kBubbleTargetOffsetX = 5;
+SkBitmap* SpeechInputBubbleBase::mic_empty_ = NULL;
+SkBitmap* SpeechInputBubbleBase::mic_full_ = NULL;
+SkBitmap* SpeechInputBubbleBase::mic_mask_ = NULL;
+
SpeechInputBubble* SpeechInputBubble::Create(TabContents* tab_contents,
Delegate* delegate,
const gfx::Rect& element_rect) {
@@ -24,6 +32,31 @@ SpeechInputBubble* SpeechInputBubble::Create(TabContents* tab_contents,
SpeechInputBubbleBase::SpeechInputBubbleBase()
: display_mode_(DISPLAY_MODE_RECORDING) {
+ if (!mic_empty_) { // Static variables.
+ mic_empty_ = ResourceBundle::GetSharedInstance().GetBitmapNamed(
+ IDR_SPEECH_INPUT_MIC_EMPTY);
+ mic_full_ = ResourceBundle::GetSharedInstance().GetBitmapNamed(
+ IDR_SPEECH_INPUT_MIC_FULL);
+ mic_mask_ = ResourceBundle::GetSharedInstance().GetBitmapNamed(
+ IDR_SPEECH_INPUT_MIC_MASK);
+ }
+
+ // Instance variables.
+ mic_image_.reset(new SkBitmap());
+ mic_image_->setConfig(SkBitmap::kARGB_8888_Config, mic_empty_->width(),
+ mic_empty_->height());
+ mic_image_->allocPixels();
+
+ buffer_image_.reset(new SkBitmap());
+ buffer_image_->setConfig(SkBitmap::kARGB_8888_Config, mic_empty_->width(),
+ mic_empty_->height());
+ buffer_image_->allocPixels();
+}
+
+SpeechInputBubbleBase::~SpeechInputBubbleBase() {
+ // This destructor is added to make sure members such as the scoped_ptr
+ // get destroyed here and the derived classes don't have to care about such
+ // member variables which they don't use.
}
void SpeechInputBubbleBase::SetRecordingMode() {
@@ -41,3 +74,32 @@ void SpeechInputBubbleBase::SetMessage(const string16& text) {
display_mode_ = DISPLAY_MODE_MESSAGE;
UpdateLayout();
}
+
+void SpeechInputBubbleBase::SetInputVolume(float volume) {
+ mic_image_->eraseARGB(0, 0, 0, 0);
+ buffer_image_->eraseARGB(0, 0, 0, 0);
+
+ int width = mic_image_->width();
+ int height = mic_image_->height();
+ SkCanvas canvas(*mic_image_);
+ SkCanvas buffer_canvas(*buffer_image_);
+
+ // The 'full volume' mic image is drawn clipped to the current volume level,
+ // and a gradient mask is applied over it with the 'multiply' compositing
+ // operator to show soft edges at the top.
+ buffer_canvas.save();
+ SkScalar clip_top = ((1.0f - volume) * height * 3) / 2.0f - height / 2.0f;
+ buffer_canvas.clipRect(SkRect::MakeLTRB(0, clip_top,
+ SkIntToScalar(width), SkIntToScalar(height)));
+ buffer_canvas.drawBitmap(*mic_full_, 0, 0);
+ buffer_canvas.restore();
+ SkPaint multiply_paint;
+ multiply_paint.setXfermode(SkXfermode::Create(SkXfermode::kMultiply_Mode));
+ buffer_canvas.drawBitmap(*mic_mask_, 0, clip_top, &multiply_paint);
+
+ // Draw the empty volume image first and the current volume image on top.
+ canvas.drawBitmap(*mic_empty_, 0, 0);
+ canvas.drawBitmap(*buffer_image_.get(), 0, 0);
+
+ SetImage(*mic_image_.get());
+}
diff --git a/chrome/browser/speech/speech_input_bubble.h b/chrome/browser/speech/speech_input_bubble.h
index bfb88a5..73032b5 100644
--- a/chrome/browser/speech/speech_input_bubble.h
+++ b/chrome/browser/speech/speech_input_bubble.h
@@ -6,11 +6,13 @@
#define CHROME_BROWSER_SPEECH_SPEECH_INPUT_BUBBLE_H_
#pragma once
+#include "base/scoped_ptr.h"
#include "base/string16.h"
namespace gfx {
class Rect;
}
+class SkBitmap;
class TabContents;
// SpeechInputBubble displays a popup info bubble during speech recognition,
@@ -91,6 +93,9 @@ class SpeechInputBubble {
// |Delegate::InfoBubbleFocusChanged| as well.
virtual void Hide() = 0;
+ // Updates the current captured audio volume displayed on screen.
+ virtual void SetInputVolume(float volume) = 0;
+
// The horizontal distance between the start of the html widget and the speech
// bubble's arrow.
static const int kBubbleTargetOffsetX;
@@ -112,16 +117,23 @@ class SpeechInputBubbleBase : public SpeechInputBubble {
};
SpeechInputBubbleBase();
+ virtual ~SpeechInputBubbleBase();
// SpeechInputBubble methods
virtual void SetRecordingMode();
virtual void SetRecognizingMode();
virtual void SetMessage(const string16& text);
+ virtual void SetInputVolume(float volume);
protected:
// Updates the platform specific UI layout for the current display mode.
virtual void UpdateLayout() = 0;
+ // Sets the given image as the image to display in the speech bubble.
+ // TODO(satish): Make the SetRecognizingMode call use this to show an
+ // animation while waiting for results.
+ virtual void SetImage(const SkBitmap& image) = 0;
+
DisplayMode display_mode() {
return display_mode_;
}
@@ -133,6 +145,14 @@ class SpeechInputBubbleBase : public SpeechInputBubble {
private:
DisplayMode display_mode_;
string16 message_text_; // Text displayed in DISPLAY_MODE_MESSAGE
+ // The current microphone image with volume level indication.
+ scoped_ptr<SkBitmap> mic_image_;
+ // A temporary buffer image used in creating the above mic image.
+ scoped_ptr<SkBitmap> buffer_image_;
+
+ static SkBitmap* mic_full_; // Mic image with full volume.
+ static SkBitmap* mic_empty_; // Mic image with zero volume.
+ static SkBitmap* mic_mask_; // Gradient mask used by the volume indicator.
};
// This typedef is to workaround the issue with certain versions of
diff --git a/chrome/browser/speech/speech_input_bubble_controller.cc b/chrome/browser/speech/speech_input_bubble_controller.cc
index 08647d4..bf1f923 100644
--- a/chrome/browser/speech/speech_input_bubble_controller.cc
+++ b/chrome/browser/speech/speech_input_bubble_controller.cc
@@ -46,77 +46,79 @@ void SpeechInputBubbleController::CreateBubble(int caller_id,
}
void SpeechInputBubbleController::CloseBubble(int caller_id) {
- if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) {
- ChromeThread::PostTask(
- ChromeThread::UI, FROM_HERE,
- NewRunnableMethod(this, &SpeechInputBubbleController::CloseBubble,
- caller_id));
- return;
- }
- DCHECK(ChromeThread::CurrentlyOn(ChromeThread::UI));
-
- if (current_bubble_caller_id_ == caller_id)
- current_bubble_caller_id_ = 0;
- delete bubbles_[caller_id];
- bubbles_.erase(caller_id);
+ ProcessRequestInUiThread(caller_id, REQUEST_CLOSE, string16(), 0);
}
void SpeechInputBubbleController::SetBubbleRecordingMode(int caller_id) {
- if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) {
- ChromeThread::PostTask(ChromeThread::UI, FROM_HERE, NewRunnableMethod(
- this, &SpeechInputBubbleController::SetBubbleRecordingMode,
- caller_id));
- return;
- }
- SetBubbleRecordingModeOrMessage(caller_id, string16());
+ ProcessRequestInUiThread(caller_id, REQUEST_SET_RECORDING_MODE,
+ string16(), 0);
}
void SpeechInputBubbleController::SetBubbleRecognizingMode(int caller_id) {
- if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) {
- ChromeThread::PostTask(ChromeThread::UI, FROM_HERE, NewRunnableMethod(
- this, &SpeechInputBubbleController::SetBubbleRecognizingMode,
- caller_id));
- return;
- }
- DCHECK(ChromeThread::CurrentlyOn(ChromeThread::UI));
- // The bubble may have been closed before we got a chance to process this
- // request. So check before proceeding.
- if (!bubbles_.count(caller_id))
- return;
+ ProcessRequestInUiThread(caller_id, REQUEST_SET_RECOGNIZING_MODE,
+ string16(), 0);
+}
- bubbles_[caller_id]->SetRecognizingMode();
+void SpeechInputBubbleController::SetBubbleInputVolume(int caller_id,
+ float volume) {
+ ProcessRequestInUiThread(caller_id, REQUEST_SET_INPUT_VOLUME, string16(),
+ volume);
}
void SpeechInputBubbleController::SetBubbleMessage(int caller_id,
const string16& text) {
+ ProcessRequestInUiThread(caller_id, REQUEST_SET_MESSAGE, text, 0);
+}
+
+void SpeechInputBubbleController::ProcessRequestInUiThread(
+ int caller_id, RequestType type, const string16& text, float volume) {
if (!ChromeThread::CurrentlyOn(ChromeThread::UI)) {
ChromeThread::PostTask(ChromeThread::UI, FROM_HERE, NewRunnableMethod(
- this, &SpeechInputBubbleController::SetBubbleMessage,
- caller_id, text));
+ this, &SpeechInputBubbleController::ProcessRequestInUiThread,
+ caller_id, type, text, volume));
return;
}
- SetBubbleRecordingModeOrMessage(caller_id, text);
-}
-
-void SpeechInputBubbleController::SetBubbleRecordingModeOrMessage(
- int caller_id, const string16& text) {
DCHECK(ChromeThread::CurrentlyOn(ChromeThread::UI));
// The bubble may have been closed before we got a chance to process this
// request. So check before proceeding.
if (!bubbles_.count(caller_id))
return;
- if (current_bubble_caller_id_ && current_bubble_caller_id_ != caller_id)
- bubbles_[current_bubble_caller_id_]->Hide();
+ bool change_active_bubble = (type == REQUEST_SET_RECORDING_MODE ||
+ type == REQUEST_SET_MESSAGE);
+ if (change_active_bubble) {
+ if (current_bubble_caller_id_ && current_bubble_caller_id_ != caller_id)
+ bubbles_[current_bubble_caller_id_]->Hide();
+ current_bubble_caller_id_ = caller_id;
+ }
- current_bubble_caller_id_ = caller_id;
SpeechInputBubble* bubble = bubbles_[caller_id];
- if (text.empty()) {
- bubble->SetRecordingMode();
- } else {
- bubble->SetMessage(text);
+ switch (type) {
+ case REQUEST_SET_RECORDING_MODE:
+ bubble->SetRecordingMode();
+ break;
+ case REQUEST_SET_RECOGNIZING_MODE:
+ bubble->SetRecognizingMode();
+ break;
+ case REQUEST_SET_MESSAGE:
+ bubble->SetMessage(text);
+ break;
+ case REQUEST_SET_INPUT_VOLUME:
+ bubble->SetInputVolume(volume);
+ break;
+ case REQUEST_CLOSE:
+ if (current_bubble_caller_id_ == caller_id)
+ current_bubble_caller_id_ = 0;
+ delete bubble;
+ bubbles_.erase(caller_id);
+ break;
+ default:
+ NOTREACHED();
+ break;
}
- bubble->Show();
+
+ if (change_active_bubble)
+ bubble->Show();
}
void SpeechInputBubbleController::InfoBubbleButtonClicked(
diff --git a/chrome/browser/speech/speech_input_bubble_controller.h b/chrome/browser/speech/speech_input_bubble_controller.h
index 2117b24..0a20333 100644
--- a/chrome/browser/speech/speech_input_bubble_controller.h
+++ b/chrome/browser/speech/speech_input_bubble_controller.h
@@ -64,6 +64,9 @@ class SpeechInputBubbleController
// bubble is hidden, |Show| must be called to make it appear on screen.
void SetBubbleMessage(int caller_id, const string16& text);
+ // Updates the current captured audio volume displayed on screen.
+ void SetBubbleInputVolume(int caller_id, float volume);
+
void CloseBubble(int caller_id);
// SpeechInputBubble::Delegate methods.
@@ -71,10 +74,22 @@ class SpeechInputBubbleController
virtual void InfoBubbleFocusChanged();
private:
+ // The various calls received by this object and handled in the UI thread.
+ enum RequestType {
+ REQUEST_SET_RECORDING_MODE,
+ REQUEST_SET_RECOGNIZING_MODE,
+ REQUEST_SET_MESSAGE,
+ REQUEST_SET_INPUT_VOLUME,
+ REQUEST_CLOSE,
+ };
+
void InvokeDelegateButtonClicked(int caller_id,
SpeechInputBubble::Button button);
void InvokeDelegateFocusChanged(int caller_id);
- void SetBubbleRecordingModeOrMessage(int caller_id, const string16& text);
+ void ProcessRequestInUiThread(int caller_id,
+ RequestType type,
+ const string16& text,
+ float volume);
// Only accessed in the IO thread.
Delegate* delegate_;
diff --git a/chrome/browser/speech/speech_input_bubble_controller_unittest.cc b/chrome/browser/speech/speech_input_bubble_controller_unittest.cc
index 59eac15..bc479c8 100644
--- a/chrome/browser/speech/speech_input_bubble_controller_unittest.cc
+++ b/chrome/browser/speech/speech_input_bubble_controller_unittest.cc
@@ -8,6 +8,8 @@
#include "gfx/rect.h"
#include "testing/gtest/include/gtest/gtest.h"
+class SkBitmap;
+
namespace speech_input {
// A mock bubble class which fakes a focus change or recognition cancel by the
@@ -49,6 +51,7 @@ class MockSpeechInputBubble : public SpeechInputBubbleBase {
virtual void Show() {}
virtual void Hide() {}
virtual void UpdateLayout() {}
+ virtual void SetImage(const SkBitmap&) {}
private:
static BubbleType type_;
diff --git a/chrome/browser/speech/speech_input_bubble_gtk.cc b/chrome/browser/speech/speech_input_bubble_gtk.cc
index db61256..7564ac4 100644
--- a/chrome/browser/speech/speech_input_bubble_gtk.cc
+++ b/chrome/browser/speech/speech_input_bubble_gtk.cc
@@ -47,6 +47,7 @@ class SpeechInputBubbleGtk
virtual void Show();
virtual void Hide();
virtual void UpdateLayout();
+ virtual void SetImage(const SkBitmap& image);
CHROMEGTK_CALLBACK_0(SpeechInputBubbleGtk, void, OnCancelClicked);
CHROMEGTK_CALLBACK_0(SpeechInputBubbleGtk, void, OnTryAgainClicked);
@@ -181,7 +182,7 @@ void SpeechInputBubbleGtk::UpdateLayout() {
gtk_label_set_text(GTK_LABEL(label_),
l10n_util::GetStringUTF8(IDS_SPEECH_INPUT_BUBBLE_HEADING).c_str());
SkBitmap* image = ResourceBundle::GetSharedInstance().GetBitmapNamed(
- display_mode() == DISPLAY_MODE_RECORDING ? IDR_SPEECH_INPUT_RECORDING :
+ display_mode() == DISPLAY_MODE_RECORDING ? IDR_SPEECH_INPUT_MIC_EMPTY :
IDR_SPEECH_INPUT_PROCESSING);
GdkPixbuf* pixbuf = gfx::GdkPixbufFromSkBitmap(image);
gtk_image_set_from_pixbuf(GTK_IMAGE(icon_), pixbuf);
@@ -191,6 +192,11 @@ void SpeechInputBubbleGtk::UpdateLayout() {
}
}
+void SpeechInputBubbleGtk::SetImage(const SkBitmap& image) {
+ // TODO(satish): Implement.
+ NOTREACHED();
+}
+
} // namespace
SpeechInputBubble* SpeechInputBubble::CreateNativeBubble(
diff --git a/chrome/browser/speech/speech_input_bubble_mac.mm b/chrome/browser/speech/speech_input_bubble_mac.mm
index fd01ca2..f22db5f 100644
--- a/chrome/browser/speech/speech_input_bubble_mac.mm
+++ b/chrome/browser/speech/speech_input_bubble_mac.mm
@@ -25,6 +25,7 @@ class SpeechInputBubbleImpl : public SpeechInputBubbleBase {
virtual void Show();
virtual void Hide();
virtual void UpdateLayout();
+ virtual void SetImage(const SkBitmap& image);
private:
scoped_nsobject<SpeechInputWindowController> window_;
@@ -55,6 +56,11 @@ SpeechInputBubbleImpl::~SpeechInputBubbleImpl() {
[window_.get() close];
}
+void SpeechInputBubbleImpl::SetImage(const SkBitmap& image) {
+ // TODO(satish): Implement.
+ NOTREACHED();
+}
+
void SpeechInputBubbleImpl::Show() {
// TODO(satish): Implement.
NOTREACHED();
diff --git a/chrome/browser/speech/speech_input_bubble_views.cc b/chrome/browser/speech/speech_input_bubble_views.cc
index a3dd616..d41c79b 100644
--- a/chrome/browser/speech/speech_input_bubble_views.cc
+++ b/chrome/browser/speech/speech_input_bubble_views.cc
@@ -40,6 +40,7 @@ class ContentView
void UpdateLayout(SpeechInputBubbleBase::DisplayMode mode,
const string16& message_text);
+ void SetImage(const SkBitmap& image);
// views::ButtonListener methods.
virtual void ButtonPressed(views::Button* source, const views::Event& event);
@@ -78,7 +79,7 @@ ContentView::ContentView(SpeechInputBubbleDelegate* delegate)
icon_ = new views::ImageView();
icon_->SetImage(*ResourceBundle::GetSharedInstance().GetBitmapNamed(
- IDR_SPEECH_INPUT_RECORDING));
+ IDR_SPEECH_INPUT_MIC_EMPTY));
icon_->SetHorizontalAlignment(views::ImageView::CENTER);
AddChildView(icon_);
@@ -104,10 +105,14 @@ void ContentView::UpdateLayout(SpeechInputBubbleBase::DisplayMode mode,
} else {
icon_->SetImage(*ResourceBundle::GetSharedInstance().GetBitmapNamed(
(mode == SpeechInputBubbleBase::DISPLAY_MODE_RECORDING) ?
- IDR_SPEECH_INPUT_RECORDING : IDR_SPEECH_INPUT_PROCESSING));
+ IDR_SPEECH_INPUT_MIC_EMPTY : IDR_SPEECH_INPUT_PROCESSING));
}
}
+void ContentView::SetImage(const SkBitmap& image) {
+ icon_->SetImage(image);
+}
+
void ContentView::ButtonPressed(views::Button* source,
const views::Event& event) {
if (source == cancel_) {
@@ -203,6 +208,7 @@ class SpeechInputBubbleImpl
// SpeechInputBubbleBase methods.
virtual void UpdateLayout();
+ virtual void SetImage(const SkBitmap& image);
// Returns the screen rectangle to use as the info bubble's target.
// |element_rect| is the html element's bounds in page coordinates.
@@ -324,6 +330,11 @@ void SpeechInputBubbleImpl::UpdateLayout() {
info_bubble_->SizeToContents();
}
+void SpeechInputBubbleImpl::SetImage(const SkBitmap& image) {
+ if (bubble_content_)
+ bubble_content_->SetImage(image);
+}
+
} // namespace
SpeechInputBubble* SpeechInputBubble::CreateNativeBubble(
diff --git a/chrome/browser/speech/speech_input_manager.cc b/chrome/browser/speech/speech_input_manager.cc
index 4c2bdca..a029bb4 100644
--- a/chrome/browser/speech/speech_input_manager.cc
+++ b/chrome/browser/speech/speech_input_manager.cc
@@ -40,6 +40,7 @@ class SpeechInputManagerImpl : public SpeechInputManager,
virtual void OnRecognizerError(int caller_id,
SpeechRecognizer::ErrorCode error);
virtual void DidCompleteEnvironmentEstimation(int caller_id);
+ virtual void SetInputVolume(int caller_id, float volume);
// SpeechInputBubbleController::Delegate methods.
virtual void InfoBubbleButtonClicked(int caller_id,
@@ -205,6 +206,13 @@ void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) {
bubble_controller_->SetBubbleRecordingMode(caller_id);
}
+void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume) {
+ DCHECK(HasPendingRequest(caller_id));
+ DCHECK_EQ(recording_caller_id_, caller_id);
+
+ bubble_controller_->SetBubbleInputVolume(caller_id, volume);
+}
+
void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) {
SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id);
CancelRecognition(caller_id);
diff --git a/chrome/browser/speech/speech_recognizer.cc b/chrome/browser/speech/speech_recognizer.cc
index 800b044..6eed2f5 100644
--- a/chrome/browser/speech/speech_recognizer.cc
+++ b/chrome/browser/speech/speech_recognizer.cc
@@ -28,7 +28,14 @@ const int kMaxSpeexFrameLength = 110; // (44kbps rate sampled at 32kHz).
// make sure it is within the byte range.
COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength);
-const int kEndpointerEstimationTimeMs = 300;
+// The following constants are related to the volume level indicator shown in
+// the UI for recorded audio.
+// Multiplier used when new volume is greater than previous level.
+const float kUpSmoothingFactor = 0.9f;
+// Multiplier used when new volume is lesser than previous level.
+const float kDownSmoothingFactor = 0.4f;
+const float kAudioMeterMinDb = 10.0f; // Lower bar for volume meter.
+const float kAudioMeterDbRange = 25.0f;
} // namespace
namespace speech_input {
@@ -38,6 +45,7 @@ const int SpeechRecognizer::kAudioPacketIntervalMs = 100;
const int SpeechRecognizer::kNumAudioChannels = 1;
const int SpeechRecognizer::kNumBitsPerAudioSample = 16;
const int SpeechRecognizer::kNoSpeechTimeoutSec = 8;
+const int SpeechRecognizer::kEndpointerEstimationTimeMs = 300;
// Provides a simple interface to encode raw audio using the Speex codec.
class SpeexEncoder {
@@ -102,7 +110,8 @@ SpeechRecognizer::SpeechRecognizer(Delegate* delegate, int caller_id)
: delegate_(delegate),
caller_id_(caller_id),
encoder_(new SpeexEncoder()),
- endpointer_(kAudioSampleRate) {
+ endpointer_(kAudioSampleRate),
+ audio_level_(0.0f) {
endpointer_.set_speech_input_complete_silence_length(
base::Time::kMicrosecondsPerSecond / 2);
endpointer_.set_long_speech_input_complete_silence_length(
@@ -259,18 +268,20 @@ void SpeechRecognizer::HandleOnData(string* data) {
int num_samples = data->length() / sizeof(short);
encoder_->Encode(samples, num_samples, &audio_buffers_);
- endpointer_.ProcessAudio(samples, num_samples);
+ float rms;
+ endpointer_.ProcessAudio(samples, num_samples, &rms);
delete data;
num_samples_recorded_ += num_samples;
- // Check if we have gathered enough audio for the endpointer to do environment
- // estimation and should move on to detect speech/end of speech.
- if (endpointer_.IsEstimatingEnvironment() &&
- num_samples_recorded_ >= (kEndpointerEstimationTimeMs *
- kAudioSampleRate) / 1000) {
- endpointer_.SetUserInputMode();
- delegate_->DidCompleteEnvironmentEstimation(caller_id_);
- return;
+ if (endpointer_.IsEstimatingEnvironment()) {
+ // Check if we have gathered enough audio for the endpointer to do
+ // environment estimation and should move on to detect speech/end of speech.
+ if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *
+ kAudioSampleRate) / 1000) {
+ endpointer_.SetUserInputMode();
+ delegate_->DidCompleteEnvironmentEstimation(caller_id_);
+ }
+ return; // No more processing since we are still estimating environment.
}
// Check if we have waited too long without hearing any speech.
@@ -280,6 +291,17 @@ void SpeechRecognizer::HandleOnData(string* data) {
return;
}
+ // Calculate the input volume to display in the UI, smoothing towards the
+ // new level.
+ float level = (rms - kAudioMeterMinDb) / kAudioMeterDbRange;
+ level = std::min(std::max(0.0f, level), 1.0f);
+ if (level > audio_level_) {
+ audio_level_ += (level - audio_level_) * kUpSmoothingFactor;
+ } else {
+ audio_level_ += (level - audio_level_) * kDownSmoothingFactor;
+ }
+ delegate_->SetInputVolume(caller_id_, audio_level_);
+
if (endpointer_.speech_input_complete()) {
StopRecording();
}
diff --git a/chrome/browser/speech/speech_recognizer.h b/chrome/browser/speech/speech_recognizer.h
index 4a18cb1..7e154ac 100644
--- a/chrome/browser/speech/speech_recognizer.h
+++ b/chrome/browser/speech/speech_recognizer.h
@@ -63,6 +63,11 @@ class SpeechRecognizer
// recognition UI once this callback is received.
virtual void DidCompleteEnvironmentEstimation(int caller_id) = 0;
+ // Informs of a change in the captured audio level, useful if displaying
+ // a microphone volume indicator while recording.
+ // The value of |volume| is in the [0.0, 1.0] range.
+ virtual void SetInputVolume(int caller_id, float volume) = 0;
+
protected:
virtual ~Delegate() {}
};
@@ -98,6 +103,7 @@ class SpeechRecognizer
static const int kNumAudioChannels;
static const int kNumBitsPerAudioSample;
static const int kNoSpeechTimeoutSec;
+ static const int kEndpointerEstimationTimeMs;
private:
void ReleaseAudioBuffers();
@@ -120,6 +126,7 @@ class SpeechRecognizer
scoped_ptr<SpeexEncoder> encoder_;
Endpointer endpointer_;
int num_samples_recorded_;
+ float audio_level_;
DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer);
};
diff --git a/chrome/browser/speech/speech_recognizer_unittest.cc b/chrome/browser/speech/speech_recognizer_unittest.cc
index 9e1cf07..e049558 100644
--- a/chrome/browser/speech/speech_recognizer_unittest.cc
+++ b/chrome/browser/speech/speech_recognizer_unittest.cc
@@ -15,10 +15,6 @@ using media::AudioInputController;
using media::TestAudioInputController;
using media::TestAudioInputControllerFactory;
-namespace {
-const int kAudioPacketLengthBytes = 1000;
-}
-
namespace speech_input {
class SpeechRecognizerTest : public SpeechRecognizerDelegate,
@@ -31,7 +27,8 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate,
recording_complete_(false),
recognition_complete_(false),
result_received_(false),
- error_(SpeechRecognizer::RECOGNIZER_NO_ERROR) {
+ error_(SpeechRecognizer::RECOGNIZER_NO_ERROR),
+ volume_(-1.0f) {
int audio_packet_length_bytes =
(SpeechRecognizer::kAudioSampleRate *
SpeechRecognizer::kAudioPacketIntervalMs *
@@ -67,6 +64,10 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate,
error_ = error;
}
+ virtual void SetInputVolume(int caller_id, float volume) {
+ volume_ = volume;
+ }
+
// testing::Test methods.
virtual void SetUp() {
URLFetcher::set_factory(&url_fetcher_factory_);
@@ -78,6 +79,12 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate,
AudioInputController::set_factory(NULL);
}
+ void FillPacketWithTestWaveform() {
+ // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
+ for (size_t i = 0; i < audio_packet_.size(); ++i)
+ audio_packet_[i] = static_cast<uint8>(i);
+ }
+
protected:
MessageLoopForIO message_loop_;
ChromeThread io_thread_;
@@ -89,6 +96,7 @@ class SpeechRecognizerTest : public SpeechRecognizerDelegate,
TestURLFetcherFactory url_fetcher_factory_;
TestAudioInputControllerFactory audio_input_controller_factory_;
std::vector<uint8> audio_packet_;
+ float volume_;
};
TEST_F(SpeechRecognizerTest, StopNoData) {
@@ -237,9 +245,8 @@ TEST_F(SpeechRecognizerTest, NoSpeechCallbackNotIssued) {
controller->event_handler()->OnData(controller, &audio_packet_[0],
audio_packet_.size());
}
- // Fill the rest of input with a simple pattern, a 125Hz sawtooth waveform.
- for (size_t i = 0; i < audio_packet_.size(); ++i)
- audio_packet_[i] = static_cast<uint8>(i);
+
+ FillPacketWithTestWaveform();
for (int i = 0; i < num_packets / 2; ++i) {
controller->event_handler()->OnData(controller, &audio_packet_[0],
audio_packet_.size());
@@ -252,4 +259,44 @@ TEST_F(SpeechRecognizerTest, NoSpeechCallbackNotIssued) {
recognizer_->CancelRecognition();
}
+TEST_F(SpeechRecognizerTest, SetInputVolumeCallback) {
+ // Start recording and give a lot of packets with audio samples set to zero
+ // and then some more with reasonably loud audio samples. Check that we don't
+ // get the callback during estimation phase, then get zero for the silence
+ // samples and proper volume for the loud audio.
+ EXPECT_TRUE(recognizer_->StartRecording());
+ TestAudioInputController* controller =
+ audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+ controller = audio_input_controller_factory_.controller();
+ ASSERT_TRUE(controller);
+
+ // Feed some samples to begin with for the endpointer to do noise estimation.
+ int num_packets = SpeechRecognizer::kEndpointerEstimationTimeMs /
+ SpeechRecognizer::kAudioPacketIntervalMs;
+ for (int i = 0; i < num_packets; ++i) {
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ }
+ MessageLoop::current()->RunAllPending();
+ EXPECT_EQ(-1.0f, volume_); // No audio volume set yet.
+
+ // The vector is already filled with zero value samples on create.
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ MessageLoop::current()->RunAllPending();
+ EXPECT_EQ(0, volume_);
+
+ FillPacketWithTestWaveform();
+ controller->event_handler()->OnData(controller, &audio_packet_[0],
+ audio_packet_.size());
+ MessageLoop::current()->RunAllPending();
+ EXPECT_FLOAT_EQ(0.9f, volume_);
+
+ EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);
+ EXPECT_FALSE(recording_complete_);
+ EXPECT_FALSE(recognition_complete_);
+ recognizer_->CancelRecognition();
+}
+
} // namespace speech_input