Move core pieces of speech from chrome to content.

TBR=satish Review URL: http://codereview.chromium.org/6591024 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@76165 0039d316-1c4b-4281-b951-d872f2087c98
author: jam@chromium.org <jam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-02-26 18:46:15 +0000
committer: jam@chromium.org <jam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-02-26 18:46:15 +0000
commit: 50fab53bddb2c3cb24d5682c913a03226ccf49ef (patch)
tree: bb04af83ca5f2be010e32c2e10cfd245117a4847 /content/browser/speech/endpointer/endpointer.h
parent: 5c557f37629dc12dfd99e8fb55c235c8c46a8098 (diff)
download: chromium_src-50fab53bddb2c3cb24d5682c913a03226ccf49ef.zip
chromium_src-50fab53bddb2c3cb24d5682c913a03226ccf49ef.tar.gz
chromium_src-50fab53bddb2c3cb24d5682c913a03226ccf49ef.tar.bz2
1 files changed, 148 insertions, 0 deletions
diff --git a/content/browser/speech/endpointer/endpointer.h b/content/browser/speech/endpointer/endpointer.h
new file mode 100644
index 0000000..be4bd65
--- /dev/null
+++ b/content/browser/speech/endpointer/endpointer.h
@@ -0,0 +1,148 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
+
+#include "base/basictypes.h"
+#include "content/browser/speech/endpointer/energy_endpointer.h"
+
+class EpStatus;
+
+namespace speech_input {
+
+// A simple interface to the underlying energy-endpointer implementation, this
+// class lets callers provide audio as being recorded and let them poll to find
+// when the user has stopped speaking.
+//
+// There are two events that may trigger the end of speech:
+//
+// speechInputPossiblyComplete event:
+//
+// Signals that silence/noise has  been detected for a *short* amount of
+// time after some speech has been detected. It can be used for low latency
+// UI feedback. To disable it, set it to a large amount.
+//
+// speechInputComplete event:
+//
+// This event is intended to signal end of input and to stop recording.
+// The amount of time to wait after speech is set by
+// speech_input_complete_silence_length_ and optionally two other
+// parameters (see below).
+// This time can be held constant, or can change as more speech is detected.
+// In the latter case, the time changes after a set amount of time from the
+// *beginning* of speech.  This is motivated by the expectation that there
+// will be two distinct types of inputs: short search queries and longer
+// dictation style input.
+//
+// Three parameters are used to define the piecewise constant timeout function.
+// The timeout length is speech_input_complete_silence_length until
+// long_speech_length, when it changes to
+// long_speech_input_complete_silence_length.
+class Endpointer {
+ public:
+  explicit Endpointer(int sample_rate);
+
+  // Start the endpointer. This should be called at the beginning of a session.
+  void StartSession();
+
+  // Stop the endpointer.
+  void EndSession();
+
+  // Start environment estimation. Audio will be used for environment estimation
+  // i.e. noise level estimation.
+  void SetEnvironmentEstimationMode();
+
+  // Start user input. This should be called when the user indicates start of
+  // input, e.g. by pressing a button.
+  void SetUserInputMode();
+
+  // Process a segment of audio, which may be more than one frame.
+  // The status of the last frame will be returned.
+  EpStatus ProcessAudio(const int16* audio_data, int num_samples,
+                        float* rms_out);
+
+  // Get the status of the endpointer.
+  EpStatus Status(int64 *time_us);
+
+  // Returns true if the endpointer detected reasonable audio levels above
+  // background noise which could be user speech, false if not.
+  bool DidStartReceivingSpeech() const {
+    return speech_previously_detected_;
+  }
+
+  bool IsEstimatingEnvironment() const {
+    return energy_endpointer_.estimating_environment();
+  }
+
+  void set_speech_input_complete_silence_length(int64 time_us) {
+    speech_input_complete_silence_length_us_ = time_us;
+  }
+
+  void set_long_speech_input_complete_silence_length(int64 time_us) {
+    long_speech_input_complete_silence_length_us_ = time_us;
+  }
+
+  void set_speech_input_possibly_complete_silence_length(int64 time_us) {
+    speech_input_possibly_complete_silence_length_us_ = time_us;
+  }
+
+  void set_long_speech_length(int64 time_us) {
+    long_speech_length_us_ = time_us;
+  }
+
+  bool speech_input_complete() const {
+    return speech_input_complete_;
+  }
+
+ private:
+  // Reset internal states. Helper method common to initial input utterance
+  // and following input utternaces.
+  void Reset();
+
+  // Minimum allowable length of speech input.
+  int64 speech_input_minimum_length_us_;
+
+  // The speechInputPossiblyComplete event signals that silence/noise has been
+  // detected for a *short* amount of time after some speech has been detected.
+  // This proporty specifies the time period.
+  int64 speech_input_possibly_complete_silence_length_us_;
+
+  // The speechInputComplete event signals that silence/noise has been
+  // detected for a *long* amount of time after some speech has been detected.
+  // This property specifies the time period.
+  int64 speech_input_complete_silence_length_us_;
+
+  // Same as above, this specifies the required silence period after speech
+  // detection. This period is used instead of
+  // speech_input_complete_silence_length_ when the utterance is longer than
+  // long_speech_length_. This parameter is optional.
+  int64 long_speech_input_complete_silence_length_us_;
+
+  // The period of time after which the endpointer should consider
+  // long_speech_input_complete_silence_length_ as a valid silence period
+  // instead of speech_input_complete_silence_length_. This parameter is
+  // optional.
+  int64 long_speech_length_us_;
+
+  // First speech onset time, used in determination of speech complete timeout.
+  int64 speech_start_time_us_;
+
+  // Most recent end time, used in determination of speech complete timeout.
+  int64 speech_end_time_us_;
+
+  int64 audio_frame_time_us_;
+  EpStatus old_ep_status_;
+  bool waiting_for_speech_possibly_complete_timeout_;
+  bool waiting_for_speech_complete_timeout_;
+  bool speech_previously_detected_;
+  bool speech_input_complete_;
+  EnergyEndpointer energy_endpointer_;
+  int sample_rate_;
+  int32 frame_size_;
+};
+
+}  // namespace speech_input
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
author	jam@chromium.org <jam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-02-26 18:46:15 +0000
committer	jam@chromium.org <jam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-02-26 18:46:15 +0000
commit	50fab53bddb2c3cb24d5682c913a03226ccf49ef (patch)
tree	bb04af83ca5f2be010e32c2e10cfd245117a4847 /content/browser/speech/endpointer/endpointer.h
parent	5c557f37629dc12dfd99e8fb55c235c8c46a8098 (diff)
download	chromium_src-50fab53bddb2c3cb24d5682c913a03226ccf49ef.zip chromium_src-50fab53bddb2c3cb24d5682c913a03226ccf49ef.tar.gz chromium_src-50fab53bddb2c3cb24d5682c913a03226ccf49ef.tar.bz2