SIMD optimizations of exp weighted moving average computation in AudioPowerMonitor.

Benchmark tests show a 445% speed-up using the SSE implementation over the original code in AudioPowerMonitor::Scan(). Also worth noting: A minor change to the non-SIMD code allowed a 30% speed-up over the original! TEST=media_unittests; and manually confirmed tab audio indicator still works properly Review URL: https://codereview.chromium.org/84563002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@237268 0039d316-1c4b-4281-b951-d872f2087c98
author: miu@chromium.org <miu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-11-26 05:52:06 +0000
committer: miu@chromium.org <miu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-11-26 05:52:06 +0000
commit: d1bb9acf8f0c223159f10a168e35a486d9ba1dd4 (patch)
tree: a89d5eecd5edd921391ef8cfdd5fe5256d5392ec /media
parent: 979b4cb73c1a4332a02047afb960ffe59c015b6c (diff)
download: chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.zip
chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.tar.gz
chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.tar.bz2
7 files changed, 506 insertions, 20 deletions
diff --git a/media/audio/audio_power_monitor.cc b/media/audio/audio_power_monitor.cc
index d8b9436..6536f46 100644
--- a/media/audio/audio_power_monitor.cc
+++ b/media/audio/audio_power_monitor.cc
@@ -11,6 +11,7 @@
 #include "base/logging.h"
 #include "base/time/time.h"
 #include "media/base/audio_bus.h"
+#include "media/base/vector_math.h"
 
 namespace media {
 
@@ -36,30 +37,19 @@ void AudioPowerMonitor::Scan(const AudioBus& buffer, int num_frames) {
     return;
 
   // Calculate a new average power by applying a first-order low-pass filter
-  // over the audio samples in |buffer|.
-  //
-  // TODO(miu): Implement optimized SSE/NEON to more efficiently compute the
-  // results (in media/base/vector_math) in soon-upcoming change.
+  // (a.k.a. an exponentially-weighted moving average) over the audio samples in
+  // each channel in |buffer|.
   float sum_power = 0.0f;
   for (int i = 0; i < num_channels; ++i) {
-    float average_power_this_channel = average_power_;
-    bool clipped = false;
-    const float* p = buffer.channel(i);
-    const float* const end_of_samples = p + num_frames;
-    for (; p < end_of_samples; ++p) {
-      const float sample = *p;
-      const float sample_squared = sample * sample;
-      clipped |= (sample_squared > 1.0f);
-      average_power_this_channel +=
-          (sample_squared - average_power_this_channel) * sample_weight_;
-    }
+    const std::pair<float, float> ewma_and_max = vector_math::EWMAAndMaxPower(
+        average_power_, buffer.channel(i), num_frames, sample_weight_);
     // If data in audio buffer is garbage, ignore its effect on the result.
-    if (base::IsNaN(average_power_this_channel)) {
-      average_power_this_channel = average_power_;
-      clipped = false;
+    if (!base::IsFinite(ewma_and_max.first)) {
+      sum_power += average_power_;
+    } else {
+      sum_power += ewma_and_max.first;
+      has_clipped_ |= (ewma_and_max.second > 1.0f);
     }
-    sum_power += average_power_this_channel;
-    has_clipped_ |= clipped;
   }
 
   // Update accumulated results, with clamping for sanity.
diff --git a/media/base/simd/vector_math_sse.cc b/media/base/simd/vector_math_sse.cc
index 39bcaa0..c212122 100644
--- a/media/base/simd/vector_math_sse.cc
+++ b/media/base/simd/vector_math_sse.cc
@@ -4,6 +4,8 @@
 
 #include "media/base/vector_math_testing.h"
 
+#include <algorithm>
+
 #include <xmmintrin.h>  // NOLINT
 
 namespace media {
@@ -35,5 +37,82 @@ void FMAC_SSE(const float src[], float scale, int len, float dest[]) {
     dest[i] += src[i] * scale;
 }
 
+// Convenience macro to extract float 0 through 3 from the vector |a|.  This is
+// needed because compilers other than clang don't support access via
+// operator[]().
+#define EXTRACT_FLOAT(a, i) \
+    (i == 0 ? \
+         _mm_cvtss_f32(a) : \
+         _mm_cvtss_f32(_mm_shuffle_ps(a, a, i)))
+
+std::pair<float, float> EWMAAndMaxPower_SSE(
+    float initial_value, const float src[], int len, float smoothing_factor) {
+  // When the recurrence is unrolled, we see that we can split it into 4
+  // separate lanes of evaluation:
+  //
+  // y[n] = a(S[n]^2) + (1-a)(y[n-1])
+  //      = a(S[n]^2) + (1-a)^1(aS[n-1]^2) + (1-a)^2(aS[n-2]^2) + ...
+  //      = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3])
+  //
+  // where z[n] = a(S[n]^2) + (1-a)^4(z[n-4]) + (1-a)^8(z[n-8]) + ...
+  //
+  // Thus, the strategy here is to compute z[n], z[n-1], z[n-2], and z[n-3] in
+  // each of the 4 lanes, and then combine them to give y[n].
+
+  const int rem = len % 4;
+  const int last_index = len - rem;
+
+  const __m128 smoothing_factor_x4 = _mm_set_ps1(smoothing_factor);
+  const float weight_prev = 1.0f - smoothing_factor;
+  const __m128 weight_prev_x4 = _mm_set_ps1(weight_prev);
+  const __m128 weight_prev_squared_x4 =
+      _mm_mul_ps(weight_prev_x4, weight_prev_x4);
+  const __m128 weight_prev_4th_x4 =
+      _mm_mul_ps(weight_prev_squared_x4, weight_prev_squared_x4);
+
+  // Compute z[n], z[n-1], z[n-2], and z[n-3] in parallel in lanes 3, 2, 1 and
+  // 0, respectively.
+  __m128 max_x4 = _mm_setzero_ps();
+  __m128 ewma_x4 = _mm_setr_ps(0.0f, 0.0f, 0.0f, initial_value);
+  int i;
+  for (i = 0; i < last_index; i += 4) {
+    ewma_x4 = _mm_mul_ps(ewma_x4, weight_prev_4th_x4);
+    const __m128 sample_x4 = _mm_load_ps(src + i);
+    const __m128 sample_squared_x4 = _mm_mul_ps(sample_x4, sample_x4);
+    max_x4 = _mm_max_ps(max_x4, sample_squared_x4);
+    // Note: The compiler optimizes this to a single multiply-and-accumulate
+    // instruction:
+    ewma_x4 = _mm_add_ps(ewma_x4,
+                         _mm_mul_ps(sample_squared_x4, smoothing_factor_x4));
+  }
+
+  // y[n] = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3])
+  float ewma = EXTRACT_FLOAT(ewma_x4, 3);
+  ewma_x4 = _mm_mul_ps(ewma_x4, weight_prev_x4);
+  ewma += EXTRACT_FLOAT(ewma_x4, 2);
+  ewma_x4 = _mm_mul_ps(ewma_x4, weight_prev_x4);
+  ewma += EXTRACT_FLOAT(ewma_x4, 1);
+  ewma_x4 = _mm_mul_ss(ewma_x4, weight_prev_x4);
+  ewma += EXTRACT_FLOAT(ewma_x4, 0);
+
+  // Fold the maximums together to get the overall maximum.
+  max_x4 = _mm_max_ps(max_x4,
+                      _mm_shuffle_ps(max_x4, max_x4, _MM_SHUFFLE(3, 3, 1, 1)));
+  max_x4 = _mm_max_ss(max_x4, _mm_shuffle_ps(max_x4, max_x4, 2));
+
+  std::pair<float, float> result(ewma, EXTRACT_FLOAT(max_x4, 0));
+
+  // Handle remaining values at the end of |src|.
+  for (; i < len; ++i) {
+    result.first *= weight_prev;
+    const float sample = src[i];
+    const float sample_squared = sample * sample;
+    result.first += sample_squared * smoothing_factor;
+    result.second = std::max(result.second, sample_squared);
+  }
+
+  return result;
+}
+
 }  // namespace vector_math
 }  // namespace media
diff --git a/media/base/vector_math.cc b/media/base/vector_math.cc
index de946ca..32584f5 100644
--- a/media/base/vector_math.cc
+++ b/media/base/vector_math.cc
@@ -5,6 +5,8 @@
 #include "media/base/vector_math.h"
 #include "media/base/vector_math_testing.h"
 
+#include <algorithm>
+
 #include "base/cpu.h"
 #include "base/logging.h"
 #include "build/build_config.h"
@@ -23,33 +25,42 @@ namespace vector_math {
 #if defined(__SSE__)
 #define FMAC_FUNC FMAC_SSE
 #define FMUL_FUNC FMUL_SSE
+#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE
 void Initialize() {}
 #else
 // X86 CPU detection required.  Functions will be set by Initialize().
 // TODO(dalecurtis): Once Chrome moves to an SSE baseline this can be removed.
 #define FMAC_FUNC g_fmac_proc_
 #define FMUL_FUNC g_fmul_proc_
+#define EWMAAndMaxPower_FUNC g_ewma_power_proc_
 
 typedef void (*MathProc)(const float src[], float scale, int len, float dest[]);
 static MathProc g_fmac_proc_ = NULL;
 static MathProc g_fmul_proc_ = NULL;
+typedef std::pair<float, float> (*EWMAAndMaxPowerProc)(
+    float initial_value, const float src[], int len, float smoothing_factor);
+static EWMAAndMaxPowerProc g_ewma_power_proc_ = NULL;
 
 void Initialize() {
   CHECK(!g_fmac_proc_);
   CHECK(!g_fmul_proc_);
+  CHECK(!g_ewma_power_proc_);
   const bool kUseSSE = base::CPU().has_sse();
   g_fmac_proc_ = kUseSSE ? FMAC_SSE : FMAC_C;
   g_fmul_proc_ = kUseSSE ? FMUL_SSE : FMUL_C;
+  g_ewma_power_proc_ = kUseSSE ? EWMAAndMaxPower_SSE : EWMAAndMaxPower_C;
 }
 #endif
 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
 #define FMAC_FUNC FMAC_NEON
 #define FMUL_FUNC FMUL_NEON
+#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON
 void Initialize() {}
 #else
 // Unknown architecture.
 #define FMAC_FUNC FMAC_C
 #define FMUL_FUNC FMUL_C
+#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_C
 void Initialize() {}
 #endif
 
@@ -77,6 +88,27 @@ void FMUL_C(const float src[], float scale, int len, float dest[]) {
     dest[i] = src[i] * scale;
 }
 
+std::pair<float, float> EWMAAndMaxPower(
+    float initial_value, const float src[], int len, float smoothing_factor) {
+  // Ensure |src| is 16-byte aligned.
+  DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1));
+  return EWMAAndMaxPower_FUNC(initial_value, src, len, smoothing_factor);
+}
+
+std::pair<float, float> EWMAAndMaxPower_C(
+    float initial_value, const float src[], int len, float smoothing_factor) {
+  std::pair<float, float> result(initial_value, 0.0f);
+  const float weight_prev = 1.0f - smoothing_factor;
+  for (int i = 0; i < len; ++i) {
+    result.first *= weight_prev;
+    const float sample = src[i];
+    const float sample_squared = sample * sample;
+    result.first += sample_squared * smoothing_factor;
+    result.second = std::max(result.second, sample_squared);
+  }
+  return result;
+}
+
 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
 void FMAC_NEON(const float src[], float scale, int len, float dest[]) {
   const int rem = len % 4;
@@ -103,6 +135,71 @@ void FMUL_NEON(const float src[], float scale, int len, float dest[]) {
   for (int i = last_index; i < len; ++i)
     dest[i] = src[i] * scale;
 }
+
+std::pair<float, float> EWMAAndMaxPower_NEON(
+    float initial_value, const float src[], int len, float smoothing_factor) {
+  // When the recurrence is unrolled, we see that we can split it into 4
+  // separate lanes of evaluation:
+  //
+  // y[n] = a(S[n]^2) + (1-a)(y[n-1])
+  //      = a(S[n]^2) + (1-a)^1(aS[n-1]^2) + (1-a)^2(aS[n-2]^2) + ...
+  //      = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3])
+  //
+  // where z[n] = a(S[n]^2) + (1-a)^4(z[n-4]) + (1-a)^8(z[n-8]) + ...
+  //
+  // Thus, the strategy here is to compute z[n], z[n-1], z[n-2], and z[n-3] in
+  // each of the 4 lanes, and then combine them to give y[n].
+
+  const int rem = len % 4;
+  const int last_index = len - rem;
+
+  const float32x4_t smoothing_factor_x4 = vdupq_n_f32(smoothing_factor);
+  const float weight_prev = 1.0f - smoothing_factor;
+  const float32x4_t weight_prev_x4 = vdupq_n_f32(weight_prev);
+  const float32x4_t weight_prev_squared_x4 =
+      vmulq_f32(weight_prev_x4, weight_prev_x4);
+  const float32x4_t weight_prev_4th_x4 =
+      vmulq_f32(weight_prev_squared_x4, weight_prev_squared_x4);
+
+  // Compute z[n], z[n-1], z[n-2], and z[n-3] in parallel in lanes 3, 2, 1 and
+  // 0, respectively.
+  float32x4_t max_x4 = vdupq_n_f32(0.0f);
+  float32x4_t ewma_x4 = vsetq_lane_f32(initial_value, vdupq_n_f32(0.0f), 3);
+  int i;
+  for (i = 0; i < last_index; i += 4) {
+    ewma_x4 = vmulq_f32(ewma_x4, weight_prev_4th_x4);
+    const float32x4_t sample_x4 = vld1q_f32(src + i);
+    const float32x4_t sample_squared_x4 = vmulq_f32(sample_x4, sample_x4);
+    max_x4 = vmaxq_f32(max_x4, sample_squared_x4);
+    ewma_x4 = vmlaq_f32(ewma_x4, sample_squared_x4, smoothing_factor_x4);
+  }
+
+  // y[n] = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3])
+  float ewma = vgetq_lane_f32(ewma_x4, 3);
+  ewma_x4 = vmulq_f32(ewma_x4, weight_prev_x4);
+  ewma += vgetq_lane_f32(ewma_x4, 2);
+  ewma_x4 = vmulq_f32(ewma_x4, weight_prev_x4);
+  ewma += vgetq_lane_f32(ewma_x4, 1);
+  ewma_x4 = vmulq_f32(ewma_x4, weight_prev_x4);
+  ewma += vgetq_lane_f32(ewma_x4, 0);
+
+  // Fold the maximums together to get the overall maximum.
+  float32x2_t max_x2 = vpmax_f32(vget_low_f32(max_x4), vget_high_f32(max_x4));
+  max_x2 = vpmax_f32(max_x2, max_x2);
+
+  std::pair<float, float> result(ewma, vget_lane_f32(max_x2, 0));
+
+  // Handle remaining values at the end of |src|.
+  for (; i < len; ++i) {
+    result.first *= weight_prev;
+    const float sample = src[i];
+    const float sample_squared = sample * sample;
+    result.first += sample_squared * smoothing_factor;
+    result.second = std::max(result.second, sample_squared);
+  }
+
+  return result;
+}
 #endif
 
 }  // namespace vector_math
diff --git a/media/base/vector_math.h b/media/base/vector_math.h
index 4764f0b..a4dea37 100644
--- a/media/base/vector_math.h
+++ b/media/base/vector_math.h
@@ -5,6 +5,8 @@
 #ifndef MEDIA_BASE_VECTOR_MATH_H_
 #define MEDIA_BASE_VECTOR_MATH_H_
 
+#include <utility>
+
 #include "media/base/media_export.h"
 
 namespace media {
@@ -26,6 +28,16 @@ MEDIA_EXPORT void FMAC(const float src[], float scale, int len, float dest[]);
 // |dest| must be aligned by kRequiredAlignment.
 MEDIA_EXPORT void FMUL(const float src[], float scale, int len, float dest[]);
 
+// Computes the exponentially-weighted moving average power of a signal by
+// iterating the recurrence:
+//
+//   y[-1] = initial_value
+//   y[n] = smoothing_factor * src[n]^2 + (1-smoothing_factor) * y[n-1]
+//
+// Returns the final average power and the maximum squared element value.
+MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower(
+    float initial_value, const float src[], int len, float smoothing_factor);
+
 }  // namespace vector_math
 }  // namespace media
 
diff --git a/media/base/vector_math_perftest.cc b/media/base/vector_math_perftest.cc
index 88ac551..9742f2e 100644
--- a/media/base/vector_math_perftest.cc
+++ b/media/base/vector_math_perftest.cc
@@ -17,6 +17,7 @@ using std::fill;
 namespace media {
 
 static const int kBenchmarkIterations = 200000;
+static const int kEWMABenchmarkIterations = 50000;
 static const float kScale = 0.5;
 static const int kVectorSize = 8192;
 
@@ -53,6 +54,25 @@ class VectorMathPerfTest : public testing::Test {
                            true);
   }
 
+  void RunBenchmark(
+      std::pair<float, float> (*fn)(float, const float[], int, float),
+      int len,
+      const std::string& test_name,
+      const std::string& trace_name) {
+    TimeTicks start = TimeTicks::HighResNow();
+    for (int i = 0; i < kEWMABenchmarkIterations; ++i) {
+      fn(0.5f, input_vector_.get(), len, 0.1f);
+    }
+    double total_time_milliseconds =
+        (TimeTicks::HighResNow() - start).InMillisecondsF();
+    perf_test::PrintResult(test_name,
+                           "",
+                           trace_name,
+                           kEWMABenchmarkIterations / total_time_milliseconds,
+                           "runs/ms",
+                           true);
+  }
+
  protected:
   scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector_;
   scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector_;
@@ -122,4 +142,40 @@ TEST_F(VectorMathPerfTest, FMUL) {
 
 #undef FMUL_FUNC
 
+#if defined(ARCH_CPU_X86_FAMILY)
+#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE
+#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON
+#endif
+
+// Benchmark for each optimized vector_math::EWMAAndMaxPower() method.
+TEST_F(VectorMathPerfTest, EWMAAndMaxPower) {
+  // Benchmark EWMAAndMaxPower_C().
+  RunBenchmark(vector_math::EWMAAndMaxPower_C,
+               kVectorSize,
+               "vector_math_ewma_and_max_power",
+               "unoptimized");
+#if defined(EWMAAndMaxPower_FUNC)
+#if defined(ARCH_CPU_X86_FAMILY)
+  ASSERT_TRUE(base::CPU().has_sse());
+#endif
+  // Benchmark EWMAAndMaxPower_FUNC() with unaligned size.
+  ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /
+                                 sizeof(float)), 0U);
+  RunBenchmark(vector_math::EWMAAndMaxPower_FUNC,
+               kVectorSize - 1,
+               "vector_math_ewma_and_max_power",
+               "optimized_unaligned");
+  // Benchmark EWMAAndMaxPower_FUNC() with aligned size.
+  ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
+            0U);
+  RunBenchmark(vector_math::EWMAAndMaxPower_FUNC,
+               kVectorSize,
+               "vector_math_ewma_and_max_power",
+               "optimized_aligned");
+#endif
+}
+
+#undef EWMAAndMaxPower_FUNC
+
 } // namespace media
diff --git a/media/base/vector_math_testing.h b/media/base/vector_math_testing.h
index 02d14f8..b0b30440 100644
--- a/media/base/vector_math_testing.h
+++ b/media/base/vector_math_testing.h
@@ -5,6 +5,8 @@
 #ifndef MEDIA_BASE_VECTOR_MATH_TESTING_H_
 #define MEDIA_BASE_VECTOR_MATH_TESTING_H_
 
+#include <utility>
+
 #include "build/build_config.h"
 #include "media/base/media_export.h"
 
@@ -14,12 +16,16 @@ namespace vector_math {
 // Optimized versions exposed for testing.  See vector_math.h for details.
 MEDIA_EXPORT void FMAC_C(const float src[], float scale, int len, float dest[]);
 MEDIA_EXPORT void FMUL_C(const float src[], float scale, int len, float dest[]);
+MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower_C(
+    float initial_value, const float src[], int len, float smoothing_factor);
 
 #if defined(ARCH_CPU_X86_FAMILY)
 MEDIA_EXPORT void FMAC_SSE(const float src[], float scale, int len,
                            float dest[]);
 MEDIA_EXPORT void FMUL_SSE(const float src[], float scale, int len,
                            float dest[]);
+MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower_SSE(
+    float initial_value, const float src[], int len, float smoothing_factor);
 #endif
 
 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
@@ -27,6 +33,8 @@ MEDIA_EXPORT void FMAC_NEON(const float src[], float scale, int len,
                             float dest[]);
 MEDIA_EXPORT void FMUL_NEON(const float src[], float scale, int len,
                             float dest[]);
+MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower_NEON(
+    float initial_value, const float src[], int len, float smoothing_factor);
 #endif
 
 }  // namespace vector_math
diff --git a/media/base/vector_math_unittest.cc b/media/base/vector_math_unittest.cc
index 32e5ea4..f8278ce 100644
--- a/media/base/vector_math_unittest.cc
+++ b/media/base/vector_math_unittest.cc
@@ -138,4 +138,248 @@ TEST_F(VectorMathTest, FMUL) {
 #endif
 }
 
+namespace {
+
+class EWMATestScenario {
+ public:
+  EWMATestScenario(float initial_value, const float src[], int len,
+                   float smoothing_factor)
+      : initial_value_(initial_value),
+        data_(static_cast<float*>(
+            len == 0 ? NULL :
+            base::AlignedAlloc(len * sizeof(float),
+                               vector_math::kRequiredAlignment))),
+        data_len_(len),
+        smoothing_factor_(smoothing_factor),
+        expected_final_avg_(initial_value),
+        expected_max_(0.0f) {
+    if (data_len_ > 0)
+      memcpy(data_.get(), src, len * sizeof(float));
+  }
+
+  // Copy constructor and assignment operator for ::testing::Values(...).
+  EWMATestScenario(const EWMATestScenario& other) { *this = other; }
+  EWMATestScenario& operator=(const EWMATestScenario& other) {
+    this->initial_value_ = other.initial_value_;
+    this->smoothing_factor_ = other.smoothing_factor_;
+    if (other.data_len_ == 0) {
+      this->data_.reset();
+    } else {
+      this->data_.reset(static_cast<float*>(
+        base::AlignedAlloc(other.data_len_ * sizeof(float),
+                           vector_math::kRequiredAlignment)));
+      memcpy(this->data_.get(), other.data_.get(),
+             other.data_len_ * sizeof(float));
+    }
+    this->data_len_ = other.data_len_;
+    this->expected_final_avg_ = other.expected_final_avg_;
+    this->expected_max_ = other.expected_max_;
+    return *this;
+  }
+
+  EWMATestScenario ScaledBy(float scale) const {
+    EWMATestScenario result(*this);
+    float* p = result.data_.get();
+    float* const p_end = p + result.data_len_;
+    for (; p < p_end; ++p)
+      *p *= scale;
+    return result;
+  }
+
+  EWMATestScenario WithImpulse(float value, int offset) const {
+    EWMATestScenario result(*this);
+    result.data_.get()[offset] = value;
+    return result;
+  }
+
+  EWMATestScenario HasExpectedResult(float final_avg_value,
+                                     float max_value) const {
+    EWMATestScenario result(*this);
+    result.expected_final_avg_ = final_avg_value;
+    result.expected_max_ = max_value;
+    return result;
+  }
+
+  void RunTest() const {
+    {
+      SCOPED_TRACE("EWMAAndMaxPower");
+      const std::pair<float, float>& result = vector_math::EWMAAndMaxPower(
+          initial_value_, data_.get(), data_len_, smoothing_factor_);
+      EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
+      EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
+    }
+
+    {
+      SCOPED_TRACE("EWMAAndMaxPower_C");
+      const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_C(
+          initial_value_, data_.get(), data_len_, smoothing_factor_);
+      EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
+      EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
+    }
+
+#if defined(ARCH_CPU_X86_FAMILY)
+    {
+      ASSERT_TRUE(base::CPU().has_sse());
+      SCOPED_TRACE("EWMAAndMaxPower_SSE");
+      const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_SSE(
+          initial_value_, data_.get(), data_len_, smoothing_factor_);
+      EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
+      EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
+    }
+#endif
+
+#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+    {
+      SCOPED_TRACE("EWMAAndMaxPower_NEON");
+      const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_NEON(
+          initial_value_, data_.get(), data_len_, smoothing_factor_);
+      EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
+      EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
+    }
+#endif
+  }
+
+ private:
+  float initial_value_;
+  scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> data_;
+  int data_len_;
+  float smoothing_factor_;
+  float expected_final_avg_;
+  float expected_max_;
+};
+
+}  // namespace
+
+typedef testing::TestWithParam<EWMATestScenario> VectorMathEWMAAndMaxPowerTest;
+
+TEST_P(VectorMathEWMAAndMaxPowerTest, Correctness) {
+  GetParam().RunTest();
+}
+
+static const float kZeros[] = {  // 32 zeros
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static const float kOnes[] = {  // 32 ones
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+static const float kCheckerboard[] = {  // 32 alternating 0, 1
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+};
+
+static const float kInverseCheckerboard[] = {  // 32 alternating 1, 0
+  1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+  1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0
+};
+
+INSTANTIATE_TEST_CASE_P(
+    Scenarios, VectorMathEWMAAndMaxPowerTest,
+    ::testing::Values(
+         // Zero-length input: Result should equal initial value.
+         EWMATestScenario(0.0f, NULL, 0, 0.0f).HasExpectedResult(0.0f, 0.0f),
+         EWMATestScenario(1.0f, NULL, 0, 0.0f).HasExpectedResult(1.0f, 0.0f),
+
+         // Smoothing factor of zero: Samples have no effect on result.
+         EWMATestScenario(0.0f, kOnes, 32, 0.0f).HasExpectedResult(0.0f, 1.0f),
+         EWMATestScenario(1.0f, kZeros, 32, 0.0f).HasExpectedResult(1.0f, 0.0f),
+
+         // Smothing factor of one: Result = last sample squared.
+         EWMATestScenario(0.0f, kCheckerboard, 32, 1.0f)
+             .ScaledBy(2.0f)
+             .HasExpectedResult(4.0f, 4.0f),
+         EWMATestScenario(1.0f, kInverseCheckerboard, 32, 1.0f)
+             .ScaledBy(2.0f)
+             .HasExpectedResult(0.0f, 4.0f),
+
+         // Smoothing factor of 1/4, muted signal.
+         EWMATestScenario(1.0f, kZeros, 1, 0.25f)
+             .HasExpectedResult(powf(0.75, 1.0f), 0.0f),
+         EWMATestScenario(1.0f, kZeros, 2, 0.25f)
+             .HasExpectedResult(powf(0.75, 2.0f), 0.0f),
+         EWMATestScenario(1.0f, kZeros, 3, 0.25f)
+             .HasExpectedResult(powf(0.75, 3.0f), 0.0f),
+         EWMATestScenario(1.0f, kZeros, 12, 0.25f)
+             .HasExpectedResult(powf(0.75, 12.0f), 0.0f),
+         EWMATestScenario(1.0f, kZeros, 13, 0.25f)
+             .HasExpectedResult(powf(0.75, 13.0f), 0.0f),
+         EWMATestScenario(1.0f, kZeros, 14, 0.25f)
+             .HasExpectedResult(powf(0.75, 14.0f), 0.0f),
+         EWMATestScenario(1.0f, kZeros, 15, 0.25f)
+             .HasExpectedResult(powf(0.75, 15.0f), 0.0f),
+
+         // Smoothing factor of 1/4, constant full-amplitude signal.
+         EWMATestScenario(0.0f, kOnes, 1, 0.25f).HasExpectedResult(0.25f, 1.0f),
+         EWMATestScenario(0.0f, kOnes, 2, 0.25f)
+             .HasExpectedResult(0.4375f, 1.0f),
+         EWMATestScenario(0.0f, kOnes, 3, 0.25f)
+             .HasExpectedResult(0.578125f, 1.0f),
+         EWMATestScenario(0.0f, kOnes, 12, 0.25f)
+             .HasExpectedResult(0.96832365f, 1.0f),
+         EWMATestScenario(0.0f, kOnes, 13, 0.25f)
+             .HasExpectedResult(0.97624274f, 1.0f),
+         EWMATestScenario(0.0f, kOnes, 14, 0.25f)
+             .HasExpectedResult(0.98218205f, 1.0f),
+         EWMATestScenario(0.0f, kOnes, 15, 0.25f)
+             .HasExpectedResult(0.98663654f, 1.0f),
+
+         // Smoothing factor of 1/4, checkerboard signal.
+         EWMATestScenario(0.0f, kCheckerboard, 1, 0.25f)
+             .HasExpectedResult(0.0f, 0.0f),
+         EWMATestScenario(0.0f, kCheckerboard, 2, 0.25f)
+             .HasExpectedResult(0.25f, 1.0f),
+         EWMATestScenario(0.0f, kCheckerboard, 3, 0.25f)
+             .HasExpectedResult(0.1875f, 1.0f),
+         EWMATestScenario(0.0f, kCheckerboard, 12, 0.25f)
+             .HasExpectedResult(0.55332780f, 1.0f),
+         EWMATestScenario(0.0f, kCheckerboard, 13, 0.25f)
+             .HasExpectedResult(0.41499585f, 1.0f),
+         EWMATestScenario(0.0f, kCheckerboard, 14, 0.25f)
+             .HasExpectedResult(0.56124689f, 1.0f),
+         EWMATestScenario(0.0f, kCheckerboard, 15, 0.25f)
+             .HasExpectedResult(0.42093517f, 1.0f),
+
+         // Smoothing factor of 1/4, inverse checkerboard signal.
+         EWMATestScenario(0.0f, kInverseCheckerboard, 1, 0.25f)
+             .HasExpectedResult(0.25f, 1.0f),
+         EWMATestScenario(0.0f, kInverseCheckerboard, 2, 0.25f)
+             .HasExpectedResult(0.1875f, 1.0f),
+         EWMATestScenario(0.0f, kInverseCheckerboard, 3, 0.25f)
+             .HasExpectedResult(0.390625f, 1.0f),
+         EWMATestScenario(0.0f, kInverseCheckerboard, 12, 0.25f)
+             .HasExpectedResult(0.41499585f, 1.0f),
+         EWMATestScenario(0.0f, kInverseCheckerboard, 13, 0.25f)
+             .HasExpectedResult(0.56124689f, 1.0f),
+         EWMATestScenario(0.0f, kInverseCheckerboard, 14, 0.25f)
+             .HasExpectedResult(0.42093517f, 1.0f),
+         EWMATestScenario(0.0f, kInverseCheckerboard, 15, 0.25f)
+             .HasExpectedResult(0.56570137f, 1.0f),
+
+         // Smoothing factor of 1/4, impluse signal.
+         EWMATestScenario(0.0f, kZeros, 3, 0.25f)
+             .WithImpulse(2.0f, 0)
+             .HasExpectedResult(0.562500f, 4.0f),
+         EWMATestScenario(0.0f, kZeros, 3, 0.25f)
+             .WithImpulse(2.0f, 1)
+             .HasExpectedResult(0.75f, 4.0f),
+         EWMATestScenario(0.0f, kZeros, 3, 0.25f)
+             .WithImpulse(2.0f, 2)
+             .HasExpectedResult(1.0f, 4.0f),
+         EWMATestScenario(0.0f, kZeros, 32, 0.25f)
+             .WithImpulse(2.0f, 0)
+             .HasExpectedResult(0.00013394f, 4.0f),
+         EWMATestScenario(0.0f, kZeros, 32, 0.25f)
+             .WithImpulse(2.0f, 1)
+             .HasExpectedResult(0.00017858f, 4.0f),
+         EWMATestScenario(0.0f, kZeros, 32, 0.25f)
+             .WithImpulse(2.0f, 2)
+             .HasExpectedResult(0.00023811f, 4.0f),
+         EWMATestScenario(0.0f, kZeros, 32, 0.25f)
+             .WithImpulse(2.0f, 3)
+             .HasExpectedResult(0.00031748f, 4.0f)
+    ));
+
 }  // namespace media
author	miu@chromium.org <miu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-11-26 05:52:06 +0000
committer	miu@chromium.org <miu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-11-26 05:52:06 +0000
commit	d1bb9acf8f0c223159f10a168e35a486d9ba1dd4 (patch)
tree	a89d5eecd5edd921391ef8cfdd5fe5256d5392ec /media
parent	979b4cb73c1a4332a02047afb960ffe59c015b6c (diff)
download	chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.zip chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.tar.gz chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.tar.bz2