summaryrefslogtreecommitdiffstats
path: root/media
diff options
context:
space:
mode:
authormiu@chromium.org <miu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-11-26 05:52:06 +0000
committermiu@chromium.org <miu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-11-26 05:52:06 +0000
commitd1bb9acf8f0c223159f10a168e35a486d9ba1dd4 (patch)
treea89d5eecd5edd921391ef8cfdd5fe5256d5392ec /media
parent979b4cb73c1a4332a02047afb960ffe59c015b6c (diff)
downloadchromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.zip
chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.tar.gz
chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.tar.bz2
SIMD optimizations of exp weighted moving average computation in AudioPowerMonitor.
Benchmark tests show a 445% speed-up using the SSE implementation over the original code in AudioPowerMonitor::Scan(). Also worth noting: A minor change to the non-SIMD code allowed a 30% speed-up over the original! TEST=media_unittests; and manually confirmed tab audio indicator still works properly Review URL: https://codereview.chromium.org/84563002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@237268 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r--media/audio/audio_power_monitor.cc30
-rw-r--r--media/base/simd/vector_math_sse.cc79
-rw-r--r--media/base/vector_math.cc97
-rw-r--r--media/base/vector_math.h12
-rw-r--r--media/base/vector_math_perftest.cc56
-rw-r--r--media/base/vector_math_testing.h8
-rw-r--r--media/base/vector_math_unittest.cc244
7 files changed, 506 insertions, 20 deletions
diff --git a/media/audio/audio_power_monitor.cc b/media/audio/audio_power_monitor.cc
index d8b9436..6536f46 100644
--- a/media/audio/audio_power_monitor.cc
+++ b/media/audio/audio_power_monitor.cc
@@ -11,6 +11,7 @@
#include "base/logging.h"
#include "base/time/time.h"
#include "media/base/audio_bus.h"
+#include "media/base/vector_math.h"
namespace media {
@@ -36,30 +37,19 @@ void AudioPowerMonitor::Scan(const AudioBus& buffer, int num_frames) {
return;
// Calculate a new average power by applying a first-order low-pass filter
- // over the audio samples in |buffer|.
- //
- // TODO(miu): Implement optimized SSE/NEON to more efficiently compute the
- // results (in media/base/vector_math) in soon-upcoming change.
+ // (a.k.a. an exponentially-weighted moving average) over the audio samples in
+ // each channel in |buffer|.
float sum_power = 0.0f;
for (int i = 0; i < num_channels; ++i) {
- float average_power_this_channel = average_power_;
- bool clipped = false;
- const float* p = buffer.channel(i);
- const float* const end_of_samples = p + num_frames;
- for (; p < end_of_samples; ++p) {
- const float sample = *p;
- const float sample_squared = sample * sample;
- clipped |= (sample_squared > 1.0f);
- average_power_this_channel +=
- (sample_squared - average_power_this_channel) * sample_weight_;
- }
+ const std::pair<float, float> ewma_and_max = vector_math::EWMAAndMaxPower(
+ average_power_, buffer.channel(i), num_frames, sample_weight_);
// If data in audio buffer is garbage, ignore its effect on the result.
- if (base::IsNaN(average_power_this_channel)) {
- average_power_this_channel = average_power_;
- clipped = false;
+ if (!base::IsFinite(ewma_and_max.first)) {
+ sum_power += average_power_;
+ } else {
+ sum_power += ewma_and_max.first;
+ has_clipped_ |= (ewma_and_max.second > 1.0f);
}
- sum_power += average_power_this_channel;
- has_clipped_ |= clipped;
}
// Update accumulated results, with clamping for sanity.
diff --git a/media/base/simd/vector_math_sse.cc b/media/base/simd/vector_math_sse.cc
index 39bcaa0..c212122 100644
--- a/media/base/simd/vector_math_sse.cc
+++ b/media/base/simd/vector_math_sse.cc
@@ -4,6 +4,8 @@
#include "media/base/vector_math_testing.h"
+#include <algorithm>
+
#include <xmmintrin.h> // NOLINT
namespace media {
@@ -35,5 +37,82 @@ void FMAC_SSE(const float src[], float scale, int len, float dest[]) {
dest[i] += src[i] * scale;
}
+// Convenience macro to extract float 0 through 3 from the vector |a|. This is
+// needed because compilers other than clang don't support access via
+// operator[]().
+#define EXTRACT_FLOAT(a, i) \
+ (i == 0 ? \
+ _mm_cvtss_f32(a) : \
+ _mm_cvtss_f32(_mm_shuffle_ps(a, a, i)))
+
+std::pair<float, float> EWMAAndMaxPower_SSE(
+ float initial_value, const float src[], int len, float smoothing_factor) {
+ // When the recurrence is unrolled, we see that we can split it into 4
+ // separate lanes of evaluation:
+ //
+ // y[n] = a(S[n]^2) + (1-a)(y[n-1])
+ // = a(S[n]^2) + (1-a)^1(aS[n-1]^2) + (1-a)^2(aS[n-2]^2) + ...
+ // = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3])
+ //
+ // where z[n] = a(S[n]^2) + (1-a)^4(z[n-4]) + (1-a)^8(z[n-8]) + ...
+ //
+ // Thus, the strategy here is to compute z[n], z[n-1], z[n-2], and z[n-3] in
+ // each of the 4 lanes, and then combine them to give y[n].
+
+ const int rem = len % 4;
+ const int last_index = len - rem;
+
+ const __m128 smoothing_factor_x4 = _mm_set_ps1(smoothing_factor);
+ const float weight_prev = 1.0f - smoothing_factor;
+ const __m128 weight_prev_x4 = _mm_set_ps1(weight_prev);
+ const __m128 weight_prev_squared_x4 =
+ _mm_mul_ps(weight_prev_x4, weight_prev_x4);
+ const __m128 weight_prev_4th_x4 =
+ _mm_mul_ps(weight_prev_squared_x4, weight_prev_squared_x4);
+
+ // Compute z[n], z[n-1], z[n-2], and z[n-3] in parallel in lanes 3, 2, 1 and
+ // 0, respectively.
+ __m128 max_x4 = _mm_setzero_ps();
+ __m128 ewma_x4 = _mm_setr_ps(0.0f, 0.0f, 0.0f, initial_value);
+ int i;
+ for (i = 0; i < last_index; i += 4) {
+ ewma_x4 = _mm_mul_ps(ewma_x4, weight_prev_4th_x4);
+ const __m128 sample_x4 = _mm_load_ps(src + i);
+ const __m128 sample_squared_x4 = _mm_mul_ps(sample_x4, sample_x4);
+ max_x4 = _mm_max_ps(max_x4, sample_squared_x4);
+ // Note: The compiler optimizes this to a single multiply-and-accumulate
+ // instruction:
+ ewma_x4 = _mm_add_ps(ewma_x4,
+ _mm_mul_ps(sample_squared_x4, smoothing_factor_x4));
+ }
+
+ // y[n] = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3])
+ float ewma = EXTRACT_FLOAT(ewma_x4, 3);
+ ewma_x4 = _mm_mul_ps(ewma_x4, weight_prev_x4);
+ ewma += EXTRACT_FLOAT(ewma_x4, 2);
+ ewma_x4 = _mm_mul_ps(ewma_x4, weight_prev_x4);
+ ewma += EXTRACT_FLOAT(ewma_x4, 1);
+ ewma_x4 = _mm_mul_ss(ewma_x4, weight_prev_x4);
+ ewma += EXTRACT_FLOAT(ewma_x4, 0);
+
+ // Fold the maximums together to get the overall maximum.
+ max_x4 = _mm_max_ps(max_x4,
+ _mm_shuffle_ps(max_x4, max_x4, _MM_SHUFFLE(3, 3, 1, 1)));
+ max_x4 = _mm_max_ss(max_x4, _mm_shuffle_ps(max_x4, max_x4, 2));
+
+ std::pair<float, float> result(ewma, EXTRACT_FLOAT(max_x4, 0));
+
+ // Handle remaining values at the end of |src|.
+ for (; i < len; ++i) {
+ result.first *= weight_prev;
+ const float sample = src[i];
+ const float sample_squared = sample * sample;
+ result.first += sample_squared * smoothing_factor;
+ result.second = std::max(result.second, sample_squared);
+ }
+
+ return result;
+}
+
} // namespace vector_math
} // namespace media
diff --git a/media/base/vector_math.cc b/media/base/vector_math.cc
index de946ca..32584f5 100644
--- a/media/base/vector_math.cc
+++ b/media/base/vector_math.cc
@@ -5,6 +5,8 @@
#include "media/base/vector_math.h"
#include "media/base/vector_math_testing.h"
+#include <algorithm>
+
#include "base/cpu.h"
#include "base/logging.h"
#include "build/build_config.h"
@@ -23,33 +25,42 @@ namespace vector_math {
#if defined(__SSE__)
#define FMAC_FUNC FMAC_SSE
#define FMUL_FUNC FMUL_SSE
+#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE
void Initialize() {}
#else
// X86 CPU detection required. Functions will be set by Initialize().
// TODO(dalecurtis): Once Chrome moves to an SSE baseline this can be removed.
#define FMAC_FUNC g_fmac_proc_
#define FMUL_FUNC g_fmul_proc_
+#define EWMAAndMaxPower_FUNC g_ewma_power_proc_
typedef void (*MathProc)(const float src[], float scale, int len, float dest[]);
static MathProc g_fmac_proc_ = NULL;
static MathProc g_fmul_proc_ = NULL;
+typedef std::pair<float, float> (*EWMAAndMaxPowerProc)(
+ float initial_value, const float src[], int len, float smoothing_factor);
+static EWMAAndMaxPowerProc g_ewma_power_proc_ = NULL;
void Initialize() {
CHECK(!g_fmac_proc_);
CHECK(!g_fmul_proc_);
+ CHECK(!g_ewma_power_proc_);
const bool kUseSSE = base::CPU().has_sse();
g_fmac_proc_ = kUseSSE ? FMAC_SSE : FMAC_C;
g_fmul_proc_ = kUseSSE ? FMUL_SSE : FMUL_C;
+ g_ewma_power_proc_ = kUseSSE ? EWMAAndMaxPower_SSE : EWMAAndMaxPower_C;
}
#endif
#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
#define FMAC_FUNC FMAC_NEON
#define FMUL_FUNC FMUL_NEON
+#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON
void Initialize() {}
#else
// Unknown architecture.
#define FMAC_FUNC FMAC_C
#define FMUL_FUNC FMUL_C
+#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_C
void Initialize() {}
#endif
@@ -77,6 +88,27 @@ void FMUL_C(const float src[], float scale, int len, float dest[]) {
dest[i] = src[i] * scale;
}
+std::pair<float, float> EWMAAndMaxPower(
+ float initial_value, const float src[], int len, float smoothing_factor) {
+ // Ensure |src| is 16-byte aligned.
+ DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1));
+ return EWMAAndMaxPower_FUNC(initial_value, src, len, smoothing_factor);
+}
+
+std::pair<float, float> EWMAAndMaxPower_C(
+ float initial_value, const float src[], int len, float smoothing_factor) {
+ std::pair<float, float> result(initial_value, 0.0f);
+ const float weight_prev = 1.0f - smoothing_factor;
+ for (int i = 0; i < len; ++i) {
+ result.first *= weight_prev;
+ const float sample = src[i];
+ const float sample_squared = sample * sample;
+ result.first += sample_squared * smoothing_factor;
+ result.second = std::max(result.second, sample_squared);
+ }
+ return result;
+}
+
#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
void FMAC_NEON(const float src[], float scale, int len, float dest[]) {
const int rem = len % 4;
@@ -103,6 +135,71 @@ void FMUL_NEON(const float src[], float scale, int len, float dest[]) {
for (int i = last_index; i < len; ++i)
dest[i] = src[i] * scale;
}
+
+std::pair<float, float> EWMAAndMaxPower_NEON(
+ float initial_value, const float src[], int len, float smoothing_factor) {
+ // When the recurrence is unrolled, we see that we can split it into 4
+ // separate lanes of evaluation:
+ //
+ // y[n] = a(S[n]^2) + (1-a)(y[n-1])
+ // = a(S[n]^2) + (1-a)^1(aS[n-1]^2) + (1-a)^2(aS[n-2]^2) + ...
+ // = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3])
+ //
+ // where z[n] = a(S[n]^2) + (1-a)^4(z[n-4]) + (1-a)^8(z[n-8]) + ...
+ //
+ // Thus, the strategy here is to compute z[n], z[n-1], z[n-2], and z[n-3] in
+ // each of the 4 lanes, and then combine them to give y[n].
+
+ const int rem = len % 4;
+ const int last_index = len - rem;
+
+ const float32x4_t smoothing_factor_x4 = vdupq_n_f32(smoothing_factor);
+ const float weight_prev = 1.0f - smoothing_factor;
+ const float32x4_t weight_prev_x4 = vdupq_n_f32(weight_prev);
+ const float32x4_t weight_prev_squared_x4 =
+ vmulq_f32(weight_prev_x4, weight_prev_x4);
+ const float32x4_t weight_prev_4th_x4 =
+ vmulq_f32(weight_prev_squared_x4, weight_prev_squared_x4);
+
+ // Compute z[n], z[n-1], z[n-2], and z[n-3] in parallel in lanes 3, 2, 1 and
+ // 0, respectively.
+ float32x4_t max_x4 = vdupq_n_f32(0.0f);
+ float32x4_t ewma_x4 = vsetq_lane_f32(initial_value, vdupq_n_f32(0.0f), 3);
+ int i;
+ for (i = 0; i < last_index; i += 4) {
+ ewma_x4 = vmulq_f32(ewma_x4, weight_prev_4th_x4);
+ const float32x4_t sample_x4 = vld1q_f32(src + i);
+ const float32x4_t sample_squared_x4 = vmulq_f32(sample_x4, sample_x4);
+ max_x4 = vmaxq_f32(max_x4, sample_squared_x4);
+ ewma_x4 = vmlaq_f32(ewma_x4, sample_squared_x4, smoothing_factor_x4);
+ }
+
+ // y[n] = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3])
+ float ewma = vgetq_lane_f32(ewma_x4, 3);
+ ewma_x4 = vmulq_f32(ewma_x4, weight_prev_x4);
+ ewma += vgetq_lane_f32(ewma_x4, 2);
+ ewma_x4 = vmulq_f32(ewma_x4, weight_prev_x4);
+ ewma += vgetq_lane_f32(ewma_x4, 1);
+ ewma_x4 = vmulq_f32(ewma_x4, weight_prev_x4);
+ ewma += vgetq_lane_f32(ewma_x4, 0);
+
+ // Fold the maximums together to get the overall maximum.
+ float32x2_t max_x2 = vpmax_f32(vget_low_f32(max_x4), vget_high_f32(max_x4));
+ max_x2 = vpmax_f32(max_x2, max_x2);
+
+ std::pair<float, float> result(ewma, vget_lane_f32(max_x2, 0));
+
+ // Handle remaining values at the end of |src|.
+ for (; i < len; ++i) {
+ result.first *= weight_prev;
+ const float sample = src[i];
+ const float sample_squared = sample * sample;
+ result.first += sample_squared * smoothing_factor;
+ result.second = std::max(result.second, sample_squared);
+ }
+
+ return result;
+}
#endif
} // namespace vector_math
diff --git a/media/base/vector_math.h b/media/base/vector_math.h
index 4764f0b..a4dea37 100644
--- a/media/base/vector_math.h
+++ b/media/base/vector_math.h
@@ -5,6 +5,8 @@
#ifndef MEDIA_BASE_VECTOR_MATH_H_
#define MEDIA_BASE_VECTOR_MATH_H_
+#include <utility>
+
#include "media/base/media_export.h"
namespace media {
@@ -26,6 +28,16 @@ MEDIA_EXPORT void FMAC(const float src[], float scale, int len, float dest[]);
// |dest| must be aligned by kRequiredAlignment.
MEDIA_EXPORT void FMUL(const float src[], float scale, int len, float dest[]);
+// Computes the exponentially-weighted moving average power of a signal by
+// iterating the recurrence:
+//
+// y[-1] = initial_value
+// y[n] = smoothing_factor * src[n]^2 + (1-smoothing_factor) * y[n-1]
+//
+// Returns the final average power and the maximum squared element value.
+MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower(
+ float initial_value, const float src[], int len, float smoothing_factor);
+
} // namespace vector_math
} // namespace media
diff --git a/media/base/vector_math_perftest.cc b/media/base/vector_math_perftest.cc
index 88ac551..9742f2e 100644
--- a/media/base/vector_math_perftest.cc
+++ b/media/base/vector_math_perftest.cc
@@ -17,6 +17,7 @@ using std::fill;
namespace media {
static const int kBenchmarkIterations = 200000;
+static const int kEWMABenchmarkIterations = 50000;
static const float kScale = 0.5;
static const int kVectorSize = 8192;
@@ -53,6 +54,25 @@ class VectorMathPerfTest : public testing::Test {
true);
}
+ void RunBenchmark(
+ std::pair<float, float> (*fn)(float, const float[], int, float),
+ int len,
+ const std::string& test_name,
+ const std::string& trace_name) {
+ TimeTicks start = TimeTicks::HighResNow();
+ for (int i = 0; i < kEWMABenchmarkIterations; ++i) {
+ fn(0.5f, input_vector_.get(), len, 0.1f);
+ }
+ double total_time_milliseconds =
+ (TimeTicks::HighResNow() - start).InMillisecondsF();
+ perf_test::PrintResult(test_name,
+ "",
+ trace_name,
+ kEWMABenchmarkIterations / total_time_milliseconds,
+ "runs/ms",
+ true);
+ }
+
protected:
scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector_;
scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector_;
@@ -122,4 +142,40 @@ TEST_F(VectorMathPerfTest, FMUL) {
#undef FMUL_FUNC
+#if defined(ARCH_CPU_X86_FAMILY)
+#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE
+#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON
+#endif
+
+// Benchmark for each optimized vector_math::EWMAAndMaxPower() method.
+TEST_F(VectorMathPerfTest, EWMAAndMaxPower) {
+ // Benchmark EWMAAndMaxPower_C().
+ RunBenchmark(vector_math::EWMAAndMaxPower_C,
+ kVectorSize,
+ "vector_math_ewma_and_max_power",
+ "unoptimized");
+#if defined(EWMAAndMaxPower_FUNC)
+#if defined(ARCH_CPU_X86_FAMILY)
+ ASSERT_TRUE(base::CPU().has_sse());
+#endif
+ // Benchmark EWMAAndMaxPower_FUNC() with unaligned size.
+ ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /
+ sizeof(float)), 0U);
+ RunBenchmark(vector_math::EWMAAndMaxPower_FUNC,
+ kVectorSize - 1,
+ "vector_math_ewma_and_max_power",
+ "optimized_unaligned");
+ // Benchmark EWMAAndMaxPower_FUNC() with aligned size.
+ ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
+ 0U);
+ RunBenchmark(vector_math::EWMAAndMaxPower_FUNC,
+ kVectorSize,
+ "vector_math_ewma_and_max_power",
+ "optimized_aligned");
+#endif
+}
+
+#undef EWMAAndMaxPower_FUNC
+
} // namespace media
diff --git a/media/base/vector_math_testing.h b/media/base/vector_math_testing.h
index 02d14f8..b0b30440 100644
--- a/media/base/vector_math_testing.h
+++ b/media/base/vector_math_testing.h
@@ -5,6 +5,8 @@
#ifndef MEDIA_BASE_VECTOR_MATH_TESTING_H_
#define MEDIA_BASE_VECTOR_MATH_TESTING_H_
+#include <utility>
+
#include "build/build_config.h"
#include "media/base/media_export.h"
@@ -14,12 +16,16 @@ namespace vector_math {
// Optimized versions exposed for testing. See vector_math.h for details.
MEDIA_EXPORT void FMAC_C(const float src[], float scale, int len, float dest[]);
MEDIA_EXPORT void FMUL_C(const float src[], float scale, int len, float dest[]);
+MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower_C(
+ float initial_value, const float src[], int len, float smoothing_factor);
#if defined(ARCH_CPU_X86_FAMILY)
MEDIA_EXPORT void FMAC_SSE(const float src[], float scale, int len,
float dest[]);
MEDIA_EXPORT void FMUL_SSE(const float src[], float scale, int len,
float dest[]);
+MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower_SSE(
+ float initial_value, const float src[], int len, float smoothing_factor);
#endif
#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
@@ -27,6 +33,8 @@ MEDIA_EXPORT void FMAC_NEON(const float src[], float scale, int len,
float dest[]);
MEDIA_EXPORT void FMUL_NEON(const float src[], float scale, int len,
float dest[]);
+MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower_NEON(
+ float initial_value, const float src[], int len, float smoothing_factor);
#endif
} // namespace vector_math
diff --git a/media/base/vector_math_unittest.cc b/media/base/vector_math_unittest.cc
index 32e5ea4..f8278ce 100644
--- a/media/base/vector_math_unittest.cc
+++ b/media/base/vector_math_unittest.cc
@@ -138,4 +138,248 @@ TEST_F(VectorMathTest, FMUL) {
#endif
}
+namespace {
+
+class EWMATestScenario {
+ public:
+ EWMATestScenario(float initial_value, const float src[], int len,
+ float smoothing_factor)
+ : initial_value_(initial_value),
+ data_(static_cast<float*>(
+ len == 0 ? NULL :
+ base::AlignedAlloc(len * sizeof(float),
+ vector_math::kRequiredAlignment))),
+ data_len_(len),
+ smoothing_factor_(smoothing_factor),
+ expected_final_avg_(initial_value),
+ expected_max_(0.0f) {
+ if (data_len_ > 0)
+ memcpy(data_.get(), src, len * sizeof(float));
+ }
+
+ // Copy constructor and assignment operator for ::testing::Values(...).
+ EWMATestScenario(const EWMATestScenario& other) { *this = other; }
+ EWMATestScenario& operator=(const EWMATestScenario& other) {
+ this->initial_value_ = other.initial_value_;
+ this->smoothing_factor_ = other.smoothing_factor_;
+ if (other.data_len_ == 0) {
+ this->data_.reset();
+ } else {
+ this->data_.reset(static_cast<float*>(
+ base::AlignedAlloc(other.data_len_ * sizeof(float),
+ vector_math::kRequiredAlignment)));
+ memcpy(this->data_.get(), other.data_.get(),
+ other.data_len_ * sizeof(float));
+ }
+ this->data_len_ = other.data_len_;
+ this->expected_final_avg_ = other.expected_final_avg_;
+ this->expected_max_ = other.expected_max_;
+ return *this;
+ }
+
+ EWMATestScenario ScaledBy(float scale) const {
+ EWMATestScenario result(*this);
+ float* p = result.data_.get();
+ float* const p_end = p + result.data_len_;
+ for (; p < p_end; ++p)
+ *p *= scale;
+ return result;
+ }
+
+ EWMATestScenario WithImpulse(float value, int offset) const {
+ EWMATestScenario result(*this);
+ result.data_.get()[offset] = value;
+ return result;
+ }
+
+ EWMATestScenario HasExpectedResult(float final_avg_value,
+ float max_value) const {
+ EWMATestScenario result(*this);
+ result.expected_final_avg_ = final_avg_value;
+ result.expected_max_ = max_value;
+ return result;
+ }
+
+ void RunTest() const {
+ {
+ SCOPED_TRACE("EWMAAndMaxPower");
+ const std::pair<float, float>& result = vector_math::EWMAAndMaxPower(
+ initial_value_, data_.get(), data_len_, smoothing_factor_);
+ EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
+ EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
+ }
+
+ {
+ SCOPED_TRACE("EWMAAndMaxPower_C");
+ const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_C(
+ initial_value_, data_.get(), data_len_, smoothing_factor_);
+ EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
+ EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
+ }
+
+#if defined(ARCH_CPU_X86_FAMILY)
+ {
+ ASSERT_TRUE(base::CPU().has_sse());
+ SCOPED_TRACE("EWMAAndMaxPower_SSE");
+ const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_SSE(
+ initial_value_, data_.get(), data_len_, smoothing_factor_);
+ EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
+ EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
+ }
+#endif
+
+#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+ {
+ SCOPED_TRACE("EWMAAndMaxPower_NEON");
+ const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_NEON(
+ initial_value_, data_.get(), data_len_, smoothing_factor_);
+ EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
+ EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
+ }
+#endif
+ }
+
+ private:
+ float initial_value_;
+ scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> data_;
+ int data_len_;
+ float smoothing_factor_;
+ float expected_final_avg_;
+ float expected_max_;
+};
+
+} // namespace
+
+typedef testing::TestWithParam<EWMATestScenario> VectorMathEWMAAndMaxPowerTest;
+
+TEST_P(VectorMathEWMAAndMaxPowerTest, Correctness) {
+ GetParam().RunTest();
+}
+
+static const float kZeros[] = { // 32 zeros
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static const float kOnes[] = { // 32 ones
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+static const float kCheckerboard[] = { // 32 alternating 0, 1
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+};
+
+static const float kInverseCheckerboard[] = { // 32 alternating 1, 0
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0
+};
+
+INSTANTIATE_TEST_CASE_P(
+ Scenarios, VectorMathEWMAAndMaxPowerTest,
+ ::testing::Values(
+ // Zero-length input: Result should equal initial value.
+ EWMATestScenario(0.0f, NULL, 0, 0.0f).HasExpectedResult(0.0f, 0.0f),
+ EWMATestScenario(1.0f, NULL, 0, 0.0f).HasExpectedResult(1.0f, 0.0f),
+
+ // Smoothing factor of zero: Samples have no effect on result.
+ EWMATestScenario(0.0f, kOnes, 32, 0.0f).HasExpectedResult(0.0f, 1.0f),
+ EWMATestScenario(1.0f, kZeros, 32, 0.0f).HasExpectedResult(1.0f, 0.0f),
+
+ // Smothing factor of one: Result = last sample squared.
+ EWMATestScenario(0.0f, kCheckerboard, 32, 1.0f)
+ .ScaledBy(2.0f)
+ .HasExpectedResult(4.0f, 4.0f),
+ EWMATestScenario(1.0f, kInverseCheckerboard, 32, 1.0f)
+ .ScaledBy(2.0f)
+ .HasExpectedResult(0.0f, 4.0f),
+
+ // Smoothing factor of 1/4, muted signal.
+ EWMATestScenario(1.0f, kZeros, 1, 0.25f)
+ .HasExpectedResult(powf(0.75, 1.0f), 0.0f),
+ EWMATestScenario(1.0f, kZeros, 2, 0.25f)
+ .HasExpectedResult(powf(0.75, 2.0f), 0.0f),
+ EWMATestScenario(1.0f, kZeros, 3, 0.25f)
+ .HasExpectedResult(powf(0.75, 3.0f), 0.0f),
+ EWMATestScenario(1.0f, kZeros, 12, 0.25f)
+ .HasExpectedResult(powf(0.75, 12.0f), 0.0f),
+ EWMATestScenario(1.0f, kZeros, 13, 0.25f)
+ .HasExpectedResult(powf(0.75, 13.0f), 0.0f),
+ EWMATestScenario(1.0f, kZeros, 14, 0.25f)
+ .HasExpectedResult(powf(0.75, 14.0f), 0.0f),
+ EWMATestScenario(1.0f, kZeros, 15, 0.25f)
+ .HasExpectedResult(powf(0.75, 15.0f), 0.0f),
+
+ // Smoothing factor of 1/4, constant full-amplitude signal.
+ EWMATestScenario(0.0f, kOnes, 1, 0.25f).HasExpectedResult(0.25f, 1.0f),
+ EWMATestScenario(0.0f, kOnes, 2, 0.25f)
+ .HasExpectedResult(0.4375f, 1.0f),
+ EWMATestScenario(0.0f, kOnes, 3, 0.25f)
+ .HasExpectedResult(0.578125f, 1.0f),
+ EWMATestScenario(0.0f, kOnes, 12, 0.25f)
+ .HasExpectedResult(0.96832365f, 1.0f),
+ EWMATestScenario(0.0f, kOnes, 13, 0.25f)
+ .HasExpectedResult(0.97624274f, 1.0f),
+ EWMATestScenario(0.0f, kOnes, 14, 0.25f)
+ .HasExpectedResult(0.98218205f, 1.0f),
+ EWMATestScenario(0.0f, kOnes, 15, 0.25f)
+ .HasExpectedResult(0.98663654f, 1.0f),
+
+ // Smoothing factor of 1/4, checkerboard signal.
+ EWMATestScenario(0.0f, kCheckerboard, 1, 0.25f)
+ .HasExpectedResult(0.0f, 0.0f),
+ EWMATestScenario(0.0f, kCheckerboard, 2, 0.25f)
+ .HasExpectedResult(0.25f, 1.0f),
+ EWMATestScenario(0.0f, kCheckerboard, 3, 0.25f)
+ .HasExpectedResult(0.1875f, 1.0f),
+ EWMATestScenario(0.0f, kCheckerboard, 12, 0.25f)
+ .HasExpectedResult(0.55332780f, 1.0f),
+ EWMATestScenario(0.0f, kCheckerboard, 13, 0.25f)
+ .HasExpectedResult(0.41499585f, 1.0f),
+ EWMATestScenario(0.0f, kCheckerboard, 14, 0.25f)
+ .HasExpectedResult(0.56124689f, 1.0f),
+ EWMATestScenario(0.0f, kCheckerboard, 15, 0.25f)
+ .HasExpectedResult(0.42093517f, 1.0f),
+
+ // Smoothing factor of 1/4, inverse checkerboard signal.
+ EWMATestScenario(0.0f, kInverseCheckerboard, 1, 0.25f)
+ .HasExpectedResult(0.25f, 1.0f),
+ EWMATestScenario(0.0f, kInverseCheckerboard, 2, 0.25f)
+ .HasExpectedResult(0.1875f, 1.0f),
+ EWMATestScenario(0.0f, kInverseCheckerboard, 3, 0.25f)
+ .HasExpectedResult(0.390625f, 1.0f),
+ EWMATestScenario(0.0f, kInverseCheckerboard, 12, 0.25f)
+ .HasExpectedResult(0.41499585f, 1.0f),
+ EWMATestScenario(0.0f, kInverseCheckerboard, 13, 0.25f)
+ .HasExpectedResult(0.56124689f, 1.0f),
+ EWMATestScenario(0.0f, kInverseCheckerboard, 14, 0.25f)
+ .HasExpectedResult(0.42093517f, 1.0f),
+ EWMATestScenario(0.0f, kInverseCheckerboard, 15, 0.25f)
+ .HasExpectedResult(0.56570137f, 1.0f),
+
+ // Smoothing factor of 1/4, impluse signal.
+ EWMATestScenario(0.0f, kZeros, 3, 0.25f)
+ .WithImpulse(2.0f, 0)
+ .HasExpectedResult(0.562500f, 4.0f),
+ EWMATestScenario(0.0f, kZeros, 3, 0.25f)
+ .WithImpulse(2.0f, 1)
+ .HasExpectedResult(0.75f, 4.0f),
+ EWMATestScenario(0.0f, kZeros, 3, 0.25f)
+ .WithImpulse(2.0f, 2)
+ .HasExpectedResult(1.0f, 4.0f),
+ EWMATestScenario(0.0f, kZeros, 32, 0.25f)
+ .WithImpulse(2.0f, 0)
+ .HasExpectedResult(0.00013394f, 4.0f),
+ EWMATestScenario(0.0f, kZeros, 32, 0.25f)
+ .WithImpulse(2.0f, 1)
+ .HasExpectedResult(0.00017858f, 4.0f),
+ EWMATestScenario(0.0f, kZeros, 32, 0.25f)
+ .WithImpulse(2.0f, 2)
+ .HasExpectedResult(0.00023811f, 4.0f),
+ EWMATestScenario(0.0f, kZeros, 32, 0.25f)
+ .WithImpulse(2.0f, 3)
+ .HasExpectedResult(0.00031748f, 4.0f)
+ ));
+
} // namespace media