diff options
author | miu@chromium.org <miu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-11-26 05:52:06 +0000 |
---|---|---|
committer | miu@chromium.org <miu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-11-26 05:52:06 +0000 |
commit | d1bb9acf8f0c223159f10a168e35a486d9ba1dd4 (patch) | |
tree | a89d5eecd5edd921391ef8cfdd5fe5256d5392ec /media | |
parent | 979b4cb73c1a4332a02047afb960ffe59c015b6c (diff) | |
download | chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.zip chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.tar.gz chromium_src-d1bb9acf8f0c223159f10a168e35a486d9ba1dd4.tar.bz2 |
SIMD optimizations of exp weighted moving average computation in AudioPowerMonitor.
Benchmark tests show a 445% speed-up using the SSE implementation over the original code in AudioPowerMonitor::Scan(). Also worth noting: A minor change to the non-SIMD code allowed a 30% speed-up over the original!
TEST=media_unittests; and manually confirmed tab audio indicator still works properly
Review URL: https://codereview.chromium.org/84563002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@237268 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r-- | media/audio/audio_power_monitor.cc | 30 | ||||
-rw-r--r-- | media/base/simd/vector_math_sse.cc | 79 | ||||
-rw-r--r-- | media/base/vector_math.cc | 97 | ||||
-rw-r--r-- | media/base/vector_math.h | 12 | ||||
-rw-r--r-- | media/base/vector_math_perftest.cc | 56 | ||||
-rw-r--r-- | media/base/vector_math_testing.h | 8 | ||||
-rw-r--r-- | media/base/vector_math_unittest.cc | 244 |
7 files changed, 506 insertions, 20 deletions
diff --git a/media/audio/audio_power_monitor.cc b/media/audio/audio_power_monitor.cc index d8b9436..6536f46 100644 --- a/media/audio/audio_power_monitor.cc +++ b/media/audio/audio_power_monitor.cc @@ -11,6 +11,7 @@ #include "base/logging.h" #include "base/time/time.h" #include "media/base/audio_bus.h" +#include "media/base/vector_math.h" namespace media { @@ -36,30 +37,19 @@ void AudioPowerMonitor::Scan(const AudioBus& buffer, int num_frames) { return; // Calculate a new average power by applying a first-order low-pass filter - // over the audio samples in |buffer|. - // - // TODO(miu): Implement optimized SSE/NEON to more efficiently compute the - // results (in media/base/vector_math) in soon-upcoming change. + // (a.k.a. an exponentially-weighted moving average) over the audio samples in + // each channel in |buffer|. float sum_power = 0.0f; for (int i = 0; i < num_channels; ++i) { - float average_power_this_channel = average_power_; - bool clipped = false; - const float* p = buffer.channel(i); - const float* const end_of_samples = p + num_frames; - for (; p < end_of_samples; ++p) { - const float sample = *p; - const float sample_squared = sample * sample; - clipped |= (sample_squared > 1.0f); - average_power_this_channel += - (sample_squared - average_power_this_channel) * sample_weight_; - } + const std::pair<float, float> ewma_and_max = vector_math::EWMAAndMaxPower( + average_power_, buffer.channel(i), num_frames, sample_weight_); // If data in audio buffer is garbage, ignore its effect on the result. - if (base::IsNaN(average_power_this_channel)) { - average_power_this_channel = average_power_; - clipped = false; + if (!base::IsFinite(ewma_and_max.first)) { + sum_power += average_power_; + } else { + sum_power += ewma_and_max.first; + has_clipped_ |= (ewma_and_max.second > 1.0f); } - sum_power += average_power_this_channel; - has_clipped_ |= clipped; } // Update accumulated results, with clamping for sanity. diff --git a/media/base/simd/vector_math_sse.cc b/media/base/simd/vector_math_sse.cc index 39bcaa0..c212122 100644 --- a/media/base/simd/vector_math_sse.cc +++ b/media/base/simd/vector_math_sse.cc @@ -4,6 +4,8 @@ #include "media/base/vector_math_testing.h" +#include <algorithm> + #include <xmmintrin.h> // NOLINT namespace media { @@ -35,5 +37,82 @@ void FMAC_SSE(const float src[], float scale, int len, float dest[]) { dest[i] += src[i] * scale; } +// Convenience macro to extract float 0 through 3 from the vector |a|. This is +// needed because compilers other than clang don't support access via +// operator[](). +#define EXTRACT_FLOAT(a, i) \ + (i == 0 ? \ + _mm_cvtss_f32(a) : \ + _mm_cvtss_f32(_mm_shuffle_ps(a, a, i))) + +std::pair<float, float> EWMAAndMaxPower_SSE( + float initial_value, const float src[], int len, float smoothing_factor) { + // When the recurrence is unrolled, we see that we can split it into 4 + // separate lanes of evaluation: + // + // y[n] = a(S[n]^2) + (1-a)(y[n-1]) + // = a(S[n]^2) + (1-a)^1(aS[n-1]^2) + (1-a)^2(aS[n-2]^2) + ... + // = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3]) + // + // where z[n] = a(S[n]^2) + (1-a)^4(z[n-4]) + (1-a)^8(z[n-8]) + ... + // + // Thus, the strategy here is to compute z[n], z[n-1], z[n-2], and z[n-3] in + // each of the 4 lanes, and then combine them to give y[n]. + + const int rem = len % 4; + const int last_index = len - rem; + + const __m128 smoothing_factor_x4 = _mm_set_ps1(smoothing_factor); + const float weight_prev = 1.0f - smoothing_factor; + const __m128 weight_prev_x4 = _mm_set_ps1(weight_prev); + const __m128 weight_prev_squared_x4 = + _mm_mul_ps(weight_prev_x4, weight_prev_x4); + const __m128 weight_prev_4th_x4 = + _mm_mul_ps(weight_prev_squared_x4, weight_prev_squared_x4); + + // Compute z[n], z[n-1], z[n-2], and z[n-3] in parallel in lanes 3, 2, 1 and + // 0, respectively. + __m128 max_x4 = _mm_setzero_ps(); + __m128 ewma_x4 = _mm_setr_ps(0.0f, 0.0f, 0.0f, initial_value); + int i; + for (i = 0; i < last_index; i += 4) { + ewma_x4 = _mm_mul_ps(ewma_x4, weight_prev_4th_x4); + const __m128 sample_x4 = _mm_load_ps(src + i); + const __m128 sample_squared_x4 = _mm_mul_ps(sample_x4, sample_x4); + max_x4 = _mm_max_ps(max_x4, sample_squared_x4); + // Note: The compiler optimizes this to a single multiply-and-accumulate + // instruction: + ewma_x4 = _mm_add_ps(ewma_x4, + _mm_mul_ps(sample_squared_x4, smoothing_factor_x4)); + } + + // y[n] = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3]) + float ewma = EXTRACT_FLOAT(ewma_x4, 3); + ewma_x4 = _mm_mul_ps(ewma_x4, weight_prev_x4); + ewma += EXTRACT_FLOAT(ewma_x4, 2); + ewma_x4 = _mm_mul_ps(ewma_x4, weight_prev_x4); + ewma += EXTRACT_FLOAT(ewma_x4, 1); + ewma_x4 = _mm_mul_ss(ewma_x4, weight_prev_x4); + ewma += EXTRACT_FLOAT(ewma_x4, 0); + + // Fold the maximums together to get the overall maximum. + max_x4 = _mm_max_ps(max_x4, + _mm_shuffle_ps(max_x4, max_x4, _MM_SHUFFLE(3, 3, 1, 1))); + max_x4 = _mm_max_ss(max_x4, _mm_shuffle_ps(max_x4, max_x4, 2)); + + std::pair<float, float> result(ewma, EXTRACT_FLOAT(max_x4, 0)); + + // Handle remaining values at the end of |src|. + for (; i < len; ++i) { + result.first *= weight_prev; + const float sample = src[i]; + const float sample_squared = sample * sample; + result.first += sample_squared * smoothing_factor; + result.second = std::max(result.second, sample_squared); + } + + return result; +} + } // namespace vector_math } // namespace media diff --git a/media/base/vector_math.cc b/media/base/vector_math.cc index de946ca..32584f5 100644 --- a/media/base/vector_math.cc +++ b/media/base/vector_math.cc @@ -5,6 +5,8 @@ #include "media/base/vector_math.h" #include "media/base/vector_math_testing.h" +#include <algorithm> + #include "base/cpu.h" #include "base/logging.h" #include "build/build_config.h" @@ -23,33 +25,42 @@ namespace vector_math { #if defined(__SSE__) #define FMAC_FUNC FMAC_SSE #define FMUL_FUNC FMUL_SSE +#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE void Initialize() {} #else // X86 CPU detection required. Functions will be set by Initialize(). // TODO(dalecurtis): Once Chrome moves to an SSE baseline this can be removed. #define FMAC_FUNC g_fmac_proc_ #define FMUL_FUNC g_fmul_proc_ +#define EWMAAndMaxPower_FUNC g_ewma_power_proc_ typedef void (*MathProc)(const float src[], float scale, int len, float dest[]); static MathProc g_fmac_proc_ = NULL; static MathProc g_fmul_proc_ = NULL; +typedef std::pair<float, float> (*EWMAAndMaxPowerProc)( + float initial_value, const float src[], int len, float smoothing_factor); +static EWMAAndMaxPowerProc g_ewma_power_proc_ = NULL; void Initialize() { CHECK(!g_fmac_proc_); CHECK(!g_fmul_proc_); + CHECK(!g_ewma_power_proc_); const bool kUseSSE = base::CPU().has_sse(); g_fmac_proc_ = kUseSSE ? FMAC_SSE : FMAC_C; g_fmul_proc_ = kUseSSE ? FMUL_SSE : FMUL_C; + g_ewma_power_proc_ = kUseSSE ? EWMAAndMaxPower_SSE : EWMAAndMaxPower_C; } #endif #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) #define FMAC_FUNC FMAC_NEON #define FMUL_FUNC FMUL_NEON +#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON void Initialize() {} #else // Unknown architecture. #define FMAC_FUNC FMAC_C #define FMUL_FUNC FMUL_C +#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_C void Initialize() {} #endif @@ -77,6 +88,27 @@ void FMUL_C(const float src[], float scale, int len, float dest[]) { dest[i] = src[i] * scale; } +std::pair<float, float> EWMAAndMaxPower( + float initial_value, const float src[], int len, float smoothing_factor) { + // Ensure |src| is 16-byte aligned. + DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1)); + return EWMAAndMaxPower_FUNC(initial_value, src, len, smoothing_factor); +} + +std::pair<float, float> EWMAAndMaxPower_C( + float initial_value, const float src[], int len, float smoothing_factor) { + std::pair<float, float> result(initial_value, 0.0f); + const float weight_prev = 1.0f - smoothing_factor; + for (int i = 0; i < len; ++i) { + result.first *= weight_prev; + const float sample = src[i]; + const float sample_squared = sample * sample; + result.first += sample_squared * smoothing_factor; + result.second = std::max(result.second, sample_squared); + } + return result; +} + #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) void FMAC_NEON(const float src[], float scale, int len, float dest[]) { const int rem = len % 4; @@ -103,6 +135,71 @@ void FMUL_NEON(const float src[], float scale, int len, float dest[]) { for (int i = last_index; i < len; ++i) dest[i] = src[i] * scale; } + +std::pair<float, float> EWMAAndMaxPower_NEON( + float initial_value, const float src[], int len, float smoothing_factor) { + // When the recurrence is unrolled, we see that we can split it into 4 + // separate lanes of evaluation: + // + // y[n] = a(S[n]^2) + (1-a)(y[n-1]) + // = a(S[n]^2) + (1-a)^1(aS[n-1]^2) + (1-a)^2(aS[n-2]^2) + ... + // = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3]) + // + // where z[n] = a(S[n]^2) + (1-a)^4(z[n-4]) + (1-a)^8(z[n-8]) + ... + // + // Thus, the strategy here is to compute z[n], z[n-1], z[n-2], and z[n-3] in + // each of the 4 lanes, and then combine them to give y[n]. + + const int rem = len % 4; + const int last_index = len - rem; + + const float32x4_t smoothing_factor_x4 = vdupq_n_f32(smoothing_factor); + const float weight_prev = 1.0f - smoothing_factor; + const float32x4_t weight_prev_x4 = vdupq_n_f32(weight_prev); + const float32x4_t weight_prev_squared_x4 = + vmulq_f32(weight_prev_x4, weight_prev_x4); + const float32x4_t weight_prev_4th_x4 = + vmulq_f32(weight_prev_squared_x4, weight_prev_squared_x4); + + // Compute z[n], z[n-1], z[n-2], and z[n-3] in parallel in lanes 3, 2, 1 and + // 0, respectively. + float32x4_t max_x4 = vdupq_n_f32(0.0f); + float32x4_t ewma_x4 = vsetq_lane_f32(initial_value, vdupq_n_f32(0.0f), 3); + int i; + for (i = 0; i < last_index; i += 4) { + ewma_x4 = vmulq_f32(ewma_x4, weight_prev_4th_x4); + const float32x4_t sample_x4 = vld1q_f32(src + i); + const float32x4_t sample_squared_x4 = vmulq_f32(sample_x4, sample_x4); + max_x4 = vmaxq_f32(max_x4, sample_squared_x4); + ewma_x4 = vmlaq_f32(ewma_x4, sample_squared_x4, smoothing_factor_x4); + } + + // y[n] = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3]) + float ewma = vgetq_lane_f32(ewma_x4, 3); + ewma_x4 = vmulq_f32(ewma_x4, weight_prev_x4); + ewma += vgetq_lane_f32(ewma_x4, 2); + ewma_x4 = vmulq_f32(ewma_x4, weight_prev_x4); + ewma += vgetq_lane_f32(ewma_x4, 1); + ewma_x4 = vmulq_f32(ewma_x4, weight_prev_x4); + ewma += vgetq_lane_f32(ewma_x4, 0); + + // Fold the maximums together to get the overall maximum. + float32x2_t max_x2 = vpmax_f32(vget_low_f32(max_x4), vget_high_f32(max_x4)); + max_x2 = vpmax_f32(max_x2, max_x2); + + std::pair<float, float> result(ewma, vget_lane_f32(max_x2, 0)); + + // Handle remaining values at the end of |src|. + for (; i < len; ++i) { + result.first *= weight_prev; + const float sample = src[i]; + const float sample_squared = sample * sample; + result.first += sample_squared * smoothing_factor; + result.second = std::max(result.second, sample_squared); + } + + return result; +} #endif } // namespace vector_math diff --git a/media/base/vector_math.h b/media/base/vector_math.h index 4764f0b..a4dea37 100644 --- a/media/base/vector_math.h +++ b/media/base/vector_math.h @@ -5,6 +5,8 @@ #ifndef MEDIA_BASE_VECTOR_MATH_H_ #define MEDIA_BASE_VECTOR_MATH_H_ +#include <utility> + #include "media/base/media_export.h" namespace media { @@ -26,6 +28,16 @@ MEDIA_EXPORT void FMAC(const float src[], float scale, int len, float dest[]); // |dest| must be aligned by kRequiredAlignment. MEDIA_EXPORT void FMUL(const float src[], float scale, int len, float dest[]); +// Computes the exponentially-weighted moving average power of a signal by +// iterating the recurrence: +// +// y[-1] = initial_value +// y[n] = smoothing_factor * src[n]^2 + (1-smoothing_factor) * y[n-1] +// +// Returns the final average power and the maximum squared element value. +MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower( + float initial_value, const float src[], int len, float smoothing_factor); + } // namespace vector_math } // namespace media diff --git a/media/base/vector_math_perftest.cc b/media/base/vector_math_perftest.cc index 88ac551..9742f2e 100644 --- a/media/base/vector_math_perftest.cc +++ b/media/base/vector_math_perftest.cc @@ -17,6 +17,7 @@ using std::fill; namespace media { static const int kBenchmarkIterations = 200000; +static const int kEWMABenchmarkIterations = 50000; static const float kScale = 0.5; static const int kVectorSize = 8192; @@ -53,6 +54,25 @@ class VectorMathPerfTest : public testing::Test { true); } + void RunBenchmark( + std::pair<float, float> (*fn)(float, const float[], int, float), + int len, + const std::string& test_name, + const std::string& trace_name) { + TimeTicks start = TimeTicks::HighResNow(); + for (int i = 0; i < kEWMABenchmarkIterations; ++i) { + fn(0.5f, input_vector_.get(), len, 0.1f); + } + double total_time_milliseconds = + (TimeTicks::HighResNow() - start).InMillisecondsF(); + perf_test::PrintResult(test_name, + "", + trace_name, + kEWMABenchmarkIterations / total_time_milliseconds, + "runs/ms", + true); + } + protected: scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector_; scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector_; @@ -122,4 +142,40 @@ TEST_F(VectorMathPerfTest, FMUL) { #undef FMUL_FUNC +#if defined(ARCH_CPU_X86_FAMILY) +#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE +#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) +#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON +#endif + +// Benchmark for each optimized vector_math::EWMAAndMaxPower() method. +TEST_F(VectorMathPerfTest, EWMAAndMaxPower) { + // Benchmark EWMAAndMaxPower_C(). + RunBenchmark(vector_math::EWMAAndMaxPower_C, + kVectorSize, + "vector_math_ewma_and_max_power", + "unoptimized"); +#if defined(EWMAAndMaxPower_FUNC) +#if defined(ARCH_CPU_X86_FAMILY) + ASSERT_TRUE(base::CPU().has_sse()); +#endif + // Benchmark EWMAAndMaxPower_FUNC() with unaligned size. + ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / + sizeof(float)), 0U); + RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, + kVectorSize - 1, + "vector_math_ewma_and_max_power", + "optimized_unaligned"); + // Benchmark EWMAAndMaxPower_FUNC() with aligned size. + ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), + 0U); + RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, + kVectorSize, + "vector_math_ewma_and_max_power", + "optimized_aligned"); +#endif +} + +#undef EWMAAndMaxPower_FUNC + } // namespace media diff --git a/media/base/vector_math_testing.h b/media/base/vector_math_testing.h index 02d14f8..b0b30440 100644 --- a/media/base/vector_math_testing.h +++ b/media/base/vector_math_testing.h @@ -5,6 +5,8 @@ #ifndef MEDIA_BASE_VECTOR_MATH_TESTING_H_ #define MEDIA_BASE_VECTOR_MATH_TESTING_H_ +#include <utility> + #include "build/build_config.h" #include "media/base/media_export.h" @@ -14,12 +16,16 @@ namespace vector_math { // Optimized versions exposed for testing. See vector_math.h for details. MEDIA_EXPORT void FMAC_C(const float src[], float scale, int len, float dest[]); MEDIA_EXPORT void FMUL_C(const float src[], float scale, int len, float dest[]); +MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower_C( + float initial_value, const float src[], int len, float smoothing_factor); #if defined(ARCH_CPU_X86_FAMILY) MEDIA_EXPORT void FMAC_SSE(const float src[], float scale, int len, float dest[]); MEDIA_EXPORT void FMUL_SSE(const float src[], float scale, int len, float dest[]); +MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower_SSE( + float initial_value, const float src[], int len, float smoothing_factor); #endif #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) @@ -27,6 +33,8 @@ MEDIA_EXPORT void FMAC_NEON(const float src[], float scale, int len, float dest[]); MEDIA_EXPORT void FMUL_NEON(const float src[], float scale, int len, float dest[]); +MEDIA_EXPORT std::pair<float, float> EWMAAndMaxPower_NEON( + float initial_value, const float src[], int len, float smoothing_factor); #endif } // namespace vector_math diff --git a/media/base/vector_math_unittest.cc b/media/base/vector_math_unittest.cc index 32e5ea4..f8278ce 100644 --- a/media/base/vector_math_unittest.cc +++ b/media/base/vector_math_unittest.cc @@ -138,4 +138,248 @@ TEST_F(VectorMathTest, FMUL) { #endif } +namespace { + +class EWMATestScenario { + public: + EWMATestScenario(float initial_value, const float src[], int len, + float smoothing_factor) + : initial_value_(initial_value), + data_(static_cast<float*>( + len == 0 ? NULL : + base::AlignedAlloc(len * sizeof(float), + vector_math::kRequiredAlignment))), + data_len_(len), + smoothing_factor_(smoothing_factor), + expected_final_avg_(initial_value), + expected_max_(0.0f) { + if (data_len_ > 0) + memcpy(data_.get(), src, len * sizeof(float)); + } + + // Copy constructor and assignment operator for ::testing::Values(...). + EWMATestScenario(const EWMATestScenario& other) { *this = other; } + EWMATestScenario& operator=(const EWMATestScenario& other) { + this->initial_value_ = other.initial_value_; + this->smoothing_factor_ = other.smoothing_factor_; + if (other.data_len_ == 0) { + this->data_.reset(); + } else { + this->data_.reset(static_cast<float*>( + base::AlignedAlloc(other.data_len_ * sizeof(float), + vector_math::kRequiredAlignment))); + memcpy(this->data_.get(), other.data_.get(), + other.data_len_ * sizeof(float)); + } + this->data_len_ = other.data_len_; + this->expected_final_avg_ = other.expected_final_avg_; + this->expected_max_ = other.expected_max_; + return *this; + } + + EWMATestScenario ScaledBy(float scale) const { + EWMATestScenario result(*this); + float* p = result.data_.get(); + float* const p_end = p + result.data_len_; + for (; p < p_end; ++p) + *p *= scale; + return result; + } + + EWMATestScenario WithImpulse(float value, int offset) const { + EWMATestScenario result(*this); + result.data_.get()[offset] = value; + return result; + } + + EWMATestScenario HasExpectedResult(float final_avg_value, + float max_value) const { + EWMATestScenario result(*this); + result.expected_final_avg_ = final_avg_value; + result.expected_max_ = max_value; + return result; + } + + void RunTest() const { + { + SCOPED_TRACE("EWMAAndMaxPower"); + const std::pair<float, float>& result = vector_math::EWMAAndMaxPower( + initial_value_, data_.get(), data_len_, smoothing_factor_); + EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f); + EXPECT_NEAR(expected_max_, result.second, 0.0000001f); + } + + { + SCOPED_TRACE("EWMAAndMaxPower_C"); + const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_C( + initial_value_, data_.get(), data_len_, smoothing_factor_); + EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f); + EXPECT_NEAR(expected_max_, result.second, 0.0000001f); + } + +#if defined(ARCH_CPU_X86_FAMILY) + { + ASSERT_TRUE(base::CPU().has_sse()); + SCOPED_TRACE("EWMAAndMaxPower_SSE"); + const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_SSE( + initial_value_, data_.get(), data_len_, smoothing_factor_); + EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f); + EXPECT_NEAR(expected_max_, result.second, 0.0000001f); + } +#endif + +#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) + { + SCOPED_TRACE("EWMAAndMaxPower_NEON"); + const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_NEON( + initial_value_, data_.get(), data_len_, smoothing_factor_); + EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f); + EXPECT_NEAR(expected_max_, result.second, 0.0000001f); + } +#endif + } + + private: + float initial_value_; + scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> data_; + int data_len_; + float smoothing_factor_; + float expected_final_avg_; + float expected_max_; +}; + +} // namespace + +typedef testing::TestWithParam<EWMATestScenario> VectorMathEWMAAndMaxPowerTest; + +TEST_P(VectorMathEWMAAndMaxPowerTest, Correctness) { + GetParam().RunTest(); +} + +static const float kZeros[] = { // 32 zeros + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const float kOnes[] = { // 32 ones + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +static const float kCheckerboard[] = { // 32 alternating 0, 1 + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +}; + +static const float kInverseCheckerboard[] = { // 32 alternating 1, 0 + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 +}; + +INSTANTIATE_TEST_CASE_P( + Scenarios, VectorMathEWMAAndMaxPowerTest, + ::testing::Values( + // Zero-length input: Result should equal initial value. + EWMATestScenario(0.0f, NULL, 0, 0.0f).HasExpectedResult(0.0f, 0.0f), + EWMATestScenario(1.0f, NULL, 0, 0.0f).HasExpectedResult(1.0f, 0.0f), + + // Smoothing factor of zero: Samples have no effect on result. + EWMATestScenario(0.0f, kOnes, 32, 0.0f).HasExpectedResult(0.0f, 1.0f), + EWMATestScenario(1.0f, kZeros, 32, 0.0f).HasExpectedResult(1.0f, 0.0f), + + // Smothing factor of one: Result = last sample squared. + EWMATestScenario(0.0f, kCheckerboard, 32, 1.0f) + .ScaledBy(2.0f) + .HasExpectedResult(4.0f, 4.0f), + EWMATestScenario(1.0f, kInverseCheckerboard, 32, 1.0f) + .ScaledBy(2.0f) + .HasExpectedResult(0.0f, 4.0f), + + // Smoothing factor of 1/4, muted signal. + EWMATestScenario(1.0f, kZeros, 1, 0.25f) + .HasExpectedResult(powf(0.75, 1.0f), 0.0f), + EWMATestScenario(1.0f, kZeros, 2, 0.25f) + .HasExpectedResult(powf(0.75, 2.0f), 0.0f), + EWMATestScenario(1.0f, kZeros, 3, 0.25f) + .HasExpectedResult(powf(0.75, 3.0f), 0.0f), + EWMATestScenario(1.0f, kZeros, 12, 0.25f) + .HasExpectedResult(powf(0.75, 12.0f), 0.0f), + EWMATestScenario(1.0f, kZeros, 13, 0.25f) + .HasExpectedResult(powf(0.75, 13.0f), 0.0f), + EWMATestScenario(1.0f, kZeros, 14, 0.25f) + .HasExpectedResult(powf(0.75, 14.0f), 0.0f), + EWMATestScenario(1.0f, kZeros, 15, 0.25f) + .HasExpectedResult(powf(0.75, 15.0f), 0.0f), + + // Smoothing factor of 1/4, constant full-amplitude signal. + EWMATestScenario(0.0f, kOnes, 1, 0.25f).HasExpectedResult(0.25f, 1.0f), + EWMATestScenario(0.0f, kOnes, 2, 0.25f) + .HasExpectedResult(0.4375f, 1.0f), + EWMATestScenario(0.0f, kOnes, 3, 0.25f) + .HasExpectedResult(0.578125f, 1.0f), + EWMATestScenario(0.0f, kOnes, 12, 0.25f) + .HasExpectedResult(0.96832365f, 1.0f), + EWMATestScenario(0.0f, kOnes, 13, 0.25f) + .HasExpectedResult(0.97624274f, 1.0f), + EWMATestScenario(0.0f, kOnes, 14, 0.25f) + .HasExpectedResult(0.98218205f, 1.0f), + EWMATestScenario(0.0f, kOnes, 15, 0.25f) + .HasExpectedResult(0.98663654f, 1.0f), + + // Smoothing factor of 1/4, checkerboard signal. + EWMATestScenario(0.0f, kCheckerboard, 1, 0.25f) + .HasExpectedResult(0.0f, 0.0f), + EWMATestScenario(0.0f, kCheckerboard, 2, 0.25f) + .HasExpectedResult(0.25f, 1.0f), + EWMATestScenario(0.0f, kCheckerboard, 3, 0.25f) + .HasExpectedResult(0.1875f, 1.0f), + EWMATestScenario(0.0f, kCheckerboard, 12, 0.25f) + .HasExpectedResult(0.55332780f, 1.0f), + EWMATestScenario(0.0f, kCheckerboard, 13, 0.25f) + .HasExpectedResult(0.41499585f, 1.0f), + EWMATestScenario(0.0f, kCheckerboard, 14, 0.25f) + .HasExpectedResult(0.56124689f, 1.0f), + EWMATestScenario(0.0f, kCheckerboard, 15, 0.25f) + .HasExpectedResult(0.42093517f, 1.0f), + + // Smoothing factor of 1/4, inverse checkerboard signal. + EWMATestScenario(0.0f, kInverseCheckerboard, 1, 0.25f) + .HasExpectedResult(0.25f, 1.0f), + EWMATestScenario(0.0f, kInverseCheckerboard, 2, 0.25f) + .HasExpectedResult(0.1875f, 1.0f), + EWMATestScenario(0.0f, kInverseCheckerboard, 3, 0.25f) + .HasExpectedResult(0.390625f, 1.0f), + EWMATestScenario(0.0f, kInverseCheckerboard, 12, 0.25f) + .HasExpectedResult(0.41499585f, 1.0f), + EWMATestScenario(0.0f, kInverseCheckerboard, 13, 0.25f) + .HasExpectedResult(0.56124689f, 1.0f), + EWMATestScenario(0.0f, kInverseCheckerboard, 14, 0.25f) + .HasExpectedResult(0.42093517f, 1.0f), + EWMATestScenario(0.0f, kInverseCheckerboard, 15, 0.25f) + .HasExpectedResult(0.56570137f, 1.0f), + + // Smoothing factor of 1/4, impluse signal. + EWMATestScenario(0.0f, kZeros, 3, 0.25f) + .WithImpulse(2.0f, 0) + .HasExpectedResult(0.562500f, 4.0f), + EWMATestScenario(0.0f, kZeros, 3, 0.25f) + .WithImpulse(2.0f, 1) + .HasExpectedResult(0.75f, 4.0f), + EWMATestScenario(0.0f, kZeros, 3, 0.25f) + .WithImpulse(2.0f, 2) + .HasExpectedResult(1.0f, 4.0f), + EWMATestScenario(0.0f, kZeros, 32, 0.25f) + .WithImpulse(2.0f, 0) + .HasExpectedResult(0.00013394f, 4.0f), + EWMATestScenario(0.0f, kZeros, 32, 0.25f) + .WithImpulse(2.0f, 1) + .HasExpectedResult(0.00017858f, 4.0f), + EWMATestScenario(0.0f, kZeros, 32, 0.25f) + .WithImpulse(2.0f, 2) + .HasExpectedResult(0.00023811f, 4.0f), + EWMATestScenario(0.0f, kZeros, 32, 0.25f) + .WithImpulse(2.0f, 3) + .HasExpectedResult(0.00031748f, 4.0f) + )); + } // namespace media |