diff options
author | dalecurtis@google.com <dalecurtis@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-09-26 20:12:44 +0000 |
---|---|---|
committer | dalecurtis@google.com <dalecurtis@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-09-26 20:12:44 +0000 |
commit | 176316bd19b6b2472444b23a9b7069b6cd69c33e (patch) | |
tree | 7ffd421756475a5bce034e3220f3425492887300 /media/base/sinc_resampler.h | |
parent | 8e7ea1d6b76da317a70949868176a5d795994a2f (diff) | |
download | chromium_src-176316bd19b6b2472444b23a9b7069b6cd69c33e.zip chromium_src-176316bd19b6b2472444b23a9b7069b6cd69c33e.tar.gz chromium_src-176316bd19b6b2472444b23a9b7069b6cd69c33e.tar.bz2 |
Add ARM NEON intrinsic optimizations for SincResampler.
On an exynos board these yielded an ~2.3x speedup:
Benchmarking 50000000 iterations:
Convolve_C took 5682.71ms.
Convolve_NEON(unaligned) took 2451.18ms; which is 2.32x faster than Convolve_C.
Convolve_NEON (aligned) took 2397.01ms; which is 2.37x faster than Convolve_C and 1.02x faster than Convolve_NEON (unaligned).
BUG=none
TEST=try bot, fischman.
Review URL: https://codereview.chromium.org/10960023
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@158870 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media/base/sinc_resampler.h')
-rw-r--r-- | media/base/sinc_resampler.h | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/media/base/sinc_resampler.h b/media/base/sinc_resampler.h index ef2f176..604192f 100644 --- a/media/base/sinc_resampler.h +++ b/media/base/sinc_resampler.h @@ -9,7 +9,6 @@ #include "base/gtest_prod_util.h" #include "base/memory/aligned_memory.h" #include "base/memory/scoped_ptr.h" -#include "build/build_config.h" #include "media/base/media_export.h" namespace media { @@ -45,8 +44,9 @@ class MEDIA_EXPORT SincResampler { void InitializeKernel(); // Compute convolution of |k1| and |k2| over |input_ptr|, resultant sums are - // linearly interpolated using |kernel_interpolation_factor|. The underlying - // implementation is chosen at run time based on SSE support. + // linearly interpolated using |kernel_interpolation_factor|. On x86, the + // underlying implementation is chosen at run time based on SSE support. On + // ARM, NEON support is chosen at compile time based on compilation flags. static float Convolve(const float* input_ptr, const float* k1, const float* k2, double kernel_interpolation_factor); static float Convolve_C(const float* input_ptr, const float* k1, @@ -54,6 +54,9 @@ class MEDIA_EXPORT SincResampler { static float Convolve_SSE(const float* input_ptr, const float* k1, const float* k2, double kernel_interpolation_factor); + static float Convolve_NEON(const float* input_ptr, const float* k1, + const float* k2, + double kernel_interpolation_factor); // The ratio of input / output sample rates. double io_sample_rate_ratio_; |