summaryrefslogtreecommitdiffstats
path: root/media/base/vector_math.cc
diff options
context:
space:
mode:
authordalecurtis@google.com <dalecurtis@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2013-04-23 18:39:48 +0000
committerdalecurtis@google.com <dalecurtis@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2013-04-23 18:39:48 +0000
commit7218e4ead141aaf8c3369705e683b54e9abcdaeb (patch)
treec66d0ab8ba44ddf2d0ac48dee3f502fbf44664b4 /media/base/vector_math.cc
parent8b257c9762e9051c1d4e794900d1084fd81913cd (diff)
downloadchromium_src-7218e4ead141aaf8c3369705e683b54e9abcdaeb.zip
chromium_src-7218e4ead141aaf8c3369705e683b54e9abcdaeb.tar.gz
chromium_src-7218e4ead141aaf8c3369705e683b54e9abcdaeb.tar.bz2
Add NEON optimizations for FMAC and FMUL.
Benchmarks show a 2.60x and 2.77x speedup for FMAC and FMUL respectively. BUG=none TEST=media_unittests Review URL: https://codereview.chromium.org/14188032 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@195855 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media/base/vector_math.cc')
-rw-r--r--media/base/vector_math.cc37
1 files changed, 37 insertions, 0 deletions
diff --git a/media/base/vector_math.cc b/media/base/vector_math.cc
index f534d92..603ae0b 100644
--- a/media/base/vector_math.cc
+++ b/media/base/vector_math.cc
@@ -7,6 +7,11 @@
#include "base/cpu.h"
#include "base/logging.h"
+#include "build/build_config.h"
+
+#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+#include <arm_neon.h>
+#endif
namespace media {
namespace vector_math {
@@ -31,6 +36,8 @@ void FMAC(const float src[], float scale, int len, float dest[]) {
static const VectorFMACProc kVectorFMACProc =
base::CPU().has_sse() ? FMAC_SSE : FMAC_C;
#endif
+#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+ static const VectorFMACProc kVectorFMACProc = FMAC_NEON;
#else
static const VectorFMACProc kVectorFMACProc = FMAC_C;
#endif
@@ -63,6 +70,8 @@ void FMUL(const float src[], float scale, int len, float dest[]) {
static const VectorFMULProc kVectorFMULProc =
base::CPU().has_sse() ? FMUL_SSE : FMUL_C;
#endif
+#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+ static const VectorFMULProc kVectorFMULProc = FMUL_NEON;
#else
static const VectorFMULProc kVectorFMULProc = FMUL_C;
#endif
@@ -75,5 +84,33 @@ void FMUL_C(const float src[], float scale, int len, float dest[]) {
dest[i] = src[i] * scale;
}
+#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+void FMAC_NEON(const float src[], float scale, int len, float dest[]) {
+ const int rem = len % 4;
+ const int last_index = len - rem;
+ float32x4_t m_scale = vmovq_n_f32(scale);
+ for (int i = 0; i < last_index; i += 4) {
+ vst1q_f32(dest + i, vmlaq_f32(
+ vld1q_f32(dest + i), vld1q_f32(src + i), m_scale));
+ }
+
+ // Handle any remaining values that wouldn't fit in an NEON pass.
+ for (int i = last_index; i < len; ++i)
+ dest[i] += src[i] * scale;
+}
+
+void FMUL_NEON(const float src[], float scale, int len, float dest[]) {
+ const int rem = len % 4;
+ const int last_index = len - rem;
+ float32x4_t m_scale = vmovq_n_f32(scale);
+ for (int i = 0; i < last_index; i += 4)
+ vst1q_f32(dest + i, vmulq_f32(vld1q_f32(src + i), m_scale));
+
+ // Handle any remaining values that wouldn't fit in an NEON pass.
+ for (int i = last_index; i < len; ++i)
+ dest[i] = src[i] * scale;
+}
+#endif
+
} // namespace vector_math
} // namespace media