2 files changed, 34 insertions, 32 deletions
diff --git a/media/base/sinc_resampler.cc b/media/base/sinc_resampler.cc
index 09ff49d..00f9314 100644
--- a/media/base/sinc_resampler.cc
+++ b/media/base/sinc_resampler.cc
@@ -57,6 +57,9 @@ SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb)
           base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),
       input_buffer_(static_cast<float*>(
           base::AlignedAlloc(sizeof(float) * kBufferSize, 16))),
+#if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__)
+      convolve_proc_(base::CPU().has_sse() ? Convolve_SSE : Convolve_C),
+#endif
       // Setup various region pointers in the buffer (see diagram above).
       r0_(input_buffer_.get() + kKernelSize / 2),
       r1_(input_buffer_.get()),
@@ -136,6 +139,22 @@ void SincResampler::InitializeKernel() {
   }
 }
 
+// If we know the minimum architecture avoid function hopping for CPU detection.
+#if defined(ARCH_CPU_X86_FAMILY)
+#if defined(__SSE__)
+#define CONVOLVE_FUNC Convolve_SSE
+#else
+// X86 CPU detection required.  |convolve_proc_| will be set upon construction.
+// TODO(dalecurtis): Once Chrome moves to a SSE baseline this can be removed.
+#define CONVOLVE_FUNC convolve_proc_
+#endif
+#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+#define CONVOLVE_FUNC Convolve_NEON
+#else
+// Unknown architecture.
+#define CONVOLVE_FUNC Convolve_C
+#endif
+
 void SincResampler::Resample(float* destination, int frames) {
   int remaining_frames = frames;
 
@@ -161,12 +180,17 @@ void SincResampler::Resample(float* destination, int frames) {
       float* k1 = kernel_storage_.get() + offset_idx * kKernelSize;
       float* k2 = k1 + kKernelSize;
 
+      // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage.  Should always be
+      // true so long as kKernelSize is a multiple of 16.
+      DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);
+      DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);
+
       // Initialize input pointer based on quantized |virtual_source_idx_|.
       float* input_ptr = r1_ + source_idx;
 
       // Figure out how much to weight each kernel's "convolution".
       double kernel_interpolation_factor = virtual_offset_idx - offset_idx;
-      *destination++ = Convolve(
+      *destination++ = CONVOLVE_FUNC(
           input_ptr, k1, k2, kernel_interpolation_factor);
 
       // Advance the virtual index.
@@ -190,6 +214,8 @@ void SincResampler::Resample(float* destination, int frames) {
   }
 }
 
+#undef CONVOLVE_FUNC
+
 int SincResampler::ChunkSize() const {
   return kBlockSize / io_sample_rate_ratio_;
 }
@@ -200,35 +226,6 @@ void SincResampler::Flush() {
   memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * kBufferSize);
 }
 
-float SincResampler::Convolve(const float* input_ptr, const float* k1,
-                              const float* k2,
-                              double kernel_interpolation_factor) {
-  // Ensure |k1|, |k2| are 16-byte aligned for SSE usage.  Should always be true
-  // so long as kKernelSize is a multiple of 16.
-  DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);
-  DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);
-
-  // Rely on function level static initialization to keep ConvolveProc selection
-  // thread safe.
-  typedef float (*ConvolveProc)(const float* src, const float* k1,
-                                const float* k2,
-                                double kernel_interpolation_factor);
-#if defined(ARCH_CPU_X86_FAMILY)
-#if defined(__SSE__)
-  static const ConvolveProc kConvolveProc = Convolve_SSE;
-#else
-  static const ConvolveProc kConvolveProc =
-      base::CPU().has_sse() ? Convolve_SSE : Convolve_C;
-#endif
-#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
-  static const ConvolveProc kConvolveProc = Convolve_NEON;
-#else
-  static const ConvolveProc kConvolveProc = Convolve_C;
-#endif
-
-  return kConvolveProc(input_ptr, k1, k2, kernel_interpolation_factor);
-}
-
 float SincResampler::Convolve_C(const float* input_ptr, const float* k1,
                                 const float* k2,
                                 double kernel_interpolation_factor) {
diff --git a/media/base/sinc_resampler.h b/media/base/sinc_resampler.h
index f4eaf5f..7b1dfaa 100644
--- a/media/base/sinc_resampler.h
+++ b/media/base/sinc_resampler.h
@@ -73,8 +73,6 @@ class MEDIA_EXPORT SincResampler {
   // linearly interpolated using |kernel_interpolation_factor|.  On x86, the
   // underlying implementation is chosen at run time based on SSE support.  On
   // ARM, NEON support is chosen at compile time based on compilation flags.
-  static float Convolve(const float* input_ptr, const float* k1,
-                        const float* k2, double kernel_interpolation_factor);
   static float Convolve_C(const float* input_ptr, const float* k1,
                           const float* k2, double kernel_interpolation_factor);
 #if defined(ARCH_CPU_X86_FAMILY)
@@ -108,6 +106,13 @@ class MEDIA_EXPORT SincResampler {
   // Data from the source is copied into this buffer for each processing pass.
   scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_buffer_;
 
+  // Stores the runtime selection of which Convolve function to use.
+#if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__)
+  typedef float (*ConvolveProc)(const float*, const float*, const float*,
+                                double);
+  const ConvolveProc convolve_proc_;
+#endif
+
   // Pointers to the various regions inside |input_buffer_|.  See the diagram at
   // the top of the .cc file for more information.
   float* const r0_;