diff options
-rw-r--r-- | media/base/sinc_resampler.cc | 57 | ||||
-rw-r--r-- | media/base/sinc_resampler.h | 9 |
2 files changed, 34 insertions, 32 deletions
diff --git a/media/base/sinc_resampler.cc b/media/base/sinc_resampler.cc index 09ff49d..00f9314 100644 --- a/media/base/sinc_resampler.cc +++ b/media/base/sinc_resampler.cc @@ -57,6 +57,9 @@ SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb) base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), input_buffer_(static_cast<float*>( base::AlignedAlloc(sizeof(float) * kBufferSize, 16))), +#if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__) + convolve_proc_(base::CPU().has_sse() ? Convolve_SSE : Convolve_C), +#endif // Setup various region pointers in the buffer (see diagram above). r0_(input_buffer_.get() + kKernelSize / 2), r1_(input_buffer_.get()), @@ -136,6 +139,22 @@ void SincResampler::InitializeKernel() { } } +// If we know the minimum architecture avoid function hopping for CPU detection. +#if defined(ARCH_CPU_X86_FAMILY) +#if defined(__SSE__) +#define CONVOLVE_FUNC Convolve_SSE +#else +// X86 CPU detection required. |convolve_proc_| will be set upon construction. +// TODO(dalecurtis): Once Chrome moves to a SSE baseline this can be removed. +#define CONVOLVE_FUNC convolve_proc_ +#endif +#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) +#define CONVOLVE_FUNC Convolve_NEON +#else +// Unknown architecture. +#define CONVOLVE_FUNC Convolve_C +#endif + void SincResampler::Resample(float* destination, int frames) { int remaining_frames = frames; @@ -161,12 +180,17 @@ void SincResampler::Resample(float* destination, int frames) { float* k1 = kernel_storage_.get() + offset_idx * kKernelSize; float* k2 = k1 + kKernelSize; + // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be + // true so long as kKernelSize is a multiple of 16. + DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F); + DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F); + // Initialize input pointer based on quantized |virtual_source_idx_|. float* input_ptr = r1_ + source_idx; // Figure out how much to weight each kernel's "convolution". double kernel_interpolation_factor = virtual_offset_idx - offset_idx; - *destination++ = Convolve( + *destination++ = CONVOLVE_FUNC( input_ptr, k1, k2, kernel_interpolation_factor); // Advance the virtual index. @@ -190,6 +214,8 @@ void SincResampler::Resample(float* destination, int frames) { } } +#undef CONVOLVE_FUNC + int SincResampler::ChunkSize() const { return kBlockSize / io_sample_rate_ratio_; } @@ -200,35 +226,6 @@ void SincResampler::Flush() { memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * kBufferSize); } -float SincResampler::Convolve(const float* input_ptr, const float* k1, - const float* k2, - double kernel_interpolation_factor) { - // Ensure |k1|, |k2| are 16-byte aligned for SSE usage. Should always be true - // so long as kKernelSize is a multiple of 16. - DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F); - DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F); - - // Rely on function level static initialization to keep ConvolveProc selection - // thread safe. - typedef float (*ConvolveProc)(const float* src, const float* k1, - const float* k2, - double kernel_interpolation_factor); -#if defined(ARCH_CPU_X86_FAMILY) -#if defined(__SSE__) - static const ConvolveProc kConvolveProc = Convolve_SSE; -#else - static const ConvolveProc kConvolveProc = - base::CPU().has_sse() ? Convolve_SSE : Convolve_C; -#endif -#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) - static const ConvolveProc kConvolveProc = Convolve_NEON; -#else - static const ConvolveProc kConvolveProc = Convolve_C; -#endif - - return kConvolveProc(input_ptr, k1, k2, kernel_interpolation_factor); -} - float SincResampler::Convolve_C(const float* input_ptr, const float* k1, const float* k2, double kernel_interpolation_factor) { diff --git a/media/base/sinc_resampler.h b/media/base/sinc_resampler.h index f4eaf5f..7b1dfaa 100644 --- a/media/base/sinc_resampler.h +++ b/media/base/sinc_resampler.h @@ -73,8 +73,6 @@ class MEDIA_EXPORT SincResampler { // linearly interpolated using |kernel_interpolation_factor|. On x86, the // underlying implementation is chosen at run time based on SSE support. On // ARM, NEON support is chosen at compile time based on compilation flags. - static float Convolve(const float* input_ptr, const float* k1, - const float* k2, double kernel_interpolation_factor); static float Convolve_C(const float* input_ptr, const float* k1, const float* k2, double kernel_interpolation_factor); #if defined(ARCH_CPU_X86_FAMILY) @@ -108,6 +106,13 @@ class MEDIA_EXPORT SincResampler { // Data from the source is copied into this buffer for each processing pass. scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_buffer_; + // Stores the runtime selection of which Convolve function to use. +#if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__) + typedef float (*ConvolveProc)(const float*, const float*, const float*, + double); + const ConvolveProc convolve_proc_; +#endif + // Pointers to the various regions inside |input_buffer_|. See the diagram at // the top of the .cc file for more information. float* const r0_; |