diff options
Diffstat (limited to 'media/base/audio_renderer_mixer.cc')
-rw-r--r-- | media/base/audio_renderer_mixer.cc | 66 |
1 files changed, 56 insertions, 10 deletions
diff --git a/media/base/audio_renderer_mixer.cc b/media/base/audio_renderer_mixer.cc index 6d23faa..1ca2f39 100644 --- a/media/base/audio_renderer_mixer.cc +++ b/media/base/audio_renderer_mixer.cc @@ -4,9 +4,15 @@ #include "media/base/audio_renderer_mixer.h" +#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) +#include <xmmintrin.h> +#endif + #include "base/bind.h" #include "base/bind_helpers.h" +#include "base/cpu.h" #include "base/logging.h" +#include "base/memory/aligned_memory.h" #include "media/audio/audio_util.h" #include "media/base/limits.h" @@ -42,7 +48,7 @@ AudioRendererMixer::~AudioRendererMixer() { // Clean up |mixer_input_audio_data_|. for (size_t i = 0; i < mixer_input_audio_data_.size(); ++i) - delete [] mixer_input_audio_data_[i]; + base::AlignedFree(mixer_input_audio_data_[i]); mixer_input_audio_data_.clear(); // Ensures that all mixer inputs have stopped themselves prior to destruction @@ -84,11 +90,12 @@ void AudioRendererMixer::ProvideInput(const std::vector<float*>& audio_data, // Allocate staging area for each mixer input's audio data on first call. We // won't know how much to allocate until here because of resampling. if (mixer_input_audio_data_.size() == 0) { - // TODO(dalecurtis): If we switch to AVX/SSE optimization, we'll need to - // allocate these on 32-byte boundaries and ensure they're sized % 32 bytes. mixer_input_audio_data_.reserve(audio_data.size()); - for (size_t i = 0; i < audio_data.size(); ++i) - mixer_input_audio_data_.push_back(new float[number_of_frames]); + for (size_t i = 0; i < audio_data.size(); ++i) { + // Allocate audio data with a 16-byte alignment for SSE optimizations. + mixer_input_audio_data_.push_back(static_cast<float*>( + base::AlignedAlloc(sizeof(float) * number_of_frames, 16))); + } mixer_input_audio_data_size_ = number_of_frames; } @@ -120,12 +127,9 @@ void AudioRendererMixer::ProvideInput(const std::vector<float*>& audio_data, continue; // Volume adjust and mix each mixer input into |audio_data| after rendering. - // TODO(dalecurtis): Optimize with NEON/SSE/AVX vector_fmac from FFmpeg. for (size_t j = 0; j < audio_data.size(); ++j) { - float* dest = audio_data[j]; - float* source = mixer_input_audio_data_[j]; - for (int k = 0; k < frames_filled; ++k) - dest[k] += source[k] * static_cast<float>(volume); + VectorFMAC( + mixer_input_audio_data_[j], volume, frames_filled, audio_data[j]); } // No need to clamp values as InterleaveFloatToInt() will take care of this @@ -143,4 +147,46 @@ void AudioRendererMixer::OnRenderError() { } } +void AudioRendererMixer::VectorFMAC(const float src[], float scale, int len, + float dest[]) { + // Rely on function level static initialization to keep VectorFMACProc + // selection thread safe. + typedef void (*VectorFMACProc)(const float src[], float scale, int len, + float dest[]); +#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) + static const VectorFMACProc kVectorFMACProc = + base::CPU().has_sse() ? VectorFMAC_SSE : VectorFMAC_C; +#else + static const VectorFMACProc kVectorFMACProc = VectorFMAC_C; +#endif + + return kVectorFMACProc(src, scale, len, dest); +} + +void AudioRendererMixer::VectorFMAC_C(const float src[], float scale, int len, + float dest[]) { + for (int i = 0; i < len; ++i) + dest[i] += src[i] * scale; +} + +#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) +void AudioRendererMixer::VectorFMAC_SSE(const float src[], float scale, int len, + float dest[]) { + // Ensure |src| and |dest| are 16-byte aligned. + DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & 0x0F); + DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & 0x0F); + + __m128 m_scale = _mm_set_ps1(scale); + int rem = len % 4; + for (int i = 0; i < len - rem; i += 4) { + _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i), + _mm_mul_ps(_mm_load_ps(src + i), m_scale))); + } + + // Handle any remaining values that wouldn't fit in an SSE pass. + if (rem) + VectorFMAC_C(src + len - rem, scale, rem, dest + len - rem); +} +#endif + } // namespace media |