summaryrefslogtreecommitdiffstats
path: root/media/base/audio_renderer_mixer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'media/base/audio_renderer_mixer.cc')
-rw-r--r--media/base/audio_renderer_mixer.cc66
1 files changed, 56 insertions, 10 deletions
diff --git a/media/base/audio_renderer_mixer.cc b/media/base/audio_renderer_mixer.cc
index 6d23faa..1ca2f39 100644
--- a/media/base/audio_renderer_mixer.cc
+++ b/media/base/audio_renderer_mixer.cc
@@ -4,9 +4,15 @@
#include "media/base/audio_renderer_mixer.h"
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
+#include <xmmintrin.h>
+#endif
+
#include "base/bind.h"
#include "base/bind_helpers.h"
+#include "base/cpu.h"
#include "base/logging.h"
+#include "base/memory/aligned_memory.h"
#include "media/audio/audio_util.h"
#include "media/base/limits.h"
@@ -42,7 +48,7 @@ AudioRendererMixer::~AudioRendererMixer() {
// Clean up |mixer_input_audio_data_|.
for (size_t i = 0; i < mixer_input_audio_data_.size(); ++i)
- delete [] mixer_input_audio_data_[i];
+ base::AlignedFree(mixer_input_audio_data_[i]);
mixer_input_audio_data_.clear();
// Ensures that all mixer inputs have stopped themselves prior to destruction
@@ -84,11 +90,12 @@ void AudioRendererMixer::ProvideInput(const std::vector<float*>& audio_data,
// Allocate staging area for each mixer input's audio data on first call. We
// won't know how much to allocate until here because of resampling.
if (mixer_input_audio_data_.size() == 0) {
- // TODO(dalecurtis): If we switch to AVX/SSE optimization, we'll need to
- // allocate these on 32-byte boundaries and ensure they're sized % 32 bytes.
mixer_input_audio_data_.reserve(audio_data.size());
- for (size_t i = 0; i < audio_data.size(); ++i)
- mixer_input_audio_data_.push_back(new float[number_of_frames]);
+ for (size_t i = 0; i < audio_data.size(); ++i) {
+ // Allocate audio data with a 16-byte alignment for SSE optimizations.
+ mixer_input_audio_data_.push_back(static_cast<float*>(
+ base::AlignedAlloc(sizeof(float) * number_of_frames, 16)));
+ }
mixer_input_audio_data_size_ = number_of_frames;
}
@@ -120,12 +127,9 @@ void AudioRendererMixer::ProvideInput(const std::vector<float*>& audio_data,
continue;
// Volume adjust and mix each mixer input into |audio_data| after rendering.
- // TODO(dalecurtis): Optimize with NEON/SSE/AVX vector_fmac from FFmpeg.
for (size_t j = 0; j < audio_data.size(); ++j) {
- float* dest = audio_data[j];
- float* source = mixer_input_audio_data_[j];
- for (int k = 0; k < frames_filled; ++k)
- dest[k] += source[k] * static_cast<float>(volume);
+ VectorFMAC(
+ mixer_input_audio_data_[j], volume, frames_filled, audio_data[j]);
}
// No need to clamp values as InterleaveFloatToInt() will take care of this
@@ -143,4 +147,46 @@ void AudioRendererMixer::OnRenderError() {
}
}
+void AudioRendererMixer::VectorFMAC(const float src[], float scale, int len,
+ float dest[]) {
+ // Rely on function level static initialization to keep VectorFMACProc
+ // selection thread safe.
+ typedef void (*VectorFMACProc)(const float src[], float scale, int len,
+ float dest[]);
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
+ static const VectorFMACProc kVectorFMACProc =
+ base::CPU().has_sse() ? VectorFMAC_SSE : VectorFMAC_C;
+#else
+ static const VectorFMACProc kVectorFMACProc = VectorFMAC_C;
+#endif
+
+ return kVectorFMACProc(src, scale, len, dest);
+}
+
+void AudioRendererMixer::VectorFMAC_C(const float src[], float scale, int len,
+ float dest[]) {
+ for (int i = 0; i < len; ++i)
+ dest[i] += src[i] * scale;
+}
+
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
+void AudioRendererMixer::VectorFMAC_SSE(const float src[], float scale, int len,
+ float dest[]) {
+ // Ensure |src| and |dest| are 16-byte aligned.
+ DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & 0x0F);
+ DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & 0x0F);
+
+ __m128 m_scale = _mm_set_ps1(scale);
+ int rem = len % 4;
+ for (int i = 0; i < len - rem; i += 4) {
+ _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i),
+ _mm_mul_ps(_mm_load_ps(src + i), m_scale)));
+ }
+
+ // Handle any remaining values that wouldn't fit in an SSE pass.
+ if (rem)
+ VectorFMAC_C(src + len - rem, scale, rem, dest + len - rem);
+}
+#endif
+
} // namespace media