diff options
author | dalecurtis@chromium.org <dalecurtis@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-04-08 21:30:37 +0000 |
---|---|---|
committer | dalecurtis@chromium.org <dalecurtis@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-04-08 21:30:37 +0000 |
commit | 7ed91e9093602872c62eba397fc1be8741089157 (patch) | |
tree | 6c4c2c6878ed51ca2e90586f11ea3887835c5e8c /media | |
parent | 13bfbc41308953ebea4952ff504037a261cfdd3c (diff) | |
download | chromium_src-7ed91e9093602872c62eba397fc1be8741089157.zip chromium_src-7ed91e9093602872c62eba397fc1be8741089157.tar.gz chromium_src-7ed91e9093602872c62eba397fc1be8741089157.tar.bz2 |
Add vector_math::FMUL. Replace audio_util::AdjustVolume.
Removes the integer based volume adjustment code from the
melting pot that is audio_util in favor of an
AudioBus::AdjustVolume() method which works on float.
The driver behind the method is a new vector_math::FMUL
method which is SSE optimized. Benchmarks put it in line
with the vector_math::FMAC() method.
Benchmarking 200000 iterations:
FMUL_C took 1962.52ms.
FMUL_SSE (unaligned size) took 493.03ms; which is 3.98x faster than FMUL_C.
FMUL_SSE (aligned size) took 491.79ms; which is 3.99x faster than FMUL_C and 1.00x faster than FMUL_SSE (unaligned size).
BUG=120319, 171540, 226447
TEST=new media_unittests.
Review URL: https://chromiumcodereview.appspot.com/13726011
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@192905 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
23 files changed, 221 insertions, 244 deletions
diff --git a/media/audio/android/opensles_output.cc b/media/audio/android/opensles_output.cc index f98baaf..36a1408 100644 --- a/media/audio/android/opensles_output.cc +++ b/media/audio/android/opensles_output.cc @@ -5,7 +5,6 @@ #include "media/audio/android/opensles_output.h" #include "base/logging.h" -#include "media/audio/audio_util.h" #include "media/audio/android/audio_manager_android.h" namespace media { @@ -267,16 +266,10 @@ void OpenSLESOutputStream::FillBufferQueue() { DCHECK_LE(static_cast<size_t>(num_filled_bytes), buffer_size_bytes_); // Note: If this ever changes to output raw float the data must be clipped and // sanitized since it may come from an untrusted source such as NaCl. + audio_bus_->Scale(volume_); audio_bus_->ToInterleaved( frames_filled, format_.bitsPerSample / 8, audio_data_[active_queue_]); - // Perform in-place, software-volume adjustments. - media::AdjustVolume(audio_data_[active_queue_], - num_filled_bytes, - format_.numChannels, - format_.bitsPerSample / 8, - volume_); - // Enqueue the buffer for playback. SLresult err = (*simple_buffer_queue_)->Enqueue( simple_buffer_queue_, diff --git a/media/audio/audio_util.cc b/media/audio/audio_util.cc index 351937e..8758fdc 100644 --- a/media/audio/audio_util.cc +++ b/media/audio/audio_util.cc @@ -14,86 +14,17 @@ #include "media/audio/audio_util.h" -#include <algorithm> -#include <limits> - -#include "base/basictypes.h" #include "base/command_line.h" -#include "base/logging.h" #include "base/string_number_conversions.h" #include "base/time.h" -#include "media/audio/audio_parameters.h" -#include "media/base/audio_bus.h" #include "media/base/media_switches.h" -#if defined(OS_MACOSX) -#include "media/audio/mac/audio_low_latency_input_mac.h" -#include "media/audio/mac/audio_low_latency_output_mac.h" -#elif defined(OS_WIN) +#if defined(OS_WIN) #include "base/win/windows_version.h" -#include "media/audio/audio_manager_base.h" -#include "media/audio/win/audio_low_latency_input_win.h" -#include "media/audio/win/audio_low_latency_output_win.h" -#include "media/audio/win/core_audio_util_win.h" -#include "media/base/limits.h" #endif namespace media { -// TODO(fbarchard): Convert to intrinsics for better efficiency. -template<class Fixed> -static int ScaleChannel(int channel, int volume) { - return static_cast<int>((static_cast<Fixed>(channel) * volume) >> 16); -} - -template<class Format, class Fixed, int bias> -static void AdjustVolume(Format* buf_out, - int sample_count, - int fixed_volume) { - for (int i = 0; i < sample_count; ++i) { - buf_out[i] = static_cast<Format>(ScaleChannel<Fixed>(buf_out[i] - bias, - fixed_volume) + bias); - } -} - -// AdjustVolume() does an in place audio sample change. -bool AdjustVolume(void* buf, - size_t buflen, - int channels, - int bytes_per_sample, - float volume) { - DCHECK(buf); - if (volume < 0.0f || volume > 1.0f) - return false; - if (volume == 1.0f) { - return true; - } else if (volume == 0.0f) { - memset(buf, 0, buflen); - return true; - } - if (channels > 0 && channels <= 8 && bytes_per_sample > 0) { - int sample_count = buflen / bytes_per_sample; - const int fixed_volume = static_cast<int>(volume * 65536); - if (bytes_per_sample == 1) { - AdjustVolume<uint8, int32, 128>(reinterpret_cast<uint8*>(buf), - sample_count, - fixed_volume); - return true; - } else if (bytes_per_sample == 2) { - AdjustVolume<int16, int32, 0>(reinterpret_cast<int16*>(buf), - sample_count, - fixed_volume); - return true; - } else if (bytes_per_sample == 4) { - AdjustVolume<int32, int64, 0>(reinterpret_cast<int32*>(buf), - sample_count, - fixed_volume); - return true; - } - } - return false; -} - // Returns user buffer size as specified on the command line or 0 if no buffer // size has been specified. int GetUserBufferSize() { diff --git a/media/audio/audio_util.h b/media/audio/audio_util.h index 9fdbde8..a11c327 100644 --- a/media/audio/audio_util.h +++ b/media/audio/audio_util.h @@ -5,39 +5,11 @@ #ifndef MEDIA_AUDIO_AUDIO_UTIL_H_ #define MEDIA_AUDIO_AUDIO_UTIL_H_ -#include <string> - #include "base/basictypes.h" -#include "media/base/channel_layout.h" +#include "build/build_config.h" #include "media/base/media_export.h" -namespace base { -class SharedMemory; -} - namespace media { -class AudioBus; - -// For all audio functions 3 audio formats are supported: -// 8 bits unsigned 0 to 255. -// 16 bit signed (little endian). -// 32 bit signed (little endian) - -// AdjustVolume() does a software volume adjustment of a sample buffer. -// The samples are multiplied by the volume, which should range from -// 0.0 (mute) to 1.0 (full volume). -// Using software allows each audio and video to have its own volume without -// affecting the master volume. -// In the future the function may be used to adjust the sample format to -// simplify hardware requirements and to support a wider variety of input -// formats. -// The buffer is modified in-place to avoid memory management, as this -// function may be called in performance critical code. -MEDIA_EXPORT bool AdjustVolume(void* buf, - size_t buflen, - int channels, - int bytes_per_sample, - float volume); // Returns user buffer size as specified on the command line or 0 if no buffer // size has been specified. diff --git a/media/audio/audio_util_unittest.cc b/media/audio/audio_util_unittest.cc deleted file mode 100644 index 679243c..0000000 --- a/media/audio/audio_util_unittest.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/basictypes.h" -#include "media/audio/audio_util.h" -#include "testing/gtest/include/gtest/gtest.h" - -// Number of samples in each audio array. -static const size_t kNumberOfSamples = 4; - -namespace media { - -TEST(AudioUtilTest, AdjustVolume_u8) { - // Test AdjustVolume() on 8 bit samples. - uint8 samples_u8[kNumberOfSamples] = { 4, 0x40, 0x80, 0xff }; - uint8 expected_u8[kNumberOfSamples] = { (4 - 128) / 2 + 128, - (0x40 - 128) / 2 + 128, - (0x80 - 128) / 2 + 128, - (0xff - 128) / 2 + 128 }; - bool result_u8 = media::AdjustVolume(samples_u8, sizeof(samples_u8), - 1, // channels. - sizeof(samples_u8[0]), - 0.5f); - EXPECT_TRUE(result_u8); - int expected_test = memcmp(samples_u8, expected_u8, sizeof(expected_u8)); - EXPECT_EQ(0, expected_test); -} - -TEST(AudioUtilTest, AdjustVolume_s16) { - // Test AdjustVolume() on 16 bit samples. - int16 samples_s16[kNumberOfSamples] = { -4, 0x40, -32768, 123 }; - int16 expected_s16[kNumberOfSamples] = { -1, 0x10, -8192, 30 }; - bool result_s16 = media::AdjustVolume(samples_s16, sizeof(samples_s16), - 2, // channels. - sizeof(samples_s16[0]), - 0.25f); - EXPECT_TRUE(result_s16); - int expected_test = memcmp(samples_s16, expected_s16, sizeof(expected_s16)); - EXPECT_EQ(0, expected_test); -} - -TEST(AudioUtilTest, AdjustVolume_s16_zero) { - // Test AdjustVolume() on 16 bit samples. - int16 samples_s16[kNumberOfSamples] = { -4, 0x40, -32768, 123 }; - int16 expected_s16[kNumberOfSamples] = { 0, 0, 0, 0 }; - bool result_s16 = media::AdjustVolume(samples_s16, sizeof(samples_s16), - 2, // channels. - sizeof(samples_s16[0]), - 0.0f); - EXPECT_TRUE(result_s16); - int expected_test = memcmp(samples_s16, expected_s16, sizeof(expected_s16)); - EXPECT_EQ(0, expected_test); -} - -TEST(AudioUtilTest, AdjustVolume_s16_one) { - // Test AdjustVolume() on 16 bit samples. - int16 samples_s16[kNumberOfSamples] = { -4, 0x40, -32768, 123 }; - int16 expected_s16[kNumberOfSamples] = { -4, 0x40, -32768, 123 }; - bool result_s16 = media::AdjustVolume(samples_s16, sizeof(samples_s16), - 2, // channels. - sizeof(samples_s16[0]), - 1.0f); - EXPECT_TRUE(result_s16); - int expected_test = memcmp(samples_s16, expected_s16, sizeof(expected_s16)); - EXPECT_EQ(0, expected_test); -} - -TEST(AudioUtilTest, AdjustVolume_s32) { - // Test AdjustVolume() on 32 bit samples. - int32 samples_s32[kNumberOfSamples] = { -4, 0x40, -32768, 123 }; - int32 expected_s32[kNumberOfSamples] = { -1, 0x10, -8192, 30 }; - bool result_s32 = media::AdjustVolume(samples_s32, sizeof(samples_s32), - 4, // channels. - sizeof(samples_s32[0]), - 0.25f); - EXPECT_TRUE(result_s32); - int expected_test = memcmp(samples_s32, expected_s32, sizeof(expected_s32)); - EXPECT_EQ(0, expected_test); -} - -} // namespace media diff --git a/media/audio/linux/alsa_output.cc b/media/audio/linux/alsa_output.cc index ea3d1b6..4615d01 100644 --- a/media/audio/linux/alsa_output.cc +++ b/media/audio/linux/alsa_output.cc @@ -42,7 +42,6 @@ #include "base/message_loop.h" #include "base/stl_util.h" #include "base/time.h" -#include "media/audio/audio_util.h" #include "media/audio/linux/alsa_util.h" #include "media/audio/linux/alsa_wrapper.h" #include "media/audio/linux/audio_manager_linux.h" @@ -379,15 +378,10 @@ void AlsaPcmOutputStream::BufferPacket(bool* source_exhausted) { // Note: If this ever changes to output raw float the data must be clipped // and sanitized since it may come from an untrusted source such as NaCl. + output_bus->Scale(volume_); output_bus->ToInterleaved( frames_filled, bytes_per_sample_, packet->GetWritableData()); - media::AdjustVolume(packet->GetWritableData(), - packet_size, - output_bus->channels(), - bytes_per_sample_, - volume_); - if (packet_size > 0) { packet->SetDataSize(packet_size); // Add the packet to the buffer. diff --git a/media/audio/mac/audio_auhal_mac.cc b/media/audio/mac/audio_auhal_mac.cc index 20df6f5..b2a8828 100644 --- a/media/audio/mac/audio_auhal_mac.cc +++ b/media/audio/mac/audio_auhal_mac.cc @@ -178,8 +178,6 @@ void AUHALStream::Stop() { void AUHALStream::SetVolume(double volume) { volume_ = static_cast<float>(volume); - - // TODO(crogers): set volume property } void AUHALStream::GetVolume(double* volume) { @@ -251,6 +249,7 @@ OSStatus AUHALStream::Render( input_bus_.get(), output_bus_.get(), AudioBuffersState(0, hardware_pending_bytes)); + output_bus_->Scale(volume_); } return noErr; diff --git a/media/audio/mac/audio_low_latency_output_mac.cc b/media/audio/mac/audio_low_latency_output_mac.cc index 4b82bef..592a6a1 100644 --- a/media/audio/mac/audio_low_latency_output_mac.cc +++ b/media/audio/mac/audio_low_latency_output_mac.cc @@ -10,7 +10,6 @@ #include "base/command_line.h" #include "base/logging.h" #include "base/mac/mac_logging.h" -#include "media/audio/audio_util.h" #include "media/audio/mac/audio_manager_mac.h" #include "media/base/media_switches.h" @@ -284,16 +283,9 @@ OSStatus AUAudioOutputStream::Render(UInt32 number_of_frames, // Note: If this ever changes to output raw float the data must be clipped and // sanitized since it may come from an untrusted source such as NaCl. + audio_bus_->Scale(volume_); audio_bus_->ToInterleaved( frames_filled, format_.mBitsPerChannel / 8, audio_data); - uint32 filled = frames_filled * format_.mBytesPerFrame; - - // Perform in-place, software-volume adjustments. - media::AdjustVolume(audio_data, - filled, - audio_bus_->channels(), - format_.mBitsPerChannel / 8, - volume_); return noErr; } diff --git a/media/audio/pulse/pulse_output.cc b/media/audio/pulse/pulse_output.cc index eccf463..1734954 100644 --- a/media/audio/pulse/pulse_output.cc +++ b/media/audio/pulse/pulse_output.cc @@ -9,7 +9,6 @@ #include "base/message_loop.h" #include "media/audio/audio_manager_base.h" #include "media/audio/audio_parameters.h" -#include "media/audio/audio_util.h" #include "media/audio/pulse/pulse_util.h" namespace media { @@ -143,10 +142,9 @@ void PulseAudioOutputStream::FulfillWriteRequest(size_t requested_bytes) { // Note: If this ever changes to output raw float the data must be clipped // and sanitized since it may come from an untrusted source such as NaCl. + audio_bus_->Scale(volume_); audio_bus_->ToInterleaved( audio_bus_->frames(), params_.bits_per_sample() / 8, buffer); - media::AdjustVolume(buffer, bytes_to_fill, params_.channels(), - params_.bits_per_sample() / 8, volume_); if (pa_stream_write(pa_stream_, buffer, bytes_to_fill, NULL, 0LL, PA_SEEK_RELATIVE) < 0) { diff --git a/media/audio/pulse/pulse_unified.cc b/media/audio/pulse/pulse_unified.cc index 791a3a8..87bb6ae 100644 --- a/media/audio/pulse/pulse_unified.cc +++ b/media/audio/pulse/pulse_unified.cc @@ -8,7 +8,6 @@ #include "base/time.h" #include "media/audio/audio_manager_base.h" #include "media/audio/audio_parameters.h" -#include "media/audio/audio_util.h" #include "media/audio/pulse/pulse_util.h" #include "media/base/seekable_buffer.h" @@ -175,10 +174,9 @@ void PulseAudioUnifiedStream::WriteData(size_t requested_bytes) { // Note: If this ever changes to output raw float the data must be clipped // and sanitized since it may come from an untrusted source such as NaCl. + output_bus_->Scale(volume_); output_bus_->ToInterleaved( output_bus_->frames(), params_.bits_per_sample() / 8, buffer); - media::AdjustVolume(buffer, requested_bytes, params_.channels(), - params_.bits_per_sample() / 8, volume_); if (pa_stream_write(output_stream_, buffer, requested_bytes, NULL, 0LL, PA_SEEK_RELATIVE) < 0) { diff --git a/media/audio/win/audio_low_latency_output_win.cc b/media/audio/win/audio_low_latency_output_win.cc index b5a4c47..04b5dd2 100644 --- a/media/audio/win/audio_low_latency_output_win.cc +++ b/media/audio/win/audio_low_latency_output_win.cc @@ -13,7 +13,6 @@ #include "base/metrics/histogram.h" #include "base/utf_string_conversions.h" #include "base/win/scoped_propvariant.h" -#include "media/audio/audio_util.h" #include "media/audio/win/audio_manager_win.h" #include "media/audio/win/avrt_wrapper_win.h" #include "media/audio/win/core_audio_util_win.h" @@ -581,15 +580,10 @@ void WASAPIAudioOutputStream::RenderAudioFromSource( // clipped and sanitized since it may come from an untrusted // source such as NaCl. const int bytes_per_sample = format_.Format.wBitsPerSample >> 3; + audio_bus_->Scale(volume_); audio_bus_->ToInterleaved( frames_filled, bytes_per_sample, audio_data); - // Perform in-place, software-volume adjustments. - media::AdjustVolume(audio_data, - num_filled_bytes, - audio_bus_->channels(), - bytes_per_sample, - volume_); // Release the buffer space acquired in the GetBuffer() call. // Render silence if we were not able to fill up the buffer totally. diff --git a/media/audio/win/audio_unified_win.cc b/media/audio/win/audio_unified_win.cc index e7877f5..59b98d0 100644 --- a/media/audio/win/audio_unified_win.cc +++ b/media/audio/win/audio_unified_win.cc @@ -9,7 +9,6 @@ #include "base/debug/trace_event.h" #include "base/time.h" #include "base/win/scoped_com_initializer.h" -#include "media/audio/audio_util.h" #include "media/audio/win/audio_manager_win.h" #include "media/audio/win/avrt_wrapper_win.h" #include "media/audio/win/core_audio_util_win.h" @@ -529,16 +528,10 @@ void WASAPIUnifiedStream::Run() { // Convert the audio bus content to interleaved integer data using // |audio_data| as destination. + render_bus_->Scale(volume_); render_bus_->ToInterleaved( packet_size_frames_, bytes_per_sample, audio_data); - // Perform in-place, software-volume adjustments. - media::AdjustVolume(audio_data, - frames_filled * format_.Format.nBlockAlign, - render_bus_->channels(), - bytes_per_sample, - volume_); - // Release the buffer space acquired in the GetBuffer() call. audio_render_client_->ReleaseBuffer(packet_size_frames_, 0); DLOG_IF(ERROR, FAILED(hr)) << "Failed to release render buffer"; diff --git a/media/audio/win/waveout_output_win.cc b/media/audio/win/waveout_output_win.cc index f42d948..9b03c31 100644 --- a/media/audio/win/waveout_output_win.cc +++ b/media/audio/win/waveout_output_win.cc @@ -13,7 +13,6 @@ #include "base/debug/trace_event.h" #include "base/logging.h" #include "media/audio/audio_io.h" -#include "media/audio/audio_util.h" #include "media/audio/win/audio_manager_win.h" namespace media { @@ -360,14 +359,11 @@ void PCMWaveOutAudioOutputStream::QueueNextPacket(WAVEHDR *buffer) { if (used <= buffer_size_) { // Note: If this ever changes to output raw float the data must be clipped // and sanitized since it may come from an untrusted source such as NaCl. + audio_bus_->Scale(volume_); audio_bus_->ToInterleaved( frames_filled, format_.Format.wBitsPerSample / 8, buffer->lpData); buffer->dwBufferLength = used * format_.Format.nChannels / channels_; - media::AdjustVolume(buffer->lpData, used, - format_.Format.nChannels, - format_.Format.wBitsPerSample >> 3, - volume_); } else { HandleError(0); return; diff --git a/media/base/audio_bus.cc b/media/base/audio_bus.cc index 0dc3300..43dd341 100644 --- a/media/base/audio_bus.cc +++ b/media/base/audio_bus.cc @@ -9,6 +9,7 @@ #include "base/logging.h" #include "media/audio/audio_parameters.h" #include "media/base/limits.h" +#include "media/base/vector_math.h" namespace media { @@ -307,4 +308,13 @@ void AudioBus::CopyTo(AudioBus* dest) const { memcpy(dest->channel(i), channel(i), sizeof(*channel(i)) * frames()); } +void AudioBus::Scale(float volume) { + if (volume > 0 && volume != 1) { + for (int i = 0; i < channels(); ++i) + vector_math::FMUL(channel(i), volume, frames(), channel(i)); + } else if (volume == 0) { + Zero(); + } +} + } // namespace media diff --git a/media/base/audio_bus.h b/media/base/audio_bus.h index 9d9563c..61a53ed 100644 --- a/media/base/audio_bus.h +++ b/media/base/audio_bus.h @@ -92,6 +92,10 @@ class MEDIA_EXPORT AudioBus { void ZeroFrames(int frames); void ZeroFramesPartial(int start_frame, int frames); + // Scale internal channel values by |volume| >= 0. If an invalid value + // is provided, no adjustment is done. + void Scale(float volume); + private: friend struct base::DefaultDeleter<AudioBus>; ~AudioBus(); diff --git a/media/base/audio_bus_unittest.cc b/media/base/audio_bus_unittest.cc index 8c3a59f..562d760 100644 --- a/media/base/audio_bus_unittest.cc +++ b/media/base/audio_bus_unittest.cc @@ -356,4 +356,35 @@ TEST_F(AudioBusTest, ToInterleavedPartial) { kPartialFrames * sizeof(*kTestVectorInt16) * kTestVectorChannels), 0); } +TEST_F(AudioBusTest, Scale) { + scoped_ptr<AudioBus> bus = AudioBus::Create(kChannels, kFrameCount); + + // Fill the bus with dummy data. + static const float kFillValue = 1; + for (int i = 0; i < bus->channels(); ++i) + std::fill(bus->channel(i), bus->channel(i) + bus->frames(), kFillValue); + + // Adjust by an invalid volume and ensure volume is unchanged. + bus->Scale(-1); + for (int i = 0; i < bus->channels(); ++i) { + SCOPED_TRACE("Invalid Scale"); + VerifyValue(bus->channel(i), bus->frames(), kFillValue); + } + + // Verify correct volume adjustment. + static const float kVolume = 0.5; + bus->Scale(kVolume); + for (int i = 0; i < bus->channels(); ++i) { + SCOPED_TRACE("Half Scale"); + VerifyValue(bus->channel(i), bus->frames(), kFillValue * kVolume); + } + + // Verify zero volume case. + bus->Scale(0); + for (int i = 0; i < bus->channels(); ++i) { + SCOPED_TRACE("Zero Scale"); + VerifyValue(bus->channel(i), bus->frames(), 0); + } +} + } // namespace media diff --git a/media/base/audio_converter.cc b/media/base/audio_converter.cc index 5fda460..ec74e03 100644 --- a/media/base/audio_converter.cc +++ b/media/base/audio_converter.cc @@ -178,11 +178,18 @@ void AudioConverter::SourceCallback(int fifo_frame_delay, AudioBus* dest) { if (it == transform_inputs_.begin()) { if (volume == 1.0f) { mixer_input_audio_bus_->CopyTo(temp_dest); - continue; + } else if (volume > 0) { + for (int i = 0; i < mixer_input_audio_bus_->channels(); ++i) { + vector_math::FMUL( + mixer_input_audio_bus_->channel(i), volume, + mixer_input_audio_bus_->frames(), temp_dest->channel(i)); + } + } else { + // Zero |temp_dest| otherwise, so we're mixing into a clean buffer. + temp_dest->Zero(); } - // Zero |temp_dest| otherwise, so we're mixing into a clean buffer. - temp_dest->Zero(); + continue; } // Volume adjust and mix each mixer input into |temp_dest| after rendering. diff --git a/media/base/simd/vector_math_sse.cc b/media/base/simd/vector_math_sse.cc index 5cc2df9..39bcaa0 100644 --- a/media/base/simd/vector_math_sse.cc +++ b/media/base/simd/vector_math_sse.cc @@ -9,6 +9,18 @@ namespace media { namespace vector_math { +void FMUL_SSE(const float src[], float scale, int len, float dest[]) { + const int rem = len % 4; + const int last_index = len - rem; + __m128 m_scale = _mm_set_ps1(scale); + for (int i = 0; i < last_index; i += 4) + _mm_store_ps(dest + i, _mm_mul_ps(_mm_load_ps(src + i), m_scale)); + + // Handle any remaining values that wouldn't fit in an SSE pass. + for (int i = last_index; i < len; ++i) + dest[i] = src[i] * scale; +} + void FMAC_SSE(const float src[], float scale, int len, float dest[]) { const int rem = len % 4; const int last_index = len - rem; diff --git a/media/base/vector_math.cc b/media/base/vector_math.cc index 96f94d9..f534d92 100644 --- a/media/base/vector_math.cc +++ b/media/base/vector_math.cc @@ -16,14 +16,18 @@ void FMAC(const float src[], float scale, int len, float dest[]) { DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1)); DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & (kRequiredAlignment - 1)); - // Rely on function level static initialization to keep VectorFMACProc - // selection thread safe. typedef void (*VectorFMACProc)(const float src[], float scale, int len, float dest[]); -#if defined(ARCH_CPU_X86_FAMILY) + + // No NaCl code uses the SSE functionality of AudioBus and plumbing the -msse + // built library is non-trivial, so simply disable for now. iOS lies about + // its architecture, so we need to exclude it here. +#if defined(ARCH_CPU_X86_FAMILY) && !defined(OS_NACL) && !defined(OS_IOS) #if defined(__SSE__) static const VectorFMACProc kVectorFMACProc = FMAC_SSE; #else + // TODO(dalecurtis): Remove function level static initialization, it's not + // thread safe: http://crbug.com/224662. static const VectorFMACProc kVectorFMACProc = base::CPU().has_sse() ? FMAC_SSE : FMAC_C; #endif @@ -39,5 +43,37 @@ void FMAC_C(const float src[], float scale, int len, float dest[]) { dest[i] += src[i] * scale; } +void FMUL(const float src[], float scale, int len, float dest[]) { + // Ensure |src| and |dest| are 16-byte aligned. + DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1)); + DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & (kRequiredAlignment - 1)); + + typedef void (*VectorFMULProc)(const float src[], float scale, int len, + float dest[]); + + // No NaCl code uses the SSE functionality of AudioBus and plumbing the -msse + // built library is non-trivial, so simply disable for now. iOS lies about + // its architecture, so we need to exclude it here. +#if defined(ARCH_CPU_X86_FAMILY) && !defined(OS_NACL) && !defined(OS_IOS) +#if defined(__SSE__) + static const VectorFMULProc kVectorFMULProc = FMUL_SSE; +#else + // TODO(dalecurtis): Remove function level static initialization, it's not + // thread safe: http://crbug.com/224662. + static const VectorFMULProc kVectorFMULProc = + base::CPU().has_sse() ? FMUL_SSE : FMUL_C; +#endif +#else + static const VectorFMULProc kVectorFMULProc = FMUL_C; +#endif + + return kVectorFMULProc(src, scale, len, dest); +} + +void FMUL_C(const float src[], float scale, int len, float dest[]) { + for (int i = 0; i < len; ++i) + dest[i] = src[i] * scale; +} + } // namespace vector_math } // namespace media diff --git a/media/base/vector_math.h b/media/base/vector_math.h index 10c3039..2618687 100644 --- a/media/base/vector_math.h +++ b/media/base/vector_math.h @@ -17,6 +17,10 @@ enum { kRequiredAlignment = 16 }; // |src| and |dest| must be aligned by kRequiredAlignment. MEDIA_EXPORT void FMAC(const float src[], float scale, int len, float dest[]); +// Multiply each element of |src| by |scale| and store in |dest|. |src| and +// |dest| must be aligned by kRequiredAlignment. +MEDIA_EXPORT void FMUL(const float src[], float scale, int len, float dest[]); + } // namespace vector_math } // namespace media diff --git a/media/base/vector_math_testing.h b/media/base/vector_math_testing.h index 503ca6a..1fecf4f 100644 --- a/media/base/vector_math_testing.h +++ b/media/base/vector_math_testing.h @@ -11,13 +11,15 @@ namespace media { namespace vector_math { -// Optimized versions of FMAC() function exposed for testing. See vector_math.h -// for details. +// Optimized versions exposed for testing. See vector_math.h for details. MEDIA_EXPORT void FMAC_C(const float src[], float scale, int len, float dest[]); +MEDIA_EXPORT void FMUL_C(const float src[], float scale, int len, float dest[]); #if defined(ARCH_CPU_X86_FAMILY) MEDIA_EXPORT void FMAC_SSE(const float src[], float scale, int len, float dest[]); +MEDIA_EXPORT void FMUL_SSE(const float src[], float scale, int len, + float dest[]); #endif } // namespace vector_math diff --git a/media/base/vector_math_unittest.cc b/media/base/vector_math_unittest.cc index e64c7c9..414998d 100644 --- a/media/base/vector_math_unittest.cc +++ b/media/base/vector_math_unittest.cc @@ -103,6 +103,38 @@ TEST_F(VectorMathTest, FMAC) { #endif } +// Ensure each optimized vector_math::FMUL() method returns the same value. +TEST_F(VectorMathTest, FMUL) { + static const float kResult = kInputFillValue * kScale; + + { + SCOPED_TRACE("FMUL"); + FillTestVectors(kInputFillValue, kOutputFillValue); + vector_math::FMUL( + input_vector.get(), kScale, kVectorSize, output_vector.get()); + VerifyOutput(kResult); + } + + { + SCOPED_TRACE("FMUL_C"); + FillTestVectors(kInputFillValue, kOutputFillValue); + vector_math::FMUL_C( + input_vector.get(), kScale, kVectorSize, output_vector.get()); + VerifyOutput(kResult); + } + +#if defined(ARCH_CPU_X86_FAMILY) + { + ASSERT_TRUE(base::CPU().has_sse()); + SCOPED_TRACE("FMUL_SSE"); + FillTestVectors(kInputFillValue, kOutputFillValue); + vector_math::FMUL_SSE( + input_vector.get(), kScale, kVectorSize, output_vector.get()); + VerifyOutput(kResult); + } +#endif +} + // Benchmark for each optimized vector_math::FMAC() method. Original benchmarks // were run with --vector-fmac-iterations=200000. TEST_F(VectorMathTest, FMACBenchmark) { @@ -156,4 +188,58 @@ TEST_F(VectorMathTest, FMACBenchmark) { #endif } + +// Benchmark for each optimized vector_math::FMUL() method. Original benchmarks +// were run with --vector-math-iterations=200000. +TEST_F(VectorMathTest, FMULBenchmark) { + static const int kBenchmarkIterations = BenchmarkIterations(); + + printf("Benchmarking %d iterations:\n", kBenchmarkIterations); + + // Benchmark FMUL_C(). + FillTestVectors(kInputFillValue, kOutputFillValue); + TimeTicks start = TimeTicks::HighResNow(); + for (int i = 0; i < kBenchmarkIterations; ++i) { + vector_math::FMUL_C( + input_vector.get(), kScale, kVectorSize, output_vector.get()); + } + double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); + printf("FMUL_C took %.2fms.\n", total_time_c_ms); + +#if defined(ARCH_CPU_X86_FAMILY) + ASSERT_TRUE(base::CPU().has_sse()); + + // Benchmark FMUL_SSE() with unaligned size. + ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / + sizeof(float)), 0U); + FillTestVectors(kInputFillValue, kOutputFillValue); + start = TimeTicks::HighResNow(); + for (int j = 0; j < kBenchmarkIterations; ++j) { + vector_math::FMUL_SSE( + input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); + } + double total_time_sse_unaligned_ms = + (TimeTicks::HighResNow() - start).InMillisecondsF(); + printf("FMUL_SSE (unaligned size) took %.2fms; which is %.2fx faster than" + " FMUL_C.\n", total_time_sse_unaligned_ms, + total_time_c_ms / total_time_sse_unaligned_ms); + + // Benchmark FMUL_SSE() with aligned size. + ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), + 0U); + FillTestVectors(kInputFillValue, kOutputFillValue); + start = TimeTicks::HighResNow(); + for (int j = 0; j < kBenchmarkIterations; ++j) { + vector_math::FMUL_SSE( + input_vector.get(), kScale, kVectorSize, output_vector.get()); + } + double total_time_sse_aligned_ms = + (TimeTicks::HighResNow() - start).InMillisecondsF(); + printf("FMUL_SSE (aligned size) took %.2fms; which is %.2fx faster than" + " FMUL_C and %.2fx faster than FMUL_SSE (unaligned size).\n", + total_time_sse_aligned_ms, total_time_c_ms / total_time_sse_aligned_ms, + total_time_sse_unaligned_ms / total_time_sse_aligned_ms); +#endif +} + } // namespace media diff --git a/media/media.gyp b/media/media.gyp index 386211f..48c6bc0 100644 --- a/media/media.gyp +++ b/media/media.gyp @@ -283,8 +283,6 @@ 'base/stream_parser.h', 'base/stream_parser_buffer.cc', 'base/stream_parser_buffer.h', - 'base/vector_math.cc', - 'base/vector_math.h', 'base/video_decoder.cc', 'base/video_decoder.h', 'base/video_decoder_config.cc', @@ -551,6 +549,7 @@ ['include', '^base/audio_bus\\.'], ['include', '^base/channel_layout\\.'], ['include', '^base/media_stub\\.cc$'], + ['include', '^base/vector_math\\.'], ], 'link_settings': { 'libraries': [ @@ -889,7 +888,6 @@ 'audio/audio_output_proxy_unittest.cc', 'audio/audio_parameters_unittest.cc', 'audio/audio_silence_detector_unittest.cc', - 'audio/audio_util_unittest.cc', 'audio/cross_process_notification_unittest.cc', 'audio/fake_audio_consumer_unittest.cc', 'audio/ios/audio_manager_ios_unittest.cc', @@ -1265,6 +1263,13 @@ 'sources': [ '<@(shared_memory_support_sources)', ], + 'conditions': [ + [ 'target_arch == "ia32" or target_arch == "x64"', { + 'dependencies': [ + 'media_sse', + ], + }], + ], }, { 'target_name': 'yuv_convert', diff --git a/media/shared_memory_support.gypi b/media/shared_memory_support.gypi index f91edf5..0f1c53a 100644 --- a/media/shared_memory_support.gypi +++ b/media/shared_memory_support.gypi @@ -18,6 +18,8 @@ 'base/channel_layout.h', 'base/limits.h', 'base/media_export.h', + 'base/vector_math.cc', + 'base/vector_math.h', ], }, } |