Split AudioRendererMixer::VectorFMAC into VectorMath library.

Prepare the future home for more optimized vector math routines, specifically FMUL for volume adjustment. Additionally the FMAC operation will be used for browser side mixing as well as channel upmixing and downmixing. BUG=none TEST=unittests. Review URL: https://chromiumcodereview.appspot.com/10868037 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@153122 0039d316-1c4b-4281-b951-d872f2087c98
author: dalecurtis@chromium.org <dalecurtis@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-08-24 00:40:46 +0000
committer: dalecurtis@chromium.org <dalecurtis@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-08-24 00:40:46 +0000
commit: 9c2b29e0d642712e780818dddab5a965b4fd96d4 (patch)
tree: 4d9ae01ab0c5c86cd1d7b4e2abbdd40db3ba1190
parent: b94ebd1f6e245d9f9f3d9a2fc933581eb8f508bc (diff)
download: chromium_src-9c2b29e0d642712e780818dddab5a965b4fd96d4.zip
chromium_src-9c2b29e0d642712e780818dddab5a965b4fd96d4.tar.gz
chromium_src-9c2b29e0d642712e780818dddab5a965b4fd96d4.tar.bz2
8 files changed, 267 insertions, 196 deletions
diff --git a/media/base/audio_renderer_mixer.cc b/media/base/audio_renderer_mixer.cc
index b9ec259..e48b171 100644
--- a/media/base/audio_renderer_mixer.cc
+++ b/media/base/audio_renderer_mixer.cc
@@ -4,17 +4,12 @@
 
 #include "media/base/audio_renderer_mixer.h"
 
-#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
-#include <xmmintrin.h>
-#endif
-
 #include "base/bind.h"
 #include "base/bind_helpers.h"
-#include "base/cpu.h"
 #include "base/logging.h"
-#include "base/memory/aligned_memory.h"
 #include "media/audio/audio_util.h"
 #include "media/base/limits.h"
+#include "media/base/vector_math.h"
 
 namespace media {
 
@@ -117,8 +112,9 @@ void AudioRendererMixer::ProvideInput(AudioBus* audio_bus) {
 
     // Volume adjust and mix each mixer input into |audio_bus| after rendering.
     for (int i = 0; i < audio_bus->channels(); ++i) {
-       VectorFMAC(mixer_input_audio_bus_->channel(i), volume, frames_filled,
-                  audio_bus->channel(i));
+      vector_math::FMAC(
+          mixer_input_audio_bus_->channel(i), volume, frames_filled,
+          audio_bus->channel(i));
     }
   }
 }
@@ -133,46 +129,4 @@ void AudioRendererMixer::OnRenderError() {
   }
 }
 
-void AudioRendererMixer::VectorFMAC(const float src[], float scale, int len,
-                                    float dest[]) {
-  // Rely on function level static initialization to keep VectorFMACProc
-  // selection thread safe.
-  typedef void (*VectorFMACProc)(const float src[], float scale, int len,
-                                 float dest[]);
-#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
-  static const VectorFMACProc kVectorFMACProc =
-      base::CPU().has_sse() ? VectorFMAC_SSE : VectorFMAC_C;
-#else
-  static const VectorFMACProc kVectorFMACProc = VectorFMAC_C;
-#endif
-
-  return kVectorFMACProc(src, scale, len, dest);
-}
-
-void AudioRendererMixer::VectorFMAC_C(const float src[], float scale, int len,
-                                      float dest[]) {
-  for (int i = 0; i < len; ++i)
-    dest[i] += src[i] * scale;
-}
-
-#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
-void AudioRendererMixer::VectorFMAC_SSE(const float src[], float scale, int len,
-                                        float dest[]) {
-  // Ensure |src| and |dest| are 16-byte aligned.
-  DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & 0x0F);
-  DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & 0x0F);
-
-  __m128 m_scale = _mm_set_ps1(scale);
-  int rem = len % 4;
-  for (int i = 0; i < len - rem; i += 4) {
-    _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i),
-                 _mm_mul_ps(_mm_load_ps(src + i), m_scale)));
-  }
-
-  // Handle any remaining values that wouldn't fit in an SSE pass.
-  if (rem)
-    VectorFMAC_C(src + len - rem, scale, rem, dest + len - rem);
-}
-#endif
-
 }  // namespace media
diff --git a/media/base/audio_renderer_mixer.h b/media/base/audio_renderer_mixer.h
index 75b7d85..c595c0d 100644
--- a/media/base/audio_renderer_mixer.h
+++ b/media/base/audio_renderer_mixer.h
@@ -7,7 +7,6 @@
 
 #include <set>
 
-#include "base/gtest_prod_util.h"
 #include "base/synchronization/lock.h"
 #include "media/base/audio_renderer_mixer_input.h"
 #include "media/base/audio_renderer_sink.h"
@@ -33,9 +32,6 @@ class MEDIA_EXPORT AudioRendererMixer
   void RemoveMixerInput(const scoped_refptr<AudioRendererMixerInput>& input);
 
  private:
-  FRIEND_TEST_ALL_PREFIXES(AudioRendererMixerTest, VectorFMAC);
-  FRIEND_TEST_ALL_PREFIXES(AudioRendererMixerTest, VectorFMACBenchmark);
-
   // AudioRendererSink::RenderCallback implementation.
   virtual int Render(AudioBus* audio_bus,
                      int audio_delay_milliseconds) OVERRIDE;
@@ -46,14 +42,6 @@ class MEDIA_EXPORT AudioRendererMixer
   // by MultiChannelResampler when more data is necessary.
   void ProvideInput(AudioBus* audio_bus);
 
-  // Multiply each element of |src| (up to |len|) by |scale| and add to |dest|.
-  static void VectorFMAC(const float src[], float scale, int len, float dest[]);
-  static void VectorFMAC_C(const float src[], float scale, int len,
-                           float dest[]);
-  // SSE optimized VectorFMAC, requires |src|, |dest| to be 16-byte aligned.
-  static void VectorFMAC_SSE(const float src[], float scale, int len,
-                             float dest[]);
-
   // Output sink for this mixer.
   scoped_refptr<AudioRendererSink> audio_sink_;
 
diff --git a/media/base/audio_renderer_mixer_unittest.cc b/media/base/audio_renderer_mixer_unittest.cc
index 35917b8..a37cd69 100644
--- a/media/base/audio_renderer_mixer_unittest.cc
+++ b/media/base/audio_renderer_mixer_unittest.cc
@@ -8,11 +8,8 @@
 
 #include "base/bind.h"
 #include "base/bind_helpers.h"
-#include "base/command_line.h"
-#include "base/memory/aligned_memory.h"
 #include "base/memory/scoped_ptr.h"
 #include "base/memory/scoped_vector.h"
-#include "base/string_number_conversions.h"
 #include "media/base/audio_renderer_mixer.h"
 #include "media/base/audio_renderer_mixer_input.h"
 #include "media/base/fake_audio_render_callback.h"
@@ -36,137 +33,6 @@ static const int kSampleRate = 48000;
 // Number of full sine wave cycles for each Render() call.
 static const int kSineCycles = 4;
 
-// Command line switch for runtime adjustment of VectorFMACBenchmark iterations.
-static const char kVectorFMACIterations[] = "vector-fmac-iterations";
-
-// Test parameters for VectorFMAC tests.
-static const float kScale = 0.5;
-static const float kInputFillValue = 1.0;
-static const float kOutputFillValue = 3.0;
-
-// Ensure various optimized VectorFMAC() methods return the same value.
-TEST(AudioRendererMixerTest, VectorFMAC) {
-  // Initialize a dummy mixer.
-  scoped_refptr<MockAudioRendererSink> sink = new MockAudioRendererSink();
-  EXPECT_CALL(*sink, Start());
-  EXPECT_CALL(*sink, Stop());
-  AudioParameters params(
-      AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, kSampleRate,
-      kBitsPerChannel, kHighLatencyBufferSize);
-  AudioRendererMixer mixer(params, params, sink);
-
-  // Initialize input and output vectors.
-  scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector(
-      static_cast<float*>(
-          base::AlignedAlloc(sizeof(float) * kHighLatencyBufferSize, 16)));
-  scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector(
-      static_cast<float*>(
-          base::AlignedAlloc(sizeof(float) * kHighLatencyBufferSize, 16)));
-
-  // Setup input and output vectors.
-  std::fill(input_vector.get(), input_vector.get() + kHighLatencyBufferSize,
-            kInputFillValue);
-  std::fill(output_vector.get(), output_vector.get() + kHighLatencyBufferSize,
-            kOutputFillValue);
-  mixer.VectorFMAC_C(
-      input_vector.get(), kScale, kHighLatencyBufferSize, output_vector.get());
-  for(int i = 0; i < kHighLatencyBufferSize; ++i) {
-    ASSERT_FLOAT_EQ(output_vector.get()[i],
-                    kInputFillValue * kScale + kOutputFillValue);
-  }
-
-#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
-  // Reset vectors, and try with SSE.
-  std::fill(output_vector.get(), output_vector.get() + kHighLatencyBufferSize,
-            kOutputFillValue);
-  mixer.VectorFMAC_SSE(
-      input_vector.get(), kScale, kHighLatencyBufferSize, output_vector.get());
-  for(int i = 0; i < kHighLatencyBufferSize; ++i) {
-    ASSERT_FLOAT_EQ(output_vector.get()[i],
-                    kInputFillValue * kScale + kOutputFillValue);
-  }
-#endif
-}
-
-// Benchmark for the various VectorFMAC() methods.  Make sure to build with
-// branding=Chrome so that DCHECKs are compiled out when benchmarking.  Original
-// benchmarks were run with --vector-fmac-iterations=200000.
-TEST(AudioRendererMixerTest, VectorFMACBenchmark) {
-  // Initialize a dummy mixer.
-  scoped_refptr<MockAudioRendererSink> sink = new MockAudioRendererSink();
-  EXPECT_CALL(*sink, Start());
-  EXPECT_CALL(*sink, Stop());
-  AudioParameters params(
-      AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, kSampleRate,
-      kBitsPerChannel, kHighLatencyBufferSize);
-  AudioRendererMixer mixer(params, params, sink);
-
-  // Initialize input and output vectors.
-  scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector(
-      static_cast<float*>(
-          base::AlignedAlloc(sizeof(float) * kHighLatencyBufferSize, 16)));
-  scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector(
-      static_cast<float*>(
-          base::AlignedAlloc(sizeof(float) * kHighLatencyBufferSize, 16)));
-
-  // Retrieve benchmark iterations from command line.
-  int vector_fmac_iterations = 10;
-  std::string iterations(CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
-      kVectorFMACIterations));
-  if (!iterations.empty())
-    base::StringToInt(iterations, &vector_fmac_iterations);
-
-  printf("Benchmarking %d iterations:\n", vector_fmac_iterations);
-
-  // Benchmark VectorFMAC_C().
-  std::fill(input_vector.get(), input_vector.get() + kHighLatencyBufferSize,
-            kInputFillValue);
-  std::fill(output_vector.get(), output_vector.get() + kHighLatencyBufferSize,
-            kOutputFillValue);
-  base::TimeTicks start = base::TimeTicks::HighResNow();
-  for (int i = 0; i < vector_fmac_iterations; ++i) {
-    mixer.VectorFMAC_C(input_vector.get(), static_cast<float>(M_PI),
-                       kHighLatencyBufferSize, output_vector.get());
-  }
-  double total_time_c_ms =
-      (base::TimeTicks::HighResNow() - start).InMillisecondsF();
-  printf("VectorFMAC_C took %.2fms.\n", total_time_c_ms);
-
-#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
-  // Benchmark VectorFMAC_SSE() with unaligned size; I.e., size % 4 != 0.
-  ASSERT_NE((kHighLatencyBufferSize - 1) % 4, 0);
-  std::fill(output_vector.get(), output_vector.get() + kHighLatencyBufferSize,
-            kOutputFillValue);
-  start = base::TimeTicks::HighResNow();
-  for (int j = 0; j < vector_fmac_iterations; ++j) {
-    mixer.VectorFMAC_SSE(input_vector.get(), M_PI, kHighLatencyBufferSize - 1,
-                         output_vector.get());
-  }
-  double total_time_sse_unaligned_ms =
-      (base::TimeTicks::HighResNow() - start).InMillisecondsF();
-  printf("VectorFMAC_SSE (unaligned size) took %.2fms; which is %.2fx faster"
-         " than VectorFMAC_C.\n", total_time_sse_unaligned_ms,
-         total_time_c_ms / total_time_sse_unaligned_ms);
-
-  // Benchmark VectorFMAC_SSE() with aligned size; I.e., size % 4 == 0.
-  ASSERT_EQ(kHighLatencyBufferSize % 4, 0);
-  std::fill(output_vector.get(), output_vector.get() + kHighLatencyBufferSize,
-            kOutputFillValue);
-  start = base::TimeTicks::HighResNow();
-  for (int j = 0; j < vector_fmac_iterations; ++j) {
-    mixer.VectorFMAC_SSE(input_vector.get(), M_PI, kHighLatencyBufferSize,
-                         output_vector.get());
-  }
-  double total_time_sse_aligned_ms =
-      (base::TimeTicks::HighResNow() - start).InMillisecondsF();
-  printf("VectorFMAC_SSE (aligned size) took %.2fms; which is %.2fx faster than"
-         " VectorFMAC_C and %.2fx faster than VectorFMAC_SSE (unaligned size)."
-         "\n",
-         total_time_sse_aligned_ms, total_time_c_ms / total_time_sse_aligned_ms,
-         total_time_sse_unaligned_ms / total_time_sse_aligned_ms);
-#endif
-}
-
 // Tuple of <input sampling rate, output sampling rate, epsilon>.
 typedef std::tr1::tuple<int, int, double> AudioRendererMixerTestData;
 class AudioRendererMixerTest
diff --git a/media/base/vector_math.cc b/media/base/vector_math.cc
new file mode 100644
index 0000000..edd95cd
--- /dev/null
+++ b/media/base/vector_math.cc
@@ -0,0 +1,59 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/base/vector_math.h"
+#include "media/base/vector_math_testing.h"
+
+#include "base/cpu.h"
+#include "base/logging.h"
+#include "build/build_config.h"
+
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
+#include <xmmintrin.h>
+#endif
+
+namespace media {
+namespace vector_math {
+
+void FMAC(const float src[], float scale, int len, float dest[]) {
+  // Ensure |src| and |dest| are 16-byte aligned.
+  DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1));
+  DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & (kRequiredAlignment - 1));
+
+  // Rely on function level static initialization to keep VectorFMACProc
+  // selection thread safe.
+  typedef void (*VectorFMACProc)(const float src[], float scale, int len,
+                                 float dest[]);
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
+  static const VectorFMACProc kVectorFMACProc =
+      base::CPU().has_sse() ? FMAC_SSE : FMAC_C;
+#else
+  static const VectorFMACProc kVectorFMACProc = FMAC_C;
+#endif
+
+  return kVectorFMACProc(src, scale, len, dest);
+}
+
+void FMAC_C(const float src[], float scale, int len, float dest[]) {
+  for (int i = 0; i < len; ++i)
+    dest[i] += src[i] * scale;
+}
+
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
+void FMAC_SSE(const float src[], float scale, int len, float dest[]) {
+  __m128 m_scale = _mm_set_ps1(scale);
+  int rem = len % 4;
+  for (int i = 0; i < len - rem; i += 4) {
+    _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i),
+                 _mm_mul_ps(_mm_load_ps(src + i), m_scale)));
+  }
+
+  // Handle any remaining values that wouldn't fit in an SSE pass.
+  if (rem)
+    FMAC_C(src + len - rem, scale, rem, dest + len - rem);
+}
+#endif
+
+}  // namespace vector_math
+}  // namespace media
diff --git a/media/base/vector_math.h b/media/base/vector_math.h
new file mode 100644
index 0000000..10c3039
--- /dev/null
+++ b/media/base/vector_math.h
@@ -0,0 +1,23 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MEDIA_BASE_VECTOR_MATH_H_
+#define MEDIA_BASE_VECTOR_MATH_H_
+
+#include "media/base/media_export.h"
+
+namespace media {
+namespace vector_math {
+
+// Required alignment for inputs and outputs to all vector math functions
+enum { kRequiredAlignment = 16 };
+
+// Multiply each element of |src| (up to |len|) by |scale| and add to |dest|.
+// |src| and |dest| must be aligned by kRequiredAlignment.
+MEDIA_EXPORT void FMAC(const float src[], float scale, int len, float dest[]);
+
+}  // namespace vector_math
+}  // namespace media
+
+#endif  // MEDIA_BASE_VECTOR_MATH_H_
diff --git a/media/base/vector_math_testing.h b/media/base/vector_math_testing.h
new file mode 100644
index 0000000..d364b74
--- /dev/null
+++ b/media/base/vector_math_testing.h
@@ -0,0 +1,22 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MEDIA_BASE_VECTOR_MATH_TESTING_H_
+#define MEDIA_BASE_VECTOR_MATH_TESTING_H_
+
+#include "media/base/media_export.h"
+
+namespace media {
+namespace vector_math {
+
+// Optimized versions of FMAC() function exposed for testing.  See vector_math.h
+// for details.
+MEDIA_EXPORT void FMAC_C(const float src[], float scale, int len, float dest[]);
+MEDIA_EXPORT void FMAC_SSE(const float src[], float scale, int len,
+                           float dest[]);
+
+}  // namespace vector_math
+}  // namespace media
+
+#endif  // MEDIA_BASE_VECTOR_MATH_TESTING_H_
diff --git a/media/base/vector_math_unittest.cc b/media/base/vector_math_unittest.cc
new file mode 100644
index 0000000..153378e
--- /dev/null
+++ b/media/base/vector_math_unittest.cc
@@ -0,0 +1,155 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// MSVC++ requires this to be set before any other includes to get M_PI.
+#define _USE_MATH_DEFINES
+#include <cmath>
+
+#include "base/command_line.h"
+#include "base/memory/aligned_memory.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/string_number_conversions.h"
+#include "base/time.h"
+#include "media/base/vector_math.h"
+#include "media/base/vector_math_testing.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using base::TimeTicks;
+using std::fill;
+
+// Command line switch for runtime adjustment of benchmark iterations.
+static const char kBenchmarkIterations[] = "vector-math-iterations";
+static const int kDefaultIterations = 10;
+
+// Default test values.
+static const float kScale = 0.5;
+static const float kInputFillValue = 1.0;
+static const float kOutputFillValue = 3.0;
+
+namespace media {
+
+class VectorMathTest : public testing::Test {
+ public:
+  static const int kVectorSize = 8192;
+
+  VectorMathTest() {
+    // Initialize input and output vectors.
+    input_vector.reset(static_cast<float*>(base::AlignedAlloc(
+        sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
+    output_vector.reset(static_cast<float*>(base::AlignedAlloc(
+        sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
+  }
+
+  void FillTestVectors(float input, float output) {
+    // Setup input and output vectors.
+    fill(input_vector.get(), input_vector.get() + kVectorSize, input);
+    fill(output_vector.get(), output_vector.get() + kVectorSize, output);
+  }
+
+  void VerifyOutput(float value) {
+    for (int i = 0; i < kVectorSize; ++i)
+      ASSERT_FLOAT_EQ(output_vector.get()[i], value);
+  }
+
+  int BenchmarkIterations() {
+    int vector_math_iterations = kDefaultIterations;
+    std::string iterations(
+        CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
+            kBenchmarkIterations));
+    if (!iterations.empty())
+      base::StringToInt(iterations, &vector_math_iterations);
+    return vector_math_iterations;
+  }
+
+ protected:
+  int benchmark_iterations;
+  scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector;
+  scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector;
+
+  DISALLOW_COPY_AND_ASSIGN(VectorMathTest);
+};
+
+// Ensure each optimized vector_math::FMAC() method returns the same value.
+TEST_F(VectorMathTest, FMAC) {
+  static const float kResult = kInputFillValue * kScale + kOutputFillValue;
+
+  {
+    SCOPED_TRACE("FMAC");
+    FillTestVectors(kInputFillValue, kOutputFillValue);
+    vector_math::FMAC(
+        input_vector.get(), kScale, kVectorSize, output_vector.get());
+    VerifyOutput(kResult);
+  }
+
+  {
+    SCOPED_TRACE("FMAC_C");
+    FillTestVectors(kInputFillValue, kOutputFillValue);
+    vector_math::FMAC_C(
+        input_vector.get(), kScale, kVectorSize, output_vector.get());
+    VerifyOutput(kResult);
+  }
+
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
+  {
+    SCOPED_TRACE("FMAC_SSE");
+    FillTestVectors(kInputFillValue, kOutputFillValue);
+    vector_math::FMAC_SSE(
+        input_vector.get(), kScale, kVectorSize, output_vector.get());
+    VerifyOutput(kResult);
+  }
+#endif
+}
+
+// Benchmark for each optimized vector_math::FMAC() method.  Original benchmarks
+// were run with --vector-fmac-iterations=200000.
+TEST_F(VectorMathTest, FMACBenchmark) {
+  static const int kBenchmarkIterations = BenchmarkIterations();
+
+  printf("Benchmarking %d iterations:\n", kBenchmarkIterations);
+
+  // Benchmark FMAC_C().
+  FillTestVectors(kInputFillValue, kOutputFillValue);
+  TimeTicks start = TimeTicks::HighResNow();
+  for (int i = 0; i < kBenchmarkIterations; ++i) {
+    vector_math::FMAC_C(
+        input_vector.get(), kScale, kVectorSize, output_vector.get());
+  }
+  double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF();
+  printf("FMAC_C took %.2fms.\n", total_time_c_ms);
+
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
+  // Benchmark FMAC_SSE() with unaligned size.
+  ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /
+            sizeof(float)), 0U);
+  FillTestVectors(kInputFillValue, kOutputFillValue);
+  start = TimeTicks::HighResNow();
+  for (int j = 0; j < kBenchmarkIterations; ++j) {
+    vector_math::FMAC_SSE(
+        input_vector.get(), kScale, kVectorSize - 1, output_vector.get());
+  }
+  double total_time_sse_unaligned_ms =
+      (TimeTicks::HighResNow() - start).InMillisecondsF();
+  printf("FMAC_SSE (unaligned size) took %.2fms; which is %.2fx faster than"
+         " FMAC_C.\n", total_time_sse_unaligned_ms,
+         total_time_c_ms / total_time_sse_unaligned_ms);
+
+  // Benchmark FMAC_SSE() with aligned size.
+  ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
+            0U);
+  FillTestVectors(kInputFillValue, kOutputFillValue);
+  start = TimeTicks::HighResNow();
+  for (int j = 0; j < kBenchmarkIterations; ++j) {
+    vector_math::FMAC_SSE(
+        input_vector.get(), kScale, kVectorSize, output_vector.get());
+  }
+  double total_time_sse_aligned_ms =
+      (TimeTicks::HighResNow() - start).InMillisecondsF();
+  printf("FMAC_SSE (aligned size) took %.2fms; which is %.2fx faster than"
+         " FMAC_C and %.2fx faster than FMAC_SSE (unaligned size).\n",
+         total_time_sse_aligned_ms, total_time_c_ms / total_time_sse_aligned_ms,
+         total_time_sse_unaligned_ms / total_time_sse_aligned_ms);
+#endif
+}
+
+}  // namespace media
diff --git a/media/media.gyp b/media/media.gyp
index 86c1a61..582c23e 100644
--- a/media/media.gyp
+++ b/media/media.gyp
@@ -207,6 +207,8 @@
         'base/stream_parser.h',
         'base/stream_parser_buffer.cc',
         'base/stream_parser_buffer.h',
+        'base/vector_math.cc',
+        'base/vector_math.h',
         'base/video_decoder.cc',
         'base/video_decoder.h',
         'base/video_decoder_config.cc',
@@ -707,6 +709,8 @@
         'base/sinc_resampler_unittest.cc',
         'base/test_data_util.cc',
         'base/test_data_util.h',
+        'base/vector_math_testing.h',
+        'base/vector_math_unittest.cc',
         'base/video_frame_unittest.cc',
         'base/video_util_unittest.cc',
         'base/yuv_convert_unittest.cc',
author	dalecurtis@chromium.org <dalecurtis@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-08-24 00:40:46 +0000
committer	dalecurtis@chromium.org <dalecurtis@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-08-24 00:40:46 +0000
commit	9c2b29e0d642712e780818dddab5a965b4fd96d4 (patch)
tree	4d9ae01ab0c5c86cd1d7b4e2abbdd40db3ba1190
parent	b94ebd1f6e245d9f9f3d9a2fc933581eb8f508bc (diff)
download	chromium_src-9c2b29e0d642712e780818dddab5a965b4fd96d4.zip chromium_src-9c2b29e0d642712e780818dddab5a965b4fd96d4.tar.gz chromium_src-9c2b29e0d642712e780818dddab5a965b4fd96d4.tar.bz2