SIMD implementation of Convolver for Lanczos filter etc.

replace current convolver function (horizontal/vertical) with SSE2 intrinsic version. Performance is not tuned to the optimal carefully in this patch. but it still should beat C version easily. BUG=62820 TEST=unittest. and image_operation_bench. Review URL: http://codereview.chromium.org/6334070 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@77527 0039d316-1c4b-4281-b951-d872f2087c98
author: jiesun@chromium.org <jiesun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-03-09 21:55:38 +0000
committer: jiesun@chromium.org <jiesun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-03-09 21:55:38 +0000
commit: 3c2632de729773052b5561575db4e81017aff538 (patch)
tree: 77d5a9c10da3330445f81db20fb88b785d457885 /skia/ext/convolver.h
parent: 7c48dafd4f686038dde05650c029995e77013aa0 (diff)
download: chromium_src-3c2632de729773052b5561575db4e81017aff538.zip
chromium_src-3c2632de729773052b5561575db4e81017aff538.tar.gz
chromium_src-3c2632de729773052b5561575db4e81017aff538.tar.bz2
1 files changed, 21 insertions, 2 deletions
diff --git a/skia/ext/convolver.h b/skia/ext/convolver.h
index 04d6fe5..cedd8fa 100644
--- a/skia/ext/convolver.h
+++ b/skia/ext/convolver.h
@@ -10,6 +10,14 @@
 #include <vector>
 
 #include "base/basictypes.h"
+#include "base/cpu.h"
+
+#if defined(ARCH_CPU_X86_FAMILY)
+#if defined(__x86_64__) || defined(_M_X64) || defined(__SSE2__) || _M_IX86_FP==2
+// This is where we had compiler support for SSE2 instructions.
+#define SIMD_SSE2 1
+#endif
+#endif
 
 // avoid confusion with Mac OS X's math library (Carbon)
 #if defined(__APPLE__)
@@ -98,6 +106,17 @@ class ConvolutionFilter1D {
     return &filter_values_[filter.data_location];
   }
 
+
+  inline void PaddingForSIMD(int padding_count) {
+    // Padding |padding_count| of more dummy coefficients after the coefficients
+    // of last filter to prevent SIMD instructions which load 8 or 16 bytes
+    // together to access invalid memory areas. We are not trying to align the
+    // coefficients right now due to the opaqueness of <vector> implementation.
+    // This has to be done after all |AddFilter| calls.
+    for (int i = 0; i < padding_count; ++i)
+      filter_values_.push_back(static_cast<Fixed>(0));
+  }
+
  private:
   struct FilterInstance {
     // Offset within filter_values for this instance of the filter.
@@ -146,8 +165,8 @@ void BGRAConvolve2D(const unsigned char* source_data,
                     const ConvolutionFilter1D& xfilter,
                     const ConvolutionFilter1D& yfilter,
                     int output_byte_row_stride,
-                    unsigned char* output);
-
+                    unsigned char* output,
+                    bool use_sse2);
 }  // namespace skia
 
 #endif  // SKIA_EXT_CONVOLVER_H_
author	jiesun@chromium.org <jiesun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-03-09 21:55:38 +0000
committer	jiesun@chromium.org <jiesun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-03-09 21:55:38 +0000
commit	3c2632de729773052b5561575db4e81017aff538 (patch)
tree	77d5a9c10da3330445f81db20fb88b785d457885 /skia/ext/convolver.h
parent	7c48dafd4f686038dde05650c029995e77013aa0 (diff)
download	chromium_src-3c2632de729773052b5561575db4e81017aff538.zip chromium_src-3c2632de729773052b5561575db4e81017aff538.tar.gz chromium_src-3c2632de729773052b5561575db4e81017aff538.tar.bz2