diff options
author | jiesun@chromium.org <jiesun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-09 21:55:38 +0000 |
---|---|---|
committer | jiesun@chromium.org <jiesun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-09 21:55:38 +0000 |
commit | 3c2632de729773052b5561575db4e81017aff538 (patch) | |
tree | 77d5a9c10da3330445f81db20fb88b785d457885 /skia/ext/convolver.h | |
parent | 7c48dafd4f686038dde05650c029995e77013aa0 (diff) | |
download | chromium_src-3c2632de729773052b5561575db4e81017aff538.zip chromium_src-3c2632de729773052b5561575db4e81017aff538.tar.gz chromium_src-3c2632de729773052b5561575db4e81017aff538.tar.bz2 |
SIMD implementation of Convolver for Lanczos filter etc.
replace current convolver function (horizontal/vertical) with SSE2 intrinsic version. Performance is not tuned to the optimal carefully in this patch. but it still should beat C version easily.
BUG=62820
TEST=unittest. and image_operation_bench.
Review URL: http://codereview.chromium.org/6334070
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@77527 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'skia/ext/convolver.h')
-rw-r--r-- | skia/ext/convolver.h | 23 |
1 files changed, 21 insertions, 2 deletions
diff --git a/skia/ext/convolver.h b/skia/ext/convolver.h index 04d6fe5..cedd8fa 100644 --- a/skia/ext/convolver.h +++ b/skia/ext/convolver.h @@ -10,6 +10,14 @@ #include <vector> #include "base/basictypes.h" +#include "base/cpu.h" + +#if defined(ARCH_CPU_X86_FAMILY) +#if defined(__x86_64__) || defined(_M_X64) || defined(__SSE2__) || _M_IX86_FP==2 +// This is where we had compiler support for SSE2 instructions. +#define SIMD_SSE2 1 +#endif +#endif // avoid confusion with Mac OS X's math library (Carbon) #if defined(__APPLE__) @@ -98,6 +106,17 @@ class ConvolutionFilter1D { return &filter_values_[filter.data_location]; } + + inline void PaddingForSIMD(int padding_count) { + // Padding |padding_count| of more dummy coefficients after the coefficients + // of last filter to prevent SIMD instructions which load 8 or 16 bytes + // together to access invalid memory areas. We are not trying to align the + // coefficients right now due to the opaqueness of <vector> implementation. + // This has to be done after all |AddFilter| calls. + for (int i = 0; i < padding_count; ++i) + filter_values_.push_back(static_cast<Fixed>(0)); + } + private: struct FilterInstance { // Offset within filter_values for this instance of the filter. @@ -146,8 +165,8 @@ void BGRAConvolve2D(const unsigned char* source_data, const ConvolutionFilter1D& xfilter, const ConvolutionFilter1D& yfilter, int output_byte_row_stride, - unsigned char* output); - + unsigned char* output, + bool use_sse2); } // namespace skia #endif // SKIA_EXT_CONVOLVER_H_ |