diff options
author | hubbe@chromium.org <hubbe@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-03-16 15:54:06 +0000 |
---|---|---|
committer | hubbe@chromium.org <hubbe@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-03-16 15:54:06 +0000 |
commit | 03fc359f9d80b21fd069daf923d5e1bd3ddd9dfd (patch) | |
tree | d5d4e9c468d559b8bced0dec530be80245aa4517 /skia/ext | |
parent | 4e8566ab87f717d73e282327f91a0c07f771810b (diff) | |
download | chromium_src-03fc359f9d80b21fd069daf923d5e1bd3ddd9dfd.zip chromium_src-03fc359f9d80b21fd069daf923d5e1bd3ddd9dfd.tar.gz chromium_src-03fc359f9d80b21fd069daf923d5e1bd3ddd9dfd.tar.bz2 |
Attempt #2 at enabling SSE2 and fixing buffer overruns.
BUG=181072
Review URL: https://chromiumcodereview.appspot.com/12636007
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@188582 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'skia/ext')
-rw-r--r-- | skia/ext/convolver.cc | 21 | ||||
-rw-r--r-- | skia/ext/convolver.h | 3 | ||||
-rw-r--r-- | skia/ext/convolver_unittest.cc | 31 |
3 files changed, 39 insertions, 16 deletions
diff --git a/skia/ext/convolver.cc b/skia/ext/convolver.cc index ee9d056..47e3711 100644 --- a/skia/ext/convolver.cc +++ b/skia/ext/convolver.cc @@ -765,6 +765,18 @@ void BGRAConvolve2D(const unsigned char* source_data, // We need to check which is the last line to convolve before we advance 4 // lines in one iteration. int last_filter_offset, last_filter_length; + + // SSE2 can access up to 3 extra pixels past the end of the + // buffer. At the bottom of the image, we have to be careful + // not to access data past the end of the buffer. Normally + // we fall back to the C++ implementation for the last row. + // If the last row is less than 3 pixels wide, we may have to fall + // back to the C++ version for more rows. Compute how many + // rows we need to avoid the SSE implementation for here. + filter_x.FilterForValue(filter_x.num_values() - 1, &last_filter_offset, + &last_filter_length); + int avoid_sse_rows = 1 + 3/(last_filter_offset + last_filter_length); + filter_y.FilterForValue(num_output_rows - 1, &last_filter_offset, &last_filter_length); @@ -775,7 +787,8 @@ void BGRAConvolve2D(const unsigned char* source_data, // Generate output rows until we have enough to run the current filter. if (use_sse2) { while (next_x_row < filter_offset + filter_length) { - if (next_x_row + 3 < last_filter_offset + last_filter_length - 1) { + if (next_x_row + 3 < last_filter_offset + last_filter_length - + avoid_sse_rows) { const unsigned char* src[4]; unsigned char* out_row[4]; for (int i = 0; i < 4; ++i) { @@ -785,9 +798,9 @@ void BGRAConvolve2D(const unsigned char* source_data, ConvolveHorizontally4_SSE2(src, filter_x, out_row); next_x_row += 4; } else { - // For the last row, SSE2 load possibly to access data beyond the - // image area. therefore we use C version here. - if (next_x_row == last_filter_offset + last_filter_length - 1) { + // Check if we need to avoid SSE2 for this row. + if (next_x_row >= last_filter_offset + last_filter_length - + avoid_sse_rows) { if (source_has_alpha) { ConvolveHorizontally<true>( &source_data[next_x_row * source_byte_row_stride], diff --git a/skia/ext/convolver.h b/skia/ext/convolver.h index 14974e5..9101a5e 100644 --- a/skia/ext/convolver.h +++ b/skia/ext/convolver.h @@ -15,8 +15,7 @@ #if defined(ARCH_CPU_X86_FAMILY) #if defined(__x86_64__) || defined(_M_X64) || defined(__SSE2__) || _M_IX86_FP==2 // This is where we had compiler support for SSE2 instructions. -// FIXME: Known buggy, so disabling for M22. -// #define SIMD_SSE2 1 +#define SIMD_SSE2 1 #endif #endif diff --git a/skia/ext/convolver_unittest.cc b/skia/ext/convolver_unittest.cc index f61b685..6bf09ee 100644 --- a/skia/ext/convolver_unittest.cc +++ b/skia/ext/convolver_unittest.cc @@ -215,7 +215,15 @@ TEST(Convolver, SIMDVerification) { base::CPU cpu; if (!cpu.has_sse2()) return; - int source_sizes[][2] = { {1920, 1080}, {720, 480}, {1377, 523}, {325, 241} }; + int source_sizes[][2] = { + {1,1}, {1,2}, {1,3}, {1,4}, {1,5}, + {2,1}, {2,2}, {2,3}, {2,4}, {2,5}, + {3,1}, {3,2}, {3,3}, {3,4}, {3,5}, + {4,1}, {4,2}, {4,3}, {4,4}, {4,5}, + {1920, 1080}, + {720, 480}, + {1377, 523}, + {325, 241} }; int dest_sizes[][2] = { {1280, 1024}, {480, 270}, {177, 123} }; float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f }; @@ -226,23 +234,26 @@ TEST(Convolver, SIMDVerification) { unsigned int source_width = source_sizes[i][0]; unsigned int source_height = source_sizes[i][1]; for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) { - unsigned int dest_width = source_sizes[j][0]; - unsigned int dest_height = source_sizes[j][1]; + unsigned int dest_width = dest_sizes[j][0]; + unsigned int dest_height = dest_sizes[j][1]; // Preparing convolve coefficients. ConvolutionFilter1D x_filter, y_filter; for (unsigned int p = 0; p < dest_width; ++p) { unsigned int offset = source_width * p / dest_width; - if (offset > source_width - arraysize(filter)) - offset = source_width - arraysize(filter); - x_filter.AddFilter(offset, filter, arraysize(filter)); + EXPECT_LT(offset, source_width); + x_filter.AddFilter(offset, filter, + std::min<int>(arraysize(filter), + source_width - offset)); } + x_filter.PaddingForSIMD(8); for (unsigned int p = 0; p < dest_height; ++p) { unsigned int offset = source_height * p / dest_height; - if (offset > source_height - arraysize(filter)) - offset = source_height - arraysize(filter); - y_filter.AddFilter(offset, filter, arraysize(filter)); + y_filter.AddFilter(offset, filter, + std::min<int>(arraysize(filter), + source_height - offset)); } + y_filter.PaddingForSIMD(8); // Allocate input and output skia bitmap. SkBitmap source, result_c, result_sse; @@ -259,7 +270,7 @@ TEST(Convolver, SIMDVerification) { // Randomize source bitmap for testing. unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels()); for (int y = 0; y < source.height(); y++) { - for (int x = 0; x < source.rowBytes(); x++) + for (unsigned int x = 0; x < source.rowBytes(); x++) src_ptr[x] = rand() % 255; src_ptr += source.rowBytes(); } |