summaryrefslogtreecommitdiffstats
path: root/skia/ext
diff options
context:
space:
mode:
authorhubbe@chromium.org <hubbe@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-03-16 15:54:06 +0000
committerhubbe@chromium.org <hubbe@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-03-16 15:54:06 +0000
commit03fc359f9d80b21fd069daf923d5e1bd3ddd9dfd (patch)
treed5d4e9c468d559b8bced0dec530be80245aa4517 /skia/ext
parent4e8566ab87f717d73e282327f91a0c07f771810b (diff)
downloadchromium_src-03fc359f9d80b21fd069daf923d5e1bd3ddd9dfd.zip
chromium_src-03fc359f9d80b21fd069daf923d5e1bd3ddd9dfd.tar.gz
chromium_src-03fc359f9d80b21fd069daf923d5e1bd3ddd9dfd.tar.bz2
Attempt #2 at enabling SSE2 and fixing buffer overruns.
BUG=181072 Review URL: https://chromiumcodereview.appspot.com/12636007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@188582 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'skia/ext')
-rw-r--r--skia/ext/convolver.cc21
-rw-r--r--skia/ext/convolver.h3
-rw-r--r--skia/ext/convolver_unittest.cc31
3 files changed, 39 insertions, 16 deletions
diff --git a/skia/ext/convolver.cc b/skia/ext/convolver.cc
index ee9d056..47e3711 100644
--- a/skia/ext/convolver.cc
+++ b/skia/ext/convolver.cc
@@ -765,6 +765,18 @@ void BGRAConvolve2D(const unsigned char* source_data,
// We need to check which is the last line to convolve before we advance 4
// lines in one iteration.
int last_filter_offset, last_filter_length;
+
+ // SSE2 can access up to 3 extra pixels past the end of the
+ // buffer. At the bottom of the image, we have to be careful
+ // not to access data past the end of the buffer. Normally
+ // we fall back to the C++ implementation for the last row.
+ // If the last row is less than 3 pixels wide, we may have to fall
+ // back to the C++ version for more rows. Compute how many
+ // rows we need to avoid the SSE implementation for here.
+ filter_x.FilterForValue(filter_x.num_values() - 1, &last_filter_offset,
+ &last_filter_length);
+ int avoid_sse_rows = 1 + 3/(last_filter_offset + last_filter_length);
+
filter_y.FilterForValue(num_output_rows - 1, &last_filter_offset,
&last_filter_length);
@@ -775,7 +787,8 @@ void BGRAConvolve2D(const unsigned char* source_data,
// Generate output rows until we have enough to run the current filter.
if (use_sse2) {
while (next_x_row < filter_offset + filter_length) {
- if (next_x_row + 3 < last_filter_offset + last_filter_length - 1) {
+ if (next_x_row + 3 < last_filter_offset + last_filter_length -
+ avoid_sse_rows) {
const unsigned char* src[4];
unsigned char* out_row[4];
for (int i = 0; i < 4; ++i) {
@@ -785,9 +798,9 @@ void BGRAConvolve2D(const unsigned char* source_data,
ConvolveHorizontally4_SSE2(src, filter_x, out_row);
next_x_row += 4;
} else {
- // For the last row, SSE2 load possibly to access data beyond the
- // image area. therefore we use C version here.
- if (next_x_row == last_filter_offset + last_filter_length - 1) {
+ // Check if we need to avoid SSE2 for this row.
+ if (next_x_row >= last_filter_offset + last_filter_length -
+ avoid_sse_rows) {
if (source_has_alpha) {
ConvolveHorizontally<true>(
&source_data[next_x_row * source_byte_row_stride],
diff --git a/skia/ext/convolver.h b/skia/ext/convolver.h
index 14974e5..9101a5e 100644
--- a/skia/ext/convolver.h
+++ b/skia/ext/convolver.h
@@ -15,8 +15,7 @@
#if defined(ARCH_CPU_X86_FAMILY)
#if defined(__x86_64__) || defined(_M_X64) || defined(__SSE2__) || _M_IX86_FP==2
// This is where we had compiler support for SSE2 instructions.
-// FIXME: Known buggy, so disabling for M22.
-// #define SIMD_SSE2 1
+#define SIMD_SSE2 1
#endif
#endif
diff --git a/skia/ext/convolver_unittest.cc b/skia/ext/convolver_unittest.cc
index f61b685..6bf09ee 100644
--- a/skia/ext/convolver_unittest.cc
+++ b/skia/ext/convolver_unittest.cc
@@ -215,7 +215,15 @@ TEST(Convolver, SIMDVerification) {
base::CPU cpu;
if (!cpu.has_sse2()) return;
- int source_sizes[][2] = { {1920, 1080}, {720, 480}, {1377, 523}, {325, 241} };
+ int source_sizes[][2] = {
+ {1,1}, {1,2}, {1,3}, {1,4}, {1,5},
+ {2,1}, {2,2}, {2,3}, {2,4}, {2,5},
+ {3,1}, {3,2}, {3,3}, {3,4}, {3,5},
+ {4,1}, {4,2}, {4,3}, {4,4}, {4,5},
+ {1920, 1080},
+ {720, 480},
+ {1377, 523},
+ {325, 241} };
int dest_sizes[][2] = { {1280, 1024}, {480, 270}, {177, 123} };
float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f };
@@ -226,23 +234,26 @@ TEST(Convolver, SIMDVerification) {
unsigned int source_width = source_sizes[i][0];
unsigned int source_height = source_sizes[i][1];
for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) {
- unsigned int dest_width = source_sizes[j][0];
- unsigned int dest_height = source_sizes[j][1];
+ unsigned int dest_width = dest_sizes[j][0];
+ unsigned int dest_height = dest_sizes[j][1];
// Preparing convolve coefficients.
ConvolutionFilter1D x_filter, y_filter;
for (unsigned int p = 0; p < dest_width; ++p) {
unsigned int offset = source_width * p / dest_width;
- if (offset > source_width - arraysize(filter))
- offset = source_width - arraysize(filter);
- x_filter.AddFilter(offset, filter, arraysize(filter));
+ EXPECT_LT(offset, source_width);
+ x_filter.AddFilter(offset, filter,
+ std::min<int>(arraysize(filter),
+ source_width - offset));
}
+ x_filter.PaddingForSIMD(8);
for (unsigned int p = 0; p < dest_height; ++p) {
unsigned int offset = source_height * p / dest_height;
- if (offset > source_height - arraysize(filter))
- offset = source_height - arraysize(filter);
- y_filter.AddFilter(offset, filter, arraysize(filter));
+ y_filter.AddFilter(offset, filter,
+ std::min<int>(arraysize(filter),
+ source_height - offset));
}
+ y_filter.PaddingForSIMD(8);
// Allocate input and output skia bitmap.
SkBitmap source, result_c, result_sse;
@@ -259,7 +270,7 @@ TEST(Convolver, SIMDVerification) {
// Randomize source bitmap for testing.
unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels());
for (int y = 0; y < source.height(); y++) {
- for (int x = 0; x < source.rowBytes(); x++)
+ for (unsigned int x = 0; x < source.rowBytes(); x++)
src_ptr[x] = rand() % 255;
src_ptr += source.rowBytes();
}