summaryrefslogtreecommitdiffstats
path: root/media
diff options
context:
space:
mode:
authorhclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-10-17 13:36:05 +0000
committerhclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-10-17 13:36:05 +0000
commitdc88452ea0a9189c1197889bf4a66d8237f9a05b (patch)
treef8562306963eb9ae5f5d9fd56711391cdf09105b /media
parente4f9f9bd81b6dc2ca82f5b8913f3a49c616c1dda (diff)
downloadchromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.zip
chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.tar.gz
chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.tar.bz2
Fix out of bound access with ScaleYUVToRGB32.
FilterYUVRows functions had OOB problems, fixed in this patch. BUG=99480 TEST=media_unittests --gtest_filter=YUV* Review URL: http://codereview.chromium.org/8230014 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@105798 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r--media/base/simd/filter_yuv_c.cc13
-rw-r--r--media/base/simd/filter_yuv_mmx.cc77
-rw-r--r--media/base/simd/filter_yuv_sse2.cc83
-rw-r--r--media/base/yuv_convert_unittest.cc115
4 files changed, 228 insertions, 60 deletions
diff --git a/media/base/simd/filter_yuv_c.cc b/media/base/simd/filter_yuv_c.cc
index 95ae01a..f292d21 100644
--- a/media/base/simd/filter_yuv_c.cc
+++ b/media/base/simd/filter_yuv_c.cc
@@ -11,7 +11,9 @@ void FilterYUVRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
uint8* end = ybuf + source_width;
- do {
+ uint8* rounded_end = ybuf + (source_width & ~7);
+
+ while (ybuf < rounded_end) {
ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
@@ -23,7 +25,14 @@ void FilterYUVRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
y0_ptr += 8;
y1_ptr += 8;
ybuf += 8;
- } while (ybuf < end);
+ }
+
+ while (ybuf < end) {
+ ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
+ ++ybuf;
+ ++y0_ptr;
+ ++y1_ptr;
+ }
}
} // namespace media
diff --git a/media/base/simd/filter_yuv_mmx.cc b/media/base/simd/filter_yuv_mmx.cc
index 77698dc..09d62e3 100644
--- a/media/base/simd/filter_yuv_mmx.cc
+++ b/media/base/simd/filter_yuv_mmx.cc
@@ -20,35 +20,56 @@ namespace media {
#pragma warning(disable: 4799)
#endif
-void FilterYUVRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
- int source_width, int source_y_fraction) {
+void FilterYUVRows_MMX(uint8* dest,
+ const uint8* src0,
+ const uint8* src1,
+ int width,
+ int fraction) {
+ int pixel = 0;
+
+ // Process the unaligned bytes first.
+ int unaligned_width =
+ (8 - (reinterpret_cast<uintptr_t>(dest) & 7)) & 7;
+ while (pixel < width && pixel < unaligned_width) {
+ dest[pixel] = (src0[pixel] * (256 - fraction) +
+ src1[pixel] * fraction) >> 8;
+ ++pixel;
+ }
+
__m64 zero = _mm_setzero_si64();
- __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
- __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
-
- const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
- const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
- __m64* dest64 = reinterpret_cast<__m64*>(ybuf);
- __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
-
- do {
- __m64 y0 = *y0_ptr64++;
- __m64 y1 = *y1_ptr64++;
- __m64 y2 = _mm_unpackhi_pi8(y0, zero);
- __m64 y3 = _mm_unpackhi_pi8(y1, zero);
- y0 = _mm_unpacklo_pi8(y0, zero);
- y1 = _mm_unpacklo_pi8(y1, zero);
- y0 = _mm_mullo_pi16(y0, y0_fraction);
- y1 = _mm_mullo_pi16(y1, y1_fraction);
- y2 = _mm_mullo_pi16(y2, y0_fraction);
- y3 = _mm_mullo_pi16(y3, y1_fraction);
- y0 = _mm_add_pi16(y0, y1);
- y2 = _mm_add_pi16(y2, y3);
- y0 = _mm_srli_pi16(y0, 8);
- y2 = _mm_srli_pi16(y2, 8);
- y0 = _mm_packs_pu16(y0, y2);
- *dest64++ = y0;
- } while (dest64 < end64);
+ __m64 src1_fraction = _mm_set1_pi16(fraction);
+ __m64 src0_fraction = _mm_set1_pi16(256 - fraction);
+ const __m64* src0_64 = reinterpret_cast<const __m64*>(src0 + pixel);
+ const __m64* src1_64 = reinterpret_cast<const __m64*>(src1 + pixel);
+ __m64* dest64 = reinterpret_cast<__m64*>(dest + pixel);
+ __m64* end64 = reinterpret_cast<__m64*>(
+ reinterpret_cast<uintptr_t>(dest + width) & ~7);
+
+ while (dest64 < end64) {
+ __m64 src0 = *src0_64++;
+ __m64 src1 = *src1_64++;
+ __m64 src2 = _mm_unpackhi_pi8(src0, zero);
+ __m64 src3 = _mm_unpackhi_pi8(src1, zero);
+ src0 = _mm_unpacklo_pi8(src0, zero);
+ src1 = _mm_unpacklo_pi8(src1, zero);
+ src0 = _mm_mullo_pi16(src0, src0_fraction);
+ src1 = _mm_mullo_pi16(src1, src1_fraction);
+ src2 = _mm_mullo_pi16(src2, src0_fraction);
+ src3 = _mm_mullo_pi16(src3, src1_fraction);
+ src0 = _mm_add_pi16(src0, src1);
+ src2 = _mm_add_pi16(src2, src3);
+ src0 = _mm_srli_pi16(src0, 8);
+ src2 = _mm_srli_pi16(src2, 8);
+ src0 = _mm_packs_pu16(src0, src2);
+ *dest64++ = src0;
+ pixel += 8;
+ }
+
+ while (pixel < width) {
+ dest[pixel] = (src0[pixel] * (256 - fraction) +
+ src1[pixel] * fraction) >> 8;
+ ++pixel;
+ }
}
#if defined(COMPILER_MSVC)
diff --git a/media/base/simd/filter_yuv_sse2.cc b/media/base/simd/filter_yuv_sse2.cc
index 137ac94..84dba5a 100644
--- a/media/base/simd/filter_yuv_sse2.cc
+++ b/media/base/simd/filter_yuv_sse2.cc
@@ -13,37 +13,60 @@
namespace media {
-void FilterYUVRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
- int source_width, int source_y_fraction) {
+void FilterYUVRows_SSE2(uint8* dest,
+ const uint8* src0,
+ const uint8* src1,
+ int width,
+ int fraction) {
+ int pixel = 0;
+
+ // Process the unaligned bytes first.
+ int unaligned_width =
+ (16 - (reinterpret_cast<uintptr_t>(dest) & 15)) & 15;
+ while (pixel < width && pixel < unaligned_width) {
+ dest[pixel] = (src0[pixel] * (256 - fraction) +
+ src1[pixel] * fraction) >> 8;
+ ++pixel;
+ }
+
__m128i zero = _mm_setzero_si128();
- __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
- __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
-
- const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
- const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
- __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
- __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
-
- do {
- __m128i y0 = _mm_loadu_si128(y0_ptr128);
- __m128i y1 = _mm_loadu_si128(y1_ptr128);
- __m128i y2 = _mm_unpackhi_epi8(y0, zero);
- __m128i y3 = _mm_unpackhi_epi8(y1, zero);
- y0 = _mm_unpacklo_epi8(y0, zero);
- y1 = _mm_unpacklo_epi8(y1, zero);
- y0 = _mm_mullo_epi16(y0, y0_fraction);
- y1 = _mm_mullo_epi16(y1, y1_fraction);
- y2 = _mm_mullo_epi16(y2, y0_fraction);
- y3 = _mm_mullo_epi16(y3, y1_fraction);
- y0 = _mm_add_epi16(y0, y1);
- y2 = _mm_add_epi16(y2, y3);
- y0 = _mm_srli_epi16(y0, 8);
- y2 = _mm_srli_epi16(y2, 8);
- y0 = _mm_packus_epi16(y0, y2);
- *dest128++ = y0;
- ++y0_ptr128;
- ++y1_ptr128;
- } while (dest128 < end128);
+ __m128i src1_fraction = _mm_set1_epi16(fraction);
+ __m128i src0_fraction = _mm_set1_epi16(256 - fraction);
+ const __m128i* src0_128 =
+ reinterpret_cast<const __m128i*>(src0 + pixel);
+ const __m128i* src1_128 =
+ reinterpret_cast<const __m128i*>(src1 + pixel);
+ __m128i* dest128 = reinterpret_cast<__m128i*>(dest + pixel);
+ __m128i* end128 = reinterpret_cast<__m128i*>(
+ reinterpret_cast<uintptr_t>(dest + width) & ~15);
+
+ while (dest128 < end128) {
+ __m128i src0 = _mm_loadu_si128(src0_128);
+ __m128i src1 = _mm_loadu_si128(src1_128);
+ __m128i src2 = _mm_unpackhi_epi8(src0, zero);
+ __m128i src3 = _mm_unpackhi_epi8(src1, zero);
+ src0 = _mm_unpacklo_epi8(src0, zero);
+ src1 = _mm_unpacklo_epi8(src1, zero);
+ src0 = _mm_mullo_epi16(src0, src0_fraction);
+ src1 = _mm_mullo_epi16(src1, src1_fraction);
+ src2 = _mm_mullo_epi16(src2, src0_fraction);
+ src3 = _mm_mullo_epi16(src3, src1_fraction);
+ src0 = _mm_add_epi16(src0, src1);
+ src2 = _mm_add_epi16(src2, src3);
+ src0 = _mm_srli_epi16(src0, 8);
+ src2 = _mm_srli_epi16(src2, 8);
+ src0 = _mm_packus_epi16(src0, src2);
+ *dest128++ = src0;
+ ++src0_128;
+ ++src1_128;
+ pixel += 16;
+ }
+
+ while (pixel < width) {
+ dest[pixel] = (src0[pixel] * (256 - fraction) +
+ src1[pixel] * fraction) >> 8;
+ ++pixel;
+ }
}
} // namespace media
diff --git a/media/base/yuv_convert_unittest.cc b/media/base/yuv_convert_unittest.cc
index 9f1a850..9db998b 100644
--- a/media/base/yuv_convert_unittest.cc
+++ b/media/base/yuv_convert_unittest.cc
@@ -10,6 +10,7 @@
#include "media/base/djb2.h"
#include "media/base/simd/convert_rgb_to_yuv.h"
#include "media/base/simd/convert_yuv_to_rgb.h"
+#include "media/base/simd/filter_yuv.h"
#include "media/base/yuv_convert.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -645,6 +646,120 @@ TEST(YUVConvertTest, LinearScaleYUVToRGB32Row_SSE) {
kWidth * kBpp));
}
+TEST(YUVConvertTest, FilterYUVRows_C_OutOfBounds) {
+ scoped_array<uint8> src(new uint8[16]);
+ scoped_array<uint8> dst(new uint8[16]);
+
+ memset(src.get(), 0xff, 16);
+ memset(dst.get(), 0, 16);
+
+ media::FilterYUVRows_C(dst.get(), src.get(), src.get(), 1, 255);
+
+ EXPECT_EQ(255u, dst[0]);
+ for (int i = 1; i < 16; ++i) {
+ EXPECT_EQ(0u, dst[i]) << " not equal at " << i;
+ }
+}
+
+TEST(YUVConvertTest, FilterYUVRows_MMX_OutOfBounds) {
+ if (!media::hasMMX()) {
+ LOG(WARNING) << "System not supported. Test skipped.";
+ return;
+ }
+
+ scoped_array<uint8> src(new uint8[16]);
+ scoped_array<uint8> dst(new uint8[16]);
+
+ memset(src.get(), 0xff, 16);
+ memset(dst.get(), 0, 16);
+
+ media::FilterYUVRows_MMX(dst.get(), src.get(), src.get(), 1, 255);
+ media::EmptyRegisterState();
+
+ EXPECT_EQ(255u, dst[0]);
+ for (int i = 1; i < 16; ++i) {
+ EXPECT_EQ(0u, dst[i]);
+ }
+}
+
+TEST(YUVConvertTest, FilterYUVRows_SSE2_OutOfBounds) {
+ if (!media::hasSSE2()) {
+ LOG(WARNING) << "System not supported. Test skipped.";
+ return;
+ }
+
+ scoped_array<uint8> src(new uint8[16]);
+ scoped_array<uint8> dst(new uint8[16]);
+
+ memset(src.get(), 0xff, 16);
+ memset(dst.get(), 0, 16);
+
+ media::FilterYUVRows_SSE2(dst.get(), src.get(), src.get(), 1, 255);
+
+ EXPECT_EQ(255u, dst[0]);
+ for (int i = 1; i < 16; ++i) {
+ EXPECT_EQ(0u, dst[i]);
+ }
+}
+
+TEST(YUVConvertTest, FilterYUVRows_MMX_UnalignedDestination) {
+ if (!media::hasMMX()) {
+ LOG(WARNING) << "System not supported. Test skipped.";
+ return;
+ }
+
+ const int kSize = 32;
+ scoped_array<uint8> src(new uint8[kSize]);
+ scoped_array<uint8> dst_sample(new uint8[kSize]);
+ scoped_array<uint8> dst(new uint8[kSize]);
+
+ memset(dst_sample.get(), 0, kSize);
+ memset(dst.get(), 0, kSize);
+ for (int i = 0; i < kSize; ++i)
+ src[i] = 100 + i;
+
+ media::FilterYUVRows_C(dst_sample.get(),
+ src.get(), src.get(), 17, 128);
+
+ // Generate an unaligned output address.
+ uint8* dst_ptr =
+ reinterpret_cast<uint8*>(
+ (reinterpret_cast<uintptr_t>(dst.get() + 8) & ~7) + 1);
+ media::FilterYUVRows_MMX(dst_ptr, src.get(), src.get(), 17, 128);
+ media::EmptyRegisterState();
+
+ EXPECT_EQ(0, memcmp(dst_sample.get(), dst_ptr, 17));
+}
+
+TEST(YUVConvertTest, FilterYUVRows_SSE2_UnalignedDestination) {
+ if (!media::hasSSE2()) {
+ LOG(WARNING) << "System not supported. Test skipped.";
+ return;
+ }
+
+ const int kSize = 64;
+ scoped_array<uint8> src(new uint8[kSize]);
+ scoped_array<uint8> dst_sample(new uint8[kSize]);
+ scoped_array<uint8> dst(new uint8[kSize]);
+
+ memset(dst_sample.get(), 0, kSize);
+ memset(dst.get(), 0, kSize);
+ for (int i = 0; i < kSize; ++i)
+ src[i] = 100 + i;
+
+ media::FilterYUVRows_C(dst_sample.get(),
+ src.get(), src.get(), 37, 128);
+
+ // Generate an unaligned output address.
+ uint8* dst_ptr =
+ reinterpret_cast<uint8*>(
+ (reinterpret_cast<uintptr_t>(dst.get() + 16) & ~15) + 1);
+ media::FilterYUVRows_SSE2(dst_ptr, src.get(), src.get(), 37, 128);
+ media::EmptyRegisterState();
+
+ EXPECT_EQ(0, memcmp(dst_sample.get(), dst_ptr, 37));
+}
+
#if defined(ARCH_CPU_X86_64)
TEST(YUVConvertTest, ScaleYUVToRGB32Row_SSE2_X64) {