Fix out of bound access with ScaleYUVToRGB32.

FilterYUVRows functions had OOB problems, fixed in this patch. BUG=99480 TEST=media_unittests --gtest_filter=YUV* Review URL: http://codereview.chromium.org/8230014 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@105798 0039d316-1c4b-4281-b951-d872f2087c98
author: hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-10-17 13:36:05 +0000
committer: hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-10-17 13:36:05 +0000
commit: dc88452ea0a9189c1197889bf4a66d8237f9a05b (patch)
tree: f8562306963eb9ae5f5d9fd56711391cdf09105b /media
parent: e4f9f9bd81b6dc2ca82f5b8913f3a49c616c1dda (diff)
download: chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.zip
chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.tar.gz
chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.tar.bz2
4 files changed, 228 insertions, 60 deletions
diff --git a/media/base/simd/filter_yuv_c.cc b/media/base/simd/filter_yuv_c.cc
index 95ae01a..f292d21 100644
--- a/media/base/simd/filter_yuv_c.cc
+++ b/media/base/simd/filter_yuv_c.cc
@@ -11,7 +11,9 @@ void FilterYUVRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
   int y1_fraction = source_y_fraction;
   int y0_fraction = 256 - y1_fraction;
   uint8* end = ybuf + source_width;
-  do {
+  uint8* rounded_end = ybuf + (source_width & ~7);
+
+  while (ybuf < rounded_end) {
     ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
     ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
     ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
@@ -23,7 +25,14 @@ void FilterYUVRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
     y0_ptr += 8;
     y1_ptr += 8;
     ybuf += 8;
-  } while (ybuf < end);
+  }
+
+  while (ybuf < end) {
+    ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
+    ++ybuf;
+    ++y0_ptr;
+    ++y1_ptr;
+  }
 }
 
 }  // namespace media
diff --git a/media/base/simd/filter_yuv_mmx.cc b/media/base/simd/filter_yuv_mmx.cc
index 77698dc..09d62e3 100644
--- a/media/base/simd/filter_yuv_mmx.cc
+++ b/media/base/simd/filter_yuv_mmx.cc
@@ -20,35 +20,56 @@ namespace media {
 #pragma warning(disable: 4799)
 #endif
 
-void FilterYUVRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-                       int source_width, int source_y_fraction) {
+void FilterYUVRows_MMX(uint8* dest,
+                       const uint8* src0,
+                       const uint8* src1,
+                       int width,
+                       int fraction) {
+  int pixel = 0;
+
+  // Process the unaligned bytes first.
+  int unaligned_width =
+      (8 - (reinterpret_cast<uintptr_t>(dest) & 7)) & 7;
+  while (pixel < width && pixel < unaligned_width) {
+    dest[pixel] = (src0[pixel] * (256 - fraction) +
+                   src1[pixel] * fraction) >> 8;
+    ++pixel;
+  }
+
   __m64 zero = _mm_setzero_si64();
-  __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
-  __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
-
-  const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
-  const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
-  __m64* dest64 = reinterpret_cast<__m64*>(ybuf);
-  __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
-
-  do {
-    __m64 y0 = *y0_ptr64++;
-    __m64 y1 = *y1_ptr64++;
-    __m64 y2 = _mm_unpackhi_pi8(y0, zero);
-    __m64 y3 = _mm_unpackhi_pi8(y1, zero);
-    y0 = _mm_unpacklo_pi8(y0, zero);
-    y1 = _mm_unpacklo_pi8(y1, zero);
-    y0 = _mm_mullo_pi16(y0, y0_fraction);
-    y1 = _mm_mullo_pi16(y1, y1_fraction);
-    y2 = _mm_mullo_pi16(y2, y0_fraction);
-    y3 = _mm_mullo_pi16(y3, y1_fraction);
-    y0 = _mm_add_pi16(y0, y1);
-    y2 = _mm_add_pi16(y2, y3);
-    y0 = _mm_srli_pi16(y0, 8);
-    y2 = _mm_srli_pi16(y2, 8);
-    y0 = _mm_packs_pu16(y0, y2);
-    *dest64++ = y0;
-  } while (dest64 < end64);
+  __m64 src1_fraction = _mm_set1_pi16(fraction);
+  __m64 src0_fraction = _mm_set1_pi16(256 - fraction);
+  const __m64* src0_64 = reinterpret_cast<const __m64*>(src0 + pixel);
+  const __m64* src1_64 = reinterpret_cast<const __m64*>(src1 + pixel);
+  __m64* dest64 = reinterpret_cast<__m64*>(dest + pixel);
+  __m64* end64 = reinterpret_cast<__m64*>(
+      reinterpret_cast<uintptr_t>(dest + width) & ~7);
+
+  while (dest64 < end64) {
+    __m64 src0 = *src0_64++;
+    __m64 src1 = *src1_64++;
+    __m64 src2 = _mm_unpackhi_pi8(src0, zero);
+    __m64 src3 = _mm_unpackhi_pi8(src1, zero);
+    src0 = _mm_unpacklo_pi8(src0, zero);
+    src1 = _mm_unpacklo_pi8(src1, zero);
+    src0 = _mm_mullo_pi16(src0, src0_fraction);
+    src1 = _mm_mullo_pi16(src1, src1_fraction);
+    src2 = _mm_mullo_pi16(src2, src0_fraction);
+    src3 = _mm_mullo_pi16(src3, src1_fraction);
+    src0 = _mm_add_pi16(src0, src1);
+    src2 = _mm_add_pi16(src2, src3);
+    src0 = _mm_srli_pi16(src0, 8);
+    src2 = _mm_srli_pi16(src2, 8);
+    src0 = _mm_packs_pu16(src0, src2);
+    *dest64++ = src0;
+    pixel += 8;
+  }
+
+  while (pixel < width) {
+    dest[pixel] = (src0[pixel] * (256 - fraction) +
+                   src1[pixel] * fraction) >> 8;
+    ++pixel;
+  }
 }
 
 #if defined(COMPILER_MSVC)
diff --git a/media/base/simd/filter_yuv_sse2.cc b/media/base/simd/filter_yuv_sse2.cc
index 137ac94..84dba5a 100644
--- a/media/base/simd/filter_yuv_sse2.cc
+++ b/media/base/simd/filter_yuv_sse2.cc
@@ -13,37 +13,60 @@
 
 namespace media {
 
-void FilterYUVRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-                        int source_width, int source_y_fraction) {
+void FilterYUVRows_SSE2(uint8* dest,
+                        const uint8* src0,
+                        const uint8* src1,
+                        int width,
+                        int fraction) {
+  int pixel = 0;
+
+  // Process the unaligned bytes first.
+  int unaligned_width =
+      (16 - (reinterpret_cast<uintptr_t>(dest) & 15)) & 15;
+  while (pixel < width && pixel < unaligned_width) {
+    dest[pixel] = (src0[pixel] * (256 - fraction) +
+                   src1[pixel] * fraction) >> 8;
+    ++pixel;
+  }
+
   __m128i zero = _mm_setzero_si128();
-  __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
-  __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
-
-  const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
-  const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
-  __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
-  __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
-
-  do {
-    __m128i y0 = _mm_loadu_si128(y0_ptr128);
-    __m128i y1 = _mm_loadu_si128(y1_ptr128);
-    __m128i y2 = _mm_unpackhi_epi8(y0, zero);
-    __m128i y3 = _mm_unpackhi_epi8(y1, zero);
-    y0 = _mm_unpacklo_epi8(y0, zero);
-    y1 = _mm_unpacklo_epi8(y1, zero);
-    y0 = _mm_mullo_epi16(y0, y0_fraction);
-    y1 = _mm_mullo_epi16(y1, y1_fraction);
-    y2 = _mm_mullo_epi16(y2, y0_fraction);
-    y3 = _mm_mullo_epi16(y3, y1_fraction);
-    y0 = _mm_add_epi16(y0, y1);
-    y2 = _mm_add_epi16(y2, y3);
-    y0 = _mm_srli_epi16(y0, 8);
-    y2 = _mm_srli_epi16(y2, 8);
-    y0 = _mm_packus_epi16(y0, y2);
-    *dest128++ = y0;
-    ++y0_ptr128;
-    ++y1_ptr128;
-  } while (dest128 < end128);
+  __m128i src1_fraction = _mm_set1_epi16(fraction);
+  __m128i src0_fraction = _mm_set1_epi16(256 - fraction);
+  const __m128i* src0_128 =
+      reinterpret_cast<const __m128i*>(src0 + pixel);
+  const __m128i* src1_128 =
+      reinterpret_cast<const __m128i*>(src1 + pixel);
+  __m128i* dest128 = reinterpret_cast<__m128i*>(dest + pixel);
+  __m128i* end128 = reinterpret_cast<__m128i*>(
+      reinterpret_cast<uintptr_t>(dest + width) & ~15);
+
+  while (dest128 < end128) {
+    __m128i src0 = _mm_loadu_si128(src0_128);
+    __m128i src1 = _mm_loadu_si128(src1_128);
+    __m128i src2 = _mm_unpackhi_epi8(src0, zero);
+    __m128i src3 = _mm_unpackhi_epi8(src1, zero);
+    src0 = _mm_unpacklo_epi8(src0, zero);
+    src1 = _mm_unpacklo_epi8(src1, zero);
+    src0 = _mm_mullo_epi16(src0, src0_fraction);
+    src1 = _mm_mullo_epi16(src1, src1_fraction);
+    src2 = _mm_mullo_epi16(src2, src0_fraction);
+    src3 = _mm_mullo_epi16(src3, src1_fraction);
+    src0 = _mm_add_epi16(src0, src1);
+    src2 = _mm_add_epi16(src2, src3);
+    src0 = _mm_srli_epi16(src0, 8);
+    src2 = _mm_srli_epi16(src2, 8);
+    src0 = _mm_packus_epi16(src0, src2);
+    *dest128++ = src0;
+    ++src0_128;
+    ++src1_128;
+    pixel += 16;
+  }
+
+  while (pixel < width) {
+    dest[pixel] = (src0[pixel] * (256 - fraction) +
+                   src1[pixel] * fraction) >> 8;
+    ++pixel;
+  }
 }
 
 }  // namespace media
diff --git a/media/base/yuv_convert_unittest.cc b/media/base/yuv_convert_unittest.cc
index 9f1a850..9db998b 100644
--- a/media/base/yuv_convert_unittest.cc
+++ b/media/base/yuv_convert_unittest.cc
@@ -10,6 +10,7 @@
 #include "media/base/djb2.h"
 #include "media/base/simd/convert_rgb_to_yuv.h"
 #include "media/base/simd/convert_yuv_to_rgb.h"
+#include "media/base/simd/filter_yuv.h"
 #include "media/base/yuv_convert.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
@@ -645,6 +646,120 @@ TEST(YUVConvertTest, LinearScaleYUVToRGB32Row_SSE) {
                       kWidth * kBpp));
 }
 
+TEST(YUVConvertTest, FilterYUVRows_C_OutOfBounds) {
+  scoped_array<uint8> src(new uint8[16]);
+  scoped_array<uint8> dst(new uint8[16]);
+
+  memset(src.get(), 0xff, 16);
+  memset(dst.get(), 0, 16);
+
+  media::FilterYUVRows_C(dst.get(), src.get(), src.get(), 1, 255);
+
+  EXPECT_EQ(255u, dst[0]);
+  for (int i = 1; i < 16; ++i) {
+    EXPECT_EQ(0u, dst[i]) << " not equal at " << i;
+  }
+}
+
+TEST(YUVConvertTest, FilterYUVRows_MMX_OutOfBounds) {
+  if (!media::hasMMX()) {
+    LOG(WARNING) << "System not supported. Test skipped.";
+    return;
+  }
+
+  scoped_array<uint8> src(new uint8[16]);
+  scoped_array<uint8> dst(new uint8[16]);
+
+  memset(src.get(), 0xff, 16);
+  memset(dst.get(), 0, 16);
+
+  media::FilterYUVRows_MMX(dst.get(), src.get(), src.get(), 1, 255);
+  media::EmptyRegisterState();
+
+  EXPECT_EQ(255u, dst[0]);
+  for (int i = 1; i < 16; ++i) {
+    EXPECT_EQ(0u, dst[i]);
+  }
+}
+
+TEST(YUVConvertTest, FilterYUVRows_SSE2_OutOfBounds) {
+  if (!media::hasSSE2()) {
+    LOG(WARNING) << "System not supported. Test skipped.";
+    return;
+  }
+
+  scoped_array<uint8> src(new uint8[16]);
+  scoped_array<uint8> dst(new uint8[16]);
+
+  memset(src.get(), 0xff, 16);
+  memset(dst.get(), 0, 16);
+
+  media::FilterYUVRows_SSE2(dst.get(), src.get(), src.get(), 1, 255);
+
+  EXPECT_EQ(255u, dst[0]);
+  for (int i = 1; i < 16; ++i) {
+    EXPECT_EQ(0u, dst[i]);
+  }
+}
+
+TEST(YUVConvertTest, FilterYUVRows_MMX_UnalignedDestination) {
+  if (!media::hasMMX()) {
+    LOG(WARNING) << "System not supported. Test skipped.";
+    return;
+  }
+
+  const int kSize = 32;
+  scoped_array<uint8> src(new uint8[kSize]);
+  scoped_array<uint8> dst_sample(new uint8[kSize]);
+  scoped_array<uint8> dst(new uint8[kSize]);
+
+  memset(dst_sample.get(), 0, kSize);
+  memset(dst.get(), 0, kSize);
+  for (int i = 0; i < kSize; ++i)
+    src[i] = 100 + i;
+
+  media::FilterYUVRows_C(dst_sample.get(),
+                         src.get(), src.get(), 17, 128);
+
+  // Generate an unaligned output address.
+  uint8* dst_ptr =
+      reinterpret_cast<uint8*>(
+          (reinterpret_cast<uintptr_t>(dst.get() + 8) & ~7) + 1);
+  media::FilterYUVRows_MMX(dst_ptr, src.get(), src.get(), 17, 128);
+  media::EmptyRegisterState();
+
+  EXPECT_EQ(0, memcmp(dst_sample.get(), dst_ptr, 17));
+}
+
+TEST(YUVConvertTest, FilterYUVRows_SSE2_UnalignedDestination) {
+  if (!media::hasSSE2()) {
+    LOG(WARNING) << "System not supported. Test skipped.";
+    return;
+  }
+
+  const int kSize = 64;
+  scoped_array<uint8> src(new uint8[kSize]);
+  scoped_array<uint8> dst_sample(new uint8[kSize]);
+  scoped_array<uint8> dst(new uint8[kSize]);
+
+  memset(dst_sample.get(), 0, kSize);
+  memset(dst.get(), 0, kSize);
+  for (int i = 0; i < kSize; ++i)
+    src[i] = 100 + i;
+
+  media::FilterYUVRows_C(dst_sample.get(),
+                         src.get(), src.get(), 37, 128);
+
+  // Generate an unaligned output address.
+  uint8* dst_ptr =
+      reinterpret_cast<uint8*>(
+          (reinterpret_cast<uintptr_t>(dst.get() + 16) & ~15) + 1);
+  media::FilterYUVRows_SSE2(dst_ptr, src.get(), src.get(), 37, 128);
+  media::EmptyRegisterState();
+
+  EXPECT_EQ(0, memcmp(dst_sample.get(), dst_ptr, 37));
+}
+
 #if defined(ARCH_CPU_X86_64)
 
 TEST(YUVConvertTest, ScaleYUVToRGB32Row_SSE2_X64) {
author	hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-10-17 13:36:05 +0000
committer	hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-10-17 13:36:05 +0000
commit	dc88452ea0a9189c1197889bf4a66d8237f9a05b (patch)
tree	f8562306963eb9ae5f5d9fd56711391cdf09105b /media
parent	e4f9f9bd81b6dc2ca82f5b8913f3a49c616c1dda (diff)
download	chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.zip chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.tar.gz chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.tar.bz2