diff options
author | hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-17 13:36:05 +0000 |
---|---|---|
committer | hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-17 13:36:05 +0000 |
commit | dc88452ea0a9189c1197889bf4a66d8237f9a05b (patch) | |
tree | f8562306963eb9ae5f5d9fd56711391cdf09105b /media | |
parent | e4f9f9bd81b6dc2ca82f5b8913f3a49c616c1dda (diff) | |
download | chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.zip chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.tar.gz chromium_src-dc88452ea0a9189c1197889bf4a66d8237f9a05b.tar.bz2 |
Fix out of bound access with ScaleYUVToRGB32.
FilterYUVRows functions had OOB problems, fixed in this patch.
BUG=99480
TEST=media_unittests --gtest_filter=YUV*
Review URL: http://codereview.chromium.org/8230014
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@105798 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r-- | media/base/simd/filter_yuv_c.cc | 13 | ||||
-rw-r--r-- | media/base/simd/filter_yuv_mmx.cc | 77 | ||||
-rw-r--r-- | media/base/simd/filter_yuv_sse2.cc | 83 | ||||
-rw-r--r-- | media/base/yuv_convert_unittest.cc | 115 |
4 files changed, 228 insertions, 60 deletions
diff --git a/media/base/simd/filter_yuv_c.cc b/media/base/simd/filter_yuv_c.cc index 95ae01a..f292d21 100644 --- a/media/base/simd/filter_yuv_c.cc +++ b/media/base/simd/filter_yuv_c.cc @@ -11,7 +11,9 @@ void FilterYUVRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; uint8* end = ybuf + source_width; - do { + uint8* rounded_end = ybuf + (source_width & ~7); + + while (ybuf < rounded_end) { ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8; ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8; ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8; @@ -23,7 +25,14 @@ void FilterYUVRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, y0_ptr += 8; y1_ptr += 8; ybuf += 8; - } while (ybuf < end); + } + + while (ybuf < end) { + ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8; + ++ybuf; + ++y0_ptr; + ++y1_ptr; + } } } // namespace media diff --git a/media/base/simd/filter_yuv_mmx.cc b/media/base/simd/filter_yuv_mmx.cc index 77698dc..09d62e3 100644 --- a/media/base/simd/filter_yuv_mmx.cc +++ b/media/base/simd/filter_yuv_mmx.cc @@ -20,35 +20,56 @@ namespace media { #pragma warning(disable: 4799) #endif -void FilterYUVRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction) { +void FilterYUVRows_MMX(uint8* dest, + const uint8* src0, + const uint8* src1, + int width, + int fraction) { + int pixel = 0; + + // Process the unaligned bytes first. + int unaligned_width = + (8 - (reinterpret_cast<uintptr_t>(dest) & 7)) & 7; + while (pixel < width && pixel < unaligned_width) { + dest[pixel] = (src0[pixel] * (256 - fraction) + + src1[pixel] * fraction) >> 8; + ++pixel; + } + __m64 zero = _mm_setzero_si64(); - __m64 y1_fraction = _mm_set1_pi16(source_y_fraction); - __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction); - - const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr); - const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr); - __m64* dest64 = reinterpret_cast<__m64*>(ybuf); - __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width); - - do { - __m64 y0 = *y0_ptr64++; - __m64 y1 = *y1_ptr64++; - __m64 y2 = _mm_unpackhi_pi8(y0, zero); - __m64 y3 = _mm_unpackhi_pi8(y1, zero); - y0 = _mm_unpacklo_pi8(y0, zero); - y1 = _mm_unpacklo_pi8(y1, zero); - y0 = _mm_mullo_pi16(y0, y0_fraction); - y1 = _mm_mullo_pi16(y1, y1_fraction); - y2 = _mm_mullo_pi16(y2, y0_fraction); - y3 = _mm_mullo_pi16(y3, y1_fraction); - y0 = _mm_add_pi16(y0, y1); - y2 = _mm_add_pi16(y2, y3); - y0 = _mm_srli_pi16(y0, 8); - y2 = _mm_srli_pi16(y2, 8); - y0 = _mm_packs_pu16(y0, y2); - *dest64++ = y0; - } while (dest64 < end64); + __m64 src1_fraction = _mm_set1_pi16(fraction); + __m64 src0_fraction = _mm_set1_pi16(256 - fraction); + const __m64* src0_64 = reinterpret_cast<const __m64*>(src0 + pixel); + const __m64* src1_64 = reinterpret_cast<const __m64*>(src1 + pixel); + __m64* dest64 = reinterpret_cast<__m64*>(dest + pixel); + __m64* end64 = reinterpret_cast<__m64*>( + reinterpret_cast<uintptr_t>(dest + width) & ~7); + + while (dest64 < end64) { + __m64 src0 = *src0_64++; + __m64 src1 = *src1_64++; + __m64 src2 = _mm_unpackhi_pi8(src0, zero); + __m64 src3 = _mm_unpackhi_pi8(src1, zero); + src0 = _mm_unpacklo_pi8(src0, zero); + src1 = _mm_unpacklo_pi8(src1, zero); + src0 = _mm_mullo_pi16(src0, src0_fraction); + src1 = _mm_mullo_pi16(src1, src1_fraction); + src2 = _mm_mullo_pi16(src2, src0_fraction); + src3 = _mm_mullo_pi16(src3, src1_fraction); + src0 = _mm_add_pi16(src0, src1); + src2 = _mm_add_pi16(src2, src3); + src0 = _mm_srli_pi16(src0, 8); + src2 = _mm_srli_pi16(src2, 8); + src0 = _mm_packs_pu16(src0, src2); + *dest64++ = src0; + pixel += 8; + } + + while (pixel < width) { + dest[pixel] = (src0[pixel] * (256 - fraction) + + src1[pixel] * fraction) >> 8; + ++pixel; + } } #if defined(COMPILER_MSVC) diff --git a/media/base/simd/filter_yuv_sse2.cc b/media/base/simd/filter_yuv_sse2.cc index 137ac94..84dba5a 100644 --- a/media/base/simd/filter_yuv_sse2.cc +++ b/media/base/simd/filter_yuv_sse2.cc @@ -13,37 +13,60 @@ namespace media { -void FilterYUVRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction) { +void FilterYUVRows_SSE2(uint8* dest, + const uint8* src0, + const uint8* src1, + int width, + int fraction) { + int pixel = 0; + + // Process the unaligned bytes first. + int unaligned_width = + (16 - (reinterpret_cast<uintptr_t>(dest) & 15)) & 15; + while (pixel < width && pixel < unaligned_width) { + dest[pixel] = (src0[pixel] * (256 - fraction) + + src1[pixel] * fraction) >> 8; + ++pixel; + } + __m128i zero = _mm_setzero_si128(); - __m128i y1_fraction = _mm_set1_epi16(source_y_fraction); - __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction); - - const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr); - const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr); - __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf); - __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width); - - do { - __m128i y0 = _mm_loadu_si128(y0_ptr128); - __m128i y1 = _mm_loadu_si128(y1_ptr128); - __m128i y2 = _mm_unpackhi_epi8(y0, zero); - __m128i y3 = _mm_unpackhi_epi8(y1, zero); - y0 = _mm_unpacklo_epi8(y0, zero); - y1 = _mm_unpacklo_epi8(y1, zero); - y0 = _mm_mullo_epi16(y0, y0_fraction); - y1 = _mm_mullo_epi16(y1, y1_fraction); - y2 = _mm_mullo_epi16(y2, y0_fraction); - y3 = _mm_mullo_epi16(y3, y1_fraction); - y0 = _mm_add_epi16(y0, y1); - y2 = _mm_add_epi16(y2, y3); - y0 = _mm_srli_epi16(y0, 8); - y2 = _mm_srli_epi16(y2, 8); - y0 = _mm_packus_epi16(y0, y2); - *dest128++ = y0; - ++y0_ptr128; - ++y1_ptr128; - } while (dest128 < end128); + __m128i src1_fraction = _mm_set1_epi16(fraction); + __m128i src0_fraction = _mm_set1_epi16(256 - fraction); + const __m128i* src0_128 = + reinterpret_cast<const __m128i*>(src0 + pixel); + const __m128i* src1_128 = + reinterpret_cast<const __m128i*>(src1 + pixel); + __m128i* dest128 = reinterpret_cast<__m128i*>(dest + pixel); + __m128i* end128 = reinterpret_cast<__m128i*>( + reinterpret_cast<uintptr_t>(dest + width) & ~15); + + while (dest128 < end128) { + __m128i src0 = _mm_loadu_si128(src0_128); + __m128i src1 = _mm_loadu_si128(src1_128); + __m128i src2 = _mm_unpackhi_epi8(src0, zero); + __m128i src3 = _mm_unpackhi_epi8(src1, zero); + src0 = _mm_unpacklo_epi8(src0, zero); + src1 = _mm_unpacklo_epi8(src1, zero); + src0 = _mm_mullo_epi16(src0, src0_fraction); + src1 = _mm_mullo_epi16(src1, src1_fraction); + src2 = _mm_mullo_epi16(src2, src0_fraction); + src3 = _mm_mullo_epi16(src3, src1_fraction); + src0 = _mm_add_epi16(src0, src1); + src2 = _mm_add_epi16(src2, src3); + src0 = _mm_srli_epi16(src0, 8); + src2 = _mm_srli_epi16(src2, 8); + src0 = _mm_packus_epi16(src0, src2); + *dest128++ = src0; + ++src0_128; + ++src1_128; + pixel += 16; + } + + while (pixel < width) { + dest[pixel] = (src0[pixel] * (256 - fraction) + + src1[pixel] * fraction) >> 8; + ++pixel; + } } } // namespace media diff --git a/media/base/yuv_convert_unittest.cc b/media/base/yuv_convert_unittest.cc index 9f1a850..9db998b 100644 --- a/media/base/yuv_convert_unittest.cc +++ b/media/base/yuv_convert_unittest.cc @@ -10,6 +10,7 @@ #include "media/base/djb2.h" #include "media/base/simd/convert_rgb_to_yuv.h" #include "media/base/simd/convert_yuv_to_rgb.h" +#include "media/base/simd/filter_yuv.h" #include "media/base/yuv_convert.h" #include "testing/gtest/include/gtest/gtest.h" @@ -645,6 +646,120 @@ TEST(YUVConvertTest, LinearScaleYUVToRGB32Row_SSE) { kWidth * kBpp)); } +TEST(YUVConvertTest, FilterYUVRows_C_OutOfBounds) { + scoped_array<uint8> src(new uint8[16]); + scoped_array<uint8> dst(new uint8[16]); + + memset(src.get(), 0xff, 16); + memset(dst.get(), 0, 16); + + media::FilterYUVRows_C(dst.get(), src.get(), src.get(), 1, 255); + + EXPECT_EQ(255u, dst[0]); + for (int i = 1; i < 16; ++i) { + EXPECT_EQ(0u, dst[i]) << " not equal at " << i; + } +} + +TEST(YUVConvertTest, FilterYUVRows_MMX_OutOfBounds) { + if (!media::hasMMX()) { + LOG(WARNING) << "System not supported. Test skipped."; + return; + } + + scoped_array<uint8> src(new uint8[16]); + scoped_array<uint8> dst(new uint8[16]); + + memset(src.get(), 0xff, 16); + memset(dst.get(), 0, 16); + + media::FilterYUVRows_MMX(dst.get(), src.get(), src.get(), 1, 255); + media::EmptyRegisterState(); + + EXPECT_EQ(255u, dst[0]); + for (int i = 1; i < 16; ++i) { + EXPECT_EQ(0u, dst[i]); + } +} + +TEST(YUVConvertTest, FilterYUVRows_SSE2_OutOfBounds) { + if (!media::hasSSE2()) { + LOG(WARNING) << "System not supported. Test skipped."; + return; + } + + scoped_array<uint8> src(new uint8[16]); + scoped_array<uint8> dst(new uint8[16]); + + memset(src.get(), 0xff, 16); + memset(dst.get(), 0, 16); + + media::FilterYUVRows_SSE2(dst.get(), src.get(), src.get(), 1, 255); + + EXPECT_EQ(255u, dst[0]); + for (int i = 1; i < 16; ++i) { + EXPECT_EQ(0u, dst[i]); + } +} + +TEST(YUVConvertTest, FilterYUVRows_MMX_UnalignedDestination) { + if (!media::hasMMX()) { + LOG(WARNING) << "System not supported. Test skipped."; + return; + } + + const int kSize = 32; + scoped_array<uint8> src(new uint8[kSize]); + scoped_array<uint8> dst_sample(new uint8[kSize]); + scoped_array<uint8> dst(new uint8[kSize]); + + memset(dst_sample.get(), 0, kSize); + memset(dst.get(), 0, kSize); + for (int i = 0; i < kSize; ++i) + src[i] = 100 + i; + + media::FilterYUVRows_C(dst_sample.get(), + src.get(), src.get(), 17, 128); + + // Generate an unaligned output address. + uint8* dst_ptr = + reinterpret_cast<uint8*>( + (reinterpret_cast<uintptr_t>(dst.get() + 8) & ~7) + 1); + media::FilterYUVRows_MMX(dst_ptr, src.get(), src.get(), 17, 128); + media::EmptyRegisterState(); + + EXPECT_EQ(0, memcmp(dst_sample.get(), dst_ptr, 17)); +} + +TEST(YUVConvertTest, FilterYUVRows_SSE2_UnalignedDestination) { + if (!media::hasSSE2()) { + LOG(WARNING) << "System not supported. Test skipped."; + return; + } + + const int kSize = 64; + scoped_array<uint8> src(new uint8[kSize]); + scoped_array<uint8> dst_sample(new uint8[kSize]); + scoped_array<uint8> dst(new uint8[kSize]); + + memset(dst_sample.get(), 0, kSize); + memset(dst.get(), 0, kSize); + for (int i = 0; i < kSize; ++i) + src[i] = 100 + i; + + media::FilterYUVRows_C(dst_sample.get(), + src.get(), src.get(), 37, 128); + + // Generate an unaligned output address. + uint8* dst_ptr = + reinterpret_cast<uint8*>( + (reinterpret_cast<uintptr_t>(dst.get() + 16) & ~15) + 1); + media::FilterYUVRows_SSE2(dst_ptr, src.get(), src.get(), 37, 128); + media::EmptyRegisterState(); + + EXPECT_EQ(0, memcmp(dst_sample.get(), dst_ptr, 37)); +} + #if defined(ARCH_CPU_X86_64) TEST(YUVConvertTest, ScaleYUVToRGB32Row_SSE2_X64) { |