diff options
author | fbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-04-22 01:00:19 +0000 |
---|---|---|
committer | fbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-04-22 01:00:19 +0000 |
commit | 8436c68ac1388c9e9df6fe823e927c4e3fffd643 (patch) | |
tree | f14dc86c407707d751983b2fec02b93963165f98 /media | |
parent | 9334377d11086b7445abe08b8950b183bb98b396 (diff) | |
download | chromium_src-8436c68ac1388c9e9df6fe823e927c4e3fffd643.zip chromium_src-8436c68ac1388c9e9df6fe823e927c4e3fffd643.tar.gz chromium_src-8436c68ac1388c9e9df6fe823e927c4e3fffd643.tar.bz2 |
Revert 45265 - Speed up vertical filtering using v = a+(ba)*x formula
BUG=42064
TEST=unittests should still pass
Review URL: http://codereview.chromium.org/1733004
TBR=jamesr@chromium.org
Review URL: http://codereview.chromium.org/1718007
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@45270 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r-- | media/base/yuv_convert.cc | 77 | ||||
-rw-r--r-- | media/base/yuv_convert.h | 6 | ||||
-rw-r--r-- | media/tools/scaler_bench/scaler_bench.cc | 6 |
3 files changed, 39 insertions, 50 deletions
diff --git a/media/base/yuv_convert.cc b/media/base/yuv_convert.cc index 0f6170d..569f8b8 100644 --- a/media/base/yuv_convert.cc +++ b/media/base/yuv_convert.cc @@ -70,12 +70,14 @@ void ConvertYUVToRGB32(const uint8* y_buf, #if USE_SSE2 // FilterRows combines two rows of the image using linear interpolation. -// Blends 8 pixels at a time. +// SSE2 version blends 8 pixels at a time. static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, int source_width, int source_y_fraction) { __m128i zero = _mm_setzero_si128(); __m128i y1_fraction = _mm_set1_epi16( static_cast<uint16>(source_y_fraction >> 8)); + __m128i y0_fraction = _mm_set1_epi16( + static_cast<uint16>(256 - (source_y_fraction >> 8))); uint8* end = ybuf + source_width; if (ybuf < end) { @@ -84,69 +86,64 @@ static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, __m128i y1 = _mm_loadl_epi64(reinterpret_cast<__m128i const*>(y1_ptr)); y0 = _mm_unpacklo_epi8(y0, zero); y1 = _mm_unpacklo_epi8(y1, zero); - y1 = _mm_sub_epi16(y1, y0); + y0 = _mm_mullo_epi16(y0, y0_fraction); y1 = _mm_mullo_epi16(y1, y1_fraction); - y1 = _mm_srai_epi16(y1, 8); - y1 = _mm_add_epi16(y1, y0); - y1 = _mm_packus_epi16(y1, y1); - _mm_storel_epi64(reinterpret_cast<__m128i *>(ybuf), y1); + y0 = _mm_add_epi16(y0, y1); // 8.8 fixed point result + y0 = _mm_srli_epi16(y0, 8); + y0 = _mm_packus_epi16(y0, y0); + _mm_storel_epi64(reinterpret_cast<__m128i *>(ybuf), y0); y0_ptr += 8; y1_ptr += 8; ybuf += 8; } while (ybuf < end); } } + #elif USE_MMX +// MMX version blends 4 pixels at a time. static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, int source_width, int source_y_fraction) { __m64 zero = _mm_setzero_si64(); __m64 y1_fraction = _mm_set1_pi16( - static_cast<int16>(source_y_fraction >> 8)); + static_cast<int16>(source_y_fraction >> 8)); + __m64 y0_fraction = _mm_set1_pi16( + static_cast<int16>(256 - (source_y_fraction >> 8))); uint8* end = ybuf + source_width; if (ybuf < end) { do { - __m64 y2 = *reinterpret_cast<const __m64 *>(y0_ptr); - __m64 y3 = *reinterpret_cast<const __m64 *>(y1_ptr); - __m64 y0 = _mm_unpacklo_pi8(y2, zero); - __m64 y1 = _mm_unpacklo_pi8(y3, zero); - y2 = _mm_unpackhi_pi8(y2, zero); - y3 = _mm_unpackhi_pi8(y3, zero); - y1 = _mm_sub_pi16(y1, y0); - y3 = _mm_sub_pi16(y3, y2); + __m64 y0 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y0_ptr)); + __m64 y1 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y1_ptr)); + y0 = _mm_unpacklo_pi8(y0, zero); + y1 = _mm_unpacklo_pi8(y1, zero); + y0 = _mm_mullo_pi16(y0, y0_fraction); y1 = _mm_mullo_pi16(y1, y1_fraction); - y3 = _mm_mullo_pi16(y3, y1_fraction); - y1 = _mm_srai_pi16(y1, 8); - y3 = _mm_srai_pi16(y3, 8); - y1 = _mm_add_pi16(y1, y0); - y3 = _mm_add_pi16(y3, y2); - y0 = _mm_packs_pu16(y1, y3); - *reinterpret_cast<__m64 *>(ybuf) = y0; - y0_ptr += 8; - y1_ptr += 8; - ybuf += 8; + y0 = _mm_add_pi16(y0, y1); // 8.8 fixed point result + y0 = _mm_srli_pi16(y0, 8); + y0 = _mm_packs_pu16(y0, y0); + *reinterpret_cast<int *>(ybuf) = _mm_cvtsi64_si32(y0); + y0_ptr += 4; + y1_ptr += 4; + ybuf += 4; } while (ybuf < end); } } #else // no MMX or SSE2 - +// C version blends 4 pixels at a time. static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, int source_width, int source_y_fraction) { - int y1_fraction = (source_y_fraction >> 8); + int y1_fraction = source_y_fraction >> 8; + int y0_fraction = 256 - (source_y_fraction >> 8); uint8* end = ybuf + source_width; if (ybuf < end) { do { - ybuf[0] = y0_ptr[0] + (((y1_ptr[0] - y0_ptr[0]) * y1_fraction) >> 8); - ybuf[1] = y0_ptr[1] + (((y1_ptr[1] - y0_ptr[1]) * y1_fraction) >> 8); - ybuf[2] = y0_ptr[2] + (((y1_ptr[2] - y0_ptr[2]) * y1_fraction) >> 8); - ybuf[3] = y0_ptr[3] + (((y1_ptr[3] - y0_ptr[3]) * y1_fraction) >> 8); - ybuf[4] = y0_ptr[4] + (((y1_ptr[4] - y0_ptr[4]) * y1_fraction) >> 8); - ybuf[5] = y0_ptr[5] + (((y1_ptr[5] - y0_ptr[5]) * y1_fraction) >> 8); - ybuf[6] = y0_ptr[6] + (((y1_ptr[6] - y0_ptr[6]) * y1_fraction) >> 8); - ybuf[7] = y0_ptr[7] + (((y1_ptr[7] - y0_ptr[7]) * y1_fraction) >> 8); - y0_ptr += 8; - y1_ptr += 8; - ybuf += 8; + ybuf[0] = (y0_ptr[0] * (y0_fraction) + y1_ptr[0] * (y1_fraction)) >> 8; + ybuf[1] = (y0_ptr[1] * (y0_fraction) + y1_ptr[1] * (y1_fraction)) >> 8; + ybuf[2] = (y0_ptr[2] * (y0_fraction) + y1_ptr[2] * (y1_fraction)) >> 8; + ybuf[3] = (y0_ptr[3] * (y0_fraction) + y1_ptr[3] * (y1_fraction)) >> 8; + y0_ptr += 4; + y1_ptr += 4; + ybuf += 4; } while (ybuf < end); } } @@ -264,7 +261,7 @@ void ScaleYUVToRGB32(const uint8* y_buf, const uint8* v_ptr = v0_ptr; // Apply vertical filtering if necessary. // TODO(fbarchard): Remove memcpy when not necessary. - if (filter & media::FILTER_BILINEAR_V) { + if (filter == media::FILTER_BILINEAR) { if (yscale_fixed != kFractionMax && source_y_fraction && ((source_y + 1) < source_height)) { FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); @@ -292,7 +289,7 @@ void ScaleYUVToRGB32(const uint8* y_buf, FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, dest_pixel, width); } else { - if (filter & FILTER_BILINEAR_H) + if (filter == FILTER_BILINEAR) LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, dest_pixel, width, source_dx); else { diff --git a/media/base/yuv_convert.h b/media/base/yuv_convert.h index 24a2c4e..d55337d 100644 --- a/media/base/yuv_convert.h +++ b/media/base/yuv_convert.h @@ -31,10 +31,8 @@ enum Rotate { // Filter affects how scaling looks. enum ScaleFilter { - FILTER_NONE = 0, // No filter (point sampled). - FILTER_BILINEAR_H = 1, // Bilinear horizontal filter. - FILTER_BILINEAR_V = 2, // Bilinear vertical filter. - FILTER_BILINEAR = 3, // Bilinear filter. + FILTER_NONE, // No filter (point sampled). + FILTER_BILINEAR, // Bilinear filter. }; // Convert a frame of YUV to 32 bit ARGB. diff --git a/media/tools/scaler_bench/scaler_bench.cc b/media/tools/scaler_bench/scaler_bench.cc index 60417ad..a50efa0 100644 --- a/media/tools/scaler_bench/scaler_bench.cc +++ b/media/tools/scaler_bench/scaler_bench.cc @@ -200,12 +200,6 @@ int main(int argc, const char** argv) { << "ms/frame" << std::endl; std::cout << "No filtering: " << BenchmarkFilter(media::FILTER_NONE) << "ms/frame" << std::endl; - std::cout << "Bilinear Vertical: " - << BenchmarkFilter(media::FILTER_BILINEAR_V) - << "ms/frame" << std::endl; - std::cout << "Bilinear Horizontal: " - << BenchmarkFilter(media::FILTER_BILINEAR_H) - << "ms/frame" << std::endl; std::cout << "Bilinear: " << BenchmarkFilter(media::FILTER_BILINEAR) << "ms/frame" << std::endl; |