diff options
author | fbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-04-13 01:01:58 +0000 |
---|---|---|
committer | fbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-04-13 01:01:58 +0000 |
commit | 74608b644e75307c910fa8b411e8e9cc7c731fae (patch) | |
tree | 5a9fa339ad77305739d3515951d0a5ab9f2f1b2a /media | |
parent | 0e39aedc261f2398a5b3fa6ecc18507574ef0e6a (diff) | |
download | chromium_src-74608b644e75307c910fa8b411e8e9cc7c731fae.zip chromium_src-74608b644e75307c910fa8b411e8e9cc7c731fae.tar.gz chromium_src-74608b644e75307c910fa8b411e8e9cc7c731fae.tar.bz2 |
Vertical filtering conditional on fraction/scale factor
BUG=19113
TEST=do large scale factors and look for lines every nth line. Should look smooth and be faster than previously.
Review URL: http://codereview.chromium.org/1521023
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@44307 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r-- | media/base/yuv_convert.cc | 101 | ||||
-rw-r--r-- | media/base/yuv_row.h | 2 |
2 files changed, 71 insertions, 32 deletions
diff --git a/media/base/yuv_convert.cc b/media/base/yuv_convert.cc index 8ff8b1f..fc77ef6 100644 --- a/media/base/yuv_convert.cc +++ b/media/base/yuv_convert.cc @@ -20,14 +20,18 @@ // Header for low level row functions. #include "media/base/yuv_row.h" -#if USE_SSE +#if USE_MMX #if defined(_MSC_VER) #include <intrin.h> #else -#include <emmintrin.h> +#include <mmintrin.h> #endif #endif +#if USE_SSE +#include <emmintrin.h> +#endif + namespace media { // 16.16 fixed point arithmetic. @@ -63,11 +67,12 @@ void ConvertYUVToRGB32(const uint8* y_buf, EMMS(); } +#if USE_MMX +#if USE_SSE // FilterRows combines two rows of the image using linear interpolation. -// 4 pixels are blended at a time. +// SSE2 version blends 8 pixels at a time. static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, int width, int scaled_y_fraction) { -#if USE_SSE __m128i zero = _mm_setzero_si128(); __m128i y1_fraction = _mm_set1_epi16( static_cast<unsigned short>(scaled_y_fraction >> 8)); @@ -92,30 +97,60 @@ static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, ybuf += 8; } while (ybuf < end); } +} + #else - int y0_fraction = kFractionMax - 1 - scaled_y_fraction; - int y1_fraction = scaled_y_fraction; +// MMX version blends 4 pixels at a time. +static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, + int width, int scaled_y_fraction) { + __m64 zero = _mm_setzero_si64(); + __m64 y1_fraction = _mm_set1_pi16( + static_cast<short>(scaled_y_fraction >> 8)); + __m64 y0_fraction = _mm_set1_pi16( + static_cast<short>((scaled_y_fraction >> 8) ^ 255)); + uint8* end = ybuf + width; - while (ybuf < end) { - ybuf[0] = (y0_ptr[0] * (y0_fraction) + - y1_ptr[0] * (y1_fraction)) >> kFractionBits; - ybuf[1] = (y0_ptr[1] * (y0_fraction) + - y1_ptr[1] * (y1_fraction)) >> kFractionBits; - ybuf[2] = (y0_ptr[2] * (y0_fraction) + - y1_ptr[2] * (y1_fraction)) >> kFractionBits; - ybuf[3] = (y0_ptr[3] * (y0_fraction) + - y1_ptr[3] * (y1_fraction)) >> kFractionBits; - y0_ptr += 4; - y1_ptr += 4; - ybuf += 4; + if (ybuf < end) { + do { + __m64 y0 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y0_ptr)); + __m64 y1 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y1_ptr)); + y0 = _mm_unpacklo_pi8 (y0, zero); + y1 = _mm_unpacklo_pi8 (y1, zero); + y0 = _mm_mullo_pi16(y0, y0_fraction); + y1 = _mm_mullo_pi16(y1, y1_fraction); + y0 = _mm_add_pi16(y0, y1); // 8.8 fixed point result + y0 = _mm_srli_pi16(y0, 8); + y0 = _mm_packs_pu16(y0, y0); + *reinterpret_cast<int *>(ybuf) = _mm_cvtsi64_si32(y0); + y0_ptr += 4; + y1_ptr += 4; + ybuf += 4; + } while (ybuf < end); } -#endif +} + +#endif // USE_SSE +#else // no MMX or SSE +// C version blends 4 pixels at a time. +static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, + int width, int scaled_y_fraction) { + int y0_fraction = 65535 - scaled_y_fraction; + int y1_fraction = scaled_y_fraction; + uint8* end = ybuf + width; + if (ybuf < end) { + do { - // Value at |ybuf[width]| must be the same as at |ybuf[width-1]|. - if (width > 1) { - end[0] = end[-1]; + ybuf[0] = (y0_ptr[0] * (y0_fraction) + y1_ptr[0] * (y1_fraction)) >> 16; + ybuf[1] = (y0_ptr[1] * (y0_fraction) + y1_ptr[1] * (y1_fraction)) >> 16; + ybuf[2] = (y0_ptr[2] * (y0_fraction) + y1_ptr[2] * (y1_fraction)) >> 16; + ybuf[3] = (y0_ptr[3] * (y0_fraction) + y1_ptr[3] * (y1_fraction)) >> 16; + y0_ptr += 4; + y1_ptr += 4; + ybuf += 4; + } while (ybuf < end); } } +#endif // USE_MMX // Scale a frame of YUV to 32 bit ARGB. void ScaleYUVToRGB32(const uint8* y_buf, @@ -198,8 +233,8 @@ void ScaleYUVToRGB32(const uint8* y_buf, } } - // Need padding in the end because FilterRows() may override up to 7 - // pixels after the end. + // Need padding because FilterRows() may write up to 15 extra pixels + // after the end for SSE2 version. uint8 ybuf[kFilterBufferSize + 16]; uint8 ubuf[kFilterBufferSize / 2 + 16]; uint8 vbuf[kFilterBufferSize / 2 + 16]; @@ -224,15 +259,17 @@ void ScaleYUVToRGB32(const uint8* y_buf, const uint8* y_ptr = y0_ptr; const uint8* u_ptr = u0_ptr; const uint8* v_ptr = v0_ptr; - // TODO(sergeyu): Avoid filtering when fraction is 0. - if (filter == media::FILTER_BILINEAR && y + 1 < scaled_height) { - FilterRows(ybuf, y0_ptr, y1_ptr, width, scaled_y_fraction); - y_ptr = ybuf; - - if ((y >> y_shift) + 1 < scaled_height >> y_shift) { - FilterRows(ubuf, u0_ptr, u1_ptr, width / 2, scaled_uv_fraction); + // Apply vertical filtering if necessary. + if (filter == media::FILTER_BILINEAR && yscale_fixed != kFractionMax) { + if (scaled_y_fraction && ((y + 1) < scaled_height)) { + FilterRows(ybuf, y0_ptr, y1_ptr, width, scaled_y_fraction); + y_ptr = ybuf; + } + if (scaled_uv_fraction && + (((y >> y_shift) + 1) < (scaled_height >> y_shift))) { + FilterRows(ubuf, u0_ptr, u1_ptr, (width + 1) / 2, scaled_uv_fraction); u_ptr = ubuf; - FilterRows(vbuf, v0_ptr, v1_ptr, width / 2, scaled_uv_fraction); + FilterRows(vbuf, v0_ptr, v1_ptr, (width + 1) / 2, scaled_uv_fraction); v_ptr = vbuf; } } diff --git a/media/base/yuv_row.h b/media/base/yuv_row.h index 5392628..e87f76e 100644 --- a/media/base/yuv_row.h +++ b/media/base/yuv_row.h @@ -79,11 +79,13 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf, #endif #endif +#if !defined(USE_SSE) #if defined(__SSE2__) || defined(_MSC_VER) #define USE_SSE 1 #else #define USE_SSE 0 #endif +#endif // x64 uses MMX2 (SSE) so emms is not required. #if USE_MMX && !defined(ARCH_CPU_X86_64) |