diff options
author | dhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-09-14 01:08:22 +0000 |
---|---|---|
committer | dhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-09-14 01:08:22 +0000 |
commit | 3227bd548c98f90ecabfcd4e3ccc3785ee9409d8 (patch) | |
tree | 4a06e64c741c94e8665bc1c25ffd142a75fd39f4 /media | |
parent | 7758c234c30291995c01b79887b52a471362fcc3 (diff) | |
download | chromium_src-3227bd548c98f90ecabfcd4e3ccc3785ee9409d8.zip chromium_src-3227bd548c98f90ecabfcd4e3ccc3785ee9409d8.tar.gz chromium_src-3227bd548c98f90ecabfcd4e3ccc3785ee9409d8.tar.bz2 |
Revert 100915 - Rewrite color space conversions suite using YASM
Failed on official Linux 32 builder. (x486?)
http://build.chromium.org/p/chromium.chrome/builders/Google%20Chrome%20Linux/builds/11240/steps/compile/logs/stdio
In file included from media/base/yuv_convert.cc:33:
/usr/lib/gcc/i486-linux-gnu/4.4.3/include/emmintrin.h:32:3:error: #error "SSE2 instruction set not enabled"
make: *** [out/Release/obj.target/yuv_convert/media/base/yuv_convert.o] Error 1
Besides rewrite assembly code in YASM, this change also do
runtime selection of routine depending processor support.
There are couple advantages in this YASM version.
1. Performance improvement
Using YASM we can push less to the stack, this results in ~5%
improvement on 32-bits system.
2. MMX and SSE versions are separate
SSE version use movntq while MMX version use movq.
3. 64-bits version works universally
This means that 64-bits YASM code compile and run on 64-bits
windows, mac and linux. While previously it only runs on
linux 64.
4. Handling PIC much better
PIC code runs and build on 32-bits and 64-bits system.
This is better than before which only works on 32-bits mac.
5. Easier maintenance
This code uses x86asm.inc for a bunch of nice macros. Having
just one YASM verion is better than having inline VC assembly
and don't have to worry inline assemly has different register
names on mac and linux.
I tested on linux 32-bits, linux 32-bits PIC and linux 64-bits
that this code is at least as fast as before.
BUG=None
TEST=media_unittests runs and all passing
Review URL: http://codereview.chromium.org/7858003
TBR=hclam@chromium.org
Review URL: http://codereview.chromium.org/7887025
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@101008 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
28 files changed, 191 insertions, 1916 deletions
diff --git a/media/base/cpu_features.h b/media/base/cpu_features.h index 0878385..c2762d8 100644 --- a/media/base/cpu_features.h +++ b/media/base/cpu_features.h @@ -10,12 +10,6 @@ namespace media { -// Returns true if CPU has MMX support. -bool hasMMX(); - -// Returns true if CPU has SSE support. -bool hasSSE(); - // Returns true if CPU has SSE2 support. bool hasSSE2(); diff --git a/media/base/cpu_features_x86.cc b/media/base/cpu_features_x86.cc index 4fb9304..bf7d05d 100644 --- a/media/base/cpu_features_x86.cc +++ b/media/base/cpu_features_x86.cc @@ -48,16 +48,6 @@ static inline void getcpuid(int info_type, int info[4]) { } #endif -bool hasMMX() { - // TODO(hclam): Acutually checks it. - return true; -} - -bool hasSSE() { - // TODO(hclam): Actually checks it. - return true; -} - bool hasSSE2() { #if defined(ARCH_CPU_X86_64) /* All x86_64 machines have SSE2, so don't even bother checking. */ diff --git a/media/base/simd/convert_rgb_to_yuv_x86.cc b/media/base/simd/convert_rgb_to_yuv_x86.cc deleted file mode 100644 index 2bd6930..0000000 --- a/media/base/simd/convert_rgb_to_yuv_x86.cc +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "media/base/simd/convert_rgb_to_yuv.h" - -#include "build/build_config.h" -#include "media/base/cpu_features.h" -#include "media/base/simd/convert_rgb_to_yuv_ssse3.h" - -namespace media { - -void ConvertRGB32ToYUV_SSSE3(const uint8* rgbframe, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height, - int rgbstride, - int ystride, - int uvstride) { -#ifdef ENABLE_SUBSAMPLING - for (; height >= 2; height -= 2) { - ConvertARGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - ConvertARGBToYUVOdd_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - uplane += uvstride; - vplane += uvstride; - } - - if (height) - ConvertARGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width); -#else - for (; height >= 2; height -= 2) { - ConvertARGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - ConvertARGBToYUVRow_SSSE3(rgbframe, yplane, NULL, NULL, width); - rgbframe += rgbstride; - yplane += ystride; - - uplane += uvstride; - vplane += uvstride; - } - - if (height) - ConvertARGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width); -#endif -} - -void ConvertRGB24ToYUV_SSSE3(const uint8* rgbframe, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height, - int rgbstride, - int ystride, - int uvstride) { -#ifdef ENABLE_SUBSAMPLING - for (; height >= 2; height -= 2) { - ConvertRGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - ConvertRGBToYUVOdd_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - uplane += uvstride; - vplane += uvstride; - } - - if (height) - ConvertRGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width); -#else - for (; height >= 2; height -= 2) { - ConvertRGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - ConvertRGBToYUVRow_SSSE3(rgbframe, yplane, NULL, NULL, width); - rgbframe += rgbstride; - yplane += ystride; - - uplane += uvstride; - vplane += uvstride; - } - - if (height) - ConvertRGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width); -#endif -} - -} // namespace media diff --git a/media/base/simd/convert_yuv_to_rgb.h b/media/base/simd/convert_yuv_to_rgb.h deleted file mode 100644 index 5f3df2c6..0000000 --- a/media/base/simd/convert_yuv_to_rgb.h +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef MEDIA_BASE_SIMD_CONVERT_YUV_TO_RGB_H_ -#define MEDIA_BASE_SIMD_CONVERT_YUV_TO_RGB_H_ - -#include "base/basictypes.h" -#include "media/base/yuv_convert.h" - -namespace media { - -typedef void (*ConvertYUVToRGB32Proc)(const uint8*, - const uint8*, - const uint8*, - uint8*, - int, - int, - int, - int, - int, - YUVType); - -void ConvertYUVToRGB32_C(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int width, - int height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type); - -void ConvertYUVToRGB32_SSE(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int width, - int height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type); - -void ConvertYUVToRGB32_MMX(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int width, - int height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type); - -} // namespace media - -// Assembly functions are declared without namespace. -extern "C" { - -typedef void (*ConvertYUVToRGB32RowProc)(const uint8*, - const uint8*, - const uint8*, - uint8*, - int); -typedef void (*ScaleYUVToRGB32RowProc)(const uint8*, - const uint8*, - const uint8*, - uint8*, - int, - int); - -void ConvertYUVToRGB32Row_C(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int width); - -void ConvertYUVToRGB32Row_MMX(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int width); - -void ConvertYUVToRGB32Row_SSE(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int width); - -void ScaleYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void ScaleYUVToRGB32Row_MMX(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void ScaleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void ScaleYUVToRGB32Row_SSE2_X64(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void LinearScaleYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void LinearScaleYUVToRGB32Row_MMX(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void LinearScaleYUVToRGB32Row_MMX_X64(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -} - -#endif // MEDIA_BASE_SIMD_CONVERT_YUV_TO_RGB_H_ diff --git a/media/base/simd/convert_yuv_to_rgb_c.cc b/media/base/simd/convert_yuv_to_rgb_c.cc deleted file mode 100644 index f8e70b2..0000000 --- a/media/base/simd/convert_yuv_to_rgb_c.cc +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "media/base/simd/convert_yuv_to_rgb.h" -// TODO(hclam): Shouldn't depend on yuv_row.h. -#include "media/base/yuv_row.h" - -#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) -#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \ - (((x) + (y)) > 32767 ? 32767 : ((x) + (y)))) - -static inline void YUVPixel(uint8 y, - uint8 u, - uint8 v, - uint8* rgb_buf) { - - int b = kCoefficientsRgbY[256+u][0]; - int g = kCoefficientsRgbY[256+u][1]; - int r = kCoefficientsRgbY[256+u][2]; - int a = kCoefficientsRgbY[256+u][3]; - - b = paddsw(b, kCoefficientsRgbY[512+v][0]); - g = paddsw(g, kCoefficientsRgbY[512+v][1]); - r = paddsw(r, kCoefficientsRgbY[512+v][2]); - a = paddsw(a, kCoefficientsRgbY[512+v][3]); - - b = paddsw(b, kCoefficientsRgbY[y][0]); - g = paddsw(g, kCoefficientsRgbY[y][1]); - r = paddsw(r, kCoefficientsRgbY[y][2]); - a = paddsw(a, kCoefficientsRgbY[y][3]); - - b >>= 6; - g >>= 6; - r >>= 6; - a >>= 6; - - *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) | - (packuswb(g) << 8) | - (packuswb(r) << 16) | - (packuswb(a) << 24); -} - -extern "C" { - -void ConvertYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - for (int x = 0; x < width; x += 2) { - uint8 u = u_buf[x >> 1]; - uint8 v = v_buf[x >> 1]; - uint8 y0 = y_buf[x]; - YUVPixel(y0, u, v, rgb_buf); - if ((x + 1) < width) { - uint8 y1 = y_buf[x + 1]; - YUVPixel(y1, u, v, rgb_buf + 4); - } - rgb_buf += 8; // Advance 2 pixels. - } -} - -// 16.16 fixed point is used. A shift by 16 isolates the integer. -// A shift by 17 is used to further subsample the chrominence channels. -// & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, -// for 1/65536 pixel accurate interpolation. -void ScaleYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - int x = 0; - for (int i = 0; i < width; i += 2) { - int y = y_buf[x >> 16]; - int u = u_buf[(x >> 17)]; - int v = v_buf[(x >> 17)]; - YUVPixel(y, u, v, rgb_buf); - x += source_dx; - if ((i + 1) < width) { - y = y_buf[x >> 16]; - YUVPixel(y, u, v, rgb_buf+4); - x += source_dx; - } - rgb_buf += 8; - } -} - -void LinearScaleYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - int x = 0; - if (source_dx >= 0x20000) { - x = 32768; - } - for (int i = 0; i < width; i += 2) { - int y0 = y_buf[x >> 16]; - int y1 = y_buf[(x >> 16) + 1]; - int u0 = u_buf[(x >> 17)]; - int u1 = u_buf[(x >> 17) + 1]; - int v0 = v_buf[(x >> 17)]; - int v1 = v_buf[(x >> 17) + 1]; - int y_frac = (x & 65535); - int uv_frac = ((x >> 1) & 65535); - int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; - int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16; - int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16; - YUVPixel(y, u, v, rgb_buf); - x += source_dx; - if ((i + 1) < width) { - y0 = y_buf[x >> 16]; - y1 = y_buf[(x >> 16) + 1]; - y_frac = (x & 65535); - y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; - YUVPixel(y, u, v, rgb_buf+4); - x += source_dx; - } - rgb_buf += 8; - } -} - -} - -namespace media { - -void ConvertYUVToRGB32_C(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int width, - int height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type) { - unsigned int y_shift = yuv_type; - for (int y = 0; y < height; ++y) { - uint8* rgb_row = rgbframe + y * rgbstride; - const uint8* y_ptr = yplane + y * ystride; - const uint8* u_ptr = uplane + (y >> y_shift) * uvstride; - const uint8* v_ptr = vplane + (y >> y_shift) * uvstride; - - ConvertYUVToRGB32Row_C(y_ptr, - u_ptr, - v_ptr, - rgb_row, - width); - } -} - -} // namespace media diff --git a/media/base/simd/convert_yuv_to_rgb_mmx.asm b/media/base/simd/convert_yuv_to_rgb_mmx.asm deleted file mode 100644 index e044474..0000000 --- a/media/base/simd/convert_yuv_to_rgb_mmx.asm +++ /dev/null @@ -1,22 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - -%include "x86inc.asm" - -; -; This file uses MMX instructions. -; - SECTION_TEXT - CPU MMX - -; Use movq to save the output. -%define MOVQ movq - -; extern "C" void ConvertYUVToRGB32Row_MMX(const uint8* y_buf, -; const uint8* u_buf, -; const uint8* v_buf, -; uint8* rgb_buf, -; int width); -%define SYMBOL ConvertYUVToRGB32Row_MMX -%include "convert_yuv_to_rgb_mmx.inc" diff --git a/media/base/simd/convert_yuv_to_rgb_mmx.inc b/media/base/simd/convert_yuv_to_rgb_mmx.inc deleted file mode 100644 index b9555ce..0000000 --- a/media/base/simd/convert_yuv_to_rgb_mmx.inc +++ /dev/null @@ -1,119 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - - global mangle(SYMBOL) PRIVATE - align function_align - -; Non-PIC code is the fastest so use this if possible. -%ifndef PIC -mangle(SYMBOL): - %assign stack_offset 0 - PROLOGUE 5, 7, 3, Y, U, V, ARGB, WIDTH, TEMPU, TEMPV - extern mangle(kCoefficientsRgbY) - jmp .convertend - -.convertloop: - movzx TEMPUd, BYTE [Uq] - add Uq, 1 - movzx TEMPVd, BYTE [Vq] - add Vq, 1 - movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPUq] - movzx TEMPUd, BYTE [Yq] - paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPVq] - movzx TEMPVd, BYTE [Yq + 1] - movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPUq] - add Yq, 2 - movq mm2, [mangle(kCoefficientsRgbY) + 8 * TEMPVq] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - MOVQ [ARGBq], mm1 - add ARGBq, 8 - -.convertend: - sub WIDTHq, 2 - jns .convertloop - - ; If number of pixels is odd then compute it. - and WIDTHq, 1 - jz .convertdone - - movzx TEMPUd, BYTE [Uq] - movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPUq] - movzx TEMPVd, BYTE [Vq] - paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPVq] - movzx TEMPUd, BYTE [Yq] - movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPUq] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ARGBq], mm1 - -.convertdone: - RET -%endif - -; With PIC code we need to load the address of mangle(kCoefficientsRgbY). -; This code is slower than the above version. -%ifdef PIC -mangle(SYMBOL): - %assign stack_offset 0 - PROLOGUE 5, 7, 3, Y, U, V, ARGB, WIDTH, TEMP, TABLE - - extern mangle(kCoefficientsRgbY) - LOAD_SYM TABLEq, mangle(kCoefficientsRgbY) - - jmp .convertend - -.convertloop: - movzx TEMPd, BYTE [Uq] - movq mm0, [TABLEq + 2048 + 8 * TEMPq] - add Uq, 1 - - movzx TEMPd, BYTE [Vq] - paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] - add Vq, 1 - - movzx TEMPd, BYTE [Yq] - movq mm1, [TABLEq + 8 * TEMPq] - - movzx TEMPd, BYTE [Yq + 1] - movq mm2, [TABLEq + 8 * TEMPq] - add Yq, 2 - - ; Add UV components to Y component. - paddsw mm1, mm0 - paddsw mm2, mm0 - - ; Down shift and then pack. - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - MOVQ [ARGBq], mm1 - add ARGBq, 8 - -.convertend: - sub WIDTHq, 2 - jns .convertloop - - ; If number of pixels is odd then compute it. - and WIDTHq, 1 - jz .convertdone - - movzx TEMPd, BYTE [Uq] - movq mm0, [TABLEq + 2048 + 8 * TEMPq] - movzx TEMPd, BYTE [Vq] - paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] - movzx TEMPd, BYTE [Yq] - movq mm1, [TABLEq + 8 * TEMPq] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ARGBq], mm1 - -.convertdone: - RET -%endif diff --git a/media/base/simd/convert_yuv_to_rgb_sse.asm b/media/base/simd/convert_yuv_to_rgb_sse.asm deleted file mode 100644 index 28d2214..0000000 --- a/media/base/simd/convert_yuv_to_rgb_sse.asm +++ /dev/null @@ -1,40 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - -%include "x86inc.asm" - -; -; This file uses MMX and SSE instructions. -; - SECTION_TEXT - CPU MMX, SSE - -; Use SSE instruction movntq can write faster. -%define MOVQ movntq - -; -; extern "C" void ConvertYUVToRGB32Row_SSE(const uint8* y_buf, -; const uint8* u_buf, -; const uint8* v_buf, -; uint8* rgb_buf, -; int width); -%define SYMBOL ConvertYUVToRGB32Row_SSE -%include "convert_yuv_to_rgb_mmx.inc" - -; void ScaleYUVToRGB32Row_MMX(const uint8* y_buf, -; const uint8* u_buf, -; const uint8* v_buf, -; uint8* rgb_buf, -; int width, -; int source_dx); -%define SYMBOL ScaleYUVToRGB32Row_SSE -%include "scale_yuv_to_rgb_mmx.inc" - -; void LinearScaleYUVToRGB32Row_MMX(const uint8* y_buf, -; const uint8* u_buf, -; const uint8* v_buf, -; uint8* rgb_buf, -; int width, -; int source_dx); - diff --git a/media/base/simd/convert_yuv_to_rgb_x86.cc b/media/base/simd/convert_yuv_to_rgb_x86.cc deleted file mode 100644 index 3e03ef9..0000000 --- a/media/base/simd/convert_yuv_to_rgb_x86.cc +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#if defined(_MSC_VER) -#include <intrin.h> -#else -#include <mmintrin.h> -#endif - -#include "media/base/cpu_features.h" -#include "media/base/simd/convert_yuv_to_rgb.h" -#include "media/base/yuv_convert.h" - -namespace media { - -void ConvertYUVToRGB32_MMX(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int width, - int height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type) { - unsigned int y_shift = yuv_type; - for (int y = 0; y < height; ++y) { - uint8* rgb_row = rgbframe + y * rgbstride; - const uint8* y_ptr = yplane + y * ystride; - const uint8* u_ptr = uplane + (y >> y_shift) * uvstride; - const uint8* v_ptr = vplane + (y >> y_shift) * uvstride; - - ConvertYUVToRGB32Row_MMX(y_ptr, - u_ptr, - v_ptr, - rgb_row, - width); - } - - _mm_empty(); -} - -void ConvertYUVToRGB32_SSE(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int width, - int height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type) { - unsigned int y_shift = yuv_type; - for (int y = 0; y < height; ++y) { - uint8* rgb_row = rgbframe + y * rgbstride; - const uint8* y_ptr = yplane + y * ystride; - const uint8* u_ptr = uplane + (y >> y_shift) * uvstride; - const uint8* v_ptr = vplane + (y >> y_shift) * uvstride; - - ConvertYUVToRGB32Row_SSE(y_ptr, - u_ptr, - v_ptr, - rgb_row, - width); - } - - _mm_empty(); -} - -} // namespace media diff --git a/media/base/simd/filter_yuv.h b/media/base/simd/filter_yuv.h deleted file mode 100644 index 5a9cf11..0000000 --- a/media/base/simd/filter_yuv.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef MEDIA_BASE_SIMD_FILTER_YUV_H_ -#define MEDIA_BASE_SIMD_FILTER_YUV_H_ - -#include "base/basictypes.h" - -namespace media { - -typedef void (*FilterYUVRowsProc)(uint8*, - const uint8*, - const uint8*, - int, - int); - -void FilterYUVRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction); - -void FilterYUVRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction); - -void FilterYUVRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction); - -} // namespace media - -#endif // MEDIA_BASE_SIMD_FILTER_YUV_H_ diff --git a/media/base/simd/filter_yuv_c.cc b/media/base/simd/filter_yuv_c.cc deleted file mode 100644 index 95ae01a..0000000 --- a/media/base/simd/filter_yuv_c.cc +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "media/base/simd/filter_yuv.h" - -namespace media { - -void FilterYUVRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction) { - int y1_fraction = source_y_fraction; - int y0_fraction = 256 - y1_fraction; - uint8* end = ybuf + source_width; - do { - ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8; - ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8; - ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8; - ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8; - ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8; - ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8; - ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8; - ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8; - y0_ptr += 8; - y1_ptr += 8; - ybuf += 8; - } while (ybuf < end); -} - -} // namespace media diff --git a/media/base/simd/filter_yuv_mmx.cc b/media/base/simd/filter_yuv_mmx.cc deleted file mode 100644 index 77698dc..0000000 --- a/media/base/simd/filter_yuv_mmx.cc +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#if defined(_MSC_VER) -#include <intrin.h> -#else -#include <mmintrin.h> -#include <emmintrin.h> -#endif - -#include "build/build_config.h" -#include "media/base/simd/filter_yuv.h" - -namespace media { - -#if defined(COMPILER_MSVC) -// Warning 4799 is about calling emms before the function exits. -// We calls emms in a frame level so suppress this warning. -#pragma warning(disable: 4799) -#endif - -void FilterYUVRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction) { - __m64 zero = _mm_setzero_si64(); - __m64 y1_fraction = _mm_set1_pi16(source_y_fraction); - __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction); - - const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr); - const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr); - __m64* dest64 = reinterpret_cast<__m64*>(ybuf); - __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width); - - do { - __m64 y0 = *y0_ptr64++; - __m64 y1 = *y1_ptr64++; - __m64 y2 = _mm_unpackhi_pi8(y0, zero); - __m64 y3 = _mm_unpackhi_pi8(y1, zero); - y0 = _mm_unpacklo_pi8(y0, zero); - y1 = _mm_unpacklo_pi8(y1, zero); - y0 = _mm_mullo_pi16(y0, y0_fraction); - y1 = _mm_mullo_pi16(y1, y1_fraction); - y2 = _mm_mullo_pi16(y2, y0_fraction); - y3 = _mm_mullo_pi16(y3, y1_fraction); - y0 = _mm_add_pi16(y0, y1); - y2 = _mm_add_pi16(y2, y3); - y0 = _mm_srli_pi16(y0, 8); - y2 = _mm_srli_pi16(y2, 8); - y0 = _mm_packs_pu16(y0, y2); - *dest64++ = y0; - } while (dest64 < end64); -} - -#if defined(COMPILER_MSVC) -#pragma warning(default: 4799) -#endif - -} // namespace media diff --git a/media/base/simd/filter_yuv_sse2.cc b/media/base/simd/filter_yuv_sse2.cc deleted file mode 100644 index 137ac94..0000000 --- a/media/base/simd/filter_yuv_sse2.cc +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#if defined(_MSC_VER) -#include <intrin.h> -#else -#include <mmintrin.h> -#include <emmintrin.h> -#endif - -#include "media/base/simd/filter_yuv.h" - -namespace media { - -void FilterYUVRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction) { - __m128i zero = _mm_setzero_si128(); - __m128i y1_fraction = _mm_set1_epi16(source_y_fraction); - __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction); - - const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr); - const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr); - __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf); - __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width); - - do { - __m128i y0 = _mm_loadu_si128(y0_ptr128); - __m128i y1 = _mm_loadu_si128(y1_ptr128); - __m128i y2 = _mm_unpackhi_epi8(y0, zero); - __m128i y3 = _mm_unpackhi_epi8(y1, zero); - y0 = _mm_unpacklo_epi8(y0, zero); - y1 = _mm_unpacklo_epi8(y1, zero); - y0 = _mm_mullo_epi16(y0, y0_fraction); - y1 = _mm_mullo_epi16(y1, y1_fraction); - y2 = _mm_mullo_epi16(y2, y0_fraction); - y3 = _mm_mullo_epi16(y3, y1_fraction); - y0 = _mm_add_epi16(y0, y1); - y2 = _mm_add_epi16(y2, y3); - y0 = _mm_srli_epi16(y0, 8); - y2 = _mm_srli_epi16(y2, 8); - y0 = _mm_packus_epi16(y0, y2); - *dest128++ = y0; - ++y0_ptr128; - ++y1_ptr128; - } while (dest128 < end128); -} - -} // namespace media diff --git a/media/base/simd/linear_scale_yuv_to_rgb_mmx.asm b/media/base/simd/linear_scale_yuv_to_rgb_mmx.asm deleted file mode 100644 index 7f7e0e8..0000000 --- a/media/base/simd/linear_scale_yuv_to_rgb_mmx.asm +++ /dev/null @@ -1,23 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - -%include "x86inc.asm" - -; -; This file uses MMX instructions. -; - SECTION_TEXT - CPU MMX - -; Use movq to save the output. -%define MOVQ movq - -; void LinearScaleYUVToRGB32Row_MMX(const uint8* y_buf, -; const uint8* u_buf, -; const uint8* v_buf, -; uint8* rgb_buf, -; int width, -; int source_dx); -%define SYMBOL LinearScaleYUVToRGB32Row_MMX -%include "linear_scale_yuv_to_rgb_mmx.inc" diff --git a/media/base/simd/linear_scale_yuv_to_rgb_mmx.inc b/media/base/simd/linear_scale_yuv_to_rgb_mmx.inc deleted file mode 100644 index 91c06a5..0000000 --- a/media/base/simd/linear_scale_yuv_to_rgb_mmx.inc +++ /dev/null @@ -1,166 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - - global mangle(SYMBOL) PRIVATE - align function_align - -mangle(SYMBOL): - %assign stack_offset 0 - - extern mangle(kCoefficientsRgbY) - -; Parameters are in the following order: -; 1. Y plane -; 2. U plane -; 3. V plane -; 4. ARGB frame -; 5. Width -; 6. Source dx - -PROLOGUE 6, 7, 3, Y, R0, R1, ARGB, R2, R3, TEMP - -%if gprsize == 8 -%define WORD_SIZE QWORD -%else -%define WORD_SIZE DWORD -%endif - -; Define register aliases. -%define Xq R1q ; Current X position -%define COMPLq R2q ; Component A value -%define COMPLd R2d ; Component A value -%define U_ARG_REGq R0q ; U plane address argument -%define V_ARG_REGq R1q ; V plane address argument -%define SOURCE_DX_ARG_REGq R3q ; Source dx argument -%define WIDTH_ARG_REGq R2q ; Width argument - -%ifdef PIC -; PIC code shared COMPR, U and V with the same register. Need to be careful in the -; code they don't mix up. This allows R3q to be used for YUV table. -%define COMPRq R0q ; Component B value -%define COMPRd R0d ; Component B value -%define Uq R0q ; U plane address -%define Vq R0q ; V plane address -%define U_PLANE WORD_SIZE [rsp + 3 * gprsize] -%define TABLE R3q ; Address of the table -%else -; Non-PIC code defines. -%define COMPRq R3q ; Component B value -%define COMPRd R3d ; Component B value -%define Uq R0q ; U plane address -%define Vq R3q ; V plane address -%define TABLE mangle(kCoefficientsRgbY) -%endif - -; Defines for stack variables. These are used in both PIC and non-PIC code. -%define V_PLANE WORD_SIZE [rsp + 2 * gprsize] -%define SOURCE_DX WORD_SIZE [rsp + gprsize] -%define SOURCE_WIDTH WORD_SIZE [rsp] - -; Handle stack variables differently for PIC and non-PIC code. - -%ifdef PIC -; Define stack usage for PIC code. PIC code push U plane onto stack. - PUSH U_ARG_REGq - PUSH V_ARG_REGq - PUSH SOURCE_DX_ARG_REGq - imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx - PUSH WIDTH_ARG_REGq - -; Load the address of kCoefficientsRgbY into TABLE - mov TEMPq, SOURCE_DX_ARG_REGq ; Need to save source_dx first - LOAD_SYM TABLE, mangle(kCoefficientsRgbY) -%define SOURCE_DX_ARG_REGq TEMPq ; Overwrite SOURCE_DX_ARG_REGq to TEMPq -%else -; Define stack usage. Non-PIC code just push 3 registers to stack. - PUSH V_ARG_REGq - PUSH SOURCE_DX_ARG_REGq - imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx - PUSH WIDTH_ARG_REGq -%endif - -%macro EPILOGUE 0 -%ifdef PIC - ADD rsp, 4 * gprsize -%else - ADD rsp, 3 * gprsize -%endif -%endmacro - - xor Xq, Xq ; x = 0 - cmp SOURCE_DX_ARG_REGq, 0x20000 - jl .lscaleend - mov Xq, 0x8000 ; x = 0.5 for 1/2 or less - jmp .lscaleend - -.lscaleloop: -%ifdef PIC - mov Uq, U_PLANE ; PIC code saves U_PLANE on stack. -%endif - -; Define macros for scaling YUV components since they are reused. -%macro SCALEUV 1 - mov TEMPq, Xq - sar TEMPq, 0x11 - movzx COMPLd, BYTE [%1 + TEMPq] - movzx COMPRd, BYTE [%1 + TEMPq + 1] - mov TEMPq, Xq - and TEMPq, 0x1fffe - imul COMPRq, TEMPq - xor TEMPq, 0x1fffe - imul COMPLq, TEMPq - add COMPLq, COMPRq - shr COMPLq, 17 -%endmacro - SCALEUV Uq ; Use the above macro to scale U - movq mm0, [TABLE + 2048 + 8 * COMPLq] - - mov Vq, V_PLANE ; Read V address from stack - SCALEUV Vq ; Use the above macro to scale V - paddsw mm0, [TABLE + 4096 + 8 * COMPLq] - -%macro SCALEY 0 - mov TEMPq, Xq - sar TEMPq, 0x10 - movzx COMPLd, BYTE [Yq + TEMPq] - movzx COMPRd, BYTE [Yq + TEMPq + 1] - mov TEMPq, Xq - add Xq, SOURCE_DX ; Add source_dx from stack - and TEMPq, 0xffff - imul COMPRq, TEMPq - xor TEMPq, 0xffff - imul COMPLq, TEMPq - add COMPLq, COMPRq - shr COMPLq, 16 -%endmacro - SCALEY ; Use the above macro to scale Y1 - movq mm1, [TABLE + 8 * COMPLq] - - cmp Xq, SOURCE_WIDTH ; Compare source_width from stack - jge .lscalelastpixel - - SCALEY ; Use the above macro to sacle Y2 - movq mm2, [TABLE + 8 * COMPLq] - - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 0x6 - psraw mm2, 0x6 - packuswb mm1, mm2 - MOVQ [ARGBq], mm1 - add ARGBq, 0x8 - -.lscaleend: - cmp Xq, SOURCE_WIDTH ; Compare source_width from stack - jl .lscaleloop - EPILOGUE - RET - -.lscalelastpixel: - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ARGBq], mm1 - EPILOGUE - RET diff --git a/media/base/simd/linear_scale_yuv_to_rgb_mmx_x64.asm b/media/base/simd/linear_scale_yuv_to_rgb_mmx_x64.asm deleted file mode 100644 index db7854457..0000000 --- a/media/base/simd/linear_scale_yuv_to_rgb_mmx_x64.asm +++ /dev/null @@ -1,142 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - -%include "x86inc.asm" - -; -; This file uses MMX instructions. -; - SECTION_TEXT - CPU MMX - -%define SYMBOL LinearScaleYUVToRGB32Row_MMX_X64 - global mangle(SYMBOL) PRIVATE - align function_align - -mangle(SYMBOL): - %assign stack_offset 0 - extern mangle(kCoefficientsRgbY) - -; Parameters are in the following order: -; 1. Y plane -; 2. U plane -; 3. V plane -; 4. ARGB frame -; 5. Width -; 6. Source dx - -PROLOGUE 6, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, COMPL - -%define TABLEq r10 -%define Xq r11 -%define INDEXq r12 -%define COMPRd r13d -%define COMPRq r13 -%define FRACTIONq r14 - - PUSH TABLEq - PUSH Xq - PUSH INDEXq - PUSH COMPRq - PUSH FRACTIONq - -%macro EPILOGUE 0 - POP FRACTIONq - POP COMPRq - POP INDEXq - POP Xq - POP TABLEq -%endmacro - - LOAD_SYM TABLEq, mangle(kCoefficientsRgbY) - - imul WIDTHq, SOURCE_DXq ; source_width = width * source_dx - xor Xq, Xq ; x = 0 - cmp SOURCE_DXq, 0x20000 - jl .lscaleend - mov Xq, 0x8000 ; x = 0.5 for 1/2 or less - jmp .lscaleend - -.lscaleloop: - ; Interpolate U - mov INDEXq, Xq - sar INDEXq, 0x11 - movzx COMPLd, BYTE [Uq + INDEXq] - movzx COMPRd, BYTE [Uq + INDEXq + 1] - mov FRACTIONq, Xq - and FRACTIONq, 0x1fffe - imul COMPRq, FRACTIONq - xor FRACTIONq, 0x1fffe - imul COMPLq, FRACTIONq - add COMPLq, COMPRq - shr COMPLq, 17 - movq mm0, [TABLEq + 2048 + 8 * COMPLq] - - ; Interpolate V - movzx COMPLd, BYTE [Vq + INDEXq] - movzx COMPRd, BYTE [Vq + INDEXq + 1] - ; Trick here to imul COMPL first then COMPR. - ; Saves two instruction. :) - imul COMPLq, FRACTIONq - xor FRACTIONq, 0x1fffe - imul COMPRq, FRACTIONq - add COMPLq, COMPRq - shr COMPLq, 17 - paddsw mm0, [TABLEq + 4096 + 8 * COMPLq] - - ; Interpolate first Y1. - lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq now points to next pixel. - ; Xq points to current pixel. - mov FRACTIONq, Xq - sar Xq, 0x10 - movzx COMPLd, BYTE [Yq + Xq] - movzx COMPRd, BYTE [Yq + Xq + 1] - and FRACTIONq, 0xffff - imul COMPRq, FRACTIONq - xor FRACTIONq, 0xffff - imul COMPLq, FRACTIONq - add COMPLq, COMPRq - shr COMPLq, 16 - movq mm1, [TABLEq + 8 * COMPLq] - - ; Interpolate Y2 if available. - cmp INDEXq, WIDTHq - jge .lscalelastpixel - - lea Xq, [INDEXq + SOURCE_DXq] ; Xq points to next pixel. - ; INDEXq points to current pixel. - mov FRACTIONq, INDEXq - sar INDEXq, 0x10 - movzx COMPLd, BYTE [Yq + INDEXq] - movzx COMPRd, BYTE [Yq + INDEXq + 1] - and FRACTIONq, 0xffff - imul COMPRq, FRACTIONq - xor FRACTIONq, 0xffff - imul COMPLq, FRACTIONq - add COMPLq, COMPRq - shr COMPLq, 16 - movq mm2, [TABLEq + 8 * COMPLq] - - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 0x6 - psraw mm2, 0x6 - packuswb mm1, mm2 - movntq [ARGBq], mm1 - add ARGBq, 0x8 - -.lscaleend: - cmp Xq, WIDTHq - jl .lscaleloop - jmp .epilogue - -.lscalelastpixel: - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ARGBq], mm1 - -.epilogue - EPILOGUE - RET diff --git a/media/base/simd/linear_scale_yuv_to_rgb_sse.asm b/media/base/simd/linear_scale_yuv_to_rgb_sse.asm deleted file mode 100644 index 847911c..0000000 --- a/media/base/simd/linear_scale_yuv_to_rgb_sse.asm +++ /dev/null @@ -1,23 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - -%include "x86inc.asm" - -; -; This file uses MMX and SSE instructions. -; - SECTION_TEXT - CPU MMX, SSE - -; Use movq to save the output. -%define MOVQ movntq - -; void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf, -; const uint8* u_buf, -; const uint8* v_buf, -; uint8* rgb_buf, -; int width, -; int source_dx); -%define SYMBOL LinearScaleYUVToRGB32Row_SSE -%include "linear_scale_yuv_to_rgb_mmx.inc" diff --git a/media/base/simd/scale_yuv_to_rgb_mmx.asm b/media/base/simd/scale_yuv_to_rgb_mmx.asm deleted file mode 100644 index 6a83757..0000000 --- a/media/base/simd/scale_yuv_to_rgb_mmx.asm +++ /dev/null @@ -1,23 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - -%include "x86inc.asm" - -; -; This file uses MMX instructions. -; - SECTION_TEXT - CPU MMX - -; Use movq to save the output. -%define MOVQ movq - -; void ScaleYUVToRGB32Row_MMX(const uint8* y_buf, -; const uint8* u_buf, -; const uint8* v_buf, -; uint8* rgb_buf, -; int width, -; int source_dx); -%define SYMBOL ScaleYUVToRGB32Row_MMX -%include "scale_yuv_to_rgb_mmx.inc" diff --git a/media/base/simd/scale_yuv_to_rgb_mmx.inc b/media/base/simd/scale_yuv_to_rgb_mmx.inc deleted file mode 100644 index 94c101c..0000000 --- a/media/base/simd/scale_yuv_to_rgb_mmx.inc +++ /dev/null @@ -1,115 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - - global mangle(SYMBOL) PRIVATE - align function_align - -mangle(SYMBOL): - %assign stack_offset 0 - - extern mangle(kCoefficientsRgbY) - -; Parameters are in the following order: -; 1. Y plane -; 2. U plane -; 3. V plane -; 4. ARGB frame -; 5. Width -; 6. Source dx - -PROLOGUE 6, 7, 3, Y, U, V, ARGB, R1, R2, TEMP - -%ifdef ARCH_X86_64 -%define WORD_SIZE QWORD -%else -%define WORD_SIZE DWORD -%endif - -%ifdef PIC - PUSH R1q ; Width -%endif - PUSH R2q ; Source dx - -%define SOURCE_DX WORD_SIZE [rsp] - -; PIC code. -%ifdef PIC - LOAD_SYM R1q, mangle(kCoefficientsRgbY) -%define WIDTH WORD_SIZE [rsp + gprsize] -%define TABLE R1q -%define Xq R2q - -; Non-PIC code. -%else -%define WIDTH R1q -%define TABLE mangle(kCoefficientsRgbY) -%define Xq R2q -%endif - - ; Set Xq index to 0. - xor Xq, Xq - jmp .scaleend - -.scaleloop: - ; TABLE can either be a register or a symbol depending on this is - ; PIC or not. - mov TEMPq, Xq - sar TEMPq, 17 - movzx TEMPd, BYTE [Uq + TEMPq] - movq mm0, [TABLE + 2048 + 8 * TEMPq] - mov TEMPq, Xq - sar TEMPq, 17 - movzx TEMPd, BYTE [Vq + TEMPq] - paddsw mm0, [TABLE + 4096 + 8 * TEMPq] - mov TEMPq, Xq - add Xq, SOURCE_DX - sar TEMPq, 16 - movzx TEMPd, BYTE [Yq + TEMPq] - movq mm1, [TABLE + 8 * TEMPq] - mov TEMPq, Xq - add Xq, SOURCE_DX - sar TEMPq, 16 - movzx TEMPd, BYTE [Yq + TEMPq] - movq mm2, [TABLE + 8 * TEMPq] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - MOVQ QWORD [ARGBq], mm1 - add ARGBq, 8 - -.scaleend: - ; WIDTH can either be a register or memory depending on this is - ; PIC or not. - sub WIDTH, 2 - jns .scaleloop - - and WIDTH, 1 ; odd number of pixels? - jz .scaledone - - mov TEMPq, Xq - sar TEMPq, 17 - movzx TEMPd, BYTE [Uq + TEMPq] - movq mm0, [TABLE + 2048 + 8 * TEMPq] - mov TEMPq, Xq - sar TEMPq, 17 - movzx TEMPd, BYTE [Vq + TEMPq] - paddsw mm0, [TABLE + 4096 + 8 * TEMPq] - mov TEMPq, Xq - sar TEMPq, 16 - movzx TEMPd, BYTE [Yq + TEMPq] - movq mm1, [TABLE + 8 * TEMPq] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd DWORD [ARGBq], mm1 - -.scaledone: -%ifdef PIC - ADD rsp, 2 * gprsize -%else - ADD rsp, gprsize -%endif - RET diff --git a/media/base/simd/scale_yuv_to_rgb_sse.asm b/media/base/simd/scale_yuv_to_rgb_sse.asm deleted file mode 100644 index bdd5625..0000000 --- a/media/base/simd/scale_yuv_to_rgb_sse.asm +++ /dev/null @@ -1,31 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - -%include "x86inc.asm" - -; -; This file uses MMX and SSE instructions. -; - SECTION_TEXT - CPU MMX, SSE - -; Use movq to save the output. -%define MOVQ movntq - -; void ScaleYUVToRGB32Row_SSE(const uint8* y_buf, -; const uint8* u_buf, -; const uint8* v_buf, -; uint8* rgb_buf, -; int width, -; int source_dx); -%define SYMBOL ScaleYUVToRGB32Row_SSE -%include "scale_yuv_to_rgb_mmx.inc" - -; void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf, -; const uint8* u_buf, -; const uint8* v_buf, -; uint8* rgb_buf, -; int width, -; int source_dx); - diff --git a/media/base/simd/scale_yuv_to_rgb_sse2_x64.asm b/media/base/simd/scale_yuv_to_rgb_sse2_x64.asm deleted file mode 100644 index e021457..0000000 --- a/media/base/simd/scale_yuv_to_rgb_sse2_x64.asm +++ /dev/null @@ -1,109 +0,0 @@ -; Copyright (c) 2011 The Chromium Authors. All rights reserved. -; Use of this source code is governed by a BSD-style license that can be -; found in the LICENSE file. - -%include "x86inc.asm" - -; -; This file uses MMX, SSE2 and instructions. -; - SECTION_TEXT - CPU SSE2 - -; void ScaleYUVToRGB32Row_SSE2_X64(const uint8* y_buf, -; const uint8* u_buf, -; const uint8* v_buf, -; uint8* rgb_buf, -; int width, -; int source_dx); -%define SYMBOL ScaleYUVToRGB32Row_SSE2_X64 - - global mangle(SYMBOL) PRIVATE - align function_align - -mangle(SYMBOL): - %assign stack_offset 0 - extern mangle(kCoefficientsRgbY) - -; Parameters are in the following order: -; 1. Y plane -; 2. U plane -; 3. V plane -; 4. ARGB frame -; 5. Width -; 6. Source dx - -PROLOGUE 6, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, COMP - -%define TABLEq r10 -%define Xq r11 -%define INDEXq r12 - PUSH r10 - PUSH r11 - PUSH r12 - - LOAD_SYM TABLEq, mangle(kCoefficientsRgbY) - - ; Set Xq index to 0. - xor Xq, Xq - jmp .scaleend - -.scaleloop: - ; Read UV pixels. - mov INDEXq, Xq - sar INDEXq, 17 - movzx COMPd, BYTE [Uq + INDEXq] - movq xmm0, [TABLEq + 2048 + 8 * COMPq] - movzx COMPd, BYTE [Vq + INDEXq] - movq xmm1, [TABLEq + 4096 + 8 * COMPq] - - ; Read first Y pixel. - lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq nows points to next pixel. - sar Xq, 16 - movzx COMPd, BYTE [Yq + Xq] - paddsw xmm0, xmm1 ; Hide a ADD after memory load. - movq xmm1, [TABLEq + 8 * COMPq] - - ; Read next Y pixel. - lea Xq, [INDEXq + SOURCE_DXq] ; Xq now points to next pixel. - sar INDEXq, 16 - movzx COMPd, BYTE [Yq + INDEXq] - movq xmm2, [TABLEq + 8 * COMPq] - paddsw xmm1, xmm0 - paddsw xmm2, xmm0 - shufps xmm1, xmm2, 0x44 ; Join two pixels into one XMM register - psraw xmm1, 6 - packuswb xmm1, xmm1 - movq QWORD [ARGBq], xmm1 - add ARGBq, 8 - -.scaleend: - sub WIDTHq, 2 - jns .scaleloop - - and WIDTHq, 1 ; odd number of pixels? - jz .scaledone - - ; Read U V components. - mov INDEXq, Xq - sar INDEXq, 17 - movzx COMPd, BYTE [Uq + INDEXq] - movq xmm0, [TABLEq + 2048 + 8 * COMPq] - movzx COMPd, BYTE [Vq + INDEXq] - paddsw xmm0, [TABLEq + 4096 + 8 * COMPq] - - ; Read one Y component. - mov INDEXq, Xq - sar INDEXq, 16 - movzx COMPd, BYTE [Yq + INDEXq] - movq xmm1, [TABLEq + 8 * COMPq] - paddsw xmm1, xmm0 - psraw xmm1, 6 - packuswb xmm1, xmm1 - movd DWORD [ARGBq], xmm1 - -.scaledone: - POP r12 - POP r11 - POP r10 - RET diff --git a/media/base/simd/x86inc.asm b/media/base/simd/x86inc.asm index 5e0ca20..956b999 100644 --- a/media/base/simd/x86inc.asm +++ b/media/base/simd/x86inc.asm @@ -95,14 +95,11 @@ %ifdef WIN64 %define PIC %elifndef ARCH_X86_64 -; For chromium we may build PIC code even for 32 bits system. -%ifndef CHROMIUM ; x86_32 doesn't require PIC. ; Some distros prefer shared objects to be PIC, but nothing breaks if ; the code contains a few textrels, so we'll skip that complexity. %undef PIC %endif -%endif %ifdef PIC default rel %endif @@ -950,11 +947,6 @@ AVX_INSTR pfmul, 1, 0 ;============================================================================= %ifdef CHROMIUM -; Always build PIC code on Mac for Chromium. -%ifdef MACHO -%define PIC -%endif - ; ; LOAD_SYM %1 (reg), %2 (sym) ; Copies the address to a local symbol to the specified register. diff --git a/media/base/yuv_convert.cc b/media/base/yuv_convert.cc index 22f1a24..cbf7f57 100644 --- a/media/base/yuv_convert.cc +++ b/media/base/yuv_convert.cc @@ -17,94 +17,151 @@ #include "media/base/yuv_convert.h" -#include "base/logging.h" #include "build/build_config.h" #include "media/base/cpu_features.h" #include "media/base/simd/convert_rgb_to_yuv.h" -#include "media/base/simd/convert_yuv_to_rgb.h" -#include "media/base/simd/filter_yuv.h" #include "media/base/yuv_convert_internal.h" #include "media/base/yuv_row.h" -#if defined(ARCH_CPU_X86_FAMILY) +#if USE_MMX #if defined(_MSC_VER) #include <intrin.h> #else -#include <emmintrin.h> #include <mmintrin.h> #endif #endif +#if USE_SSE2 +#include <emmintrin.h> +#endif + namespace media { -static FilterYUVRowsProc ChooseFilterYUVRowsProc() { -#if defined(ARCH_CPU_X86_FAMILY) - if (hasSSE2()) - return &FilterYUVRows_SSE2; - if (hasMMX()) - return &FilterYUVRows_MMX; -#endif - return &FilterYUVRows_C; -} +// 16.16 fixed point arithmetic +const int kFractionBits = 16; +const int kFractionMax = 1 << kFractionBits; +const int kFractionMask = ((1 << kFractionBits) - 1); -static ConvertYUVToRGB32RowProc ChooseConvertYUVToRGB32RowProc() { -#if defined(ARCH_CPU_X86_FAMILY) - if (hasSSE()) - return &ConvertYUVToRGB32Row_SSE; - if (hasMMX()) - return &ConvertYUVToRGB32Row_MMX; -#endif - return &ConvertYUVToRGB32Row_C; -} +// Convert a frame of YUV to 32 bit ARGB. +void ConvertYUVToRGB32(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width, + int height, + int y_pitch, + int uv_pitch, + int rgb_pitch, + YUVType yuv_type) { + unsigned int y_shift = yuv_type; + for (int y = 0; y < height; ++y) { + uint8* rgb_row = rgb_buf + y * rgb_pitch; + const uint8* y_ptr = y_buf + y * y_pitch; + const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch; + const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch; + + FastConvertYUVToRGB32Row(y_ptr, + u_ptr, + v_ptr, + rgb_row, + width); + } -static ScaleYUVToRGB32RowProc ChooseScaleYUVToRGB32RowProc() { -#if defined(ARCH_CPU_X86_FAMILY) -#if defined(ARCH_CPU_X86_64) - // Use 64-bits version if possible. - return &ScaleYUVToRGB32Row_SSE2_X64; -#endif - // Choose the best one on 32-bits system. - if (hasSSE()) - return &ScaleYUVToRGB32Row_SSE; - if (hasMMX()) - return &ScaleYUVToRGB32Row_MMX; -#endif - return &ScaleYUVToRGB32Row_C; + // MMX used for FastConvertYUVToRGB32Row requires emms instruction. + EMMS(); } -static ScaleYUVToRGB32RowProc ChooseLinearScaleYUVToRGB32RowProc() { -#if defined(ARCH_CPU_X86_FAMILY) -#if defined(ARCH_CPU_X86_64) - // Use 64-bits version if possible. - return &LinearScaleYUVToRGB32Row_MMX_X64; -#endif - // 32-bits system. - if (hasSSE()) - return &LinearScaleYUVToRGB32Row_SSE; - if (hasMMX()) - return &LinearScaleYUVToRGB32Row_MMX; -#endif - return &LinearScaleYUVToRGB32Row_C; +#if USE_SSE2 +// FilterRows combines two rows of the image using linear interpolation. +// SSE2 version does 16 pixels at a time + +static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, + int source_width, int source_y_fraction) { + __m128i zero = _mm_setzero_si128(); + __m128i y1_fraction = _mm_set1_epi16(source_y_fraction); + __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction); + + const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr); + const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr); + __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf); + __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width); + + do { + __m128i y0 = _mm_loadu_si128(y0_ptr128); + __m128i y1 = _mm_loadu_si128(y1_ptr128); + __m128i y2 = _mm_unpackhi_epi8(y0, zero); + __m128i y3 = _mm_unpackhi_epi8(y1, zero); + y0 = _mm_unpacklo_epi8(y0, zero); + y1 = _mm_unpacklo_epi8(y1, zero); + y0 = _mm_mullo_epi16(y0, y0_fraction); + y1 = _mm_mullo_epi16(y1, y1_fraction); + y2 = _mm_mullo_epi16(y2, y0_fraction); + y3 = _mm_mullo_epi16(y3, y1_fraction); + y0 = _mm_add_epi16(y0, y1); + y2 = _mm_add_epi16(y2, y3); + y0 = _mm_srli_epi16(y0, 8); + y2 = _mm_srli_epi16(y2, 8); + y0 = _mm_packus_epi16(y0, y2); + *dest128++ = y0; + ++y0_ptr128; + ++y1_ptr128; + } while (dest128 < end128); } - -// Empty SIMD registers state after using them. -void EmptyRegisterState() { -#if defined(ARCH_CPU_X86_FAMILY) - static bool checked = false; - static bool has_mmx = false; - if (!checked) { - has_mmx = hasMMX(); - checked = true; - } - if (has_mmx) - _mm_empty(); -#endif +#elif USE_MMX +// MMX version does 8 pixels at a time +static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, + int source_width, int source_y_fraction) { + __m64 zero = _mm_setzero_si64(); + __m64 y1_fraction = _mm_set1_pi16(source_y_fraction); + __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction); + + const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr); + const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr); + __m64* dest64 = reinterpret_cast<__m64*>(ybuf); + __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width); + + do { + __m64 y0 = *y0_ptr64++; + __m64 y1 = *y1_ptr64++; + __m64 y2 = _mm_unpackhi_pi8(y0, zero); + __m64 y3 = _mm_unpackhi_pi8(y1, zero); + y0 = _mm_unpacklo_pi8(y0, zero); + y1 = _mm_unpacklo_pi8(y1, zero); + y0 = _mm_mullo_pi16(y0, y0_fraction); + y1 = _mm_mullo_pi16(y1, y1_fraction); + y2 = _mm_mullo_pi16(y2, y0_fraction); + y3 = _mm_mullo_pi16(y3, y1_fraction); + y0 = _mm_add_pi16(y0, y1); + y2 = _mm_add_pi16(y2, y3); + y0 = _mm_srli_pi16(y0, 8); + y2 = _mm_srli_pi16(y2, 8); + y0 = _mm_packs_pu16(y0, y2); + *dest64++ = y0; + } while (dest64 < end64); +} +#else // no MMX or SSE2 +// C version does 8 at a time to mimic MMX code +static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, + int source_width, int source_y_fraction) { + int y1_fraction = source_y_fraction; + int y0_fraction = 256 - y1_fraction; + uint8* end = ybuf + source_width; + do { + ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8; + ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8; + ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8; + ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8; + ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8; + ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8; + ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8; + ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8; + y0_ptr += 8; + y1_ptr += 8; + ybuf += 8; + } while (ybuf < end); } +#endif -// 16.16 fixed point arithmetic -const int kFractionBits = 16; -const int kFractionMax = 1 << kFractionBits; -const int kFractionMask = ((1 << kFractionBits) - 1); // Scale a frame of YUV to 32 bit ARGB. void ScaleYUVToRGB32(const uint8* y_buf, @@ -121,20 +178,6 @@ void ScaleYUVToRGB32(const uint8* y_buf, YUVType yuv_type, Rotate view_rotate, ScaleFilter filter) { - static FilterYUVRowsProc filter_proc = NULL; - static ConvertYUVToRGB32RowProc convert_proc = NULL; - static ScaleYUVToRGB32RowProc scale_proc = NULL; - static ScaleYUVToRGB32RowProc linear_scale_proc = NULL; - - if (!filter_proc) - filter_proc = ChooseFilterYUVRowsProc(); - if (!convert_proc) - convert_proc = ChooseConvertYUVToRGB32RowProc(); - if (!scale_proc) - scale_proc = ChooseScaleYUVToRGB32RowProc(); - if (!linear_scale_proc) - linear_scale_proc = ChooseLinearScaleYUVToRGB32RowProc(); - // Handle zero sized sources and destinations. if ((yuv_type == YV12 && (source_width < 2 || source_height < 2)) || (yuv_type == YV16 && (source_width < 2 || source_height < 1)) || @@ -182,6 +225,9 @@ void ScaleYUVToRGB32(const uint8* y_buf, int source_dx = source_width * kFractionMax / width; int source_dy = source_height * kFractionMax / height; +#if USE_MMX && defined(_MSC_VER) + int source_dx_uv = source_dx; +#endif if ((view_rotate == ROTATE_90) || (view_rotate == ROTATE_270)) { @@ -194,6 +240,9 @@ void ScaleYUVToRGB32(const uint8* y_buf, int original_dx = source_dx; int original_dy = source_dy; source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits; +#if USE_MMX && defined(_MSC_VER) + source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits; +#endif source_dy = original_dx; if (view_rotate == ROTATE_90) { y_pitch = -1; @@ -245,7 +294,7 @@ void ScaleYUVToRGB32(const uint8* y_buf, if (filter & media::FILTER_BILINEAR_V) { if (yscale_fixed != kFractionMax && source_y_fraction && ((source_y + 1) < source_height)) { - filter_proc(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); + FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); } else { memcpy(ybuf, y0_ptr, source_width); } @@ -255,8 +304,8 @@ void ScaleYUVToRGB32(const uint8* y_buf, if (yscale_fixed != kFractionMax && source_uv_fraction && (((source_y >> y_shift) + 1) < (source_height >> y_shift))) { - filter_proc(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction); - filter_proc(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction); + FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction); + FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction); } else { memcpy(ubuf, u0_ptr, uv_source_width); memcpy(vbuf, v0_ptr, uv_source_width); @@ -267,17 +316,41 @@ void ScaleYUVToRGB32(const uint8* y_buf, vbuf[uv_source_width] = vbuf[uv_source_width - 1]; } if (source_dx == kFractionMax) { // Not scaled - convert_proc(y_ptr, u_ptr, v_ptr, dest_pixel, width); + FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, + dest_pixel, width); } else { if (filter & FILTER_BILINEAR_H) { - linear_scale_proc(y_ptr, u_ptr, v_ptr, dest_pixel, width, source_dx); - } else { - scale_proc(y_ptr, u_ptr, v_ptr, dest_pixel, width, source_dx); + LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, + dest_pixel, width, source_dx); + } else { +// Specialized scalers and rotation. +#if USE_MMX && defined(_MSC_VER) + if (width == (source_width * 2)) { + DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, + dest_pixel, width); + } else if ((source_dx & kFractionMask) == 0) { + // Scaling by integer scale factor. ie half. + ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, + dest_pixel, width, + source_dx >> kFractionBits); + } else if (source_dx_uv == source_dx) { // Not rotated. + ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, + dest_pixel, width, source_dx); + } else { + RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, + dest_pixel, width, + source_dx >> kFractionBits, + source_dx_uv >> kFractionBits); + } +#else + ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, + dest_pixel, width, source_dx); +#endif } } } - - EmptyRegisterState(); + // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms. + EMMS(); } void ConvertRGB32ToYUV(const uint8* rgbframe, @@ -298,9 +371,7 @@ void ConvertRGB32ToYUV(const uint8* rgbframe, convert_proc = &ConvertRGB32ToYUV_C; #else // For x86 processors, check if SSSE3 (or SSE2) is supported. - if (hasSSSE3()) - convert_proc = &ConvertRGB32ToYUV_SSSE3; - else if (hasSSE2()) + if (hasSSE2()) convert_proc = &ConvertRGB32ToYUV_SSE2; else convert_proc = &ConvertRGB32ToYUV_C; @@ -320,21 +391,8 @@ void ConvertRGB24ToYUV(const uint8* rgbframe, int rgbstride, int ystride, int uvstride) { -#if defined(ARCH_CPU_ARM_FAMILY) ConvertRGB24ToYUV_C(rgbframe, yplane, uplane, vplane, width, height, rgbstride, ystride, uvstride); -#else - static void (*convert_proc)(const uint8*, uint8*, uint8*, uint8*, - int, int, int, int, int) = NULL; - if (!convert_proc) { - if (hasSSSE3()) - convert_proc = &ConvertRGB24ToYUV_SSSE3; - else - convert_proc = &ConvertRGB24ToYUV_C; - } - convert_proc(rgbframe, yplane, uplane, vplane, width, height, - rgbstride, ystride, uvstride); -#endif } void ConvertYUY2ToYUV(const uint8* src, @@ -345,34 +403,4 @@ void ConvertYUY2ToYUV(const uint8* src, int height) { ConvertYUY2ToYUV_C(src, yplane, uplane, vplane, width, height); } - -void ConvertYUVToRGB32(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int width, - int height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type) { -#if defined(ARCH_CPU_ARM_FAMILY) - ConvertYUVToRGB32_C(yplane, uplane, vplane, rgbframe, - width, height, ystride, uvstride, rgbstride, yuv_type); -#else - static ConvertYUVToRGB32Proc convert_proc = NULL; - if (!convert_proc) { - if (hasSSE()) - convert_proc = &ConvertYUVToRGB32_SSE; - else if (hasMMX()) - convert_proc = &ConvertYUVToRGB32_MMX; - else - convert_proc = &ConvertYUVToRGB32_C; - } - - convert_proc(yplane, uplane, vplane, rgbframe, - width, height, ystride, uvstride, rgbstride, yuv_type); -#endif -} - } // namespace media diff --git a/media/base/yuv_convert.h b/media/base/yuv_convert.h index 95b1780..20ba0aa 100644 --- a/media/base/yuv_convert.h +++ b/media/base/yuv_convert.h @@ -94,10 +94,6 @@ void ConvertYUY2ToYUV(const uint8* src, int width, int height); -// Empty SIMD register state after calling optimized scaler functions. -// This method is only used in unit test after calling SIMD functions. -void EmptyRegisterState(); - } // namespace media #endif // MEDIA_BASE_YUV_CONVERT_H_ diff --git a/media/base/yuv_convert_internal.h b/media/base/yuv_convert_internal.h index 7be14c4..80776aa 100644 --- a/media/base/yuv_convert_internal.h +++ b/media/base/yuv_convert_internal.h @@ -13,15 +13,15 @@ namespace media { // SSE2 version of converting RGBA to YV12. -void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height, - int rgbstride, - int ystride, - int uvstride); +extern void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe, + uint8* yplane, + uint8* uplane, + uint8* vplane, + int width, + int height, + int rgbstride, + int ystride, + int uvstride); // This is a C reference implementation of the above routine. // This method should only be used in unit test. diff --git a/media/base/yuv_convert_unittest.cc b/media/base/yuv_convert_unittest.cc index 5de3b11..380897e 100644 --- a/media/base/yuv_convert_unittest.cc +++ b/media/base/yuv_convert_unittest.cc @@ -8,7 +8,6 @@ #include "base/path_service.h" #include "media/base/cpu_features.h" #include "media/base/djb2.h" -#include "media/base/simd/convert_yuv_to_rgb.h" #include "media/base/yuv_convert.h" #include "media/base/yuv_convert_internal.h" #include "media/base/yuv_row.h" @@ -377,7 +376,17 @@ TEST(YUVConvertTest, RGB32ToYUV_SSE2_MatchReference) { scoped_array<uint8> yuv_converted_bytes(new uint8[kYUV12Size]); scoped_array<uint8> yuv_reference_bytes(new uint8[kYUV12Size]); - ReadYV12Data(&yuv_bytes); + // Read YUV reference data from file. + FilePath yuv_url; + EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &yuv_url)); + yuv_url = yuv_url.Append(FILE_PATH_LITERAL("media")) + .Append(FILE_PATH_LITERAL("test")) + .Append(FILE_PATH_LITERAL("data")) + .Append(FILE_PATH_LITERAL("bali_640x360_P420.yuv")); + EXPECT_EQ(static_cast<int>(kYUV12Size), + file_util::ReadFile(yuv_url, + reinterpret_cast<char*>(yuv_bytes.get()), + static_cast<int>(kYUV12Size))); // Convert a frame of YUV to 32 bit ARGB. media::ConvertYUVToRGB32( @@ -450,241 +459,4 @@ TEST(YUVConvertTest, RGB32ToYUV_SSE2_MatchReference) { // Make sure there's no difference from the reference. EXPECT_EQ(0, error); } - -TEST(YUVConvertTest, ConvertYUVToRGB32Row_MMX) { - if (!media::hasMMX()) { - LOG(WARNING) << "System not supported. Test skipped."; - return; - } - - scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]); - scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]); - scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]); - ReadYV12Data(&yuv_bytes); - - const int kWidth = 167; - ConvertYUVToRGB32Row_C(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_reference.get(), - kWidth); - ConvertYUVToRGB32Row_MMX(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_converted.get(), - kWidth); - media::EmptyRegisterState(); - EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(), - rgb_bytes_converted.get(), - kWidth * kBpp)); -} - -TEST(YUVConvertTest, ConvertYUVToRGB32Row_SSE) { - if (!media::hasSSE()) { - LOG(WARNING) << "System not supported. Test skipped."; - return; - } - - scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]); - scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]); - scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]); - ReadYV12Data(&yuv_bytes); - - const int kWidth = 167; - ConvertYUVToRGB32Row_C(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_reference.get(), - kWidth); - ConvertYUVToRGB32Row_SSE(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_converted.get(), - kWidth); - media::EmptyRegisterState(); - EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(), - rgb_bytes_converted.get(), - kWidth * kBpp)); -} - -TEST(YUVConvertTest, ScaleYUVToRGB32Row_MMX) { - if (!media::hasMMX()) { - LOG(WARNING) << "System not supported. Test skipped."; - return; - } - - scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]); - scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]); - scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]); - ReadYV12Data(&yuv_bytes); - - const int kWidth = 167; - const int kSourceDx = 80000; // This value means a scale down. - ScaleYUVToRGB32Row_C(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_reference.get(), - kWidth, - kSourceDx); - ScaleYUVToRGB32Row_MMX(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_converted.get(), - kWidth, - kSourceDx); - media::EmptyRegisterState(); - EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(), - rgb_bytes_converted.get(), - kWidth * kBpp)); -} - -TEST(YUVConvertTest, ScaleYUVToRGB32Row_SSE) { - if (!media::hasSSE()) { - LOG(WARNING) << "System not supported. Test skipped."; - return; - } - - scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]); - scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]); - scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]); - ReadYV12Data(&yuv_bytes); - - const int kWidth = 167; - const int kSourceDx = 80000; // This value means a scale down. - ScaleYUVToRGB32Row_C(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_reference.get(), - kWidth, - kSourceDx); - ScaleYUVToRGB32Row_SSE(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_converted.get(), - kWidth, - kSourceDx); - media::EmptyRegisterState(); - EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(), - rgb_bytes_converted.get(), - kWidth * kBpp)); -} - -TEST(YUVConvertTest, LinearScaleYUVToRGB32Row_MMX) { - if (!media::hasMMX()) { - LOG(WARNING) << "System not supported. Test skipped."; - return; - } - - scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]); - scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]); - scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]); - ReadYV12Data(&yuv_bytes); - - const int kWidth = 167; - const int kSourceDx = 80000; // This value means a scale down. - LinearScaleYUVToRGB32Row_C(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_reference.get(), - kWidth, - kSourceDx); - LinearScaleYUVToRGB32Row_MMX(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_converted.get(), - kWidth, - kSourceDx); - media::EmptyRegisterState(); - EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(), - rgb_bytes_converted.get(), - kWidth * kBpp)); -} - -TEST(YUVConvertTest, LinearScaleYUVToRGB32Row_SSE) { - if (!media::hasSSE()) { - LOG(WARNING) << "System not supported. Test skipped."; - return; - } - - scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]); - scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]); - scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]); - ReadYV12Data(&yuv_bytes); - - const int kWidth = 167; - const int kSourceDx = 80000; // This value means a scale down. - LinearScaleYUVToRGB32Row_C(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_reference.get(), - kWidth, - kSourceDx); - LinearScaleYUVToRGB32Row_SSE(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_converted.get(), - kWidth, - kSourceDx); - media::EmptyRegisterState(); - EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(), - rgb_bytes_converted.get(), - kWidth * kBpp)); -} - -#if defined(ARCH_CPU_X86_64) - -TEST(YUVConvertTest, ScaleYUVToRGB32Row_SSE2_X64) { - scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]); - scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]); - scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]); - ReadYV12Data(&yuv_bytes); - - const int kWidth = 167; - const int kSourceDx = 80000; // This value means a scale down. - ScaleYUVToRGB32Row_C(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_reference.get(), - kWidth, - kSourceDx); - ScaleYUVToRGB32Row_SSE2_X64(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_converted.get(), - kWidth, - kSourceDx); - media::EmptyRegisterState(); - EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(), - rgb_bytes_converted.get(), - kWidth * kBpp)); -} - -TEST(YUVConvertTest, LinearScaleYUVToRGB32Row_MMX_X64) { - scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]); - scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]); - scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]); - ReadYV12Data(&yuv_bytes); - - const int kWidth = 167; - const int kSourceDx = 80000; // This value means a scale down. - LinearScaleYUVToRGB32Row_C(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_reference.get(), - kWidth, - kSourceDx); - LinearScaleYUVToRGB32Row_MMX_X64(yuv_bytes.get(), - yuv_bytes.get() + kSourceUOffset, - yuv_bytes.get() + kSourceVOffset, - rgb_bytes_converted.get(), - kWidth, - kSourceDx); - media::EmptyRegisterState(); - EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(), - rgb_bytes_converted.get(), - kWidth * kBpp)); -} - -#endif // defined(ARCH_CPU_X86_64) - -#endif // defined(ARCH_CPU_X86_FAMILY) +#endif diff --git a/media/base/yuv_row_posix.cc b/media/base/yuv_row_posix.cc index f839de8..2217f38 100644 --- a/media/base/yuv_row_posix.cc +++ b/media/base/yuv_row_posix.cc @@ -920,3 +920,4 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf, #endif // USE_MMX } // extern "C" + diff --git a/media/media.gyp b/media/media.gyp index 60a192ab..8be1290 100644 --- a/media/media.gyp +++ b/media/media.gyp @@ -345,12 +345,7 @@ 'conditions': [ [ 'target_arch == "ia32" or target_arch == "x64"', { 'dependencies': [ - 'yuv_convert_simd_x86', - ], - }], - [ 'target_arch == "arm"', { - 'dependencies': [ - 'yuv_convert_simd_arm', + 'yuv_convert_sse2', ], }], ], @@ -366,45 +361,15 @@ ], }, { - 'target_name': 'yuv_convert_simd_x86', + 'target_name': 'yuv_convert_sse2', 'type': 'static_library', 'include_dirs': [ '..', ], - 'sources': [ - 'base/yuv_convert_sse2.cc', - 'base/simd/convert_rgb_to_yuv_x86.cc', - 'base/simd/convert_rgb_to_yuv_ssse3.asm', - 'base/simd/convert_rgb_to_yuv_ssse3.inc', - 'base/simd/convert_yuv_to_rgb_c.cc', - 'base/simd/convert_yuv_to_rgb_x86.cc', - 'base/simd/convert_yuv_to_rgb_mmx.asm', - 'base/simd/convert_yuv_to_rgb_mmx.inc', - 'base/simd/convert_yuv_to_rgb_sse.asm', - 'base/simd/filter_yuv.h', - 'base/simd/filter_yuv_c.cc', - 'base/simd/filter_yuv_mmx.cc', - 'base/simd/filter_yuv_sse2.cc', - 'base/simd/linear_scale_yuv_to_rgb_mmx.asm', - 'base/simd/linear_scale_yuv_to_rgb_mmx.inc', - 'base/simd/linear_scale_yuv_to_rgb_sse.asm', - 'base/simd/scale_yuv_to_rgb_mmx.asm', - 'base/simd/scale_yuv_to_rgb_mmx.inc', - 'base/simd/scale_yuv_to_rgb_sse.asm', - ], 'conditions': [ - [ 'target_arch == "x64"', { - # Source files optimized for X64 systems. - 'sources': [ - 'base/simd/linear_scale_yuv_to_rgb_mmx_x64.asm', - 'base/simd/scale_yuv_to_rgb_sse2_x64.asm', - ], - }], [ 'os_posix == 1 and OS != "mac"', { 'cflags': [ '-msse2', - '-msse3', - '-mssse3', ], }], [ 'OS == "mac"', { @@ -467,6 +432,10 @@ }, }], ], + 'sources': [ + 'base/yuv_convert_sse2.cc', + 'base/simd/convert_rgb_to_yuv.cc', + ], 'variables': { 'yasm_output_path': '<(SHARED_INTERMEDIATE_DIR)/media', }, @@ -475,18 +444,6 @@ ], }, { - 'target_name': 'yuv_convert_simd_arm', - 'type': 'static_library', - 'include_dirs': [ - '..', - ], - 'sources': [ - 'base/simd/convert_yuv_to_rgb_c.cc', - 'base/simd/filter_yuv.h', - 'base/simd/filter_yuv_c.cc', - ], - }, - { 'target_name': 'ffmpeg_unittests', 'type': 'executable', 'dependencies': [ |