diff options
author | hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-09-16 17:34:03 +0000 |
---|---|---|
committer | hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-09-16 17:34:03 +0000 |
commit | 4538076777f3d6264fc1901ebffae527e2f08061 (patch) | |
tree | f170eaec08267b2a0792a0cba123c539c18b1fe6 /media | |
parent | ac8c1809e2fec0e52396b6b6ab5506d817412a23 (diff) | |
download | chromium_src-4538076777f3d6264fc1901ebffae527e2f08061.zip chromium_src-4538076777f3d6264fc1901ebffae527e2f08061.tar.gz chromium_src-4538076777f3d6264fc1901ebffae527e2f08061.tar.bz2 |
Reorganize YUV scalers (Continued)
After rewriting assembly code in YASM we can finally move all these
different versions of files in the same folder, i.e. media/base/simd.
After this change the main entry point will be:
yuv_convert.cc and yuv_convert.h
It then calls into the internal functions under media/base/simd.
After this change I'll move all the color conversion files to
media/csc.
BUG=None
TEST=Tree is gree
Review URL: http://codereview.chromium.org/7888012
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@101507 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r-- | media/base/simd/convert_rgb_to_yuv.h | 46 | ||||
-rw-r--r-- | media/base/simd/convert_rgb_to_yuv_c.cc (renamed from media/base/yuv_convert_c.cc) | 29 | ||||
-rw-r--r-- | media/base/simd/convert_rgb_to_yuv_sse2.cc (renamed from media/base/yuv_convert_sse2.cc) | 10 | ||||
-rw-r--r-- | media/base/simd/convert_rgb_to_yuv_ssse3.cc (renamed from media/base/simd/convert_rgb_to_yuv.cc) | 0 | ||||
-rw-r--r-- | media/base/simd/convert_rgb_to_yuv_x86.cc | 101 | ||||
-rw-r--r-- | media/base/simd/convert_yuv_to_rgb.h | 2 | ||||
-rw-r--r-- | media/base/simd/convert_yuv_to_rgb_c.cc | 3 | ||||
-rw-r--r-- | media/base/simd/yuv_to_rgb_table.cc (renamed from media/base/yuv_row_table.cc) | 2 | ||||
-rw-r--r-- | media/base/simd/yuv_to_rgb_table.h | 26 | ||||
-rw-r--r-- | media/base/yuv_convert.cc | 21 | ||||
-rw-r--r-- | media/base/yuv_convert_internal.h | 70 | ||||
-rw-r--r-- | media/base/yuv_convert_unittest.cc | 3 | ||||
-rw-r--r-- | media/base/yuv_row.h | 126 | ||||
-rw-r--r-- | media/base/yuv_row_posix.cc | 922 | ||||
-rw-r--r-- | media/base/yuv_row_win.cc | 589 | ||||
-rw-r--r-- | media/media.gyp | 18 |
16 files changed, 111 insertions, 1857 deletions
diff --git a/media/base/simd/convert_rgb_to_yuv.h b/media/base/simd/convert_rgb_to_yuv.h index e16fa51..03fe114 100644 --- a/media/base/simd/convert_rgb_to_yuv.h +++ b/media/base/simd/convert_rgb_to_yuv.h @@ -35,6 +35,52 @@ void ConvertRGB24ToYUV_SSSE3(const uint8* rgbframe, int ystride, int uvstride); +// SSE2 version of converting RGBA to YV12. +void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe, + uint8* yplane, + uint8* uplane, + uint8* vplane, + int width, + int height, + int rgbstride, + int ystride, + int uvstride); + +// This is a C reference implementation of the above routine. +// This method should only be used in unit test. +// TODO(hclam): Should use this as the C version of RGB to YUV. +void ConvertRGB32ToYUV_SSE2_Reference(const uint8* rgbframe, + uint8* yplane, + uint8* uplane, + uint8* vplane, + int width, + int height, + int rgbstride, + int ystride, + int uvstride); + +// C version of converting RGBA to YV12. +void ConvertRGB32ToYUV_C(const uint8* rgbframe, + uint8* yplane, + uint8* uplane, + uint8* vplane, + int width, + int height, + int rgbstride, + int ystride, + int uvstride); + +// C version of converting RGB24 to YV12. +void ConvertRGB24ToYUV_C(const uint8* rgbframe, + uint8* yplane, + uint8* uplane, + uint8* vplane, + int width, + int height, + int rgbstride, + int ystride, + int uvstride); + } // namespace media #endif // MEDIA_BASE_SIMD_CONVERT_RGB_TO_YUV_H_ diff --git a/media/base/yuv_convert_c.cc b/media/base/simd/convert_rgb_to_yuv_c.cc index 39ec3cf..ae4c731 100644 --- a/media/base/yuv_convert_c.cc +++ b/media/base/simd/convert_rgb_to_yuv_c.cc @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "media/base/yuv_convert.h" -#include "media/base/yuv_convert_internal.h" +#include "media/base/simd/convert_rgb_to_yuv.h" namespace media { @@ -80,30 +79,4 @@ void ConvertRGB24ToYUV_C(const uint8* rgbframe, } } -void ConvertYUY2ToYUV_C(const uint8* src, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height) { - for (int i = 0; i < height / 2; ++i) { - for (int j = 0; j < (width / 2); ++j) { - yplane[0] = src[0]; - *uplane = src[1]; - yplane[1] = src[2]; - *vplane = src[3]; - src += 4; - yplane += 2; - uplane++; - vplane++; - } - for (int j = 0; j < (width / 2); ++j) { - yplane[0] = src[0]; - yplane[1] = src[2]; - src += 4; - yplane += 2; - } - } -} - } // namespace media diff --git a/media/base/yuv_convert_sse2.cc b/media/base/simd/convert_rgb_to_yuv_sse2.cc index 9ecef5f..bb803f6 100644 --- a/media/base/yuv_convert_sse2.cc +++ b/media/base/simd/convert_rgb_to_yuv_sse2.cc @@ -2,11 +2,11 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "media/base/yuv_convert.h" -#include "media/base/yuv_convert_internal.h" -#include "media/base/yuv_row.h" +#include "build/build_config.h" +#include "media/base/simd/convert_rgb_to_yuv.h" +#include "media/base/simd/yuv_to_rgb_table.h" -#if defined(_MSC_VER) +#if defined(COMPILER_MSVC) #include <intrin.h> #else #include <mmintrin.h> @@ -85,6 +85,8 @@ static inline void ConvertRGBToYUV_V2H2(const uint8* rgb_buf_1, int sum_r = 0; int r, g, b; + + CONVERT_Y(rgb_buf_1, y_buf_1); CONVERT_Y(rgb_buf_1, y_buf_1); CONVERT_Y(rgb_buf_2, y_buf_2); diff --git a/media/base/simd/convert_rgb_to_yuv.cc b/media/base/simd/convert_rgb_to_yuv_ssse3.cc index 2bd6930..2bd6930 100644 --- a/media/base/simd/convert_rgb_to_yuv.cc +++ b/media/base/simd/convert_rgb_to_yuv_ssse3.cc diff --git a/media/base/simd/convert_rgb_to_yuv_x86.cc b/media/base/simd/convert_rgb_to_yuv_x86.cc deleted file mode 100644 index 2bd6930..0000000 --- a/media/base/simd/convert_rgb_to_yuv_x86.cc +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "media/base/simd/convert_rgb_to_yuv.h" - -#include "build/build_config.h" -#include "media/base/cpu_features.h" -#include "media/base/simd/convert_rgb_to_yuv_ssse3.h" - -namespace media { - -void ConvertRGB32ToYUV_SSSE3(const uint8* rgbframe, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height, - int rgbstride, - int ystride, - int uvstride) { -#ifdef ENABLE_SUBSAMPLING - for (; height >= 2; height -= 2) { - ConvertARGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - ConvertARGBToYUVOdd_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - uplane += uvstride; - vplane += uvstride; - } - - if (height) - ConvertARGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width); -#else - for (; height >= 2; height -= 2) { - ConvertARGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - ConvertARGBToYUVRow_SSSE3(rgbframe, yplane, NULL, NULL, width); - rgbframe += rgbstride; - yplane += ystride; - - uplane += uvstride; - vplane += uvstride; - } - - if (height) - ConvertARGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width); -#endif -} - -void ConvertRGB24ToYUV_SSSE3(const uint8* rgbframe, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height, - int rgbstride, - int ystride, - int uvstride) { -#ifdef ENABLE_SUBSAMPLING - for (; height >= 2; height -= 2) { - ConvertRGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - ConvertRGBToYUVOdd_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - uplane += uvstride; - vplane += uvstride; - } - - if (height) - ConvertRGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width); -#else - for (; height >= 2; height -= 2) { - ConvertRGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width); - rgbframe += rgbstride; - yplane += ystride; - - ConvertRGBToYUVRow_SSSE3(rgbframe, yplane, NULL, NULL, width); - rgbframe += rgbstride; - yplane += ystride; - - uplane += uvstride; - vplane += uvstride; - } - - if (height) - ConvertRGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width); -#endif -} - -} // namespace media diff --git a/media/base/simd/convert_yuv_to_rgb.h b/media/base/simd/convert_yuv_to_rgb.h index 5f3df2c6..ff7d8ec 100644 --- a/media/base/simd/convert_yuv_to_rgb.h +++ b/media/base/simd/convert_yuv_to_rgb.h @@ -145,6 +145,6 @@ void LinearScaleYUVToRGB32Row_MMX_X64(const uint8* y_buf, int width, int source_dx); -} +} // extern "C" #endif // MEDIA_BASE_SIMD_CONVERT_YUV_TO_RGB_H_ diff --git a/media/base/simd/convert_yuv_to_rgb_c.cc b/media/base/simd/convert_yuv_to_rgb_c.cc index f8e70b2..c403984 100644 --- a/media/base/simd/convert_yuv_to_rgb_c.cc +++ b/media/base/simd/convert_yuv_to_rgb_c.cc @@ -3,8 +3,7 @@ // found in the LICENSE file. #include "media/base/simd/convert_yuv_to_rgb.h" -// TODO(hclam): Shouldn't depend on yuv_row.h. -#include "media/base/yuv_row.h" +#include "media/base/simd/yuv_to_rgb_table.h" #define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) #define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \ diff --git a/media/base/yuv_row_table.cc b/media/base/simd/yuv_to_rgb_table.cc index 296380b..f998e85 100644 --- a/media/base/yuv_row_table.cc +++ b/media/base/simd/yuv_to_rgb_table.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "media/base/yuv_row.h" +#include "media/base/simd/yuv_to_rgb_table.h" extern "C" { diff --git a/media/base/simd/yuv_to_rgb_table.h b/media/base/simd/yuv_to_rgb_table.h new file mode 100644 index 0000000..0c43a7a --- /dev/null +++ b/media/base/simd/yuv_to_rgb_table.h @@ -0,0 +1,26 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Defines convertion table from YUV to RGB. + +#ifndef MEDIA_BASE_SIMD_YUV_TO_RGB_TABLE_H_ +#define MEDIA_BASE_SIMD_YUV_TO_RGB_TABLE_H_ + +#include "base/basictypes.h" +#include "build/build_config.h" + +extern "C" { + +#if defined(COMPILER_MSVC) +#define SIMD_ALIGNED(var) __declspec(align(16)) var +#else +#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) +#endif + +// Align the table to 16-bytes to allow faster reading. +extern SIMD_ALIGNED(int16 kCoefficientsRgbY[768][4]); + +} // extern "C" + +#endif // MEDIA_BASE_SIMD_YUV_TO_RGB_TABLE_H_ diff --git a/media/base/yuv_convert.cc b/media/base/yuv_convert.cc index 7b4586a..2fdb798 100644 --- a/media/base/yuv_convert.cc +++ b/media/base/yuv_convert.cc @@ -23,8 +23,6 @@ #include "media/base/simd/convert_rgb_to_yuv.h" #include "media/base/simd/convert_yuv_to_rgb.h" #include "media/base/simd/filter_yuv.h" -#include "media/base/yuv_convert_internal.h" -#include "media/base/yuv_row.h" #if defined(ARCH_CPU_X86_FAMILY) #if defined(COMPILER_MSVC) @@ -326,7 +324,24 @@ void ConvertYUY2ToYUV(const uint8* src, uint8* vplane, int width, int height) { - ConvertYUY2ToYUV_C(src, yplane, uplane, vplane, width, height); + for (int i = 0; i < height / 2; ++i) { + for (int j = 0; j < (width / 2); ++j) { + yplane[0] = src[0]; + *uplane = src[1]; + yplane[1] = src[2]; + *vplane = src[3]; + src += 4; + yplane += 2; + uplane++; + vplane++; + } + for (int j = 0; j < (width / 2); ++j) { + yplane[0] = src[0]; + yplane[1] = src[2]; + src += 4; + yplane += 2; + } + } } void ConvertYUVToRGB32(const uint8* yplane, diff --git a/media/base/yuv_convert_internal.h b/media/base/yuv_convert_internal.h deleted file mode 100644 index 7be14c4..0000000 --- a/media/base/yuv_convert_internal.h +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// This file defines the YUV conversion functions for each specific -// optimization. - -#ifndef MEDIA_BASE_YUV_CONVERT_INTERNAL_H_ -#define MEDIA_BASE_YUV_CONVERT_INTERNAL_H_ - -#include "base/basictypes.h" - -namespace media { - -// SSE2 version of converting RGBA to YV12. -void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height, - int rgbstride, - int ystride, - int uvstride); - -// This is a C reference implementation of the above routine. -// This method should only be used in unit test. -void ConvertRGB32ToYUV_SSE2_Reference(const uint8* rgbframe, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height, - int rgbstride, - int ystride, - int uvstride); - -// C version of converting RGBA to YV12. -void ConvertRGB32ToYUV_C(const uint8* rgbframe, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height, - int rgbstride, - int ystride, - int uvstride); - -// C version of converting RGB24 to YV12. -void ConvertRGB24ToYUV_C(const uint8* rgbframe, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height, - int rgbstride, - int ystride, - int uvstride); - -// C version of converting YUY2 to YV12. -void ConvertYUY2ToYUV_C(const uint8* src, - uint8* yplane, - uint8* uplane, - uint8* vplane, - int width, - int height); - -} // namespace media - -#endif // MEDIA_BASE_YUV_CONVERT_INTERNAL_H_ diff --git a/media/base/yuv_convert_unittest.cc b/media/base/yuv_convert_unittest.cc index 83bcfd1..9f1a850 100644 --- a/media/base/yuv_convert_unittest.cc +++ b/media/base/yuv_convert_unittest.cc @@ -8,10 +8,9 @@ #include "base/path_service.h" #include "media/base/cpu_features.h" #include "media/base/djb2.h" +#include "media/base/simd/convert_rgb_to_yuv.h" #include "media/base/simd/convert_yuv_to_rgb.h" #include "media/base/yuv_convert.h" -#include "media/base/yuv_convert_internal.h" -#include "media/base/yuv_row.h" #include "testing/gtest/include/gtest/gtest.h" // Size of raw image. diff --git a/media/base/yuv_row.h b/media/base/yuv_row.h deleted file mode 100644 index 9a74d29..0000000 --- a/media/base/yuv_row.h +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// yuv_row internal functions to handle YUV conversion and scaling to RGB. -// These functions are used from both yuv_convert.cc and yuv_scale.cc. - -// TODO(fbarchard): Write function that can handle rotation and scaling. - -#ifndef MEDIA_BASE_YUV_ROW_H_ -#define MEDIA_BASE_YUV_ROW_H_ - -#include "base/basictypes.h" - -extern "C" { -// Can only do 1x. -// This is the second fastest of the scalers. -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); - -// Can do 1x, half size or any scale down by an integer amount. -// Step can be negative (mirroring, rotate 180). -// This is the third fastest of the scalers. -void ConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int step); - -// Rotate is like Convert, but applies different step to Y versus U and V. -// This allows rotation by 90 or 270, by stepping by stride. -// This is the forth fastest of the scalers. -void RotateConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int ystep, - int uvstep); - -// Doubler does 4 pixels at a time. Each pixel is replicated. -// This is the fastest of the scalers. -void DoubleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); - -// Handles arbitrary scaling up or down. -// Mirroring is supported, but not 90 or 270 degree rotation. -// Chroma is under sampled every 2 pixels for performance. -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -// Handles arbitrary scaling up or down with bilinear filtering. -// Mirroring is supported, but not 90 or 270 degree rotation. -// Chroma is under sampled every 2 pixels for performance. -// This is the slowest of the scalers. -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void FastConvertRGB32ToYUVRow(const uint8* rgb_buf_1, - const uint8* rgb_buf_2, - uint8* y_buf_1, - uint8* y_buf_2, - uint8* u_buf, - uint8* v_buf, - int width); - -#if defined(_MSC_VER) -#define SIMD_ALIGNED(var) __declspec(align(16)) var -#else -#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) -#endif -extern SIMD_ALIGNED(int16 kCoefficientsRgbY[768][4]); - -// Method to force C version. -//#define USE_MMX 0 -//#define USE_SSE2 0 - -#if !defined(USE_MMX) -// Windows, Mac and Linux/BSD use MMX -#if defined(__MMX__) || defined(_MSC_VER) -#define USE_MMX 1 -#else -#define USE_MMX 0 -#endif -#endif - -#if !defined(USE_SSE2) -#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || _M_IX86_FP==2 -#define USE_SSE2 1 -#else -#define USE_SSE2 0 -#endif -#endif - -// x64 uses MMX2 (SSE) so emms is not required. -// Warning C4799: function has no EMMS instruction. -// EMMS() is slow and should be called by the calling function once per image. -#if USE_MMX && !defined(ARCH_CPU_X86_64) -#if defined(_MSC_VER) -#define EMMS() __asm emms -#pragma warning(disable: 4799) -#else -#define EMMS() asm("emms") -#endif -#else -#define EMMS() -#endif - -} // extern "C" - -#endif // MEDIA_BASE_YUV_ROW_H_ diff --git a/media/base/yuv_row_posix.cc b/media/base/yuv_row_posix.cc deleted file mode 100644 index f839de8..0000000 --- a/media/base/yuv_row_posix.cc +++ /dev/null @@ -1,922 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "media/base/yuv_row.h" - -#ifndef NDEBUG -#include "base/logging.h" -#else -#define DCHECK(a) -#endif - -extern "C" { - -#if USE_SSE2 && defined(ARCH_CPU_X86_64) - -// AMD64 ABI uses register paremters. -void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi - const uint8* u_buf, // rsi - const uint8* v_buf, // rdx - uint8* rgb_buf, // rcx - int width) { // r8 - asm( - "jmp convertend\n" -"convertloop:" - "movzb (%1),%%r10\n" - "add $0x1,%1\n" - "movzb (%2),%%r11\n" - "add $0x1,%2\n" - "movq 2048(%5,%%r10,8),%%xmm0\n" - "movzb (%0),%%r10\n" - "movq 4096(%5,%%r11,8),%%xmm1\n" - "movzb 0x1(%0),%%r11\n" - "paddsw %%xmm1,%%xmm0\n" - "movq (%5,%%r10,8),%%xmm2\n" - "add $0x2,%0\n" - "movq (%5,%%r11,8),%%xmm3\n" - "paddsw %%xmm0,%%xmm2\n" - "paddsw %%xmm0,%%xmm3\n" - "shufps $0x44,%%xmm3,%%xmm2\n" - "psraw $0x6,%%xmm2\n" - "packuswb %%xmm2,%%xmm2\n" - "movq %%xmm2,0x0(%3)\n" - "add $0x8,%3\n" -"convertend:" - "sub $0x2,%4\n" - "jns convertloop\n" - -"convertnext:" - "add $0x1,%4\n" - "js convertdone\n" - - "movzb (%1),%%r10\n" - "movq 2048(%5,%%r10,8),%%xmm0\n" - "movzb (%2),%%r10\n" - "movq 4096(%5,%%r10,8),%%xmm1\n" - "paddsw %%xmm1,%%xmm0\n" - "movzb (%0),%%r10\n" - "movq (%5,%%r10,8),%%xmm1\n" - "paddsw %%xmm0,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movd %%xmm1,0x0(%3)\n" -"convertdone:" - : - : "r"(y_buf), // %0 - "r"(u_buf), // %1 - "r"(v_buf), // %2 - "r"(rgb_buf), // %3 - "r"(width), // %4 - "r" (kCoefficientsRgbY) // %5 - : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" -); -} - -void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi - const uint8* u_buf, // rsi - const uint8* v_buf, // rdx - uint8* rgb_buf, // rcx - int width, // r8 - int source_dx) { // r9 - asm( - "xor %%r11,%%r11\n" - "sub $0x2,%4\n" - "js scalenext\n" - -"scaleloop:" - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - "movzb (%1,%%r10,1),%%rax\n" - "movq 2048(%5,%%rax,8),%%xmm0\n" - "movzb (%2,%%r10,1),%%rax\n" - "movq 4096(%5,%%rax,8),%%xmm1\n" - "lea (%%r11,%6),%%r10\n" - "sar $0x10,%%r11\n" - "movzb (%0,%%r11,1),%%rax\n" - "paddsw %%xmm1,%%xmm0\n" - "movq (%5,%%rax,8),%%xmm1\n" - "lea (%%r10,%6),%%r11\n" - "sar $0x10,%%r10\n" - "movzb (%0,%%r10,1),%%rax\n" - "movq (%5,%%rax,8),%%xmm2\n" - "paddsw %%xmm0,%%xmm1\n" - "paddsw %%xmm0,%%xmm2\n" - "shufps $0x44,%%xmm2,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movq %%xmm1,0x0(%3)\n" - "add $0x8,%3\n" - "sub $0x2,%4\n" - "jns scaleloop\n" - -"scalenext:" - "add $0x1,%4\n" - "js scaledone\n" - - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - "movzb (%1,%%r10,1),%%rax\n" - "movq 2048(%5,%%rax,8),%%xmm0\n" - "movzb (%2,%%r10,1),%%rax\n" - "movq 4096(%5,%%rax,8),%%xmm1\n" - "paddsw %%xmm1,%%xmm0\n" - "sar $0x10,%%r11\n" - "movzb (%0,%%r11,1),%%rax\n" - "movq (%5,%%rax,8),%%xmm1\n" - "paddsw %%xmm0,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movd %%xmm1,0x0(%3)\n" - -"scaledone:" - : - : "r"(y_buf), // %0 - "r"(u_buf), // %1 - "r"(v_buf), // %2 - "r"(rgb_buf), // %3 - "r"(width), // %4 - "r" (kCoefficientsRgbY), // %5 - "r"(static_cast<long>(source_dx)) // %6 - : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2" -); -} - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - asm( - "xor %%r11,%%r11\n" // x = 0 - "sub $0x2,%4\n" - "js .lscalenext\n" - "cmp $0x20000,%6\n" // if source_dx >= 2.0 - "jl .lscalehalf\n" - "mov $0x8000,%%r11\n" // x = 0.5 for 1/2 or less -".lscalehalf:" - -".lscaleloop:" - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - - "movzb (%1, %%r10, 1), %%r13 \n" - "movzb 1(%1, %%r10, 1), %%r14 \n" - "mov %%r11, %%rax \n" - "and $0x1fffe, %%rax \n" - "imul %%rax, %%r14 \n" - "xor $0x1fffe, %%rax \n" - "imul %%rax, %%r13 \n" - "add %%r14, %%r13 \n" - "shr $17, %%r13 \n" - "movq 2048(%5,%%r13,8), %%xmm0\n" - - "movzb (%2, %%r10, 1), %%r13 \n" - "movzb 1(%2, %%r10, 1), %%r14 \n" - "mov %%r11, %%rax \n" - "and $0x1fffe, %%rax \n" - "imul %%rax, %%r14 \n" - "xor $0x1fffe, %%rax \n" - "imul %%rax, %%r13 \n" - "add %%r14, %%r13 \n" - "shr $17, %%r13 \n" - "movq 4096(%5,%%r13,8), %%xmm1\n" - - "mov %%r11, %%rax \n" - "lea (%%r11,%6),%%r10\n" - "sar $0x10,%%r11\n" - "paddsw %%xmm1,%%xmm0\n" - - "movzb (%0, %%r11, 1), %%r13 \n" - "movzb 1(%0, %%r11, 1), %%r14 \n" - "and $0xffff, %%rax \n" - "imul %%rax, %%r14 \n" - "xor $0xffff, %%rax \n" - "imul %%rax, %%r13 \n" - "add %%r14, %%r13 \n" - "shr $16, %%r13 \n" - "movq (%5,%%r13,8),%%xmm1\n" - - "mov %%r10, %%rax \n" - "lea (%%r10,%6),%%r11\n" - "sar $0x10,%%r10\n" - - "movzb (%0,%%r10,1), %%r13 \n" - "movzb 1(%0,%%r10,1), %%r14 \n" - "and $0xffff, %%rax \n" - "imul %%rax, %%r14 \n" - "xor $0xffff, %%rax \n" - "imul %%rax, %%r13 \n" - "add %%r14, %%r13 \n" - "shr $16, %%r13 \n" - "movq (%5,%%r13,8),%%xmm2\n" - - "paddsw %%xmm0,%%xmm1\n" - "paddsw %%xmm0,%%xmm2\n" - "shufps $0x44,%%xmm2,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movq %%xmm1,0x0(%3)\n" - "add $0x8,%3\n" - "sub $0x2,%4\n" - "jns .lscaleloop\n" - -".lscalenext:" - "add $0x1,%4\n" - "js .lscaledone\n" - - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - - "movzb (%1,%%r10,1), %%r13 \n" - "movq 2048(%5,%%r13,8),%%xmm0\n" - - "movzb (%2,%%r10,1), %%r13 \n" - "movq 4096(%5,%%r13,8),%%xmm1\n" - - "paddsw %%xmm1,%%xmm0\n" - "sar $0x10,%%r11\n" - - "movzb (%0,%%r11,1), %%r13 \n" - "movq (%5,%%r13,8),%%xmm1\n" - - "paddsw %%xmm0,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movd %%xmm1,0x0(%3)\n" - -".lscaledone:" - : - : "r"(y_buf), // %0 - "r"(u_buf), // %1 - "r"(v_buf), // %2 - "r"(rgb_buf), // %3 - "r"(width), // %4 - "r" (kCoefficientsRgbY), // %5 - "r"(static_cast<long>(source_dx)) // %6 - : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2" -); -} - -#elif USE_MMX && !defined(ARCH_CPU_X86_64) && !defined(__PIC__) - -// PIC version is slower because less registers are available, so -// non-PIC is used on platforms where it is possible. - -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); - asm( - ".text\n" - ".global FastConvertYUVToRGB32Row\n" -"FastConvertYUVToRGB32Row:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x34(%esp),%ecx\n" - "jmp convertend\n" - -"convertloop:" - "movzbl (%edi),%eax\n" - "add $0x1,%edi\n" - "movzbl (%esi),%ebx\n" - "add $0x1,%esi\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n" - "movzbl 0x1(%edx),%ebx\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "add $0x2,%edx\n" - "movq kCoefficientsRgbY(,%ebx,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" -"convertend:" - "sub $0x2,%ecx\n" - "jns convertloop\n" - - "and $0x1,%ecx\n" - "je convertdone\n" - - "movzbl (%edi),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "movzbl (%esi),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" -"convertdone:" - "popa\n" - "ret\n" -); - - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - asm( - ".text\n" - ".global ScaleYUVToRGB32Row\n" -"ScaleYUVToRGB32Row:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x34(%esp),%ecx\n" - "xor %ebx,%ebx\n" - "jmp scaleend\n" - -"scaleloop:" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" -"scaleend:" - "sub $0x2,%ecx\n" - "jns scaleloop\n" - - "and $0x1,%ecx\n" - "je scaledone\n" - - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" - -"scaledone:" - "popa\n" - "ret\n" -); - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - asm( - ".text\n" - ".global LinearScaleYUVToRGB32Row\n" -"LinearScaleYUVToRGB32Row:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x30(%esp),%ebp\n" - - // source_width = width * source_dx + ebx - "mov 0x34(%esp), %ecx\n" - "imull 0x38(%esp), %ecx\n" - "mov %ecx, 0x34(%esp)\n" - - "mov 0x38(%esp), %ecx\n" - "xor %ebx,%ebx\n" // x = 0 - "cmp $0x20000,%ecx\n" // if source_dx >= 2.0 - "jl .lscaleend\n" - "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less - "jmp .lscaleend\n" - -".lscaleloop:" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - - "movzbl (%edi,%eax,1),%ecx\n" - "movzbl 1(%edi,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "andl $0x1fffe, %eax \n" - "imul %eax, %esi \n" - "xorl $0x1fffe, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $17, %ecx \n" - "movq kCoefficientsRgbY+2048(,%ecx,8),%mm0\n" - - "mov 0x2c(%esp),%esi\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - - "movzbl (%esi,%eax,1),%ecx\n" - "movzbl 1(%esi,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "andl $0x1fffe, %eax \n" - "imul %eax, %esi \n" - "xorl $0x1fffe, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $17, %ecx \n" - "paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n" - - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%ecx\n" - "movzbl 1(%edx,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "andl $0xffff, %eax \n" - "imul %eax, %esi \n" - "xorl $0xffff, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $16, %ecx \n" - "movq kCoefficientsRgbY(,%ecx,8),%mm1\n" - - "cmp 0x34(%esp), %ebx\n" - "jge .lscalelastpixel\n" - - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%ecx\n" - "movzbl 1(%edx,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "andl $0xffff, %eax \n" - "imul %eax, %esi \n" - "xorl $0xffff, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $16, %ecx \n" - "movq kCoefficientsRgbY(,%ecx,8),%mm2\n" - - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" - -".lscaleend:" - "cmp 0x34(%esp), %ebx\n" - "jl .lscaleloop\n" - "popa\n" - "ret\n" - -".lscalelastpixel:" - "paddsw %mm0, %mm1\n" - "psraw $6, %mm1\n" - "packuswb %mm1, %mm1\n" - "movd %mm1, (%ebp)\n" - "popa\n" - "ret\n" -); - -#elif USE_MMX && !defined(ARCH_CPU_X86_64) && defined(__PIC__) - -extern void PICConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int16 *kCoefficientsRgbY); - asm( - ".text\n" -#if defined(OS_MACOSX) -"_PICConvertYUVToRGB32Row:\n" -#else -"PICConvertYUVToRGB32Row:\n" -#endif - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x38(%esp),%ecx\n" - - "jmp .Lconvertend\n" - -".Lconvertloop:" - "movzbl (%edi),%eax\n" - "add $0x1,%edi\n" - "movzbl (%esi),%ebx\n" - "add $0x1,%esi\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "paddsw 4096(%ecx,%ebx,8),%mm0\n" - "movzbl 0x1(%edx),%ebx\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "add $0x2,%edx\n" - "movq 0(%ecx,%ebx,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" -".Lconvertend:" - "subl $0x2,0x34(%esp)\n" - "jns .Lconvertloop\n" - - "andl $0x1,0x34(%esp)\n" - "je .Lconvertdone\n" - - "movzbl (%edi),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "movzbl (%esi),%eax\n" - "paddsw 4096(%ecx,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" -".Lconvertdone:\n" - "popa\n" - "ret\n" -); - -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, - &kCoefficientsRgbY[0][0]); -} - -extern void PICScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx, - int16 *kCoefficientsRgbY); - - asm( - ".text\n" -#if defined(OS_MACOSX) -"_PICScaleYUVToRGB32Row:\n" -#else -"PICScaleYUVToRGB32Row:\n" -#endif - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x3c(%esp),%ecx\n" - "xor %ebx,%ebx\n" - "jmp Lscaleend\n" - -"Lscaleloop:" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw 4096(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq 0(%ecx,%eax,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" -"Lscaleend:" - "subl $0x2,0x34(%esp)\n" - "jns Lscaleloop\n" - - "andl $0x1,0x34(%esp)\n" - "je Lscaledone\n" - - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw 4096(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" - -"Lscaledone:" - "popa\n" - "ret\n" -); - - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx, - &kCoefficientsRgbY[0][0]); -} - -void PICLinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx, - int16 *kCoefficientsRgbY); - asm( - ".text\n" -#if defined(OS_MACOSX) -"_PICLinearScaleYUVToRGB32Row:\n" -#else -"PICLinearScaleYUVToRGB32Row:\n" -#endif - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x34(%esp),%ecx\n" - "mov 0x3c(%esp),%edi\n" - "xor %ebx,%ebx\n" - - // source_width = width * source_dx + ebx - "mov 0x34(%esp), %ecx\n" - "imull 0x38(%esp), %ecx\n" - "mov %ecx, 0x34(%esp)\n" - - "mov 0x38(%esp), %ecx\n" - "xor %ebx,%ebx\n" // x = 0 - "cmp $0x20000,%ecx\n" // if source_dx >= 2.0 - "jl .lscaleend\n" - "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less - "jmp .lscaleend\n" - -".lscaleloop:" - "mov 0x28(%esp),%esi\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - - "movzbl (%esi,%eax,1),%ecx\n" - "movzbl 1(%esi,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "andl $0x1fffe, %eax \n" - "imul %eax, %esi \n" - "xorl $0x1fffe, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $17, %ecx \n" - "movq 2048(%edi,%ecx,8),%mm0\n" - - "mov 0x2c(%esp),%esi\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - - "movzbl (%esi,%eax,1),%ecx\n" - "movzbl 1(%esi,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "andl $0x1fffe, %eax \n" - "imul %eax, %esi \n" - "xorl $0x1fffe, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $17, %ecx \n" - "paddsw 4096(%edi,%ecx,8),%mm0\n" - - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%ecx\n" - "movzbl 1(%edx,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "andl $0xffff, %eax \n" - "imul %eax, %esi \n" - "xorl $0xffff, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $16, %ecx \n" - "movq (%edi,%ecx,8),%mm1\n" - - "cmp 0x34(%esp), %ebx\n" - "jge .lscalelastpixel\n" - - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%ecx\n" - "movzbl 1(%edx,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "andl $0xffff, %eax \n" - "imul %eax, %esi \n" - "xorl $0xffff, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $16, %ecx \n" - "movq (%edi,%ecx,8),%mm2\n" - - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" - -".lscaleend:" - "cmp %ebx, 0x34(%esp)\n" - "jg .lscaleloop\n" - "popa\n" - "ret\n" - -".lscalelastpixel:" - "paddsw %mm0, %mm1\n" - "psraw $6, %mm1\n" - "packuswb %mm1, %mm1\n" - "movd %mm1, (%ebp)\n" - "popa\n" - "ret\n" -); - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx, - &kCoefficientsRgbY[0][0]); -} - -#else // USE_MMX - -// C reference code that mimic the YUV assembly. -#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) -#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \ - (((x) + (y)) > 32767 ? 32767 : ((x) + (y)))) - -static inline void YuvPixel(uint8 y, - uint8 u, - uint8 v, - uint8* rgb_buf) { - - int b = kCoefficientsRgbY[256+u][0]; - int g = kCoefficientsRgbY[256+u][1]; - int r = kCoefficientsRgbY[256+u][2]; - int a = kCoefficientsRgbY[256+u][3]; - - b = paddsw(b, kCoefficientsRgbY[512+v][0]); - g = paddsw(g, kCoefficientsRgbY[512+v][1]); - r = paddsw(r, kCoefficientsRgbY[512+v][2]); - a = paddsw(a, kCoefficientsRgbY[512+v][3]); - - b = paddsw(b, kCoefficientsRgbY[y][0]); - g = paddsw(g, kCoefficientsRgbY[y][1]); - r = paddsw(r, kCoefficientsRgbY[y][2]); - a = paddsw(a, kCoefficientsRgbY[y][3]); - - b >>= 6; - g >>= 6; - r >>= 6; - a >>= 6; - - *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) | - (packuswb(g) << 8) | - (packuswb(r) << 16) | - (packuswb(a) << 24); -} - -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - for (int x = 0; x < width; x += 2) { - uint8 u = u_buf[x >> 1]; - uint8 v = v_buf[x >> 1]; - uint8 y0 = y_buf[x]; - YuvPixel(y0, u, v, rgb_buf); - if ((x + 1) < width) { - uint8 y1 = y_buf[x + 1]; - YuvPixel(y1, u, v, rgb_buf + 4); - } - rgb_buf += 8; // Advance 2 pixels. - } -} - -// 16.16 fixed point is used. A shift by 16 isolates the integer. -// A shift by 17 is used to further subsample the chrominence channels. -// & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, -// for 1/65536 pixel accurate interpolation. -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - int x = 0; - for (int i = 0; i < width; i += 2) { - int y = y_buf[x >> 16]; - int u = u_buf[(x >> 17)]; - int v = v_buf[(x >> 17)]; - YuvPixel(y, u, v, rgb_buf); - x += source_dx; - if ((i + 1) < width) { - y = y_buf[x >> 16]; - YuvPixel(y, u, v, rgb_buf+4); - x += source_dx; - } - rgb_buf += 8; - } -} - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - int x = 0; - if (source_dx >= 0x20000) { - x = 32768; - } - for (int i = 0; i < width; i += 2) { - int y0 = y_buf[x >> 16]; - int y1 = y_buf[(x >> 16) + 1]; - int u0 = u_buf[(x >> 17)]; - int u1 = u_buf[(x >> 17) + 1]; - int v0 = v_buf[(x >> 17)]; - int v1 = v_buf[(x >> 17) + 1]; - int y_frac = (x & 65535); - int uv_frac = ((x >> 1) & 65535); - int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; - int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16; - int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16; - YuvPixel(y, u, v, rgb_buf); - x += source_dx; - if ((i + 1) < width) { - y0 = y_buf[x >> 16]; - y1 = y_buf[(x >> 16) + 1]; - y_frac = (x & 65535); - y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; - YuvPixel(y, u, v, rgb_buf+4); - x += source_dx; - } - rgb_buf += 8; - } -} - -#endif // USE_MMX -} // extern "C" diff --git a/media/base/yuv_row_win.cc b/media/base/yuv_row_win.cc deleted file mode 100644 index b5049a5..0000000 --- a/media/base/yuv_row_win.cc +++ /dev/null @@ -1,589 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "media/base/yuv_row.h" - -#define kCoefficientsRgbU kCoefficientsRgbY + 2048 -#define kCoefficientsRgbV kCoefficientsRgbY + 4096 - -extern "C" { - -#if USE_MMX -__declspec(naked) -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp convertend - - convertloop : - movzx eax, byte ptr [edi] - add edi, 1 - movzx ebx, byte ptr [esi] - add esi, 1 - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [edx] - paddsw mm0, [kCoefficientsRgbV + 8 * ebx] - movzx ebx, byte ptr [edx + 1] - movq mm1, [kCoefficientsRgbY + 8 * eax] - add edx, 2 - movq mm2, [kCoefficientsRgbY + 8 * ebx] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - convertend : - sub ecx, 2 - jns convertloop - - and ecx, 1 // odd number of pixels? - jz convertdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - convertdone : - - popad - ret - } -} - -__declspec(naked) -void ConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int step) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - mov ebx, [esp + 32 + 24] // step - jmp wend - - wloop : - movzx eax, byte ptr [edi] - add edi, ebx - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - add esi, ebx - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - add edx, ebx - movq mm1, [kCoefficientsRgbY + 8 * eax] - movzx eax, byte ptr [edx] - add edx, ebx - movq mm2, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - wend : - sub ecx, 2 - jns wloop - - and ecx, 1 // odd number of pixels? - jz wdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - wdone : - - popad - ret - } -} - -__declspec(naked) -void RotateConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int ystep, - int uvstep) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp wend - - wloop : - movzx eax, byte ptr [edi] - mov ebx, [esp + 32 + 28] // uvstep - add edi, ebx - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - add esi, ebx - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - mov ebx, [esp + 32 + 24] // ystep - add edx, ebx - movq mm1, [kCoefficientsRgbY + 8 * eax] - movzx eax, byte ptr [edx] - add edx, ebx - movq mm2, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - wend : - sub ecx, 2 - jns wloop - - and ecx, 1 // odd number of pixels? - jz wdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - wdone : - - popad - ret - } -} - -__declspec(naked) -void DoubleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp wend - - wloop : - movzx eax, byte ptr [edi] - add edi, 1 - movzx ebx, byte ptr [esi] - add esi, 1 - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [edx] - paddsw mm0, [kCoefficientsRgbV + 8 * ebx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - punpckldq mm1, mm1 - movntq [ebp], mm1 - - movzx ebx, byte ptr [edx + 1] - add edx, 2 - paddsw mm0, [kCoefficientsRgbY + 8 * ebx] - psraw mm0, 6 - packuswb mm0, mm0 - punpckldq mm0, mm0 - movntq [ebp+8], mm0 - add ebp, 16 - wend : - sub ecx, 4 - jns wloop - - add ecx, 4 - jz wdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - jmp wend1 - - wloop1 : - movd [ebp], mm1 - add ebp, 4 - wend1 : - sub ecx, 1 - jns wloop1 - wdone : - popad - ret - } -} - -// This version does general purpose scaling by any amount, up or down. -// The only thing it can not do it rotation by 90 or 270. -// For performance the chroma is under sampled, reducing cost of a 3x -// 1080p scale from 8.4 ms to 5.4 ms. -__declspec(naked) -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - xor ebx, ebx // x - jmp scaleend - - scaleloop : - mov eax, ebx - sar eax, 17 - movzx eax, byte ptr [edi + eax] - movq mm0, [kCoefficientsRgbU + 8 * eax] - mov eax, ebx - sar eax, 17 - movzx eax, byte ptr [esi + eax] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - mov eax, ebx - add ebx, [esp + 32 + 24] // x += source_dx - sar eax, 16 - movzx eax, byte ptr [edx + eax] - movq mm1, [kCoefficientsRgbY + 8 * eax] - mov eax, ebx - add ebx, [esp + 32 + 24] // x += source_dx - sar eax, 16 - movzx eax, byte ptr [edx + eax] - movq mm2, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - scaleend : - sub ecx, 2 - jns scaleloop - - and ecx, 1 // odd number of pixels? - jz scaledone - - mov eax, ebx - sar eax, 17 - movzx eax, byte ptr [edi + eax] - movq mm0, [kCoefficientsRgbU + 8 * eax] - mov eax, ebx - sar eax, 17 - movzx eax, byte ptr [esi + eax] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - mov eax, ebx - sar eax, 16 - movzx eax, byte ptr [edx + eax] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - - scaledone : - popad - ret - } -} - -__declspec(naked) -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - // [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - imul ecx, [esp + 32 + 24] // source_dx - mov [esp + 32 + 20], ecx // source_width = width * source_dx - mov ecx, [esp + 32 + 24] // source_dx - xor ebx, ebx // x = 0 - cmp ecx, 0x20000 - jl lscaleend - mov ebx, 0x8000 // x = 0.5 for 1/2 or less - jmp lscaleend -lscaleloop: - mov eax, ebx - sar eax, 0x11 - - movzx ecx, byte ptr [edi + eax] - movzx esi, byte ptr [edi + eax + 1] - mov eax, ebx - and eax, 0x1fffe - imul esi, eax - xor eax, 0x1fffe - imul ecx, eax - add ecx, esi - shr ecx, 17 - movq mm0, [kCoefficientsRgbU + 8 * ecx] - - mov esi, [esp + 32 + 12] - mov eax, ebx - sar eax, 0x11 - - movzx ecx, byte ptr [esi + eax] - movzx esi, byte ptr [esi + eax + 1] - mov eax, ebx - and eax, 0x1fffe - imul esi, eax - xor eax, 0x1fffe - imul ecx, eax - add ecx, esi - shr ecx, 17 - paddsw mm0, [kCoefficientsRgbV + 8 * ecx] - - mov eax, ebx - sar eax, 0x10 - movzx ecx, byte ptr [edx + eax] - movzx esi, byte ptr [1 + edx + eax] - mov eax, ebx - add ebx, [esp + 32 + 24] - and eax, 0xffff - imul esi, eax - xor eax, 0xffff - imul ecx, eax - add ecx, esi - shr ecx, 16 - movq mm1, [kCoefficientsRgbY + 8 * ecx] - - cmp ebx, [esp + 32 + 20] - jge lscalelastpixel - - mov eax, ebx - sar eax, 0x10 - movzx ecx, byte ptr [edx + eax] - movzx esi, byte ptr [edx + eax + 1] - mov eax, ebx - add ebx, [esp + 32 + 24] - and eax, 0xffff - imul esi, eax - xor eax, 0xffff - imul ecx, eax - add ecx, esi - shr ecx, 16 - movq mm2, [kCoefficientsRgbY + 8 * ecx] - - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 0x6 - psraw mm2, 0x6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 0x8 - -lscaleend: - cmp ebx, [esp + 32 + 20] - jl lscaleloop - popad - ret - -lscalelastpixel: - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - popad - ret - }; -} -#else // USE_MMX - -// C reference code that mimic the YUV assembly. -#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) -#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \ - (((x) + (y)) > 32767 ? 32767 : ((x) + (y)))) - -static inline void YuvPixel(uint8 y, - uint8 u, - uint8 v, - uint8* rgb_buf) { - - int b = kCoefficientsRgbY[256+u][0]; - int g = kCoefficientsRgbY[256+u][1]; - int r = kCoefficientsRgbY[256+u][2]; - int a = kCoefficientsRgbY[256+u][3]; - - b = paddsw(b, kCoefficientsRgbY[512+v][0]); - g = paddsw(g, kCoefficientsRgbY[512+v][1]); - r = paddsw(r, kCoefficientsRgbY[512+v][2]); - a = paddsw(a, kCoefficientsRgbY[512+v][3]); - - b = paddsw(b, kCoefficientsRgbY[y][0]); - g = paddsw(g, kCoefficientsRgbY[y][1]); - r = paddsw(r, kCoefficientsRgbY[y][2]); - a = paddsw(a, kCoefficientsRgbY[y][3]); - - b >>= 6; - g >>= 6; - r >>= 6; - a >>= 6; - - *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) | - (packuswb(g) << 8) | - (packuswb(r) << 16) | - (packuswb(a) << 24); -} - -#if TEST_MMX_YUV -static inline void YuvPixel(uint8 y, - uint8 u, - uint8 v, - uint8* rgb_buf) { - - __asm { - movzx eax, u - movq mm0, [kCoefficientsRgbY+2048 + 8 * eax] - movzx eax, v - paddsw mm0, [kCoefficientsRgbY+4096 + 8 * eax] - movzx eax, y - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - mov eax, rgb_buf - movd [eax], mm1 - emms - } -} -#endif - -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - for (int x = 0; x < width; x += 2) { - uint8 u = u_buf[x >> 1]; - uint8 v = v_buf[x >> 1]; - uint8 y0 = y_buf[x]; - YuvPixel(y0, u, v, rgb_buf); - if ((x + 1) < width) { - uint8 y1 = y_buf[x + 1]; - YuvPixel(y1, u, v, rgb_buf + 4); - } - rgb_buf += 8; // Advance 2 pixels. - } -} - -// 16.16 fixed point is used. A shift by 16 isolates the integer. -// A shift by 17 is used to further subsample the chrominence channels. -// & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, -// for 1/65536 pixel accurate interpolation. -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - int x = 0; - for (int i = 0; i < width; i += 2) { - int y = y_buf[x >> 16]; - int u = u_buf[(x >> 17)]; - int v = v_buf[(x >> 17)]; - YuvPixel(y, u, v, rgb_buf); - x += source_dx; - if ((i + 1) < width) { - y = y_buf[x >> 16]; - YuvPixel(y, u, v, rgb_buf+4); - x += source_dx; - } - rgb_buf += 8; - } -} - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - int x = 0; - if (source_dx >= 0x20000) { - x = 32768; - } - for (int i = 0; i < width; i += 2) { - int y0 = y_buf[x >> 16]; - int y1 = y_buf[(x >> 16) + 1]; - int u0 = u_buf[(x >> 17)]; - int u1 = u_buf[(x >> 17) + 1]; - int v0 = v_buf[(x >> 17)]; - int v1 = v_buf[(x >> 17) + 1]; - int y_frac = (x & 65535); - int uv_frac = ((x >> 1) & 65535); - int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; - int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16; - int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16; - YuvPixel(y, u, v, rgb_buf); - x += source_dx; - if ((i + 1) < width) { - y0 = y_buf[x >> 16]; - y1 = y_buf[(x >> 16) + 1]; - y_frac = (x & 65535); - y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; - YuvPixel(y, u, v, rgb_buf+4); - x += source_dx; - } - rgb_buf += 8; - } -} - -#endif // USE_MMX -} // extern "C" - diff --git a/media/media.gyp b/media/media.gyp index de685ea..7af199e 100644 --- a/media/media.gyp +++ b/media/media.gyp @@ -357,12 +357,6 @@ 'sources': [ 'base/yuv_convert.cc', 'base/yuv_convert.h', - 'base/yuv_convert_internal.h', - 'base/yuv_convert_c.cc', - 'base/yuv_row_win.cc', - 'base/yuv_row_posix.cc', - 'base/yuv_row_table.cc', - 'base/yuv_row.h', ], }, { @@ -372,9 +366,10 @@ '..', ], 'sources': [ - 'base/yuv_convert_sse2.cc', - 'base/simd/convert_rgb_to_yuv_x86.cc', + 'base/simd/convert_rgb_to_yuv_c.cc', + 'base/simd/convert_rgb_to_yuv_sse2.cc', 'base/simd/convert_rgb_to_yuv_ssse3.asm', + 'base/simd/convert_rgb_to_yuv_ssse3.cc', 'base/simd/convert_rgb_to_yuv_ssse3.inc', 'base/simd/convert_yuv_to_rgb_c.cc', 'base/simd/convert_yuv_to_rgb_x86.cc', @@ -391,6 +386,8 @@ 'base/simd/scale_yuv_to_rgb_mmx.asm', 'base/simd/scale_yuv_to_rgb_mmx.inc', 'base/simd/scale_yuv_to_rgb_sse.asm', + 'base/simd/yuv_to_rgb_table.cc', + 'base/simd/yuv_to_rgb_table.h', ], 'conditions': [ [ 'target_arch == "x64"', { @@ -481,9 +478,14 @@ '..', ], 'sources': [ + 'base/simd/convert_rgb_to_yuv_c.cc', + 'base/simd/convert_rgb_to_yuv.h', 'base/simd/convert_yuv_to_rgb_c.cc', + 'base/simd/convert_yuv_to_rgb.h', 'base/simd/filter_yuv.h', 'base/simd/filter_yuv_c.cc', + 'base/simd/yuv_to_rgb_table.cc', + 'base/simd/yuv_to_rgb_table.h', ], }, { |