summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--media/base/yuv_convert_unittest.cc55
-rw-r--r--media/base/yuv_row.h3
-rw-r--r--media/base/yuv_row_linux.cc78
3 files changed, 70 insertions, 66 deletions
diff --git a/media/base/yuv_convert_unittest.cc b/media/base/yuv_convert_unittest.cc
index d75488a..00152b6 100644
--- a/media/base/yuv_convert_unittest.cc
+++ b/media/base/yuv_convert_unittest.cc
@@ -33,7 +33,6 @@ static const size_t kRGBSize = kWidth * kHeight * kBpp;
static const size_t kRGBSizeConverted = kWidth * kHeight * kBpp;
// Set to 100 to time ConvertYUVToRGB32.
-// This will take approximately 40 to 200 ms.
static const int kTestTimes = 1;
TEST(YUVConvertTest, YV12) {
@@ -99,16 +98,18 @@ TEST(YUVConvertTest, YV16) {
reinterpret_cast<char*>(yuv_bytes.get()),
static_cast<int>(kYUV16Size)));
- // Convert a frame of YUV to 32 bit ARGB.
- media::ConvertYUVToRGB32(yuv_bytes.get(), // Y
- yuv_bytes.get() + kWidth * kHeight, // U
- yuv_bytes.get() + kWidth * kHeight * 3 / 2, // V
- rgb_converted_bytes.get(), // RGB output
- kWidth, kHeight, // Dimensions
- kWidth, // YStride
- kWidth / 2, // UVStride
- kWidth * kBpp, // RGBStride
- media::YV16);
+ for (int i = 0; i < kTestTimes; ++i) {
+ // Convert a frame of YUV to 32 bit ARGB.
+ media::ConvertYUVToRGB32(yuv_bytes.get(), // Y
+ yuv_bytes.get() + kWidth * kHeight, // U
+ yuv_bytes.get() + kWidth * kHeight * 3 / 2, // V
+ rgb_converted_bytes.get(), // RGB output
+ kWidth, kHeight, // Dimensions
+ kWidth, // YStride
+ kWidth / 2, // UVStride
+ kWidth * kBpp, // RGBStride
+ media::YV16);
+ }
unsigned int rgb_hash = DJB2Hash(rgb_converted_bytes.get(), kRGBSizeConverted,
kDJB2HashSeed);
@@ -143,17 +144,19 @@ TEST(YuvScaleTest, YV12) {
const size_t size_of_rgb_scaled = kScaledWidth * kScaledHeight * kBpp;
scoped_array<uint8> rgb_scaled_bytes(new uint8[size_of_rgb_scaled]);
- media::ScaleYUVToRGB32(yuv_bytes.get(), // Y
+ for (int i = 0; i < kTestTimes; ++i) {
+ media::ScaleYUVToRGB32(yuv_bytes.get(), // Y
yuv_bytes.get() + kWidth * kHeight, // U
yuv_bytes.get() + kWidth * kHeight * 5 / 4, // V
- rgb_scaled_bytes.get(), // Rgb output
- kWidth, kHeight, // Dimensions
- kScaledWidth, kScaledHeight, // Dimensions
- kWidth, // YStride
- kWidth / 2, // UvStride
- kScaledWidth * kBpp, // RgbStride
+ rgb_scaled_bytes.get(), // Rgb output
+ kWidth, kHeight, // Dimensions
+ kScaledWidth, kScaledHeight, // Dimensions
+ kWidth, // YStride
+ kWidth / 2, // UvStride
+ kScaledWidth * kBpp, // RgbStride
media::YV12,
media::ROTATE_0);
+ }
unsigned int rgb_hash = DJB2Hash(rgb_scaled_bytes.get(), size_of_rgb_scaled,
kDJB2HashSeed);
@@ -188,17 +191,19 @@ TEST(YuvScaleTest, YV16) {
const size_t size_of_rgb_scaled = kScaledWidth * kScaledHeight * kBpp;
scoped_array<uint8> rgb_scaled_bytes(new uint8[size_of_rgb_scaled]);
- media::ScaleYUVToRGB32(yuv_bytes.get(), // Y
+ for (int i = 0; i < kTestTimes; ++i) {
+ media::ScaleYUVToRGB32(yuv_bytes.get(), // Y
yuv_bytes.get() + kWidth * kHeight, // U
yuv_bytes.get() + kWidth * kHeight * 3 / 2, // V
- rgb_scaled_bytes.get(), // Rgb output
- kWidth, kHeight, // Dimensions
- kScaledWidth, kScaledHeight, // Dimensions
- kWidth, // YStride
- kWidth / 2, // UvStride
- kScaledWidth * kBpp, // RgbStride
+ rgb_scaled_bytes.get(), // Rgb output
+ kWidth, kHeight, // Dimensions
+ kScaledWidth, kScaledHeight, // Dimensions
+ kWidth, // YStride
+ kWidth / 2, // UvStride
+ kScaledWidth * kBpp, // RgbStride
media::YV16,
media::ROTATE_0);
+ }
unsigned int rgb_hash = DJB2Hash(rgb_scaled_bytes.get(), size_of_rgb_scaled,
kDJB2HashSeed);
diff --git a/media/base/yuv_row.h b/media/base/yuv_row.h
index 31f1788..36f8f6e 100644
--- a/media/base/yuv_row.h
+++ b/media/base/yuv_row.h
@@ -71,7 +71,8 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
#endif
#endif
-#if USE_MMX
+// x64 uses MMX2 (SSE) so emms is not required.
+#if USE_MMX && !defined(ARCH_CPU_X86_64)
#if defined(_MSC_VER)
#define EMMS() __asm emms
#else
diff --git a/media/base/yuv_row_linux.cc b/media/base/yuv_row_linux.cc
index 5825960..b99c7bf 100644
--- a/media/base/yuv_row_linux.cc
+++ b/media/base/yuv_row_linux.cc
@@ -263,19 +263,19 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
"add $0x1,%rsi\n"
"movzb (%rdx),%r11\n"
"add $0x1,%rdx\n"
- "movq kCoefficientsRgbU(,%r10,8),%mm0\n"
+ "movq kCoefficientsRgbU(,%r10,8),%xmm0\n"
"movzb (%rdi),%r10\n"
- "paddsw kCoefficientsRgbV(,%r11,8),%mm0\n"
+ "paddsw kCoefficientsRgbV(,%r11,8),%xmm0\n"
"movzb 0x1(%rdi),%r11\n"
- "movq kCoefficientsRgbY(,%r10,8),%mm1\n"
+ "movq kCoefficientsRgbY(,%r10,8),%xmm1\n"
"add $0x2,%rdi\n"
- "movq kCoefficientsRgbY(,%r11,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%rcx)\n"
+ "movq kCoefficientsRgbY(,%r11,8),%xmm2\n"
+ "paddsw %xmm0,%xmm1\n"
+ "paddsw %xmm0,%xmm2\n"
+ "psraw $0x6,%xmm1\n"
+ "psraw $0x6,%xmm2\n"
+ "packuswb %xmm2,%xmm1\n"
+ "movq %xmm1,0x0(%rcx)\n"
"add $0x8,%rcx\n"
"convertend:"
"sub $0x2,%r8\n"
@@ -285,15 +285,15 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
"je convertdone\n"
"movzb (%rsi),%r10\n"
- "movq kCoefficientsRgbU(,%r10,8),%mm0\n"
+ "movq kCoefficientsRgbU(,%r10,8),%xmm0\n"
"movzb (%rdx),%r10\n"
- "paddsw kCoefficientsRgbV(,%r10,8),%mm0\n"
+ "paddsw kCoefficientsRgbV(,%r10,8),%xmm0\n"
"movzb (%rdi),%r10\n"
- "movq kCoefficientsRgbY(,%r10,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%rcx)\n"
+ "movq kCoefficientsRgbY(,%r10,8),%xmm1\n"
+ "paddsw %xmm0,%xmm1\n"
+ "psraw $0x6,%xmm1\n"
+ "packuswb %xmm1,%xmm1\n"
+ "movd %xmm1,0x0(%rcx)\n"
"convertdone:"
"ret\n"
);
@@ -316,25 +316,23 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
"mov %r11,%r10\n"
"sar $0x5,%r10\n"
"movzb (%rsi,%r10,1),%rax\n"
- "movq kCoefficientsRgbU(,%rax,8),%mm0\n"
+ "movq kCoefficientsRgbU(,%rax,8),%xmm0\n"
"movzb (%rdx,%r10,1),%rax\n"
- "paddsw kCoefficientsRgbV(,%rax,8),%mm0\n"
- "mov %r11,%r10\n"
- "add %r9,%r11\n"
- "sar $0x4,%r10\n"
- "movzb (%rdi,%r10,1),%rax\n"
- "movq kCoefficientsRgbY(,%rax,8),%mm1\n"
- "mov %r11,%r10\n"
- "add %r9,%r11\n"
+ "paddsw kCoefficientsRgbV(,%rax,8),%xmm0\n"
+ "lea (%r11,%r9),%r10\n"
+ "sar $0x4,%r11\n"
+ "movzb (%rdi,%r11,1),%rax\n"
+ "movq kCoefficientsRgbY(,%rax,8),%xmm1\n"
+ "lea (%r10,%r9),%r11\n"
"sar $0x4,%r10\n"
"movzb (%rdi,%r10,1),%rax\n"
- "movq kCoefficientsRgbY(,%rax,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%rcx)\n"
+ "movq kCoefficientsRgbY(,%rax,8),%xmm2\n"
+ "paddsw %xmm0,%xmm1\n"
+ "paddsw %xmm0,%xmm2\n"
+ "psraw $0x6,%xmm1\n"
+ "psraw $0x6,%xmm2\n"
+ "packuswb %xmm2,%xmm1\n"
+ "movq %xmm1,0x0(%rcx)\n"
"add $0x8,%rcx\n"
"scaleend:"
"sub $0x2,%r8\n"
@@ -346,16 +344,16 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
"mov %r11,%r10\n"
"sar $0x5,%r10\n"
"movzb (%rsi,%r10,1),%rax\n"
- "movq kCoefficientsRgbU(,%rax,8),%mm0\n"
+ "movq kCoefficientsRgbU(,%rax,8),%xmm0\n"
"movzb (%rdx,%r10,1),%rax\n"
- "paddsw kCoefficientsRgbV(,%rax,8),%mm0\n"
+ "paddsw kCoefficientsRgbV(,%rax,8),%xmm0\n"
"sar $0x4,%r11\n"
"movzb (%rdi,%r11,1),%rax\n"
- "movq kCoefficientsRgbY(,%rax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%rcx)\n"
+ "movq kCoefficientsRgbY(,%rax,8),%xmm1\n"
+ "paddsw %xmm0,%xmm1\n"
+ "psraw $0x6,%xmm1\n"
+ "packuswb %xmm1,%xmm1\n"
+ "movd %xmm1,0x0(%rcx)\n"
"scaledone:"
"ret\n"