diff options
-rw-r--r-- | media/base/yuv_convert_unittest.cc | 55 | ||||
-rw-r--r-- | media/base/yuv_row.h | 3 | ||||
-rw-r--r-- | media/base/yuv_row_linux.cc | 78 |
3 files changed, 70 insertions, 66 deletions
diff --git a/media/base/yuv_convert_unittest.cc b/media/base/yuv_convert_unittest.cc index d75488a..00152b6 100644 --- a/media/base/yuv_convert_unittest.cc +++ b/media/base/yuv_convert_unittest.cc @@ -33,7 +33,6 @@ static const size_t kRGBSize = kWidth * kHeight * kBpp; static const size_t kRGBSizeConverted = kWidth * kHeight * kBpp; // Set to 100 to time ConvertYUVToRGB32. -// This will take approximately 40 to 200 ms. static const int kTestTimes = 1; TEST(YUVConvertTest, YV12) { @@ -99,16 +98,18 @@ TEST(YUVConvertTest, YV16) { reinterpret_cast<char*>(yuv_bytes.get()), static_cast<int>(kYUV16Size))); - // Convert a frame of YUV to 32 bit ARGB. - media::ConvertYUVToRGB32(yuv_bytes.get(), // Y - yuv_bytes.get() + kWidth * kHeight, // U - yuv_bytes.get() + kWidth * kHeight * 3 / 2, // V - rgb_converted_bytes.get(), // RGB output - kWidth, kHeight, // Dimensions - kWidth, // YStride - kWidth / 2, // UVStride - kWidth * kBpp, // RGBStride - media::YV16); + for (int i = 0; i < kTestTimes; ++i) { + // Convert a frame of YUV to 32 bit ARGB. + media::ConvertYUVToRGB32(yuv_bytes.get(), // Y + yuv_bytes.get() + kWidth * kHeight, // U + yuv_bytes.get() + kWidth * kHeight * 3 / 2, // V + rgb_converted_bytes.get(), // RGB output + kWidth, kHeight, // Dimensions + kWidth, // YStride + kWidth / 2, // UVStride + kWidth * kBpp, // RGBStride + media::YV16); + } unsigned int rgb_hash = DJB2Hash(rgb_converted_bytes.get(), kRGBSizeConverted, kDJB2HashSeed); @@ -143,17 +144,19 @@ TEST(YuvScaleTest, YV12) { const size_t size_of_rgb_scaled = kScaledWidth * kScaledHeight * kBpp; scoped_array<uint8> rgb_scaled_bytes(new uint8[size_of_rgb_scaled]); - media::ScaleYUVToRGB32(yuv_bytes.get(), // Y + for (int i = 0; i < kTestTimes; ++i) { + media::ScaleYUVToRGB32(yuv_bytes.get(), // Y yuv_bytes.get() + kWidth * kHeight, // U yuv_bytes.get() + kWidth * kHeight * 5 / 4, // V - rgb_scaled_bytes.get(), // Rgb output - kWidth, kHeight, // Dimensions - kScaledWidth, kScaledHeight, // Dimensions - kWidth, // YStride - kWidth / 2, // UvStride - kScaledWidth * kBpp, // RgbStride + rgb_scaled_bytes.get(), // Rgb output + kWidth, kHeight, // Dimensions + kScaledWidth, kScaledHeight, // Dimensions + kWidth, // YStride + kWidth / 2, // UvStride + kScaledWidth * kBpp, // RgbStride media::YV12, media::ROTATE_0); + } unsigned int rgb_hash = DJB2Hash(rgb_scaled_bytes.get(), size_of_rgb_scaled, kDJB2HashSeed); @@ -188,17 +191,19 @@ TEST(YuvScaleTest, YV16) { const size_t size_of_rgb_scaled = kScaledWidth * kScaledHeight * kBpp; scoped_array<uint8> rgb_scaled_bytes(new uint8[size_of_rgb_scaled]); - media::ScaleYUVToRGB32(yuv_bytes.get(), // Y + for (int i = 0; i < kTestTimes; ++i) { + media::ScaleYUVToRGB32(yuv_bytes.get(), // Y yuv_bytes.get() + kWidth * kHeight, // U yuv_bytes.get() + kWidth * kHeight * 3 / 2, // V - rgb_scaled_bytes.get(), // Rgb output - kWidth, kHeight, // Dimensions - kScaledWidth, kScaledHeight, // Dimensions - kWidth, // YStride - kWidth / 2, // UvStride - kScaledWidth * kBpp, // RgbStride + rgb_scaled_bytes.get(), // Rgb output + kWidth, kHeight, // Dimensions + kScaledWidth, kScaledHeight, // Dimensions + kWidth, // YStride + kWidth / 2, // UvStride + kScaledWidth * kBpp, // RgbStride media::YV16, media::ROTATE_0); + } unsigned int rgb_hash = DJB2Hash(rgb_scaled_bytes.get(), size_of_rgb_scaled, kDJB2HashSeed); diff --git a/media/base/yuv_row.h b/media/base/yuv_row.h index 31f1788..36f8f6e 100644 --- a/media/base/yuv_row.h +++ b/media/base/yuv_row.h @@ -71,7 +71,8 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, #endif #endif -#if USE_MMX +// x64 uses MMX2 (SSE) so emms is not required. +#if USE_MMX && !defined(ARCH_CPU_X86_64) #if defined(_MSC_VER) #define EMMS() __asm emms #else diff --git a/media/base/yuv_row_linux.cc b/media/base/yuv_row_linux.cc index 5825960..b99c7bf 100644 --- a/media/base/yuv_row_linux.cc +++ b/media/base/yuv_row_linux.cc @@ -263,19 +263,19 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi "add $0x1,%rsi\n" "movzb (%rdx),%r11\n" "add $0x1,%rdx\n" - "movq kCoefficientsRgbU(,%r10,8),%mm0\n" + "movq kCoefficientsRgbU(,%r10,8),%xmm0\n" "movzb (%rdi),%r10\n" - "paddsw kCoefficientsRgbV(,%r11,8),%mm0\n" + "paddsw kCoefficientsRgbV(,%r11,8),%xmm0\n" "movzb 0x1(%rdi),%r11\n" - "movq kCoefficientsRgbY(,%r10,8),%mm1\n" + "movq kCoefficientsRgbY(,%r10,8),%xmm1\n" "add $0x2,%rdi\n" - "movq kCoefficientsRgbY(,%r11,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%rcx)\n" + "movq kCoefficientsRgbY(,%r11,8),%xmm2\n" + "paddsw %xmm0,%xmm1\n" + "paddsw %xmm0,%xmm2\n" + "psraw $0x6,%xmm1\n" + "psraw $0x6,%xmm2\n" + "packuswb %xmm2,%xmm1\n" + "movq %xmm1,0x0(%rcx)\n" "add $0x8,%rcx\n" "convertend:" "sub $0x2,%r8\n" @@ -285,15 +285,15 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi "je convertdone\n" "movzb (%rsi),%r10\n" - "movq kCoefficientsRgbU(,%r10,8),%mm0\n" + "movq kCoefficientsRgbU(,%r10,8),%xmm0\n" "movzb (%rdx),%r10\n" - "paddsw kCoefficientsRgbV(,%r10,8),%mm0\n" + "paddsw kCoefficientsRgbV(,%r10,8),%xmm0\n" "movzb (%rdi),%r10\n" - "movq kCoefficientsRgbY(,%r10,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%rcx)\n" + "movq kCoefficientsRgbY(,%r10,8),%xmm1\n" + "paddsw %xmm0,%xmm1\n" + "psraw $0x6,%xmm1\n" + "packuswb %xmm1,%xmm1\n" + "movd %xmm1,0x0(%rcx)\n" "convertdone:" "ret\n" ); @@ -316,25 +316,23 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi "mov %r11,%r10\n" "sar $0x5,%r10\n" "movzb (%rsi,%r10,1),%rax\n" - "movq kCoefficientsRgbU(,%rax,8),%mm0\n" + "movq kCoefficientsRgbU(,%rax,8),%xmm0\n" "movzb (%rdx,%r10,1),%rax\n" - "paddsw kCoefficientsRgbV(,%rax,8),%mm0\n" - "mov %r11,%r10\n" - "add %r9,%r11\n" - "sar $0x4,%r10\n" - "movzb (%rdi,%r10,1),%rax\n" - "movq kCoefficientsRgbY(,%rax,8),%mm1\n" - "mov %r11,%r10\n" - "add %r9,%r11\n" + "paddsw kCoefficientsRgbV(,%rax,8),%xmm0\n" + "lea (%r11,%r9),%r10\n" + "sar $0x4,%r11\n" + "movzb (%rdi,%r11,1),%rax\n" + "movq kCoefficientsRgbY(,%rax,8),%xmm1\n" + "lea (%r10,%r9),%r11\n" "sar $0x4,%r10\n" "movzb (%rdi,%r10,1),%rax\n" - "movq kCoefficientsRgbY(,%rax,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%rcx)\n" + "movq kCoefficientsRgbY(,%rax,8),%xmm2\n" + "paddsw %xmm0,%xmm1\n" + "paddsw %xmm0,%xmm2\n" + "psraw $0x6,%xmm1\n" + "psraw $0x6,%xmm2\n" + "packuswb %xmm2,%xmm1\n" + "movq %xmm1,0x0(%rcx)\n" "add $0x8,%rcx\n" "scaleend:" "sub $0x2,%r8\n" @@ -346,16 +344,16 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi "mov %r11,%r10\n" "sar $0x5,%r10\n" "movzb (%rsi,%r10,1),%rax\n" - "movq kCoefficientsRgbU(,%rax,8),%mm0\n" + "movq kCoefficientsRgbU(,%rax,8),%xmm0\n" "movzb (%rdx,%r10,1),%rax\n" - "paddsw kCoefficientsRgbV(,%rax,8),%mm0\n" + "paddsw kCoefficientsRgbV(,%rax,8),%xmm0\n" "sar $0x4,%r11\n" "movzb (%rdi,%r11,1),%rax\n" - "movq kCoefficientsRgbY(,%rax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%rcx)\n" + "movq kCoefficientsRgbY(,%rax,8),%xmm1\n" + "paddsw %xmm0,%xmm1\n" + "psraw $0x6,%xmm1\n" + "packuswb %xmm1,%xmm1\n" + "movd %xmm1,0x0(%rcx)\n" "scaledone:" "ret\n" |