diff options
Diffstat (limited to 'media/base/yuv_row_linux.cc')
-rw-r--r-- | media/base/yuv_row_linux.cc | 158 |
1 files changed, 119 insertions, 39 deletions
diff --git a/media/base/yuv_row_linux.cc b/media/base/yuv_row_linux.cc index 9410c2f..5825960 100644 --- a/media/base/yuv_row_linux.cc +++ b/media/base/yuv_row_linux.cc @@ -245,45 +245,123 @@ MMX_ALIGNED(int16 kCoefficientsRgbV[256][4]) = { #undef RGBV #undef MMX_ALIGNED -// TODO(fbarchard): Use the following function instead of -// pure assembly to help make code more portable to 64 bit -// and Mac, which has different labels. -// no-gcse eliminates the frame pointer, freeing up ebp. - -#if defined(FUTURE_64BIT_VERSION) -void __attribute__((optimize("O2", "no-gcse"))) - NewFastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { +#if defined(ARCH_CPU_X86_64) + +void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi + const uint8* u_buf, // rsi + const uint8* v_buf, // rdx + uint8* rgb_buf, // rcx + int width); // r8 + asm( - "shr %4\n" -"1:\n" - "movzb (%1),%%eax\n" - "add $0x1,%1\n" - "movzb (%2),%%ebx\n" - "add $0x1,%2\n" - "movq kCoefficientsRgbU(,%%eax,8),%%mm0\n" - "movzb (%0),%%eax\n" - "paddsw kCoefficientsRgbV(,%%ebx,8),%%mm0\n" - "movzb 0x1(%0),%%ebx\n" - "movq kCoefficientsRgbY(,%%eax,8),%%mm1\n" - "add $0x2,%0\n" - "movq kCoefficientsRgbY(,%%ebx,8),%%mm2\n" - "paddsw %%mm0,%%mm1\n" - "paddsw %%mm0,%%mm2\n" - "psraw $0x6,%%mm1\n" - "psraw $0x6,%%mm2\n" - "packuswb %%mm2,%%mm1\n" - "movntq %%mm1,0x0(%3)\n" - "add $0x8,%3\n" - "sub $0x1,%4\n" - "jne 1b\n" - : : "r"(y_buf),"r"(u_buf),"r"(v_buf),"r"(rgb_buf),"r"(width) - : "eax","ebx"); -} -#endif + ".global FastConvertYUVToRGB32Row\n" +"FastConvertYUVToRGB32Row:\n" + "jmp convertend\n" + +"convertloop:" + "movzb (%rsi),%r10\n" + "add $0x1,%rsi\n" + "movzb (%rdx),%r11\n" + "add $0x1,%rdx\n" + "movq kCoefficientsRgbU(,%r10,8),%mm0\n" + "movzb (%rdi),%r10\n" + "paddsw kCoefficientsRgbV(,%r11,8),%mm0\n" + "movzb 0x1(%rdi),%r11\n" + "movq kCoefficientsRgbY(,%r10,8),%mm1\n" + "add $0x2,%rdi\n" + "movq kCoefficientsRgbY(,%r11,8),%mm2\n" + "paddsw %mm0,%mm1\n" + "paddsw %mm0,%mm2\n" + "psraw $0x6,%mm1\n" + "psraw $0x6,%mm2\n" + "packuswb %mm2,%mm1\n" + "movntq %mm1,0x0(%rcx)\n" + "add $0x8,%rcx\n" +"convertend:" + "sub $0x2,%r8\n" + "jns convertloop\n" + + "and $0x1,%r8\n" + "je convertdone\n" + + "movzb (%rsi),%r10\n" + "movq kCoefficientsRgbU(,%r10,8),%mm0\n" + "movzb (%rdx),%r10\n" + "paddsw kCoefficientsRgbV(,%r10,8),%mm0\n" + "movzb (%rdi),%r10\n" + "movq kCoefficientsRgbY(,%r10,8),%mm1\n" + "paddsw %mm0,%mm1\n" + "psraw $0x6,%mm1\n" + "packuswb %mm1,%mm1\n" + "movd %mm1,0x0(%rcx)\n" +"convertdone:" + "ret\n" +); + + +void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi + const uint8* u_buf, // rsi + const uint8* v_buf, // rdx + uint8* rgb_buf, // rcx + int width, // r8 + int scaled_dx); // r9 + + asm( + ".global ScaleYUVToRGB32Row\n" +"ScaleYUVToRGB32Row:\n" + "xor %r11,%r11\n" + "jmp scaleend\n" + +"scaleloop:" + "mov %r11,%r10\n" + "sar $0x5,%r10\n" + "movzb (%rsi,%r10,1),%rax\n" + "movq kCoefficientsRgbU(,%rax,8),%mm0\n" + "movzb (%rdx,%r10,1),%rax\n" + "paddsw kCoefficientsRgbV(,%rax,8),%mm0\n" + "mov %r11,%r10\n" + "add %r9,%r11\n" + "sar $0x4,%r10\n" + "movzb (%rdi,%r10,1),%rax\n" + "movq kCoefficientsRgbY(,%rax,8),%mm1\n" + "mov %r11,%r10\n" + "add %r9,%r11\n" + "sar $0x4,%r10\n" + "movzb (%rdi,%r10,1),%rax\n" + "movq kCoefficientsRgbY(,%rax,8),%mm2\n" + "paddsw %mm0,%mm1\n" + "paddsw %mm0,%mm2\n" + "psraw $0x6,%mm1\n" + "psraw $0x6,%mm2\n" + "packuswb %mm2,%mm1\n" + "movntq %mm1,0x0(%rcx)\n" + "add $0x8,%rcx\n" +"scaleend:" + "sub $0x2,%r8\n" + "jns scaleloop\n" + + "and $0x1,%r8\n" + "je scaledone\n" + + "mov %r11,%r10\n" + "sar $0x5,%r10\n" + "movzb (%rsi,%r10,1),%rax\n" + "movq kCoefficientsRgbU(,%rax,8),%mm0\n" + "movzb (%rdx,%r10,1),%rax\n" + "paddsw kCoefficientsRgbV(,%rax,8),%mm0\n" + "sar $0x4,%r11\n" + "movzb (%rdi,%r11,1),%rax\n" + "movq kCoefficientsRgbY(,%rax,8),%mm1\n" + "paddsw %mm0,%mm1\n" + "psraw $0x6,%mm1\n" + "packuswb %mm1,%mm1\n" + "movd %mm1,0x0(%rcx)\n" + +"scaledone:" + "ret\n" +); + +#else void FastConvertYUVToRGB32Row(const uint8* y_buf, const uint8* u_buf, @@ -418,6 +496,8 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, "ret\n" ); +#endif + #else // USE_MMX // Reference version of YUV converter. @@ -601,6 +681,6 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, scaled_x += scaled_dx; } } -#endif // USE_MMX +#endif // USE_MMX } // extern "C" |