diff options
author | fbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-14 20:46:29 +0000 |
---|---|---|
committer | fbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-14 20:46:29 +0000 |
commit | 61fc302c4fda79bf3fcc5572c5fa5163a2590e08 (patch) | |
tree | 693f419f3893249e7a6db051655353c7b970a982 /media/base/yuv_row_linux.cc | |
parent | 2c6b40896d0e00da22bb251a63b504e9e28c97a3 (diff) | |
download | chromium_src-61fc302c4fda79bf3fcc5572c5fa5163a2590e08.zip chromium_src-61fc302c4fda79bf3fcc5572c5fa5163a2590e08.tar.gz chromium_src-61fc302c4fda79bf3fcc5572c5fa5163a2590e08.tar.bz2 |
Use lea to remove 2 instructions from the inner loop of YUV Scale.
Use MMX2 to remove emms.
unittest has code to time yuv scale and convert 100 times so it takes about 1 second... useful for catching performance issues. But the code is disabled.
BUG=23263
TEST=media_unittest should pass and run much faster: 28% faster than 32 bit version.
Review URL: http://codereview.chromium.org/273047
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@29019 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media/base/yuv_row_linux.cc')
-rw-r--r-- | media/base/yuv_row_linux.cc | 78 |
1 files changed, 38 insertions, 40 deletions
diff --git a/media/base/yuv_row_linux.cc b/media/base/yuv_row_linux.cc index 5825960..b99c7bf 100644 --- a/media/base/yuv_row_linux.cc +++ b/media/base/yuv_row_linux.cc @@ -263,19 +263,19 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi "add $0x1,%rsi\n" "movzb (%rdx),%r11\n" "add $0x1,%rdx\n" - "movq kCoefficientsRgbU(,%r10,8),%mm0\n" + "movq kCoefficientsRgbU(,%r10,8),%xmm0\n" "movzb (%rdi),%r10\n" - "paddsw kCoefficientsRgbV(,%r11,8),%mm0\n" + "paddsw kCoefficientsRgbV(,%r11,8),%xmm0\n" "movzb 0x1(%rdi),%r11\n" - "movq kCoefficientsRgbY(,%r10,8),%mm1\n" + "movq kCoefficientsRgbY(,%r10,8),%xmm1\n" "add $0x2,%rdi\n" - "movq kCoefficientsRgbY(,%r11,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%rcx)\n" + "movq kCoefficientsRgbY(,%r11,8),%xmm2\n" + "paddsw %xmm0,%xmm1\n" + "paddsw %xmm0,%xmm2\n" + "psraw $0x6,%xmm1\n" + "psraw $0x6,%xmm2\n" + "packuswb %xmm2,%xmm1\n" + "movq %xmm1,0x0(%rcx)\n" "add $0x8,%rcx\n" "convertend:" "sub $0x2,%r8\n" @@ -285,15 +285,15 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi "je convertdone\n" "movzb (%rsi),%r10\n" - "movq kCoefficientsRgbU(,%r10,8),%mm0\n" + "movq kCoefficientsRgbU(,%r10,8),%xmm0\n" "movzb (%rdx),%r10\n" - "paddsw kCoefficientsRgbV(,%r10,8),%mm0\n" + "paddsw kCoefficientsRgbV(,%r10,8),%xmm0\n" "movzb (%rdi),%r10\n" - "movq kCoefficientsRgbY(,%r10,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%rcx)\n" + "movq kCoefficientsRgbY(,%r10,8),%xmm1\n" + "paddsw %xmm0,%xmm1\n" + "psraw $0x6,%xmm1\n" + "packuswb %xmm1,%xmm1\n" + "movd %xmm1,0x0(%rcx)\n" "convertdone:" "ret\n" ); @@ -316,25 +316,23 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi "mov %r11,%r10\n" "sar $0x5,%r10\n" "movzb (%rsi,%r10,1),%rax\n" - "movq kCoefficientsRgbU(,%rax,8),%mm0\n" + "movq kCoefficientsRgbU(,%rax,8),%xmm0\n" "movzb (%rdx,%r10,1),%rax\n" - "paddsw kCoefficientsRgbV(,%rax,8),%mm0\n" - "mov %r11,%r10\n" - "add %r9,%r11\n" - "sar $0x4,%r10\n" - "movzb (%rdi,%r10,1),%rax\n" - "movq kCoefficientsRgbY(,%rax,8),%mm1\n" - "mov %r11,%r10\n" - "add %r9,%r11\n" + "paddsw kCoefficientsRgbV(,%rax,8),%xmm0\n" + "lea (%r11,%r9),%r10\n" + "sar $0x4,%r11\n" + "movzb (%rdi,%r11,1),%rax\n" + "movq kCoefficientsRgbY(,%rax,8),%xmm1\n" + "lea (%r10,%r9),%r11\n" "sar $0x4,%r10\n" "movzb (%rdi,%r10,1),%rax\n" - "movq kCoefficientsRgbY(,%rax,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%rcx)\n" + "movq kCoefficientsRgbY(,%rax,8),%xmm2\n" + "paddsw %xmm0,%xmm1\n" + "paddsw %xmm0,%xmm2\n" + "psraw $0x6,%xmm1\n" + "psraw $0x6,%xmm2\n" + "packuswb %xmm2,%xmm1\n" + "movq %xmm1,0x0(%rcx)\n" "add $0x8,%rcx\n" "scaleend:" "sub $0x2,%r8\n" @@ -346,16 +344,16 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi "mov %r11,%r10\n" "sar $0x5,%r10\n" "movzb (%rsi,%r10,1),%rax\n" - "movq kCoefficientsRgbU(,%rax,8),%mm0\n" + "movq kCoefficientsRgbU(,%rax,8),%xmm0\n" "movzb (%rdx,%r10,1),%rax\n" - "paddsw kCoefficientsRgbV(,%rax,8),%mm0\n" + "paddsw kCoefficientsRgbV(,%rax,8),%xmm0\n" "sar $0x4,%r11\n" "movzb (%rdi,%r11,1),%rax\n" - "movq kCoefficientsRgbY(,%rax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%rcx)\n" + "movq kCoefficientsRgbY(,%rax,8),%xmm1\n" + "paddsw %xmm0,%xmm1\n" + "psraw $0x6,%xmm1\n" + "packuswb %xmm1,%xmm1\n" + "movd %xmm1,0x0(%rcx)\n" "scaledone:" "ret\n" |