summaryrefslogtreecommitdiffstats
path: root/media/base/yuv_row_linux.cc
diff options
context:
space:
mode:
authorfbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-14 20:46:29 +0000
committerfbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-14 20:46:29 +0000
commit61fc302c4fda79bf3fcc5572c5fa5163a2590e08 (patch)
tree693f419f3893249e7a6db051655353c7b970a982 /media/base/yuv_row_linux.cc
parent2c6b40896d0e00da22bb251a63b504e9e28c97a3 (diff)
downloadchromium_src-61fc302c4fda79bf3fcc5572c5fa5163a2590e08.zip
chromium_src-61fc302c4fda79bf3fcc5572c5fa5163a2590e08.tar.gz
chromium_src-61fc302c4fda79bf3fcc5572c5fa5163a2590e08.tar.bz2
Use lea to remove 2 instructions from the inner loop of YUV Scale.
Use MMX2 to remove emms. unittest has code to time yuv scale and convert 100 times so it takes about 1 second... useful for catching performance issues. But the code is disabled. BUG=23263 TEST=media_unittest should pass and run much faster: 28% faster than 32 bit version. Review URL: http://codereview.chromium.org/273047 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@29019 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media/base/yuv_row_linux.cc')
-rw-r--r--media/base/yuv_row_linux.cc78
1 files changed, 38 insertions, 40 deletions
diff --git a/media/base/yuv_row_linux.cc b/media/base/yuv_row_linux.cc
index 5825960..b99c7bf 100644
--- a/media/base/yuv_row_linux.cc
+++ b/media/base/yuv_row_linux.cc
@@ -263,19 +263,19 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
"add $0x1,%rsi\n"
"movzb (%rdx),%r11\n"
"add $0x1,%rdx\n"
- "movq kCoefficientsRgbU(,%r10,8),%mm0\n"
+ "movq kCoefficientsRgbU(,%r10,8),%xmm0\n"
"movzb (%rdi),%r10\n"
- "paddsw kCoefficientsRgbV(,%r11,8),%mm0\n"
+ "paddsw kCoefficientsRgbV(,%r11,8),%xmm0\n"
"movzb 0x1(%rdi),%r11\n"
- "movq kCoefficientsRgbY(,%r10,8),%mm1\n"
+ "movq kCoefficientsRgbY(,%r10,8),%xmm1\n"
"add $0x2,%rdi\n"
- "movq kCoefficientsRgbY(,%r11,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%rcx)\n"
+ "movq kCoefficientsRgbY(,%r11,8),%xmm2\n"
+ "paddsw %xmm0,%xmm1\n"
+ "paddsw %xmm0,%xmm2\n"
+ "psraw $0x6,%xmm1\n"
+ "psraw $0x6,%xmm2\n"
+ "packuswb %xmm2,%xmm1\n"
+ "movq %xmm1,0x0(%rcx)\n"
"add $0x8,%rcx\n"
"convertend:"
"sub $0x2,%r8\n"
@@ -285,15 +285,15 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
"je convertdone\n"
"movzb (%rsi),%r10\n"
- "movq kCoefficientsRgbU(,%r10,8),%mm0\n"
+ "movq kCoefficientsRgbU(,%r10,8),%xmm0\n"
"movzb (%rdx),%r10\n"
- "paddsw kCoefficientsRgbV(,%r10,8),%mm0\n"
+ "paddsw kCoefficientsRgbV(,%r10,8),%xmm0\n"
"movzb (%rdi),%r10\n"
- "movq kCoefficientsRgbY(,%r10,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%rcx)\n"
+ "movq kCoefficientsRgbY(,%r10,8),%xmm1\n"
+ "paddsw %xmm0,%xmm1\n"
+ "psraw $0x6,%xmm1\n"
+ "packuswb %xmm1,%xmm1\n"
+ "movd %xmm1,0x0(%rcx)\n"
"convertdone:"
"ret\n"
);
@@ -316,25 +316,23 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
"mov %r11,%r10\n"
"sar $0x5,%r10\n"
"movzb (%rsi,%r10,1),%rax\n"
- "movq kCoefficientsRgbU(,%rax,8),%mm0\n"
+ "movq kCoefficientsRgbU(,%rax,8),%xmm0\n"
"movzb (%rdx,%r10,1),%rax\n"
- "paddsw kCoefficientsRgbV(,%rax,8),%mm0\n"
- "mov %r11,%r10\n"
- "add %r9,%r11\n"
- "sar $0x4,%r10\n"
- "movzb (%rdi,%r10,1),%rax\n"
- "movq kCoefficientsRgbY(,%rax,8),%mm1\n"
- "mov %r11,%r10\n"
- "add %r9,%r11\n"
+ "paddsw kCoefficientsRgbV(,%rax,8),%xmm0\n"
+ "lea (%r11,%r9),%r10\n"
+ "sar $0x4,%r11\n"
+ "movzb (%rdi,%r11,1),%rax\n"
+ "movq kCoefficientsRgbY(,%rax,8),%xmm1\n"
+ "lea (%r10,%r9),%r11\n"
"sar $0x4,%r10\n"
"movzb (%rdi,%r10,1),%rax\n"
- "movq kCoefficientsRgbY(,%rax,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%rcx)\n"
+ "movq kCoefficientsRgbY(,%rax,8),%xmm2\n"
+ "paddsw %xmm0,%xmm1\n"
+ "paddsw %xmm0,%xmm2\n"
+ "psraw $0x6,%xmm1\n"
+ "psraw $0x6,%xmm2\n"
+ "packuswb %xmm2,%xmm1\n"
+ "movq %xmm1,0x0(%rcx)\n"
"add $0x8,%rcx\n"
"scaleend:"
"sub $0x2,%r8\n"
@@ -346,16 +344,16 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
"mov %r11,%r10\n"
"sar $0x5,%r10\n"
"movzb (%rsi,%r10,1),%rax\n"
- "movq kCoefficientsRgbU(,%rax,8),%mm0\n"
+ "movq kCoefficientsRgbU(,%rax,8),%xmm0\n"
"movzb (%rdx,%r10,1),%rax\n"
- "paddsw kCoefficientsRgbV(,%rax,8),%mm0\n"
+ "paddsw kCoefficientsRgbV(,%rax,8),%xmm0\n"
"sar $0x4,%r11\n"
"movzb (%rdi,%r11,1),%rax\n"
- "movq kCoefficientsRgbY(,%rax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%rcx)\n"
+ "movq kCoefficientsRgbY(,%rax,8),%xmm1\n"
+ "paddsw %xmm0,%xmm1\n"
+ "psraw $0x6,%xmm1\n"
+ "packuswb %xmm1,%xmm1\n"
+ "movd %xmm1,0x0(%rcx)\n"
"scaledone:"
"ret\n"