summaryrefslogtreecommitdiffstats
path: root/media
diff options
context:
space:
mode:
authorfbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-03-24 01:32:35 +0000
committerfbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-03-24 01:32:35 +0000
commit53561c634662489d47ac28a1a6ce3e287690bb57 (patch)
tree45acd54d3f864e8e75873d47c504536e5135f0cb /media
parent63ca5d56f715b60b331c10dae70245af20cd9003 (diff)
downloadchromium_src-53561c634662489d47ac28a1a6ce3e287690bb57.zip
chromium_src-53561c634662489d47ac28a1a6ce3e287690bb57.tar.gz
chromium_src-53561c634662489d47ac28a1a6ce3e287690bb57.tar.bz2
This reordering of instructions and use of movzx improved performance on pentium4 without impacting performance on core2 architecture.
Performance goes from 2.050 ms to 1.742 ms per frame. git-svn-id: svn://svn.chromium.org/chrome/trunk/src@12333 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r--media/base/yuv_convert.cc58
1 files changed, 21 insertions, 37 deletions
diff --git a/media/base/yuv_convert.cc b/media/base/yuv_convert.cc
index 4b044d4..b930dac 100644
--- a/media/base/yuv_convert.cc
+++ b/media/base/yuv_convert.cc
@@ -330,40 +330,32 @@ void ConvertYV12ToRGB32Row(const uint8* y_buf,
size_t width) {
__asm {
pushad
-
mov edx, [esp + 32 + 4] // Y
mov edi, [esp + 32 + 8] // U
mov esi, [esp + 32 + 12] // V
mov ebp, [esp + 32 + 16] // rgb
mov ecx, [esp + 32 + 20] // width
shr ecx, 1
- xor eax, eax
- xor ebx, ebx
wloop :
- mov al, [edi]
- mov bl, [esi]
+ movzx eax, byte ptr [edi]
+ add edi, 1
+ movzx ebx, byte ptr [esi]
+ add esi, 1
movq mm0, [coefficients_RGB_U + 8 * eax]
+ movzx eax, byte ptr [edx]
paddsw mm0, [coefficients_RGB_V + 8 * ebx]
-
- mov al, [edx]
- mov bl, [edx + 1]
+ movzx ebx, byte ptr [edx + 1]
movq mm1, [coefficients_RGB_Y + 8 * eax]
+ add edx, 2
movq mm2, [coefficients_RGB_Y + 8 * ebx]
-
paddsw mm1, mm0
paddsw mm2, mm0
psraw mm1, 6
psraw mm2, 6
packuswb mm1, mm2
-
movntq [ebp], mm1 // NOLINT
-
add ebp, 8
- add edx, 2
- add edi, 1
- add esi, 1
-
sub ecx, 1
jnz wloop
@@ -390,16 +382,17 @@ void ConvertYV12ToRGB32Row(const uint8* y_buf,
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"shr %ecx\n"
- "xor %eax,%eax\n"
- "xor %ebx,%ebx\n"
"1:\n"
- "mov (%edi),%al\n"
- "mov (%esi),%bl\n"
+ "movzx byte ptr (%edi),%eax\n"
+ "add $0x1,%edi\n"
+ "movzx byte ptr (%esi),%ebx\n"
+ "add $0x1,%esi\n"
"movq coefficients_RGB_U(,%eax,8),%mm0\n"
+ "movzx byte ptr (%edx),%eax\n"
"paddsw coefficients_RGB_V(,%ebx,8),%mm0\n"
- "mov (%edx),%al\n"
- "mov 0x1(%edx),%bl\n"
+ "movzx byte ptr 0x1(%edx),%ebx\n"
"movq coefficients_RGB_Y(,%eax,8),%mm1\n"
+ "add $0x2,%edx\n"
"movq coefficients_RGB_Y(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
@@ -408,16 +401,10 @@ void ConvertYV12ToRGB32Row(const uint8* y_buf,
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
- "add $0x2,%edx\n"
- "add $0x1,%edi\n"
- "add $0x1,%esi\n"
"sub $0x1,%ecx\n"
"jne 1b\n"
"popa\n"
"ret\n"
- "nop\n" // pad function to 0x70 bytes
- "nop\n"
- "nop\n"
);
#else
@@ -441,13 +428,16 @@ void ConvertYV12ToRGB32Row(const uint8* y_buf,
"xor %eax,%eax\n"
"xor %ebx,%ebx\n"
"1:\n"
- "mov (%edi),%al\n"
- "mov (%esi),%bl\n"
+ "movzx byte ptr (%edi),%eax\n"
+ "add $0x1,%edi\n"
+ "movzx byte ptr (%esi),%ebx\n"
+ "add $0x1,%esi\n"
"movq _coefficients_RGB_U(,%eax,8),%mm0\n"
+ "movzx byte ptr (%edx),%eax\n"
"paddsw _coefficients_RGB_V(,%ebx,8),%mm0\n"
- "mov (%edx),%al\n"
- "mov 0x1(%edx),%bl\n"
+ "movzx byte ptr 0x1(%edx),%ebx\n"
"movq _coefficients_RGB_Y(,%eax,8),%mm1\n"
+ "add $0x2,%edx\n"
"movq _coefficients_RGB_Y(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
@@ -456,16 +446,10 @@ void ConvertYV12ToRGB32Row(const uint8* y_buf,
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
- "add $0x2,%edx\n"
- "add $0x1,%edi\n"
- "add $0x1,%esi\n"
"sub $0x1,%ecx\n"
"jne 1b\n"
"popa\n"
"ret\n"
- "nop\n" // pad function to 0x70 bytes
- "nop\n"
- "nop\n"
);
#endif // MSC_VER