diff options
author | fbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-09-01 01:04:13 +0000 |
---|---|---|
committer | fbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-09-01 01:04:13 +0000 |
commit | 24267bb2ef971593b3b11178320dc4d248940b56 (patch) | |
tree | 40f1c0da0e0f0fcc8ac6406bbb82ff1ecfc44126 /media/base/yuv_row_win.cc | |
parent | 4be85a0d5e0f68724f4e44eb560fcd95c21b40e8 (diff) | |
download | chromium_src-24267bb2ef971593b3b11178320dc4d248940b56.zip chromium_src-24267bb2ef971593b3b11178320dc4d248940b56.tar.gz chromium_src-24267bb2ef971593b3b11178320dc4d248940b56.tar.bz2 |
mmx for linux yuv convert function.
BUG=18449,20718
TEST=play a video on linux without scaling and it should go 4 times faster than with scaling.
Review URL: http://codereview.chromium.org/174442
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@25001 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media/base/yuv_row_win.cc')
-rw-r--r-- | media/base/yuv_row_win.cc | 114 |
1 files changed, 53 insertions, 61 deletions
diff --git a/media/base/yuv_row_win.cc b/media/base/yuv_row_win.cc index 53dadc4..e6f15bb 100644 --- a/media/base/yuv_row_win.cc +++ b/media/base/yuv_row_win.cc @@ -4,11 +4,7 @@ #include "media/base/yuv_row.h" -// Enable bilinear filtering by turning on the following macro. -// #define MEDIA_BILINEAR_FILTER 1 - -namespace media { - +extern "C" { #define RGBY(i) { \ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ @@ -32,8 +28,7 @@ namespace media { #define MMX_ALIGNED(var) __declspec(align(16)) var -extern "C" { -MMX_ALIGNED(int16 coefficients_RGB_Y[256][4]) = { +MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = { RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), @@ -100,7 +95,7 @@ MMX_ALIGNED(int16 coefficients_RGB_Y[256][4]) = { RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF), }; -MMX_ALIGNED(int16 coefficients_RGB_U[256][4]) = { +MMX_ALIGNED(int16 kCoefficientsRgbU[256][4]) = { RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03), RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07), RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B), @@ -167,7 +162,7 @@ MMX_ALIGNED(int16 coefficients_RGB_U[256][4]) = { RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF), }; -MMX_ALIGNED(int16 coefficients_RGB_V[256][4]) = { +MMX_ALIGNED(int16 kCoefficientsRgbV[256][4]) = { RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03), RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07), RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B), @@ -233,7 +228,6 @@ MMX_ALIGNED(int16 coefficients_RGB_V[256][4]) = { RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), }; -} // extern "C" #undef RGBHY #undef RGBY @@ -258,20 +252,20 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, mov esi, [esp + 32 + 12] // V mov ebp, [esp + 32 + 16] // rgb mov ecx, [esp + 32 + 20] // width - jmp wend + jmp convertend - wloop : + convertloop : movzx eax, byte ptr [edi] add edi, 1 movzx ebx, byte ptr [esi] add esi, 1 - movq mm0, [coefficients_RGB_U + 8 * eax] + movq mm0, [kCoefficientsRgbU + 8 * eax] movzx eax, byte ptr [edx] - paddsw mm0, [coefficients_RGB_V + 8 * ebx] + paddsw mm0, [kCoefficientsRgbV + 8 * ebx] movzx ebx, byte ptr [edx + 1] - movq mm1, [coefficients_RGB_Y + 8 * eax] + movq mm1, [kCoefficientsRgbY + 8 * eax] add edx, 2 - movq mm2, [coefficients_RGB_Y + 8 * ebx] + movq mm2, [kCoefficientsRgbY + 8 * ebx] paddsw mm1, mm0 paddsw mm2, mm0 psraw mm1, 6 @@ -279,24 +273,24 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, packuswb mm1, mm2 movntq [ebp], mm1 add ebp, 8 - wend : + convertend : sub ecx, 2 - jns wloop + jns convertloop and ecx, 1 // odd number of pixels? - jz wdone + jz convertdone movzx eax, byte ptr [edi] - movq mm0, [coefficients_RGB_U + 8 * eax] + movq mm0, [kCoefficientsRgbU + 8 * eax] movzx eax, byte ptr [esi] - paddsw mm0, [coefficients_RGB_V + 8 * eax] + paddsw mm0, [kCoefficientsRgbV + 8 * eax] movzx eax, byte ptr [edx] - movq mm1, [coefficients_RGB_Y + 8 * eax] + movq mm1, [kCoefficientsRgbY + 8 * eax] paddsw mm1, mm0 psraw mm1, 6 packuswb mm1, mm1 movd [ebp], mm1 - wdone : + convertdone : popad ret @@ -323,16 +317,16 @@ void ConvertYUVToRGB32Row(const uint8* y_buf, wloop : movzx eax, byte ptr [edi] add edi, ebx - movq mm0, [coefficients_RGB_U + 8 * eax] + movq mm0, [kCoefficientsRgbU + 8 * eax] movzx eax, byte ptr [esi] add esi, ebx - paddsw mm0, [coefficients_RGB_V + 8 * eax] + paddsw mm0, [kCoefficientsRgbV + 8 * eax] movzx eax, byte ptr [edx] add edx, ebx - movq mm1, [coefficients_RGB_Y + 8 * eax] + movq mm1, [kCoefficientsRgbY + 8 * eax] movzx eax, byte ptr [edx] add edx, ebx - movq mm2, [coefficients_RGB_Y + 8 * eax] + movq mm2, [kCoefficientsRgbY + 8 * eax] paddsw mm1, mm0 paddsw mm2, mm0 psraw mm1, 6 @@ -348,11 +342,11 @@ void ConvertYUVToRGB32Row(const uint8* y_buf, jz wdone movzx eax, byte ptr [edi] - movq mm0, [coefficients_RGB_U + 8 * eax] + movq mm0, [kCoefficientsRgbU + 8 * eax] movzx eax, byte ptr [esi] - paddsw mm0, [coefficients_RGB_V + 8 * eax] + paddsw mm0, [kCoefficientsRgbV + 8 * eax] movzx eax, byte ptr [edx] - movq mm1, [coefficients_RGB_Y + 8 * eax] + movq mm1, [kCoefficientsRgbY + 8 * eax] paddsw mm1, mm0 psraw mm1, 6 packuswb mm1, mm1 @@ -385,17 +379,17 @@ void RotateConvertYUVToRGB32Row(const uint8* y_buf, movzx eax, byte ptr [edi] mov ebx, [esp + 32 + 28] // uvstep add edi, ebx - movq mm0, [coefficients_RGB_U + 8 * eax] + movq mm0, [kCoefficientsRgbU + 8 * eax] movzx eax, byte ptr [esi] add esi, ebx - paddsw mm0, [coefficients_RGB_V + 8 * eax] + paddsw mm0, [kCoefficientsRgbV + 8 * eax] movzx eax, byte ptr [edx] mov ebx, [esp + 32 + 24] // ystep add edx, ebx - movq mm1, [coefficients_RGB_Y + 8 * eax] + movq mm1, [kCoefficientsRgbY + 8 * eax] movzx eax, byte ptr [edx] add edx, ebx - movq mm2, [coefficients_RGB_Y + 8 * eax] + movq mm2, [kCoefficientsRgbY + 8 * eax] paddsw mm1, mm0 paddsw mm2, mm0 psraw mm1, 6 @@ -411,11 +405,11 @@ void RotateConvertYUVToRGB32Row(const uint8* y_buf, jz wdone movzx eax, byte ptr [edi] - movq mm0, [coefficients_RGB_U + 8 * eax] + movq mm0, [kCoefficientsRgbU + 8 * eax] movzx eax, byte ptr [esi] - paddsw mm0, [coefficients_RGB_V + 8 * eax] + paddsw mm0, [kCoefficientsRgbV + 8 * eax] movzx eax, byte ptr [edx] - movq mm1, [coefficients_RGB_Y + 8 * eax] + movq mm1, [kCoefficientsRgbY + 8 * eax] paddsw mm1, mm0 psraw mm1, 6 packuswb mm1, mm1 @@ -447,10 +441,10 @@ void DoubleYUVToRGB32Row(const uint8* y_buf, add edi, 1 movzx ebx, byte ptr [esi] add esi, 1 - movq mm0, [coefficients_RGB_U + 8 * eax] + movq mm0, [kCoefficientsRgbU + 8 * eax] movzx eax, byte ptr [edx] - paddsw mm0, [coefficients_RGB_V + 8 * ebx] - movq mm1, [coefficients_RGB_Y + 8 * eax] + paddsw mm0, [kCoefficientsRgbV + 8 * ebx] + movq mm1, [kCoefficientsRgbY + 8 * eax] paddsw mm1, mm0 psraw mm1, 6 packuswb mm1, mm1 @@ -459,7 +453,7 @@ void DoubleYUVToRGB32Row(const uint8* y_buf, movzx ebx, byte ptr [edx + 1] add edx, 2 - paddsw mm0, [coefficients_RGB_Y + 8 * ebx] + paddsw mm0, [kCoefficientsRgbY + 8 * ebx] psraw mm0, 6 packuswb mm0, mm0 punpckldq mm0, mm0 @@ -473,11 +467,11 @@ void DoubleYUVToRGB32Row(const uint8* y_buf, jz wdone movzx eax, byte ptr [edi] - movq mm0, [coefficients_RGB_U + 8 * eax] + movq mm0, [kCoefficientsRgbU + 8 * eax] movzx eax, byte ptr [esi] - paddsw mm0, [coefficients_RGB_V + 8 * eax] + paddsw mm0, [kCoefficientsRgbV + 8 * eax] movzx eax, byte ptr [edx] - movq mm1, [coefficients_RGB_Y + 8 * eax] + movq mm1, [kCoefficientsRgbY + 8 * eax] paddsw mm1, mm0 psraw mm1, 6 packuswb mm1, mm1 @@ -514,27 +508,27 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, mov ebp, [esp + 32 + 16] // rgb mov ecx, [esp + 32 + 20] // width xor ebx, ebx // x - jmp wend + jmp scaleend - wloop : + scaleloop : mov eax, ebx sar eax, 5 movzx eax, byte ptr [edi + eax] - movq mm0, [coefficients_RGB_U + 8 * eax] + movq mm0, [kCoefficientsRgbU + 8 * eax] mov eax, ebx sar eax, 5 movzx eax, byte ptr [esi + eax] - paddsw mm0, [coefficients_RGB_V + 8 * eax] + paddsw mm0, [kCoefficientsRgbV + 8 * eax] mov eax, ebx add ebx, [esp + 32 + 24] // x += dx sar eax, 4 movzx eax, byte ptr [edx + eax] - movq mm1, [coefficients_RGB_Y + 8 * eax] + movq mm1, [kCoefficientsRgbY + 8 * eax] mov eax, ebx add ebx, [esp + 32 + 24] // x += dx sar eax, 4 movzx eax, byte ptr [edx + eax] - movq mm2, [coefficients_RGB_Y + 8 * eax] + movq mm2, [kCoefficientsRgbY + 8 * eax] paddsw mm1, mm0 paddsw mm2, mm0 psraw mm1, 6 @@ -542,29 +536,29 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, packuswb mm1, mm2 movntq [ebp], mm1 add ebp, 8 - wend : + scaleend : sub ecx, 2 - jns wloop + jns scaleloop and ecx, 1 // odd number of pixels? - jz wdone + jz scaledone mov eax, ebx sar eax, 5 movzx eax, byte ptr [edi + eax] - movq mm0, [coefficients_RGB_U + 8 * eax] + movq mm0, [kCoefficientsRgbU + 8 * eax] mov eax, ebx sar eax, 5 movzx eax, byte ptr [esi + eax] - paddsw mm0, [coefficients_RGB_V + 8 * eax] + paddsw mm0, [kCoefficientsRgbV + 8 * eax] mov eax, ebx sar eax, 4 movzx eax, byte ptr [edx + eax] - movq mm1, [coefficients_RGB_Y + 8 * eax] + movq mm1, [kCoefficientsRgbY + 8 * eax] mov eax, ebx sar eax, 4 movzx eax, byte ptr [edx + eax] - movq mm2, [coefficients_RGB_Y + 8 * eax] + movq mm2, [kCoefficientsRgbY + 8 * eax] paddsw mm1, mm0 paddsw mm2, mm0 psraw mm1, 6 @@ -572,12 +566,10 @@ void ScaleYUVToRGB32Row(const uint8* y_buf, packuswb mm1, mm2 movd [ebp], mm1 - wdone : - + scaledone : popad ret } } - -} // namespace media +} // extern "C" |