diff options
author | hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-01 20:59:30 +0000 |
---|---|---|
committer | hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-01 20:59:30 +0000 |
commit | 699158b7034b3027ce0fb11ef4e134294bc6d6ce (patch) | |
tree | 9e57535c1edcfbac7eea8cbc1bddf184497d2da9 /media | |
parent | 9c99d1ce15520712578e2b7e9102c909e4ab379c (diff) | |
download | chromium_src-699158b7034b3027ce0fb11ef4e134294bc6d6ce.zip chromium_src-699158b7034b3027ce0fb11ef4e134294bc6d6ce.tar.gz chromium_src-699158b7034b3027ce0fb11ef4e134294bc6d6ce.tar.bz2 |
RGB to YUV conversion using SSE2
This code uses SSE2 intrinsics with the feature of 2x2 subsampling for U and V.
Performance compared to a pure C version is about 20% faster with better
quality.
BUG=None
TEST=None
Review URL: http://codereview.chromium.org/6268018
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@73339 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r-- | media/base/yuv_convert.h | 10 | ||||
-rw-r--r-- | media/base/yuv_convert_c.cc | 50 | ||||
-rw-r--r-- | media/base/yuv_convert_sse2.cc | 189 | ||||
-rw-r--r-- | media/base/yuv_row.h | 9 | ||||
-rw-r--r-- | media/base/yuv_row_table.cc | 224 | ||||
-rw-r--r-- | media/media.gyp | 1 | ||||
-rw-r--r-- | media/tools/scaler_bench/scaler_bench.cc | 31 |
7 files changed, 514 insertions, 0 deletions
diff --git a/media/base/yuv_convert.h b/media/base/yuv_convert.h index 24a2c4e..ab908b2 100644 --- a/media/base/yuv_convert.h +++ b/media/base/yuv_convert.h @@ -67,6 +67,16 @@ void ScaleYUVToRGB32(const uint8* yplane, Rotate view_rotate, ScaleFilter filter); +void ConvertRGB32ToYUV(const uint8* rgbframe, + uint8* yplane, + uint8* uplane, + uint8* vplane, + int width, + int height, + int rgbstride, + int ystride, + int uvstride); + } // namespace media #endif // MEDIA_BASE_YUV_CONVERT_H_ diff --git a/media/base/yuv_convert_c.cc b/media/base/yuv_convert_c.cc new file mode 100644 index 0000000..b5d8345 --- /dev/null +++ b/media/base/yuv_convert_c.cc @@ -0,0 +1,50 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/base/yuv_convert.h" + +namespace media { + +static int clip_byte(int x) { + if (x > 255) + return 255; + else if (x < 0) + return 0; + else + return x; +} + +void ConvertRGB32ToYUV(const uint8* rgbframe, + uint8* yplane, + uint8* uplane, + uint8* vplane, + int width, + int height, + int rgbstride, + int ystride, + int uvstride) { + for (int i = 0; i < height; ++i) { + for (int j = 0; j < width; ++j) { + // Since the input pixel format is RGB32, there are 4 bytes per pixel. + const uint8* pixel = rgbframe + 4 * j; + yplane[j] = clip_byte(((pixel[2] * 66 + pixel[1] * 129 + + pixel[0] * 25 + 128) >> 8) + 16); + if (i % 2 == 0 && j % 2 == 0) { + uplane[j / 2] = clip_byte(((pixel[2] * -38 + pixel[1] * -74 + + pixel[0] * 112 + 128) >> 8) + 128); + vplane[j / 2] = clip_byte(((pixel[2] * 112 + pixel[1] * -94 + + pixel[1] * -18 + 128) >> 8) + 128); + } + } + + rgbframe += rgbstride; + yplane += ystride; + if (i % 2 == 0) { + uplane += uvstride; + vplane += uvstride; + } + } +} + +} // namespace media diff --git a/media/base/yuv_convert_sse2.cc b/media/base/yuv_convert_sse2.cc new file mode 100644 index 0000000..2c1beae --- /dev/null +++ b/media/base/yuv_convert_sse2.cc @@ -0,0 +1,189 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/base/yuv_convert.h" +#include "media/base/yuv_row.h" + +#if defined(_MSC_VER) +#include <intrin.h> +#else +#include <mmintrin.h> +#include <emmintrin.h> +#endif + +namespace media { + +// This is the final offset for the conversion from signed yuv values to +// unsigned values. It is arranged so that offset of 16 is applied to Y +// components and 128 is added to UV components for 2 pixels. +SIMD_ALIGNED(const int16 kYuvOffset[8]) = {16, 0, 128, 128, 16, 0, 128, 128}; + +void FastConvertRGB32ToYUVRow(const uint8* rgb_buf_1, + const uint8* rgb_buf_2, + uint8* y_buf_1, + uint8* y_buf_2, + uint8* u_buf, + uint8* v_buf, + int width) { + const uint64* r_table = reinterpret_cast<uint64*>(kCoefficientsYuvR); + const uint64* g_table = reinterpret_cast<uint64*>(kCoefficientsYuvR + 256); + const uint64* b_table = reinterpret_cast<uint64*>(kCoefficientsYuvR + 512); + uint16* y_row_1 = reinterpret_cast<uint16*>(y_buf_1); + uint16* y_row_2 = reinterpret_cast<uint16*>(y_buf_2); + const uint32* rgb_row_1 = reinterpret_cast<const uint32*>(rgb_buf_1); + const uint32* rgb_row_2 = reinterpret_cast<const uint32*>(rgb_buf_2); + + SIMD_ALIGNED(int8 output_stage[8]); + __m128i offset = _mm_load_si128(reinterpret_cast<const __m128i*>(kYuvOffset)); + + for (int i = 0; i < width; i += 2) { + // Load the first pixel (a). + register unsigned int pixel = *rgb_row_1++; + __m128i b_comp_a = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(b_table + (pixel & 0xFF))); + pixel >>= 8; + __m128i g_comp_a = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(g_table + (pixel & 0xFF))); + pixel >>= 8; + __m128i r_comp_a = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(r_table + (pixel & 0xFF))); + + // Load the first pixel (c) in the second row. + pixel = *rgb_row_2++; + __m128i b_comp_c = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(b_table + (pixel & 0xFF))); + pixel >>= 8; + __m128i g_comp_c = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(g_table + (pixel & 0xFF))); + pixel >>= 8; + __m128i r_comp_c = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(r_table + (pixel & 0xFF))); + + // Pack two pixels into one register. + __m128i b_comp_ac = _mm_unpacklo_epi64(b_comp_a, b_comp_c); + __m128i g_comp_ac = _mm_unpacklo_epi64(g_comp_a, g_comp_c); + __m128i r_comp_ac = _mm_unpacklo_epi64(r_comp_a, r_comp_c); + + // Add the coefficients together. + // 127 0 + // |yuv_ac| will be (Vc Uc 0 Yc Va Ua 0 Ya). + __m128i yuv_ac = _mm_adds_epi16(r_comp_ac, + _mm_adds_epi16(g_comp_ac, b_comp_ac)); + + // Right shift 6 bits to perform divide by 64 and then add the offset. + yuv_ac = _mm_srai_epi16(yuv_ac, 6); + yuv_ac = _mm_adds_epi16(yuv_ac, offset); + + // Now perform on the second column on pixel (b). + pixel = *rgb_row_1++; + __m128i b_comp_b = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(b_table + (pixel & 0xFF))); + pixel >>= 8; + __m128i g_comp_b = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(g_table + (pixel & 0xFF))); + pixel >>= 8; + __m128i r_comp_b = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(r_table + (pixel & 0xFF))); + + // Load the second pixel (d) in the second row. + pixel = *rgb_row_2++; + __m128i b_comp_d = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(b_table + (pixel & 0xFF))); + pixel >>= 8; + __m128i g_comp_d = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(g_table + (pixel & 0xFF))); + pixel >>= 8; + __m128i r_comp_d = _mm_loadl_epi64( + reinterpret_cast<const __m128i*>(r_table + (pixel & 0xFF))); + + // Pack two pixels into one register. + __m128i b_comp_bd = _mm_unpacklo_epi64(b_comp_b, b_comp_d); + __m128i g_comp_bd = _mm_unpacklo_epi64(g_comp_b, g_comp_d); + __m128i r_comp_bd = _mm_unpacklo_epi64(r_comp_b, r_comp_d); + + // Add the coefficients together. + // 127 0 + // |yuv_bd| will be (Vd Ud 0 Yd Vb Ub 0 Yb). + __m128i yuv_bd = _mm_adds_epi16(r_comp_bd, + _mm_adds_epi16(g_comp_bd, b_comp_bd)); + + // Right shift 6 bits to perform divide by 64 and then add the offset. + yuv_bd = _mm_srai_epi16(yuv_bd, 6); + yuv_bd = _mm_adds_epi16(yuv_bd, offset); + + // |yuv_row_1| will have (Vb Va Ub Ua 0 0 Yb Ya) and + // |yuv_row_2| will have (Vd Vc Ud Uc 0 0 Yd Yc). + __m128i yuv_row_1 = _mm_unpacklo_epi16(yuv_ac, yuv_bd); + __m128i yuv_row_2 = _mm_unpackhi_epi16(yuv_ac, yuv_bd); + + // |y_comp| will have (0 0 0 0 Yd Yc Yb Ya). + __m128i y_comp = _mm_unpacklo_epi32(yuv_row_1, yuv_row_2); + + // Down size to 8 bits. + y_comp = _mm_packus_epi16(y_comp, y_comp); + + // |uv_comp| will have (Vd Vc Vb Va Ud Uc Ub Ua). + __m128i uv_comp = _mm_unpackhi_epi32(yuv_row_1, yuv_row_2); + + // Generate |unity| to become (1 1 1 1 1 1 1 1). + __m128i unity = _mm_cmpeq_epi16(offset, offset); + unity = _mm_srli_epi16(unity, 15); + + // |uv_comp| will have (Vc + Vd, Va + Vb, Uc + Ud, Ua + Ub). + uv_comp = _mm_madd_epi16(uv_comp, unity); + + // Pack |uv_comp| into 16 bit signed integers. + uv_comp = _mm_packs_epi32(uv_comp, uv_comp); + + // And then do a multiply-add again. r1 will have 4 32-bits integers. + uv_comp = _mm_madd_epi16(uv_comp, unity); + + // Do a right shift to perform divide by 4. + uv_comp = _mm_srai_epi32(uv_comp, 2); + + // And then pack twice to form 2 8-bits unsigned integers of U and V. + uv_comp = _mm_packs_epi32(uv_comp, uv_comp); + uv_comp = _mm_packus_epi16(uv_comp, uv_comp); + + // And then finally pack the output. + __m128i output = _mm_unpacklo_epi32(y_comp, uv_comp); + + // Store the output. + _mm_storel_epi64(reinterpret_cast<__m128i*>(output_stage), output); + + *y_row_1++ = *reinterpret_cast<uint16*>(output_stage); + *y_row_2++ = *reinterpret_cast<uint16*>(output_stage + 2); + *u_buf++ = output_stage[4]; + *v_buf++ = output_stage[5]; + } +} + +// TODO(hclam): Add code to do runtime SSE2 detection. +void ConvertRGB32ToYUV(const uint8* rgbframe, + uint8* yplane, + uint8* uplane, + uint8* vplane, + int width, + int height, + int rgbstride, + int ystride, + int uvstride) { + // Make sure |width| is a multiple of 2. + width = (width / 2) * 2; + for (int i = 0; i < height; i += 2) { + FastConvertRGB32ToYUVRow(rgbframe, + rgbframe + rgbstride, + yplane, + yplane + ystride, + uplane, + vplane, + width); + rgbframe += 2 * rgbstride; + yplane += 2 * ystride; + uplane += uvstride; + vplane += uvstride; + } +} + +} // namespace media diff --git a/media/base/yuv_row.h b/media/base/yuv_row.h index 0a2990b..2414ccf 100644 --- a/media/base/yuv_row.h +++ b/media/base/yuv_row.h @@ -71,12 +71,21 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf, int width, int source_dx); +void FastConvertRGB32ToYUVRow(const uint8* rgb_buf_1, + const uint8* rgb_buf_2, + uint8* y_buf_1, + uint8* y_buf_2, + uint8* u_buf, + uint8* v_buf, + int width); + #if defined(_MSC_VER) #define SIMD_ALIGNED(var) __declspec(align(16)) var #else #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #endif extern SIMD_ALIGNED(int16 kCoefficientsRgbY[768][4]); +extern SIMD_ALIGNED(int16 kCoefficientsYuvR[768][4]); // Method to force C version. //#define USE_MMX 0 diff --git a/media/base/yuv_row_table.cc b/media/base/yuv_row_table.cc index 296380b..20d8c59 100644 --- a/media/base/yuv_row_table.cc +++ b/media/base/yuv_row_table.cc @@ -27,6 +27,27 @@ extern "C" { 0 \ } +#define YUVR(i) { \ + static_cast<int16>(0.254 * 64 * i + 0.5), \ + 0, \ + static_cast<int16>(-0.148 * 64 * i + 0.5), \ + static_cast<int16>(0.439 * 64 * i + 0.5), \ +} + +#define YUVG(i) { \ + static_cast<int16>(0.504 * 64 * i + 0.5), \ + 0, \ + static_cast<int16>(-0.291 * 64 * i + 0.5), \ + static_cast<int16>(-0.368 * 64 * i + 0.5), \ +} + +#define YUVB(i) { \ + static_cast<int16>(0.098 * 64 * i + 0.5), \ + 0, \ + static_cast<int16>(0.439 * 64 * i + 0.5), \ + static_cast<int16>(-0.071 * 64 * i + 0.5), \ +} + SIMD_ALIGNED(int16 kCoefficientsRgbY[256 * 3][4]) = { RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), @@ -226,8 +247,211 @@ SIMD_ALIGNED(int16 kCoefficientsRgbY[256 * 3][4]) = { RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), }; +SIMD_ALIGNED(int16 kCoefficientsYuvR[256 * 3][4]) = { + // R table. + YUVR(0x00), YUVR(0x01), YUVR(0x02), YUVR(0x03), + YUVR(0x04), YUVR(0x05), YUVR(0x06), YUVR(0x07), + YUVR(0x08), YUVR(0x09), YUVR(0x0A), YUVR(0x0B), + YUVR(0x0C), YUVR(0x0D), YUVR(0x0E), YUVR(0x0F), + YUVR(0x10), YUVR(0x11), YUVR(0x12), YUVR(0x13), + YUVR(0x14), YUVR(0x15), YUVR(0x16), YUVR(0x17), + YUVR(0x18), YUVR(0x19), YUVR(0x1A), YUVR(0x1B), + YUVR(0x1C), YUVR(0x1D), YUVR(0x1E), YUVR(0x1F), + YUVR(0x20), YUVR(0x21), YUVR(0x22), YUVR(0x23), + YUVR(0x24), YUVR(0x25), YUVR(0x26), YUVR(0x27), + YUVR(0x28), YUVR(0x29), YUVR(0x2A), YUVR(0x2B), + YUVR(0x2C), YUVR(0x2D), YUVR(0x2E), YUVR(0x2F), + YUVR(0x30), YUVR(0x31), YUVR(0x32), YUVR(0x33), + YUVR(0x34), YUVR(0x35), YUVR(0x36), YUVR(0x37), + YUVR(0x38), YUVR(0x39), YUVR(0x3A), YUVR(0x3B), + YUVR(0x3C), YUVR(0x3D), YUVR(0x3E), YUVR(0x3F), + YUVR(0x40), YUVR(0x41), YUVR(0x42), YUVR(0x43), + YUVR(0x44), YUVR(0x45), YUVR(0x46), YUVR(0x47), + YUVR(0x48), YUVR(0x49), YUVR(0x4A), YUVR(0x4B), + YUVR(0x4C), YUVR(0x4D), YUVR(0x4E), YUVR(0x4F), + YUVR(0x50), YUVR(0x51), YUVR(0x52), YUVR(0x53), + YUVR(0x54), YUVR(0x55), YUVR(0x56), YUVR(0x57), + YUVR(0x58), YUVR(0x59), YUVR(0x5A), YUVR(0x5B), + YUVR(0x5C), YUVR(0x5D), YUVR(0x5E), YUVR(0x5F), + YUVR(0x60), YUVR(0x61), YUVR(0x62), YUVR(0x63), + YUVR(0x64), YUVR(0x65), YUVR(0x66), YUVR(0x67), + YUVR(0x68), YUVR(0x69), YUVR(0x6A), YUVR(0x6B), + YUVR(0x6C), YUVR(0x6D), YUVR(0x6E), YUVR(0x6F), + YUVR(0x70), YUVR(0x71), YUVR(0x72), YUVR(0x73), + YUVR(0x74), YUVR(0x75), YUVR(0x76), YUVR(0x77), + YUVR(0x78), YUVR(0x79), YUVR(0x7A), YUVR(0x7B), + YUVR(0x7C), YUVR(0x7D), YUVR(0x7E), YUVR(0x7F), + YUVR(0x80), YUVR(0x81), YUVR(0x82), YUVR(0x83), + YUVR(0x84), YUVR(0x85), YUVR(0x86), YUVR(0x87), + YUVR(0x88), YUVR(0x89), YUVR(0x8A), YUVR(0x8B), + YUVR(0x8C), YUVR(0x8D), YUVR(0x8E), YUVR(0x8F), + YUVR(0x90), YUVR(0x91), YUVR(0x92), YUVR(0x93), + YUVR(0x94), YUVR(0x95), YUVR(0x96), YUVR(0x97), + YUVR(0x98), YUVR(0x99), YUVR(0x9A), YUVR(0x9B), + YUVR(0x9C), YUVR(0x9D), YUVR(0x9E), YUVR(0x9F), + YUVR(0xA0), YUVR(0xA1), YUVR(0xA2), YUVR(0xA3), + YUVR(0xA4), YUVR(0xA5), YUVR(0xA6), YUVR(0xA7), + YUVR(0xA8), YUVR(0xA9), YUVR(0xAA), YUVR(0xAB), + YUVR(0xAC), YUVR(0xAD), YUVR(0xAE), YUVR(0xAF), + YUVR(0xB0), YUVR(0xB1), YUVR(0xB2), YUVR(0xB3), + YUVR(0xB4), YUVR(0xB5), YUVR(0xB6), YUVR(0xB7), + YUVR(0xB8), YUVR(0xB9), YUVR(0xBA), YUVR(0xBB), + YUVR(0xBC), YUVR(0xBD), YUVR(0xBE), YUVR(0xBF), + YUVR(0xC0), YUVR(0xC1), YUVR(0xC2), YUVR(0xC3), + YUVR(0xC4), YUVR(0xC5), YUVR(0xC6), YUVR(0xC7), + YUVR(0xC8), YUVR(0xC9), YUVR(0xCA), YUVR(0xCB), + YUVR(0xCC), YUVR(0xCD), YUVR(0xCE), YUVR(0xCF), + YUVR(0xD0), YUVR(0xD1), YUVR(0xD2), YUVR(0xD3), + YUVR(0xD4), YUVR(0xD5), YUVR(0xD6), YUVR(0xD7), + YUVR(0xD8), YUVR(0xD9), YUVR(0xDA), YUVR(0xDB), + YUVR(0xDC), YUVR(0xDD), YUVR(0xDE), YUVR(0xDF), + YUVR(0xE0), YUVR(0xE1), YUVR(0xE2), YUVR(0xE3), + YUVR(0xE4), YUVR(0xE5), YUVR(0xE6), YUVR(0xE7), + YUVR(0xE8), YUVR(0xE9), YUVR(0xEA), YUVR(0xEB), + YUVR(0xEC), YUVR(0xED), YUVR(0xEE), YUVR(0xEF), + YUVR(0xF0), YUVR(0xF1), YUVR(0xF2), YUVR(0xF3), + YUVR(0xF4), YUVR(0xF5), YUVR(0xF6), YUVR(0xF7), + YUVR(0xF8), YUVR(0xF9), YUVR(0xFA), YUVR(0xFB), + YUVR(0xFC), YUVR(0xFD), YUVR(0xFE), YUVR(0xFF), + + // G table. + YUVG(0x00), YUVG(0x01), YUVG(0x02), YUVG(0x03), + YUVG(0x04), YUVG(0x05), YUVG(0x06), YUVG(0x07), + YUVG(0x08), YUVG(0x09), YUVG(0x0A), YUVG(0x0B), + YUVG(0x0C), YUVG(0x0D), YUVG(0x0E), YUVG(0x0F), + YUVG(0x10), YUVG(0x11), YUVG(0x12), YUVG(0x13), + YUVG(0x14), YUVG(0x15), YUVG(0x16), YUVG(0x17), + YUVG(0x18), YUVG(0x19), YUVG(0x1A), YUVG(0x1B), + YUVG(0x1C), YUVG(0x1D), YUVG(0x1E), YUVG(0x1F), + YUVG(0x20), YUVG(0x21), YUVG(0x22), YUVG(0x23), + YUVG(0x24), YUVG(0x25), YUVG(0x26), YUVG(0x27), + YUVG(0x28), YUVG(0x29), YUVG(0x2A), YUVG(0x2B), + YUVG(0x2C), YUVG(0x2D), YUVG(0x2E), YUVG(0x2F), + YUVG(0x30), YUVG(0x31), YUVG(0x32), YUVG(0x33), + YUVG(0x34), YUVG(0x35), YUVG(0x36), YUVG(0x37), + YUVG(0x38), YUVG(0x39), YUVG(0x3A), YUVG(0x3B), + YUVG(0x3C), YUVG(0x3D), YUVG(0x3E), YUVG(0x3F), + YUVG(0x40), YUVG(0x41), YUVG(0x42), YUVG(0x43), + YUVG(0x44), YUVG(0x45), YUVG(0x46), YUVG(0x47), + YUVG(0x48), YUVG(0x49), YUVG(0x4A), YUVG(0x4B), + YUVG(0x4C), YUVG(0x4D), YUVG(0x4E), YUVG(0x4F), + YUVG(0x50), YUVG(0x51), YUVG(0x52), YUVG(0x53), + YUVG(0x54), YUVG(0x55), YUVG(0x56), YUVG(0x57), + YUVG(0x58), YUVG(0x59), YUVG(0x5A), YUVG(0x5B), + YUVG(0x5C), YUVG(0x5D), YUVG(0x5E), YUVG(0x5F), + YUVG(0x60), YUVG(0x61), YUVG(0x62), YUVG(0x63), + YUVG(0x64), YUVG(0x65), YUVG(0x66), YUVG(0x67), + YUVG(0x68), YUVG(0x69), YUVG(0x6A), YUVG(0x6B), + YUVG(0x6C), YUVG(0x6D), YUVG(0x6E), YUVG(0x6F), + YUVG(0x70), YUVG(0x71), YUVG(0x72), YUVG(0x73), + YUVG(0x74), YUVG(0x75), YUVG(0x76), YUVG(0x77), + YUVG(0x78), YUVG(0x79), YUVG(0x7A), YUVG(0x7B), + YUVG(0x7C), YUVG(0x7D), YUVG(0x7E), YUVG(0x7F), + YUVG(0x80), YUVG(0x81), YUVG(0x82), YUVG(0x83), + YUVG(0x84), YUVG(0x85), YUVG(0x86), YUVG(0x87), + YUVG(0x88), YUVG(0x89), YUVG(0x8A), YUVG(0x8B), + YUVG(0x8C), YUVG(0x8D), YUVG(0x8E), YUVG(0x8F), + YUVG(0x90), YUVG(0x91), YUVG(0x92), YUVG(0x93), + YUVG(0x94), YUVG(0x95), YUVG(0x96), YUVG(0x97), + YUVG(0x98), YUVG(0x99), YUVG(0x9A), YUVG(0x9B), + YUVG(0x9C), YUVG(0x9D), YUVG(0x9E), YUVG(0x9F), + YUVG(0xA0), YUVG(0xA1), YUVG(0xA2), YUVG(0xA3), + YUVG(0xA4), YUVG(0xA5), YUVG(0xA6), YUVG(0xA7), + YUVG(0xA8), YUVG(0xA9), YUVG(0xAA), YUVG(0xAB), + YUVG(0xAC), YUVG(0xAD), YUVG(0xAE), YUVG(0xAF), + YUVG(0xB0), YUVG(0xB1), YUVG(0xB2), YUVG(0xB3), + YUVG(0xB4), YUVG(0xB5), YUVG(0xB6), YUVG(0xB7), + YUVG(0xB8), YUVG(0xB9), YUVG(0xBA), YUVG(0xBB), + YUVG(0xBC), YUVG(0xBD), YUVG(0xBE), YUVG(0xBF), + YUVG(0xC0), YUVG(0xC1), YUVG(0xC2), YUVG(0xC3), + YUVG(0xC4), YUVG(0xC5), YUVG(0xC6), YUVG(0xC7), + YUVG(0xC8), YUVG(0xC9), YUVG(0xCA), YUVG(0xCB), + YUVG(0xCC), YUVG(0xCD), YUVG(0xCE), YUVG(0xCF), + YUVG(0xD0), YUVG(0xD1), YUVG(0xD2), YUVG(0xD3), + YUVG(0xD4), YUVG(0xD5), YUVG(0xD6), YUVG(0xD7), + YUVG(0xD8), YUVG(0xD9), YUVG(0xDA), YUVG(0xDB), + YUVG(0xDC), YUVG(0xDD), YUVG(0xDE), YUVG(0xDF), + YUVG(0xE0), YUVG(0xE1), YUVG(0xE2), YUVG(0xE3), + YUVG(0xE4), YUVG(0xE5), YUVG(0xE6), YUVG(0xE7), + YUVG(0xE8), YUVG(0xE9), YUVG(0xEA), YUVG(0xEB), + YUVG(0xEC), YUVG(0xED), YUVG(0xEE), YUVG(0xEF), + YUVG(0xF0), YUVG(0xF1), YUVG(0xF2), YUVG(0xF3), + YUVG(0xF4), YUVG(0xF5), YUVG(0xF6), YUVG(0xF7), + YUVG(0xF8), YUVG(0xF9), YUVG(0xFA), YUVG(0xFB), + YUVG(0xFC), YUVG(0xFD), YUVG(0xFE), YUVG(0xFF), + + // B table. + YUVB(0x00), YUVB(0x01), YUVB(0x02), YUVB(0x03), + YUVB(0x04), YUVB(0x05), YUVB(0x06), YUVB(0x07), + YUVB(0x08), YUVB(0x09), YUVB(0x0A), YUVB(0x0B), + YUVB(0x0C), YUVB(0x0D), YUVB(0x0E), YUVB(0x0F), + YUVB(0x10), YUVB(0x11), YUVB(0x12), YUVB(0x13), + YUVB(0x14), YUVB(0x15), YUVB(0x16), YUVB(0x17), + YUVB(0x18), YUVB(0x19), YUVB(0x1A), YUVB(0x1B), + YUVB(0x1C), YUVB(0x1D), YUVB(0x1E), YUVB(0x1F), + YUVB(0x20), YUVB(0x21), YUVB(0x22), YUVB(0x23), + YUVB(0x24), YUVB(0x25), YUVB(0x26), YUVB(0x27), + YUVB(0x28), YUVB(0x29), YUVB(0x2A), YUVB(0x2B), + YUVB(0x2C), YUVB(0x2D), YUVB(0x2E), YUVB(0x2F), + YUVB(0x30), YUVB(0x31), YUVB(0x32), YUVB(0x33), + YUVB(0x34), YUVB(0x35), YUVB(0x36), YUVB(0x37), + YUVB(0x38), YUVB(0x39), YUVB(0x3A), YUVB(0x3B), + YUVB(0x3C), YUVB(0x3D), YUVB(0x3E), YUVB(0x3F), + YUVB(0x40), YUVB(0x41), YUVB(0x42), YUVB(0x43), + YUVB(0x44), YUVB(0x45), YUVB(0x46), YUVB(0x47), + YUVB(0x48), YUVB(0x49), YUVB(0x4A), YUVB(0x4B), + YUVB(0x4C), YUVB(0x4D), YUVB(0x4E), YUVB(0x4F), + YUVB(0x50), YUVB(0x51), YUVB(0x52), YUVB(0x53), + YUVB(0x54), YUVB(0x55), YUVB(0x56), YUVB(0x57), + YUVB(0x58), YUVB(0x59), YUVB(0x5A), YUVB(0x5B), + YUVB(0x5C), YUVB(0x5D), YUVB(0x5E), YUVB(0x5F), + YUVB(0x60), YUVB(0x61), YUVB(0x62), YUVB(0x63), + YUVB(0x64), YUVB(0x65), YUVB(0x66), YUVB(0x67), + YUVB(0x68), YUVB(0x69), YUVB(0x6A), YUVB(0x6B), + YUVB(0x6C), YUVB(0x6D), YUVB(0x6E), YUVB(0x6F), + YUVB(0x70), YUVB(0x71), YUVB(0x72), YUVB(0x73), + YUVB(0x74), YUVB(0x75), YUVB(0x76), YUVB(0x77), + YUVB(0x78), YUVB(0x79), YUVB(0x7A), YUVB(0x7B), + YUVB(0x7C), YUVB(0x7D), YUVB(0x7E), YUVB(0x7F), + YUVB(0x80), YUVB(0x81), YUVB(0x82), YUVB(0x83), + YUVB(0x84), YUVB(0x85), YUVB(0x86), YUVB(0x87), + YUVB(0x88), YUVB(0x89), YUVB(0x8A), YUVB(0x8B), + YUVB(0x8C), YUVB(0x8D), YUVB(0x8E), YUVB(0x8F), + YUVB(0x90), YUVB(0x91), YUVB(0x92), YUVB(0x93), + YUVB(0x94), YUVB(0x95), YUVB(0x96), YUVB(0x97), + YUVB(0x98), YUVB(0x99), YUVB(0x9A), YUVB(0x9B), + YUVB(0x9C), YUVB(0x9D), YUVB(0x9E), YUVB(0x9F), + YUVB(0xA0), YUVB(0xA1), YUVB(0xA2), YUVB(0xA3), + YUVB(0xA4), YUVB(0xA5), YUVB(0xA6), YUVB(0xA7), + YUVB(0xA8), YUVB(0xA9), YUVB(0xAA), YUVB(0xAB), + YUVB(0xAC), YUVB(0xAD), YUVB(0xAE), YUVB(0xAF), + YUVB(0xB0), YUVB(0xB1), YUVB(0xB2), YUVB(0xB3), + YUVB(0xB4), YUVB(0xB5), YUVB(0xB6), YUVB(0xB7), + YUVB(0xB8), YUVB(0xB9), YUVB(0xBA), YUVB(0xBB), + YUVB(0xBC), YUVB(0xBD), YUVB(0xBE), YUVB(0xBF), + YUVB(0xC0), YUVB(0xC1), YUVB(0xC2), YUVB(0xC3), + YUVB(0xC4), YUVB(0xC5), YUVB(0xC6), YUVB(0xC7), + YUVB(0xC8), YUVB(0xC9), YUVB(0xCA), YUVB(0xCB), + YUVB(0xCC), YUVB(0xCD), YUVB(0xCE), YUVB(0xCF), + YUVB(0xD0), YUVB(0xD1), YUVB(0xD2), YUVB(0xD3), + YUVB(0xD4), YUVB(0xD5), YUVB(0xD6), YUVB(0xD7), + YUVB(0xD8), YUVB(0xD9), YUVB(0xDA), YUVB(0xDB), + YUVB(0xDC), YUVB(0xDD), YUVB(0xDE), YUVB(0xDF), + YUVB(0xE0), YUVB(0xE1), YUVB(0xE2), YUVB(0xE3), + YUVB(0xE4), YUVB(0xE5), YUVB(0xE6), YUVB(0xE7), + YUVB(0xE8), YUVB(0xE9), YUVB(0xEA), YUVB(0xEB), + YUVB(0xEC), YUVB(0xED), YUVB(0xEE), YUVB(0xEF), + YUVB(0xF0), YUVB(0xF1), YUVB(0xF2), YUVB(0xF3), + YUVB(0xF4), YUVB(0xF5), YUVB(0xF6), YUVB(0xF7), + YUVB(0xF8), YUVB(0xF9), YUVB(0xFA), YUVB(0xFB), + YUVB(0xFC), YUVB(0xFD), YUVB(0xFE), YUVB(0xFF), +}; + #undef RGBY #undef RGBU #undef RGBV +#undef YUVR +#undef YUVG +#undef YUVB } // extern "C" diff --git a/media/media.gyp b/media/media.gyp index e2cdafd..baba862 100644 --- a/media/media.gyp +++ b/media/media.gyp @@ -113,6 +113,7 @@ 'base/video_frame.h', 'base/yuv_convert.cc', 'base/yuv_convert.h', + 'base/yuv_convert_sse2.cc', 'base/yuv_row_win.cc', 'base/yuv_row_posix.cc', 'base/yuv_row_table.cc', diff --git a/media/tools/scaler_bench/scaler_bench.cc b/media/tools/scaler_bench/scaler_bench.cc index 3457d7c..f9be6c6 100644 --- a/media/tools/scaler_bench/scaler_bench.cc +++ b/media/tools/scaler_bench/scaler_bench.cc @@ -11,6 +11,7 @@ #include "base/command_line.h" #include "base/scoped_vector.h" +#include "base/scoped_ptr.h" #include "base/string_number_conversions.h" #include "base/time.h" #include "media/base/video_frame.h" @@ -86,6 +87,34 @@ double BenchmarkSkia() { return static_cast<double>((end - start).InMilliseconds()) / num_frames; } +double BenchmarkRGBToYUV() { + int rgb_stride = source_width * 4; + scoped_array<uint8> rgb_frame(new uint8[rgb_stride * source_height]); + + int y_stride = source_width; + int uv_stride = source_width / 2; + scoped_array<uint8> y_plane(new uint8[y_stride * source_height]); + scoped_array<uint8> u_plane(new uint8[uv_stride * source_height / 2]); + scoped_array<uint8> v_plane(new uint8[uv_stride * source_height / 2]); + + TimeTicks start = TimeTicks::HighResNow(); + + for (int i = 0; i < num_frames; ++i) { + media::ConvertRGB32ToYUV(rgb_frame.get(), + y_plane.get(), + u_plane.get(), + v_plane.get(), + source_width, + source_height, + rgb_stride, + y_stride, + uv_stride); + } + + TimeTicks end = TimeTicks::HighResNow(); + return static_cast<double>((end - start).InMilliseconds()) / num_frames; +} + double BenchmarkFilter(media::ScaleFilter filter) { std::vector<scoped_refptr<VideoFrame> > source_frames; std::vector<scoped_refptr<VideoFrame> > dest_frames; @@ -198,6 +227,8 @@ int main(int argc, const char** argv) { std::cout << "Skia: " << BenchmarkSkia() << "ms/frame" << std::endl; + std::cout << "RGB To YUV: " << BenchmarkRGBToYUV() + << "ms/frame" << std::endl; std::cout << "No filtering: " << BenchmarkFilter(media::FILTER_NONE) << "ms/frame" << std::endl; std::cout << "Bilinear Vertical: " |