summaryrefslogtreecommitdiffstats
path: root/media
diff options
context:
space:
mode:
authorhclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-01 20:59:30 +0000
committerhclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-01 20:59:30 +0000
commit699158b7034b3027ce0fb11ef4e134294bc6d6ce (patch)
tree9e57535c1edcfbac7eea8cbc1bddf184497d2da9 /media
parent9c99d1ce15520712578e2b7e9102c909e4ab379c (diff)
downloadchromium_src-699158b7034b3027ce0fb11ef4e134294bc6d6ce.zip
chromium_src-699158b7034b3027ce0fb11ef4e134294bc6d6ce.tar.gz
chromium_src-699158b7034b3027ce0fb11ef4e134294bc6d6ce.tar.bz2
RGB to YUV conversion using SSE2
This code uses SSE2 intrinsics with the feature of 2x2 subsampling for U and V. Performance compared to a pure C version is about 20% faster with better quality. BUG=None TEST=None Review URL: http://codereview.chromium.org/6268018 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@73339 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r--media/base/yuv_convert.h10
-rw-r--r--media/base/yuv_convert_c.cc50
-rw-r--r--media/base/yuv_convert_sse2.cc189
-rw-r--r--media/base/yuv_row.h9
-rw-r--r--media/base/yuv_row_table.cc224
-rw-r--r--media/media.gyp1
-rw-r--r--media/tools/scaler_bench/scaler_bench.cc31
7 files changed, 514 insertions, 0 deletions
diff --git a/media/base/yuv_convert.h b/media/base/yuv_convert.h
index 24a2c4e..ab908b2 100644
--- a/media/base/yuv_convert.h
+++ b/media/base/yuv_convert.h
@@ -67,6 +67,16 @@ void ScaleYUVToRGB32(const uint8* yplane,
Rotate view_rotate,
ScaleFilter filter);
+void ConvertRGB32ToYUV(const uint8* rgbframe,
+ uint8* yplane,
+ uint8* uplane,
+ uint8* vplane,
+ int width,
+ int height,
+ int rgbstride,
+ int ystride,
+ int uvstride);
+
} // namespace media
#endif // MEDIA_BASE_YUV_CONVERT_H_
diff --git a/media/base/yuv_convert_c.cc b/media/base/yuv_convert_c.cc
new file mode 100644
index 0000000..b5d8345
--- /dev/null
+++ b/media/base/yuv_convert_c.cc
@@ -0,0 +1,50 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/base/yuv_convert.h"
+
+namespace media {
+
+static int clip_byte(int x) {
+ if (x > 255)
+ return 255;
+ else if (x < 0)
+ return 0;
+ else
+ return x;
+}
+
+void ConvertRGB32ToYUV(const uint8* rgbframe,
+ uint8* yplane,
+ uint8* uplane,
+ uint8* vplane,
+ int width,
+ int height,
+ int rgbstride,
+ int ystride,
+ int uvstride) {
+ for (int i = 0; i < height; ++i) {
+ for (int j = 0; j < width; ++j) {
+ // Since the input pixel format is RGB32, there are 4 bytes per pixel.
+ const uint8* pixel = rgbframe + 4 * j;
+ yplane[j] = clip_byte(((pixel[2] * 66 + pixel[1] * 129 +
+ pixel[0] * 25 + 128) >> 8) + 16);
+ if (i % 2 == 0 && j % 2 == 0) {
+ uplane[j / 2] = clip_byte(((pixel[2] * -38 + pixel[1] * -74 +
+ pixel[0] * 112 + 128) >> 8) + 128);
+ vplane[j / 2] = clip_byte(((pixel[2] * 112 + pixel[1] * -94 +
+ pixel[1] * -18 + 128) >> 8) + 128);
+ }
+ }
+
+ rgbframe += rgbstride;
+ yplane += ystride;
+ if (i % 2 == 0) {
+ uplane += uvstride;
+ vplane += uvstride;
+ }
+ }
+}
+
+} // namespace media
diff --git a/media/base/yuv_convert_sse2.cc b/media/base/yuv_convert_sse2.cc
new file mode 100644
index 0000000..2c1beae
--- /dev/null
+++ b/media/base/yuv_convert_sse2.cc
@@ -0,0 +1,189 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/base/yuv_convert.h"
+#include "media/base/yuv_row.h"
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#else
+#include <mmintrin.h>
+#include <emmintrin.h>
+#endif
+
+namespace media {
+
+// This is the final offset for the conversion from signed yuv values to
+// unsigned values. It is arranged so that offset of 16 is applied to Y
+// components and 128 is added to UV components for 2 pixels.
+SIMD_ALIGNED(const int16 kYuvOffset[8]) = {16, 0, 128, 128, 16, 0, 128, 128};
+
+void FastConvertRGB32ToYUVRow(const uint8* rgb_buf_1,
+ const uint8* rgb_buf_2,
+ uint8* y_buf_1,
+ uint8* y_buf_2,
+ uint8* u_buf,
+ uint8* v_buf,
+ int width) {
+ const uint64* r_table = reinterpret_cast<uint64*>(kCoefficientsYuvR);
+ const uint64* g_table = reinterpret_cast<uint64*>(kCoefficientsYuvR + 256);
+ const uint64* b_table = reinterpret_cast<uint64*>(kCoefficientsYuvR + 512);
+ uint16* y_row_1 = reinterpret_cast<uint16*>(y_buf_1);
+ uint16* y_row_2 = reinterpret_cast<uint16*>(y_buf_2);
+ const uint32* rgb_row_1 = reinterpret_cast<const uint32*>(rgb_buf_1);
+ const uint32* rgb_row_2 = reinterpret_cast<const uint32*>(rgb_buf_2);
+
+ SIMD_ALIGNED(int8 output_stage[8]);
+ __m128i offset = _mm_load_si128(reinterpret_cast<const __m128i*>(kYuvOffset));
+
+ for (int i = 0; i < width; i += 2) {
+ // Load the first pixel (a).
+ register unsigned int pixel = *rgb_row_1++;
+ __m128i b_comp_a = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(b_table + (pixel & 0xFF)));
+ pixel >>= 8;
+ __m128i g_comp_a = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(g_table + (pixel & 0xFF)));
+ pixel >>= 8;
+ __m128i r_comp_a = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(r_table + (pixel & 0xFF)));
+
+ // Load the first pixel (c) in the second row.
+ pixel = *rgb_row_2++;
+ __m128i b_comp_c = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(b_table + (pixel & 0xFF)));
+ pixel >>= 8;
+ __m128i g_comp_c = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(g_table + (pixel & 0xFF)));
+ pixel >>= 8;
+ __m128i r_comp_c = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(r_table + (pixel & 0xFF)));
+
+ // Pack two pixels into one register.
+ __m128i b_comp_ac = _mm_unpacklo_epi64(b_comp_a, b_comp_c);
+ __m128i g_comp_ac = _mm_unpacklo_epi64(g_comp_a, g_comp_c);
+ __m128i r_comp_ac = _mm_unpacklo_epi64(r_comp_a, r_comp_c);
+
+ // Add the coefficients together.
+ // 127 0
+ // |yuv_ac| will be (Vc Uc 0 Yc Va Ua 0 Ya).
+ __m128i yuv_ac = _mm_adds_epi16(r_comp_ac,
+ _mm_adds_epi16(g_comp_ac, b_comp_ac));
+
+ // Right shift 6 bits to perform divide by 64 and then add the offset.
+ yuv_ac = _mm_srai_epi16(yuv_ac, 6);
+ yuv_ac = _mm_adds_epi16(yuv_ac, offset);
+
+ // Now perform on the second column on pixel (b).
+ pixel = *rgb_row_1++;
+ __m128i b_comp_b = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(b_table + (pixel & 0xFF)));
+ pixel >>= 8;
+ __m128i g_comp_b = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(g_table + (pixel & 0xFF)));
+ pixel >>= 8;
+ __m128i r_comp_b = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(r_table + (pixel & 0xFF)));
+
+ // Load the second pixel (d) in the second row.
+ pixel = *rgb_row_2++;
+ __m128i b_comp_d = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(b_table + (pixel & 0xFF)));
+ pixel >>= 8;
+ __m128i g_comp_d = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(g_table + (pixel & 0xFF)));
+ pixel >>= 8;
+ __m128i r_comp_d = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(r_table + (pixel & 0xFF)));
+
+ // Pack two pixels into one register.
+ __m128i b_comp_bd = _mm_unpacklo_epi64(b_comp_b, b_comp_d);
+ __m128i g_comp_bd = _mm_unpacklo_epi64(g_comp_b, g_comp_d);
+ __m128i r_comp_bd = _mm_unpacklo_epi64(r_comp_b, r_comp_d);
+
+ // Add the coefficients together.
+ // 127 0
+ // |yuv_bd| will be (Vd Ud 0 Yd Vb Ub 0 Yb).
+ __m128i yuv_bd = _mm_adds_epi16(r_comp_bd,
+ _mm_adds_epi16(g_comp_bd, b_comp_bd));
+
+ // Right shift 6 bits to perform divide by 64 and then add the offset.
+ yuv_bd = _mm_srai_epi16(yuv_bd, 6);
+ yuv_bd = _mm_adds_epi16(yuv_bd, offset);
+
+ // |yuv_row_1| will have (Vb Va Ub Ua 0 0 Yb Ya) and
+ // |yuv_row_2| will have (Vd Vc Ud Uc 0 0 Yd Yc).
+ __m128i yuv_row_1 = _mm_unpacklo_epi16(yuv_ac, yuv_bd);
+ __m128i yuv_row_2 = _mm_unpackhi_epi16(yuv_ac, yuv_bd);
+
+ // |y_comp| will have (0 0 0 0 Yd Yc Yb Ya).
+ __m128i y_comp = _mm_unpacklo_epi32(yuv_row_1, yuv_row_2);
+
+ // Down size to 8 bits.
+ y_comp = _mm_packus_epi16(y_comp, y_comp);
+
+ // |uv_comp| will have (Vd Vc Vb Va Ud Uc Ub Ua).
+ __m128i uv_comp = _mm_unpackhi_epi32(yuv_row_1, yuv_row_2);
+
+ // Generate |unity| to become (1 1 1 1 1 1 1 1).
+ __m128i unity = _mm_cmpeq_epi16(offset, offset);
+ unity = _mm_srli_epi16(unity, 15);
+
+ // |uv_comp| will have (Vc + Vd, Va + Vb, Uc + Ud, Ua + Ub).
+ uv_comp = _mm_madd_epi16(uv_comp, unity);
+
+ // Pack |uv_comp| into 16 bit signed integers.
+ uv_comp = _mm_packs_epi32(uv_comp, uv_comp);
+
+ // And then do a multiply-add again. r1 will have 4 32-bits integers.
+ uv_comp = _mm_madd_epi16(uv_comp, unity);
+
+ // Do a right shift to perform divide by 4.
+ uv_comp = _mm_srai_epi32(uv_comp, 2);
+
+ // And then pack twice to form 2 8-bits unsigned integers of U and V.
+ uv_comp = _mm_packs_epi32(uv_comp, uv_comp);
+ uv_comp = _mm_packus_epi16(uv_comp, uv_comp);
+
+ // And then finally pack the output.
+ __m128i output = _mm_unpacklo_epi32(y_comp, uv_comp);
+
+ // Store the output.
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(output_stage), output);
+
+ *y_row_1++ = *reinterpret_cast<uint16*>(output_stage);
+ *y_row_2++ = *reinterpret_cast<uint16*>(output_stage + 2);
+ *u_buf++ = output_stage[4];
+ *v_buf++ = output_stage[5];
+ }
+}
+
+// TODO(hclam): Add code to do runtime SSE2 detection.
+void ConvertRGB32ToYUV(const uint8* rgbframe,
+ uint8* yplane,
+ uint8* uplane,
+ uint8* vplane,
+ int width,
+ int height,
+ int rgbstride,
+ int ystride,
+ int uvstride) {
+ // Make sure |width| is a multiple of 2.
+ width = (width / 2) * 2;
+ for (int i = 0; i < height; i += 2) {
+ FastConvertRGB32ToYUVRow(rgbframe,
+ rgbframe + rgbstride,
+ yplane,
+ yplane + ystride,
+ uplane,
+ vplane,
+ width);
+ rgbframe += 2 * rgbstride;
+ yplane += 2 * ystride;
+ uplane += uvstride;
+ vplane += uvstride;
+ }
+}
+
+} // namespace media
diff --git a/media/base/yuv_row.h b/media/base/yuv_row.h
index 0a2990b..2414ccf 100644
--- a/media/base/yuv_row.h
+++ b/media/base/yuv_row.h
@@ -71,12 +71,21 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
int width,
int source_dx);
+void FastConvertRGB32ToYUVRow(const uint8* rgb_buf_1,
+ const uint8* rgb_buf_2,
+ uint8* y_buf_1,
+ uint8* y_buf_2,
+ uint8* u_buf,
+ uint8* v_buf,
+ int width);
+
#if defined(_MSC_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var
#else
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
#endif
extern SIMD_ALIGNED(int16 kCoefficientsRgbY[768][4]);
+extern SIMD_ALIGNED(int16 kCoefficientsYuvR[768][4]);
// Method to force C version.
//#define USE_MMX 0
diff --git a/media/base/yuv_row_table.cc b/media/base/yuv_row_table.cc
index 296380b..20d8c59 100644
--- a/media/base/yuv_row_table.cc
+++ b/media/base/yuv_row_table.cc
@@ -27,6 +27,27 @@ extern "C" {
0 \
}
+#define YUVR(i) { \
+ static_cast<int16>(0.254 * 64 * i + 0.5), \
+ 0, \
+ static_cast<int16>(-0.148 * 64 * i + 0.5), \
+ static_cast<int16>(0.439 * 64 * i + 0.5), \
+}
+
+#define YUVG(i) { \
+ static_cast<int16>(0.504 * 64 * i + 0.5), \
+ 0, \
+ static_cast<int16>(-0.291 * 64 * i + 0.5), \
+ static_cast<int16>(-0.368 * 64 * i + 0.5), \
+}
+
+#define YUVB(i) { \
+ static_cast<int16>(0.098 * 64 * i + 0.5), \
+ 0, \
+ static_cast<int16>(0.439 * 64 * i + 0.5), \
+ static_cast<int16>(-0.071 * 64 * i + 0.5), \
+}
+
SIMD_ALIGNED(int16 kCoefficientsRgbY[256 * 3][4]) = {
RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
@@ -226,8 +247,211 @@ SIMD_ALIGNED(int16 kCoefficientsRgbY[256 * 3][4]) = {
RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
};
+SIMD_ALIGNED(int16 kCoefficientsYuvR[256 * 3][4]) = {
+ // R table.
+ YUVR(0x00), YUVR(0x01), YUVR(0x02), YUVR(0x03),
+ YUVR(0x04), YUVR(0x05), YUVR(0x06), YUVR(0x07),
+ YUVR(0x08), YUVR(0x09), YUVR(0x0A), YUVR(0x0B),
+ YUVR(0x0C), YUVR(0x0D), YUVR(0x0E), YUVR(0x0F),
+ YUVR(0x10), YUVR(0x11), YUVR(0x12), YUVR(0x13),
+ YUVR(0x14), YUVR(0x15), YUVR(0x16), YUVR(0x17),
+ YUVR(0x18), YUVR(0x19), YUVR(0x1A), YUVR(0x1B),
+ YUVR(0x1C), YUVR(0x1D), YUVR(0x1E), YUVR(0x1F),
+ YUVR(0x20), YUVR(0x21), YUVR(0x22), YUVR(0x23),
+ YUVR(0x24), YUVR(0x25), YUVR(0x26), YUVR(0x27),
+ YUVR(0x28), YUVR(0x29), YUVR(0x2A), YUVR(0x2B),
+ YUVR(0x2C), YUVR(0x2D), YUVR(0x2E), YUVR(0x2F),
+ YUVR(0x30), YUVR(0x31), YUVR(0x32), YUVR(0x33),
+ YUVR(0x34), YUVR(0x35), YUVR(0x36), YUVR(0x37),
+ YUVR(0x38), YUVR(0x39), YUVR(0x3A), YUVR(0x3B),
+ YUVR(0x3C), YUVR(0x3D), YUVR(0x3E), YUVR(0x3F),
+ YUVR(0x40), YUVR(0x41), YUVR(0x42), YUVR(0x43),
+ YUVR(0x44), YUVR(0x45), YUVR(0x46), YUVR(0x47),
+ YUVR(0x48), YUVR(0x49), YUVR(0x4A), YUVR(0x4B),
+ YUVR(0x4C), YUVR(0x4D), YUVR(0x4E), YUVR(0x4F),
+ YUVR(0x50), YUVR(0x51), YUVR(0x52), YUVR(0x53),
+ YUVR(0x54), YUVR(0x55), YUVR(0x56), YUVR(0x57),
+ YUVR(0x58), YUVR(0x59), YUVR(0x5A), YUVR(0x5B),
+ YUVR(0x5C), YUVR(0x5D), YUVR(0x5E), YUVR(0x5F),
+ YUVR(0x60), YUVR(0x61), YUVR(0x62), YUVR(0x63),
+ YUVR(0x64), YUVR(0x65), YUVR(0x66), YUVR(0x67),
+ YUVR(0x68), YUVR(0x69), YUVR(0x6A), YUVR(0x6B),
+ YUVR(0x6C), YUVR(0x6D), YUVR(0x6E), YUVR(0x6F),
+ YUVR(0x70), YUVR(0x71), YUVR(0x72), YUVR(0x73),
+ YUVR(0x74), YUVR(0x75), YUVR(0x76), YUVR(0x77),
+ YUVR(0x78), YUVR(0x79), YUVR(0x7A), YUVR(0x7B),
+ YUVR(0x7C), YUVR(0x7D), YUVR(0x7E), YUVR(0x7F),
+ YUVR(0x80), YUVR(0x81), YUVR(0x82), YUVR(0x83),
+ YUVR(0x84), YUVR(0x85), YUVR(0x86), YUVR(0x87),
+ YUVR(0x88), YUVR(0x89), YUVR(0x8A), YUVR(0x8B),
+ YUVR(0x8C), YUVR(0x8D), YUVR(0x8E), YUVR(0x8F),
+ YUVR(0x90), YUVR(0x91), YUVR(0x92), YUVR(0x93),
+ YUVR(0x94), YUVR(0x95), YUVR(0x96), YUVR(0x97),
+ YUVR(0x98), YUVR(0x99), YUVR(0x9A), YUVR(0x9B),
+ YUVR(0x9C), YUVR(0x9D), YUVR(0x9E), YUVR(0x9F),
+ YUVR(0xA0), YUVR(0xA1), YUVR(0xA2), YUVR(0xA3),
+ YUVR(0xA4), YUVR(0xA5), YUVR(0xA6), YUVR(0xA7),
+ YUVR(0xA8), YUVR(0xA9), YUVR(0xAA), YUVR(0xAB),
+ YUVR(0xAC), YUVR(0xAD), YUVR(0xAE), YUVR(0xAF),
+ YUVR(0xB0), YUVR(0xB1), YUVR(0xB2), YUVR(0xB3),
+ YUVR(0xB4), YUVR(0xB5), YUVR(0xB6), YUVR(0xB7),
+ YUVR(0xB8), YUVR(0xB9), YUVR(0xBA), YUVR(0xBB),
+ YUVR(0xBC), YUVR(0xBD), YUVR(0xBE), YUVR(0xBF),
+ YUVR(0xC0), YUVR(0xC1), YUVR(0xC2), YUVR(0xC3),
+ YUVR(0xC4), YUVR(0xC5), YUVR(0xC6), YUVR(0xC7),
+ YUVR(0xC8), YUVR(0xC9), YUVR(0xCA), YUVR(0xCB),
+ YUVR(0xCC), YUVR(0xCD), YUVR(0xCE), YUVR(0xCF),
+ YUVR(0xD0), YUVR(0xD1), YUVR(0xD2), YUVR(0xD3),
+ YUVR(0xD4), YUVR(0xD5), YUVR(0xD6), YUVR(0xD7),
+ YUVR(0xD8), YUVR(0xD9), YUVR(0xDA), YUVR(0xDB),
+ YUVR(0xDC), YUVR(0xDD), YUVR(0xDE), YUVR(0xDF),
+ YUVR(0xE0), YUVR(0xE1), YUVR(0xE2), YUVR(0xE3),
+ YUVR(0xE4), YUVR(0xE5), YUVR(0xE6), YUVR(0xE7),
+ YUVR(0xE8), YUVR(0xE9), YUVR(0xEA), YUVR(0xEB),
+ YUVR(0xEC), YUVR(0xED), YUVR(0xEE), YUVR(0xEF),
+ YUVR(0xF0), YUVR(0xF1), YUVR(0xF2), YUVR(0xF3),
+ YUVR(0xF4), YUVR(0xF5), YUVR(0xF6), YUVR(0xF7),
+ YUVR(0xF8), YUVR(0xF9), YUVR(0xFA), YUVR(0xFB),
+ YUVR(0xFC), YUVR(0xFD), YUVR(0xFE), YUVR(0xFF),
+
+ // G table.
+ YUVG(0x00), YUVG(0x01), YUVG(0x02), YUVG(0x03),
+ YUVG(0x04), YUVG(0x05), YUVG(0x06), YUVG(0x07),
+ YUVG(0x08), YUVG(0x09), YUVG(0x0A), YUVG(0x0B),
+ YUVG(0x0C), YUVG(0x0D), YUVG(0x0E), YUVG(0x0F),
+ YUVG(0x10), YUVG(0x11), YUVG(0x12), YUVG(0x13),
+ YUVG(0x14), YUVG(0x15), YUVG(0x16), YUVG(0x17),
+ YUVG(0x18), YUVG(0x19), YUVG(0x1A), YUVG(0x1B),
+ YUVG(0x1C), YUVG(0x1D), YUVG(0x1E), YUVG(0x1F),
+ YUVG(0x20), YUVG(0x21), YUVG(0x22), YUVG(0x23),
+ YUVG(0x24), YUVG(0x25), YUVG(0x26), YUVG(0x27),
+ YUVG(0x28), YUVG(0x29), YUVG(0x2A), YUVG(0x2B),
+ YUVG(0x2C), YUVG(0x2D), YUVG(0x2E), YUVG(0x2F),
+ YUVG(0x30), YUVG(0x31), YUVG(0x32), YUVG(0x33),
+ YUVG(0x34), YUVG(0x35), YUVG(0x36), YUVG(0x37),
+ YUVG(0x38), YUVG(0x39), YUVG(0x3A), YUVG(0x3B),
+ YUVG(0x3C), YUVG(0x3D), YUVG(0x3E), YUVG(0x3F),
+ YUVG(0x40), YUVG(0x41), YUVG(0x42), YUVG(0x43),
+ YUVG(0x44), YUVG(0x45), YUVG(0x46), YUVG(0x47),
+ YUVG(0x48), YUVG(0x49), YUVG(0x4A), YUVG(0x4B),
+ YUVG(0x4C), YUVG(0x4D), YUVG(0x4E), YUVG(0x4F),
+ YUVG(0x50), YUVG(0x51), YUVG(0x52), YUVG(0x53),
+ YUVG(0x54), YUVG(0x55), YUVG(0x56), YUVG(0x57),
+ YUVG(0x58), YUVG(0x59), YUVG(0x5A), YUVG(0x5B),
+ YUVG(0x5C), YUVG(0x5D), YUVG(0x5E), YUVG(0x5F),
+ YUVG(0x60), YUVG(0x61), YUVG(0x62), YUVG(0x63),
+ YUVG(0x64), YUVG(0x65), YUVG(0x66), YUVG(0x67),
+ YUVG(0x68), YUVG(0x69), YUVG(0x6A), YUVG(0x6B),
+ YUVG(0x6C), YUVG(0x6D), YUVG(0x6E), YUVG(0x6F),
+ YUVG(0x70), YUVG(0x71), YUVG(0x72), YUVG(0x73),
+ YUVG(0x74), YUVG(0x75), YUVG(0x76), YUVG(0x77),
+ YUVG(0x78), YUVG(0x79), YUVG(0x7A), YUVG(0x7B),
+ YUVG(0x7C), YUVG(0x7D), YUVG(0x7E), YUVG(0x7F),
+ YUVG(0x80), YUVG(0x81), YUVG(0x82), YUVG(0x83),
+ YUVG(0x84), YUVG(0x85), YUVG(0x86), YUVG(0x87),
+ YUVG(0x88), YUVG(0x89), YUVG(0x8A), YUVG(0x8B),
+ YUVG(0x8C), YUVG(0x8D), YUVG(0x8E), YUVG(0x8F),
+ YUVG(0x90), YUVG(0x91), YUVG(0x92), YUVG(0x93),
+ YUVG(0x94), YUVG(0x95), YUVG(0x96), YUVG(0x97),
+ YUVG(0x98), YUVG(0x99), YUVG(0x9A), YUVG(0x9B),
+ YUVG(0x9C), YUVG(0x9D), YUVG(0x9E), YUVG(0x9F),
+ YUVG(0xA0), YUVG(0xA1), YUVG(0xA2), YUVG(0xA3),
+ YUVG(0xA4), YUVG(0xA5), YUVG(0xA6), YUVG(0xA7),
+ YUVG(0xA8), YUVG(0xA9), YUVG(0xAA), YUVG(0xAB),
+ YUVG(0xAC), YUVG(0xAD), YUVG(0xAE), YUVG(0xAF),
+ YUVG(0xB0), YUVG(0xB1), YUVG(0xB2), YUVG(0xB3),
+ YUVG(0xB4), YUVG(0xB5), YUVG(0xB6), YUVG(0xB7),
+ YUVG(0xB8), YUVG(0xB9), YUVG(0xBA), YUVG(0xBB),
+ YUVG(0xBC), YUVG(0xBD), YUVG(0xBE), YUVG(0xBF),
+ YUVG(0xC0), YUVG(0xC1), YUVG(0xC2), YUVG(0xC3),
+ YUVG(0xC4), YUVG(0xC5), YUVG(0xC6), YUVG(0xC7),
+ YUVG(0xC8), YUVG(0xC9), YUVG(0xCA), YUVG(0xCB),
+ YUVG(0xCC), YUVG(0xCD), YUVG(0xCE), YUVG(0xCF),
+ YUVG(0xD0), YUVG(0xD1), YUVG(0xD2), YUVG(0xD3),
+ YUVG(0xD4), YUVG(0xD5), YUVG(0xD6), YUVG(0xD7),
+ YUVG(0xD8), YUVG(0xD9), YUVG(0xDA), YUVG(0xDB),
+ YUVG(0xDC), YUVG(0xDD), YUVG(0xDE), YUVG(0xDF),
+ YUVG(0xE0), YUVG(0xE1), YUVG(0xE2), YUVG(0xE3),
+ YUVG(0xE4), YUVG(0xE5), YUVG(0xE6), YUVG(0xE7),
+ YUVG(0xE8), YUVG(0xE9), YUVG(0xEA), YUVG(0xEB),
+ YUVG(0xEC), YUVG(0xED), YUVG(0xEE), YUVG(0xEF),
+ YUVG(0xF0), YUVG(0xF1), YUVG(0xF2), YUVG(0xF3),
+ YUVG(0xF4), YUVG(0xF5), YUVG(0xF6), YUVG(0xF7),
+ YUVG(0xF8), YUVG(0xF9), YUVG(0xFA), YUVG(0xFB),
+ YUVG(0xFC), YUVG(0xFD), YUVG(0xFE), YUVG(0xFF),
+
+ // B table.
+ YUVB(0x00), YUVB(0x01), YUVB(0x02), YUVB(0x03),
+ YUVB(0x04), YUVB(0x05), YUVB(0x06), YUVB(0x07),
+ YUVB(0x08), YUVB(0x09), YUVB(0x0A), YUVB(0x0B),
+ YUVB(0x0C), YUVB(0x0D), YUVB(0x0E), YUVB(0x0F),
+ YUVB(0x10), YUVB(0x11), YUVB(0x12), YUVB(0x13),
+ YUVB(0x14), YUVB(0x15), YUVB(0x16), YUVB(0x17),
+ YUVB(0x18), YUVB(0x19), YUVB(0x1A), YUVB(0x1B),
+ YUVB(0x1C), YUVB(0x1D), YUVB(0x1E), YUVB(0x1F),
+ YUVB(0x20), YUVB(0x21), YUVB(0x22), YUVB(0x23),
+ YUVB(0x24), YUVB(0x25), YUVB(0x26), YUVB(0x27),
+ YUVB(0x28), YUVB(0x29), YUVB(0x2A), YUVB(0x2B),
+ YUVB(0x2C), YUVB(0x2D), YUVB(0x2E), YUVB(0x2F),
+ YUVB(0x30), YUVB(0x31), YUVB(0x32), YUVB(0x33),
+ YUVB(0x34), YUVB(0x35), YUVB(0x36), YUVB(0x37),
+ YUVB(0x38), YUVB(0x39), YUVB(0x3A), YUVB(0x3B),
+ YUVB(0x3C), YUVB(0x3D), YUVB(0x3E), YUVB(0x3F),
+ YUVB(0x40), YUVB(0x41), YUVB(0x42), YUVB(0x43),
+ YUVB(0x44), YUVB(0x45), YUVB(0x46), YUVB(0x47),
+ YUVB(0x48), YUVB(0x49), YUVB(0x4A), YUVB(0x4B),
+ YUVB(0x4C), YUVB(0x4D), YUVB(0x4E), YUVB(0x4F),
+ YUVB(0x50), YUVB(0x51), YUVB(0x52), YUVB(0x53),
+ YUVB(0x54), YUVB(0x55), YUVB(0x56), YUVB(0x57),
+ YUVB(0x58), YUVB(0x59), YUVB(0x5A), YUVB(0x5B),
+ YUVB(0x5C), YUVB(0x5D), YUVB(0x5E), YUVB(0x5F),
+ YUVB(0x60), YUVB(0x61), YUVB(0x62), YUVB(0x63),
+ YUVB(0x64), YUVB(0x65), YUVB(0x66), YUVB(0x67),
+ YUVB(0x68), YUVB(0x69), YUVB(0x6A), YUVB(0x6B),
+ YUVB(0x6C), YUVB(0x6D), YUVB(0x6E), YUVB(0x6F),
+ YUVB(0x70), YUVB(0x71), YUVB(0x72), YUVB(0x73),
+ YUVB(0x74), YUVB(0x75), YUVB(0x76), YUVB(0x77),
+ YUVB(0x78), YUVB(0x79), YUVB(0x7A), YUVB(0x7B),
+ YUVB(0x7C), YUVB(0x7D), YUVB(0x7E), YUVB(0x7F),
+ YUVB(0x80), YUVB(0x81), YUVB(0x82), YUVB(0x83),
+ YUVB(0x84), YUVB(0x85), YUVB(0x86), YUVB(0x87),
+ YUVB(0x88), YUVB(0x89), YUVB(0x8A), YUVB(0x8B),
+ YUVB(0x8C), YUVB(0x8D), YUVB(0x8E), YUVB(0x8F),
+ YUVB(0x90), YUVB(0x91), YUVB(0x92), YUVB(0x93),
+ YUVB(0x94), YUVB(0x95), YUVB(0x96), YUVB(0x97),
+ YUVB(0x98), YUVB(0x99), YUVB(0x9A), YUVB(0x9B),
+ YUVB(0x9C), YUVB(0x9D), YUVB(0x9E), YUVB(0x9F),
+ YUVB(0xA0), YUVB(0xA1), YUVB(0xA2), YUVB(0xA3),
+ YUVB(0xA4), YUVB(0xA5), YUVB(0xA6), YUVB(0xA7),
+ YUVB(0xA8), YUVB(0xA9), YUVB(0xAA), YUVB(0xAB),
+ YUVB(0xAC), YUVB(0xAD), YUVB(0xAE), YUVB(0xAF),
+ YUVB(0xB0), YUVB(0xB1), YUVB(0xB2), YUVB(0xB3),
+ YUVB(0xB4), YUVB(0xB5), YUVB(0xB6), YUVB(0xB7),
+ YUVB(0xB8), YUVB(0xB9), YUVB(0xBA), YUVB(0xBB),
+ YUVB(0xBC), YUVB(0xBD), YUVB(0xBE), YUVB(0xBF),
+ YUVB(0xC0), YUVB(0xC1), YUVB(0xC2), YUVB(0xC3),
+ YUVB(0xC4), YUVB(0xC5), YUVB(0xC6), YUVB(0xC7),
+ YUVB(0xC8), YUVB(0xC9), YUVB(0xCA), YUVB(0xCB),
+ YUVB(0xCC), YUVB(0xCD), YUVB(0xCE), YUVB(0xCF),
+ YUVB(0xD0), YUVB(0xD1), YUVB(0xD2), YUVB(0xD3),
+ YUVB(0xD4), YUVB(0xD5), YUVB(0xD6), YUVB(0xD7),
+ YUVB(0xD8), YUVB(0xD9), YUVB(0xDA), YUVB(0xDB),
+ YUVB(0xDC), YUVB(0xDD), YUVB(0xDE), YUVB(0xDF),
+ YUVB(0xE0), YUVB(0xE1), YUVB(0xE2), YUVB(0xE3),
+ YUVB(0xE4), YUVB(0xE5), YUVB(0xE6), YUVB(0xE7),
+ YUVB(0xE8), YUVB(0xE9), YUVB(0xEA), YUVB(0xEB),
+ YUVB(0xEC), YUVB(0xED), YUVB(0xEE), YUVB(0xEF),
+ YUVB(0xF0), YUVB(0xF1), YUVB(0xF2), YUVB(0xF3),
+ YUVB(0xF4), YUVB(0xF5), YUVB(0xF6), YUVB(0xF7),
+ YUVB(0xF8), YUVB(0xF9), YUVB(0xFA), YUVB(0xFB),
+ YUVB(0xFC), YUVB(0xFD), YUVB(0xFE), YUVB(0xFF),
+};
+
#undef RGBY
#undef RGBU
#undef RGBV
+#undef YUVR
+#undef YUVG
+#undef YUVB
} // extern "C"
diff --git a/media/media.gyp b/media/media.gyp
index e2cdafd..baba862 100644
--- a/media/media.gyp
+++ b/media/media.gyp
@@ -113,6 +113,7 @@
'base/video_frame.h',
'base/yuv_convert.cc',
'base/yuv_convert.h',
+ 'base/yuv_convert_sse2.cc',
'base/yuv_row_win.cc',
'base/yuv_row_posix.cc',
'base/yuv_row_table.cc',
diff --git a/media/tools/scaler_bench/scaler_bench.cc b/media/tools/scaler_bench/scaler_bench.cc
index 3457d7c..f9be6c6 100644
--- a/media/tools/scaler_bench/scaler_bench.cc
+++ b/media/tools/scaler_bench/scaler_bench.cc
@@ -11,6 +11,7 @@
#include "base/command_line.h"
#include "base/scoped_vector.h"
+#include "base/scoped_ptr.h"
#include "base/string_number_conversions.h"
#include "base/time.h"
#include "media/base/video_frame.h"
@@ -86,6 +87,34 @@ double BenchmarkSkia() {
return static_cast<double>((end - start).InMilliseconds()) / num_frames;
}
+double BenchmarkRGBToYUV() {
+ int rgb_stride = source_width * 4;
+ scoped_array<uint8> rgb_frame(new uint8[rgb_stride * source_height]);
+
+ int y_stride = source_width;
+ int uv_stride = source_width / 2;
+ scoped_array<uint8> y_plane(new uint8[y_stride * source_height]);
+ scoped_array<uint8> u_plane(new uint8[uv_stride * source_height / 2]);
+ scoped_array<uint8> v_plane(new uint8[uv_stride * source_height / 2]);
+
+ TimeTicks start = TimeTicks::HighResNow();
+
+ for (int i = 0; i < num_frames; ++i) {
+ media::ConvertRGB32ToYUV(rgb_frame.get(),
+ y_plane.get(),
+ u_plane.get(),
+ v_plane.get(),
+ source_width,
+ source_height,
+ rgb_stride,
+ y_stride,
+ uv_stride);
+ }
+
+ TimeTicks end = TimeTicks::HighResNow();
+ return static_cast<double>((end - start).InMilliseconds()) / num_frames;
+}
+
double BenchmarkFilter(media::ScaleFilter filter) {
std::vector<scoped_refptr<VideoFrame> > source_frames;
std::vector<scoped_refptr<VideoFrame> > dest_frames;
@@ -198,6 +227,8 @@ int main(int argc, const char** argv) {
std::cout << "Skia: " << BenchmarkSkia()
<< "ms/frame" << std::endl;
+ std::cout << "RGB To YUV: " << BenchmarkRGBToYUV()
+ << "ms/frame" << std::endl;
std::cout << "No filtering: " << BenchmarkFilter(media::FILTER_NONE)
<< "ms/frame" << std::endl;
std::cout << "Bilinear Vertical: "