summaryrefslogtreecommitdiffstats
path: root/media
diff options
context:
space:
mode:
authorfbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-04-22 01:00:19 +0000
committerfbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-04-22 01:00:19 +0000
commit8436c68ac1388c9e9df6fe823e927c4e3fffd643 (patch)
treef14dc86c407707d751983b2fec02b93963165f98 /media
parent9334377d11086b7445abe08b8950b183bb98b396 (diff)
downloadchromium_src-8436c68ac1388c9e9df6fe823e927c4e3fffd643.zip
chromium_src-8436c68ac1388c9e9df6fe823e927c4e3fffd643.tar.gz
chromium_src-8436c68ac1388c9e9df6fe823e927c4e3fffd643.tar.bz2
Revert 45265 - Speed up vertical filtering using v = a+(ba)*x formula
BUG=42064 TEST=unittests should still pass Review URL: http://codereview.chromium.org/1733004 TBR=jamesr@chromium.org Review URL: http://codereview.chromium.org/1718007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@45270 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r--media/base/yuv_convert.cc77
-rw-r--r--media/base/yuv_convert.h6
-rw-r--r--media/tools/scaler_bench/scaler_bench.cc6
3 files changed, 39 insertions, 50 deletions
diff --git a/media/base/yuv_convert.cc b/media/base/yuv_convert.cc
index 0f6170d..569f8b8 100644
--- a/media/base/yuv_convert.cc
+++ b/media/base/yuv_convert.cc
@@ -70,12 +70,14 @@ void ConvertYUVToRGB32(const uint8* y_buf,
#if USE_SSE2
// FilterRows combines two rows of the image using linear interpolation.
-// Blends 8 pixels at a time.
+// SSE2 version blends 8 pixels at a time.
static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
int source_width, int source_y_fraction) {
__m128i zero = _mm_setzero_si128();
__m128i y1_fraction = _mm_set1_epi16(
static_cast<uint16>(source_y_fraction >> 8));
+ __m128i y0_fraction = _mm_set1_epi16(
+ static_cast<uint16>(256 - (source_y_fraction >> 8)));
uint8* end = ybuf + source_width;
if (ybuf < end) {
@@ -84,69 +86,64 @@ static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
__m128i y1 = _mm_loadl_epi64(reinterpret_cast<__m128i const*>(y1_ptr));
y0 = _mm_unpacklo_epi8(y0, zero);
y1 = _mm_unpacklo_epi8(y1, zero);
- y1 = _mm_sub_epi16(y1, y0);
+ y0 = _mm_mullo_epi16(y0, y0_fraction);
y1 = _mm_mullo_epi16(y1, y1_fraction);
- y1 = _mm_srai_epi16(y1, 8);
- y1 = _mm_add_epi16(y1, y0);
- y1 = _mm_packus_epi16(y1, y1);
- _mm_storel_epi64(reinterpret_cast<__m128i *>(ybuf), y1);
+ y0 = _mm_add_epi16(y0, y1); // 8.8 fixed point result
+ y0 = _mm_srli_epi16(y0, 8);
+ y0 = _mm_packus_epi16(y0, y0);
+ _mm_storel_epi64(reinterpret_cast<__m128i *>(ybuf), y0);
y0_ptr += 8;
y1_ptr += 8;
ybuf += 8;
} while (ybuf < end);
}
}
+
#elif USE_MMX
+// MMX version blends 4 pixels at a time.
static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
int source_width, int source_y_fraction) {
__m64 zero = _mm_setzero_si64();
__m64 y1_fraction = _mm_set1_pi16(
- static_cast<int16>(source_y_fraction >> 8));
+ static_cast<int16>(source_y_fraction >> 8));
+ __m64 y0_fraction = _mm_set1_pi16(
+ static_cast<int16>(256 - (source_y_fraction >> 8)));
uint8* end = ybuf + source_width;
if (ybuf < end) {
do {
- __m64 y2 = *reinterpret_cast<const __m64 *>(y0_ptr);
- __m64 y3 = *reinterpret_cast<const __m64 *>(y1_ptr);
- __m64 y0 = _mm_unpacklo_pi8(y2, zero);
- __m64 y1 = _mm_unpacklo_pi8(y3, zero);
- y2 = _mm_unpackhi_pi8(y2, zero);
- y3 = _mm_unpackhi_pi8(y3, zero);
- y1 = _mm_sub_pi16(y1, y0);
- y3 = _mm_sub_pi16(y3, y2);
+ __m64 y0 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y0_ptr));
+ __m64 y1 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y1_ptr));
+ y0 = _mm_unpacklo_pi8(y0, zero);
+ y1 = _mm_unpacklo_pi8(y1, zero);
+ y0 = _mm_mullo_pi16(y0, y0_fraction);
y1 = _mm_mullo_pi16(y1, y1_fraction);
- y3 = _mm_mullo_pi16(y3, y1_fraction);
- y1 = _mm_srai_pi16(y1, 8);
- y3 = _mm_srai_pi16(y3, 8);
- y1 = _mm_add_pi16(y1, y0);
- y3 = _mm_add_pi16(y3, y2);
- y0 = _mm_packs_pu16(y1, y3);
- *reinterpret_cast<__m64 *>(ybuf) = y0;
- y0_ptr += 8;
- y1_ptr += 8;
- ybuf += 8;
+ y0 = _mm_add_pi16(y0, y1); // 8.8 fixed point result
+ y0 = _mm_srli_pi16(y0, 8);
+ y0 = _mm_packs_pu16(y0, y0);
+ *reinterpret_cast<int *>(ybuf) = _mm_cvtsi64_si32(y0);
+ y0_ptr += 4;
+ y1_ptr += 4;
+ ybuf += 4;
} while (ybuf < end);
}
}
#else // no MMX or SSE2
-
+// C version blends 4 pixels at a time.
static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
int source_width, int source_y_fraction) {
- int y1_fraction = (source_y_fraction >> 8);
+ int y1_fraction = source_y_fraction >> 8;
+ int y0_fraction = 256 - (source_y_fraction >> 8);
uint8* end = ybuf + source_width;
if (ybuf < end) {
do {
- ybuf[0] = y0_ptr[0] + (((y1_ptr[0] - y0_ptr[0]) * y1_fraction) >> 8);
- ybuf[1] = y0_ptr[1] + (((y1_ptr[1] - y0_ptr[1]) * y1_fraction) >> 8);
- ybuf[2] = y0_ptr[2] + (((y1_ptr[2] - y0_ptr[2]) * y1_fraction) >> 8);
- ybuf[3] = y0_ptr[3] + (((y1_ptr[3] - y0_ptr[3]) * y1_fraction) >> 8);
- ybuf[4] = y0_ptr[4] + (((y1_ptr[4] - y0_ptr[4]) * y1_fraction) >> 8);
- ybuf[5] = y0_ptr[5] + (((y1_ptr[5] - y0_ptr[5]) * y1_fraction) >> 8);
- ybuf[6] = y0_ptr[6] + (((y1_ptr[6] - y0_ptr[6]) * y1_fraction) >> 8);
- ybuf[7] = y0_ptr[7] + (((y1_ptr[7] - y0_ptr[7]) * y1_fraction) >> 8);
- y0_ptr += 8;
- y1_ptr += 8;
- ybuf += 8;
+ ybuf[0] = (y0_ptr[0] * (y0_fraction) + y1_ptr[0] * (y1_fraction)) >> 8;
+ ybuf[1] = (y0_ptr[1] * (y0_fraction) + y1_ptr[1] * (y1_fraction)) >> 8;
+ ybuf[2] = (y0_ptr[2] * (y0_fraction) + y1_ptr[2] * (y1_fraction)) >> 8;
+ ybuf[3] = (y0_ptr[3] * (y0_fraction) + y1_ptr[3] * (y1_fraction)) >> 8;
+ y0_ptr += 4;
+ y1_ptr += 4;
+ ybuf += 4;
} while (ybuf < end);
}
}
@@ -264,7 +261,7 @@ void ScaleYUVToRGB32(const uint8* y_buf,
const uint8* v_ptr = v0_ptr;
// Apply vertical filtering if necessary.
// TODO(fbarchard): Remove memcpy when not necessary.
- if (filter & media::FILTER_BILINEAR_V) {
+ if (filter == media::FILTER_BILINEAR) {
if (yscale_fixed != kFractionMax &&
source_y_fraction && ((source_y + 1) < source_height)) {
FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
@@ -292,7 +289,7 @@ void ScaleYUVToRGB32(const uint8* y_buf,
FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, width);
} else {
- if (filter & FILTER_BILINEAR_H)
+ if (filter == FILTER_BILINEAR)
LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, width, source_dx);
else {
diff --git a/media/base/yuv_convert.h b/media/base/yuv_convert.h
index 24a2c4e..d55337d 100644
--- a/media/base/yuv_convert.h
+++ b/media/base/yuv_convert.h
@@ -31,10 +31,8 @@ enum Rotate {
// Filter affects how scaling looks.
enum ScaleFilter {
- FILTER_NONE = 0, // No filter (point sampled).
- FILTER_BILINEAR_H = 1, // Bilinear horizontal filter.
- FILTER_BILINEAR_V = 2, // Bilinear vertical filter.
- FILTER_BILINEAR = 3, // Bilinear filter.
+ FILTER_NONE, // No filter (point sampled).
+ FILTER_BILINEAR, // Bilinear filter.
};
// Convert a frame of YUV to 32 bit ARGB.
diff --git a/media/tools/scaler_bench/scaler_bench.cc b/media/tools/scaler_bench/scaler_bench.cc
index 60417ad..a50efa0 100644
--- a/media/tools/scaler_bench/scaler_bench.cc
+++ b/media/tools/scaler_bench/scaler_bench.cc
@@ -200,12 +200,6 @@ int main(int argc, const char** argv) {
<< "ms/frame" << std::endl;
std::cout << "No filtering: " << BenchmarkFilter(media::FILTER_NONE)
<< "ms/frame" << std::endl;
- std::cout << "Bilinear Vertical: "
- << BenchmarkFilter(media::FILTER_BILINEAR_V)
- << "ms/frame" << std::endl;
- std::cout << "Bilinear Horizontal: "
- << BenchmarkFilter(media::FILTER_BILINEAR_H)
- << "ms/frame" << std::endl;
std::cout << "Bilinear: " << BenchmarkFilter(media::FILTER_BILINEAR)
<< "ms/frame" << std::endl;