diff options
author | dhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-11 18:00:57 +0000 |
---|---|---|
committer | dhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-11 18:00:57 +0000 |
commit | c59d845257c5245a476ffd4284c506ef58fe008d (patch) | |
tree | 7d6db0a3b735ec3fb944ad902506d638aa1cf1dd | |
parent | eff69e3f36db0ed1e17281506cf315e51e07c2c3 (diff) | |
download | chromium_src-c59d845257c5245a476ffd4284c506ef58fe008d.zip chromium_src-c59d845257c5245a476ffd4284c506ef58fe008d.tar.gz chromium_src-c59d845257c5245a476ffd4284c506ef58fe008d.tar.bz2 |
Revert 74571 - Use SSE2 block differ for chromoting
We have the SSE2 lying around in the tree just never being used. This will
allow us to use it.
A number of Windows bots have gone red in media_tests on the waterfall:
http://build.chromium.org/p/chromium/builders/XP%20Tests%20%281%29
http://build.chromium.org/p/chromium/builders/XP%20Tests%20%281%29/builds/501/steps/media_unittests/logs/stdio
This seems to be related to r74571. So am reverting to see.
BUG=None
TEST=Chromoting to a host machine and the diff will work correctly
Review URL: http://codereview.chromium.org/6469022
TBR=hclam@chromium.org
Review URL: http://codereview.chromium.org/6502002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@74630 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | media/base/yuv_convert.cc | 3 | ||||
-rw-r--r-- | remoting/host/differ.cc | 32 | ||||
-rw-r--r-- | remoting/host/differ.h | 6 | ||||
-rw-r--r-- | remoting/host/differ_block.cc | 92 | ||||
-rw-r--r-- | remoting/host/differ_block.h | 1 | ||||
-rw-r--r-- | remoting/host/differ_block_internal.h | 25 | ||||
-rw-r--r-- | remoting/host/differ_block_sse2.cc | 111 | ||||
-rw-r--r-- | remoting/host/differ_unittest.cc | 10 | ||||
-rw-r--r-- | remoting/remoting.gyp | 43 |
9 files changed, 112 insertions, 211 deletions
diff --git a/media/base/yuv_convert.cc b/media/base/yuv_convert.cc index 2e4af1d..f6e2857 100644 --- a/media/base/yuv_convert.cc +++ b/media/base/yuv_convert.cc @@ -17,7 +17,6 @@ #include "media/base/yuv_convert.h" -#include "build/build_config.h" #include "media/base/cpu_features.h" #include "media/base/yuv_convert_internal.h" #include "media/base/yuv_row.h" @@ -357,7 +356,7 @@ void ConvertRGB32ToYUV(const uint8* rgbframe, static void (*convert_proc)(const uint8*, uint8*, uint8*, uint8*, int, int, int, int, int) = NULL; if (!convert_proc) { -#if defined(ARCH_CPU_ARM_FAMILY) +#ifdef __arm__ // For ARM processors, always use C version. // TODO(hclam): Implement a NEON version. convert_proc = &ConvertRGB32ToYUV_C; diff --git a/remoting/host/differ.cc b/remoting/host/differ.cc index c473870..c7d28b0 100644 --- a/remoting/host/differ.cc +++ b/remoting/host/differ.cc @@ -5,7 +5,6 @@ #include "remoting/host/differ.h" #include "base/logging.h" -#include "remoting/host/differ_block.h" namespace remoting { @@ -73,7 +72,7 @@ void Differ::MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer) { uint8* diff_info = diff_info_row_start; for (int x = 0; x < x_full_blocks; x++) { - DiffInfo diff = BlockDifference(prev_block, curr_block, bytes_per_row_); + DiffInfo diff = DiffBlock(prev_block, curr_block, bytes_per_row_); if (diff != 0) { // Mark this block as being modified so that it gets incorporated into // a dirty rect. @@ -99,6 +98,35 @@ void Differ::MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer) { } } +DiffInfo Differ::DiffBlock(const uint8* prev_buffer, const uint8* curr_buffer, + int stride) { + const uint8* prev_row_start = prev_buffer; + const uint8* curr_row_start = curr_buffer; + + // Number of uint64s in each row of the block. + // This must be an integral number. + int int64s_per_row = (kBlockSize * bytes_per_pixel_) / sizeof(uint64); + DCHECK(((kBlockSize * bytes_per_pixel_) % sizeof(uint64)) == 0); + + for (int y = 0; y < kBlockSize; y++) { + const uint64* prev = reinterpret_cast<const uint64*>(prev_row_start); + const uint64* curr = reinterpret_cast<const uint64*>(curr_row_start); + + // Check each row in uint64-sized chunks. + // Note that this check may straddle multiple pixels. This is OK because + // we're interested in identifying whether or not there was change - we + // don't care what the actual change is. + for (int x = 0; x < int64s_per_row; x++) { + if (*prev++ != *curr++) { + return 1; + } + } + prev_row_start += stride; + curr_row_start += stride; + } + return 0; +} + DiffInfo Differ::DiffPartialBlock(const uint8* prev_buffer, const uint8* curr_buffer, int stride, int width, int height) { diff --git a/remoting/host/differ.h b/remoting/host/differ.h index 8386e7a..d227333 100644 --- a/remoting/host/differ.h +++ b/remoting/host/differ.h @@ -36,6 +36,12 @@ class Differ { void MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer); // Diff a small block of image and return non-zero if there is a diff. + // Currently, this just returns 0 or 1, but this may change in the future + // to return the number of pixels changed. + DiffInfo DiffBlock(const uint8* prev_buffer, const uint8* curr_buffer, + int stride); + + // Diff a small block of image and return non-zero if there is a diff. // This checks only the part of the block specified by the width and // height parameters. // This is much slower than DiffBlock() since it cannot assume that the diff --git a/remoting/host/differ_block.cc b/remoting/host/differ_block.cc index 88a4f8b6..f7b785d 100644 --- a/remoting/host/differ_block.cc +++ b/remoting/host/differ_block.cc @@ -4,13 +4,76 @@ #include "remoting/host/differ_block.h" -#include "build/build_config.h" -#include "media/base/cpu_features.h" -#include "remoting/host/differ_block_internal.h" +#include <stdlib.h> + +#if !defined(USE_SSE2) +#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || defined(_MSC_VER) +#define USE_SSE2 1 +#else +#define USE_SSE2 0 +#endif +#endif + +#if USE_SSE2 +#include <emmintrin.h> +#endif namespace remoting { -int BlockDifference_C(const uint8* image1, const uint8* image2, int stride) { +#if USE_SSE2 +int BlockDifference(const uint8* image1, const uint8* image2, int stride) { + __m128i acc = _mm_setzero_si128(); + __m128i v0; + __m128i v1; + __m128i sad; + for (int y = 0; y < kBlockHeight; ++y) { + const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); + const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); + v0 = _mm_loadu_si128(i1); + v1 = _mm_loadu_si128(i2); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 1); + v1 = _mm_loadu_si128(i2 + 1); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 2); + v1 = _mm_loadu_si128(i2 + 2); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 3); + v1 = _mm_loadu_si128(i2 + 3); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 4); + v1 = _mm_loadu_si128(i2 + 4); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 5); + v1 = _mm_loadu_si128(i2 + 5); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 6); + v1 = _mm_loadu_si128(i2 + 6); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 7); + v1 = _mm_loadu_si128(i2 + 7); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + sad = _mm_shuffle_epi32(acc, 0xEE); // [acc3, acc2, acc3, acc2] + sad = _mm_adds_epu16(sad, acc); + int diff = _mm_cvtsi128_si32(sad); + if (diff) { + return 1; + } + image1 += stride; + image2 += stride; + } + return 0; +} +#else +int BlockDifference(const uint8* image1, const uint8* image2, int stride) { // Number of uint64s in each row of the block. // This must be an integral number. int int64s_per_row = (kBlockWidth * kBytesPerPixel) / sizeof(uint64); @@ -33,27 +96,6 @@ int BlockDifference_C(const uint8* image1, const uint8* image2, int stride) { } return 0; } - -int BlockDifference(const uint8* image1, const uint8* image2, int stride) { - static int (*diff_proc)(const uint8*, const uint8*, int) = NULL; - - if (!diff_proc) { -#if defined(ARCH_CPU_ARM_FAMILY) - // For ARM processors, always use C version. - // TODO(hclam): Implement a NEON version. - diff_proc = &BlockDifference_C; -#else - // For x86 processors, check if SSE2 is supported. - if (media::hasSSE2() && kBlockWidth == 32) - diff_proc = &BlockDifference_SSE2_W32; - else if (media::hasSSE2() && kBlockWidth == 16) - diff_proc = &BlockDifference_SSE2_W16; - else - diff_proc = &BlockDifference_C; #endif - } - - return diff_proc(image1, image2, stride); -} } // namespace remoting diff --git a/remoting/host/differ_block.h b/remoting/host/differ_block.h index 9c03814..8ca538f 100644 --- a/remoting/host/differ_block.h +++ b/remoting/host/differ_block.h @@ -17,6 +17,7 @@ static const int kBytesPerPixel = 4; // Low level functions to compare 2 blocks of pixels. // zero means the blocks are identical. // one means the blocks are different. + int BlockDifference(const uint8* image1, const uint8* image2, int stride); } // namespace remoting diff --git a/remoting/host/differ_block_internal.h b/remoting/host/differ_block_internal.h deleted file mode 100644 index d0ddce0..0000000 --- a/remoting/host/differ_block_internal.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// This header file is used only differ_block.h. It defines the SSE2 rountines -// for finding block difference. - -#ifndef REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_ -#define REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_ - -#include "base/basictypes.h" - -namespace remoting { - -// Find block difference of dimension 16x16. -extern int BlockDifference_SSE2_W16(const uint8* image1, const uint8* image2, - int stride); - -// Find block difference of dimension 32x32. -extern int BlockDifference_SSE2_W32(const uint8* image1, const uint8* image2, - int stride); - -} // namespace remoting - -#endif // REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_ diff --git a/remoting/host/differ_block_sse2.cc b/remoting/host/differ_block_sse2.cc deleted file mode 100644 index c0cc3b6..0000000 --- a/remoting/host/differ_block_sse2.cc +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#if defined(_MSC_VER) -#include <intrin.h> -#else -#include <mmintrin.h> -#include <emmintrin.h> -#endif - -#include "remoting/host/differ_block.h" -#include "remoting/host/differ_block_internal.h" - -namespace remoting { - -extern int BlockDifference_SSE2_W16(const uint8* image1, const uint8* image2, - int stride) { - __m128i acc = _mm_setzero_si128(); - __m128i v0; - __m128i v1; - __m128i sad; - for (int y = 0; y < kBlockHeight; ++y) { - const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); - const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); - v0 = _mm_loadu_si128(i1); - v1 = _mm_loadu_si128(i2); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 1); - v1 = _mm_loadu_si128(i2 + 1); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 2); - v1 = _mm_loadu_si128(i2 + 2); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 3); - v1 = _mm_loadu_si128(i2 + 3); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - - // This essential means sad = acc >> 64. We only care about the lower 16 - // bits. - sad = _mm_shuffle_epi32(acc, 0xEE); - sad = _mm_adds_epu16(sad, acc); - int diff = _mm_cvtsi128_si32(sad); - if (diff) - return 1; - image1 += stride; - image2 += stride; - } - return 0; -} - -extern int BlockDifference_SSE2_W32(const uint8* image1, const uint8* image2, - int stride) { - __m128i acc = _mm_setzero_si128(); - __m128i v0; - __m128i v1; - __m128i sad; - for (int y = 0; y < kBlockHeight; ++y) { - const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); - const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); - v0 = _mm_loadu_si128(i1); - v1 = _mm_loadu_si128(i2); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 1); - v1 = _mm_loadu_si128(i2 + 1); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 2); - v1 = _mm_loadu_si128(i2 + 2); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 3); - v1 = _mm_loadu_si128(i2 + 3); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 4); - v1 = _mm_loadu_si128(i2 + 4); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 5); - v1 = _mm_loadu_si128(i2 + 5); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 6); - v1 = _mm_loadu_si128(i2 + 6); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 7); - v1 = _mm_loadu_si128(i2 + 7); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - - // This essential means sad = acc >> 64. We only care about the lower 16 - // bits. - sad = _mm_shuffle_epi32(acc, 0xEE); - sad = _mm_adds_epu16(sad, acc); - int diff = _mm_cvtsi128_si32(sad); - if (diff) - return 1; - image1 += stride; - image2 += stride; - } - return 0; -} - -} // namespace remoting diff --git a/remoting/host/differ_unittest.cc b/remoting/host/differ_unittest.cc index 1728a73..f6156f3 100644 --- a/remoting/host/differ_unittest.cc +++ b/remoting/host/differ_unittest.cc @@ -4,14 +4,14 @@ #include "base/scoped_ptr.h" #include "remoting/host/differ.h" -#include "remoting/host/differ_block.h" #include "testing/gmock/include/gmock/gmock.h" namespace remoting { -// 96x96 screen gives a 4x4 grid of blocks. +// 96x96 screen gives a 3x3 grid of blocks. const int kScreenWidth= 96; const int kScreenHeight = 96; +const int kBytesPerPixel = 3; const int kBytesPerRow = (kBytesPerPixel * kScreenWidth); class DifferTest : public testing::Test { @@ -50,9 +50,9 @@ class DifferTest : public testing::Test { // Offset from upper-left of buffer to upper-left of requested block. int block_offset = ((block_y * stride_) + (block_x * bytes_per_pixel_)) * kBlockSize; - return BlockDifference(prev_.get() + block_offset, - curr_.get() + block_offset, - stride_); + return differ_->DiffBlock(prev_.get() + block_offset, + curr_.get() + block_offset, + stride_); } // Write the pixel |value| into the specified block in the |buffer|. diff --git a/remoting/remoting.gyp b/remoting/remoting.gyp index 9b5239f..f39ef6b 100644 --- a/remoting/remoting.gyp +++ b/remoting/remoting.gyp @@ -183,7 +183,6 @@ 'chromoting_base', 'chromoting_jingle_glue', 'chromoting_protocol', - 'differ_block', ], 'sources': [ 'host/access_verifier.cc', @@ -198,6 +197,8 @@ 'host/chromoting_host_context.h', 'host/differ.h', 'host/differ.cc', + 'host/differ_block.h', + 'host/differ_block.cc', 'host/screen_recorder.cc', 'host/screen_recorder.h', 'host/heartbeat_sender.cc', @@ -420,46 +421,6 @@ }, # end of target 'chromoting_protocol' { - 'target_name': 'differ_block', - 'type': '<(library)', - 'include_dirs': [ - '..', - ], - 'dependencies': [ - '../media/media.gyp:cpu_features', - ], - 'conditions': [ - [ 'target_arch == "ia32" or target_arch == "x64"', { - 'dependencies': [ - 'differ_block_sse2', - ], - }], - ], - 'sources': [ - 'host/differ_block.cc', - 'host/differ_block.h', - ], - }, # end of target differ_block - - { - 'target_name': 'differ_block_sse2', - 'type': '<(library)', - 'include_dirs': [ - '..', - ], - 'conditions': [ - [ 'OS == "linux" or OS == "freebsd" or OS == "openbsd"', { - 'cflags': [ - '-msse2', - ], - }], - ], - 'sources': [ - 'host/differ_block_sse2.cc', - ], - }, # end of target differ_block_sse2 - - { 'target_name': 'chromotocol_test_client', 'type': 'executable', 'dependencies': [ |