diff options
author | finnur@chromium.org <finnur@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-11 11:58:29 +0000 |
---|---|---|
committer | finnur@chromium.org <finnur@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-11 11:58:29 +0000 |
commit | f92d0bbbd1e7eb2bd77e1ebb7edc33a516bc29f6 (patch) | |
tree | a55b89c0f6de8a76d75b3cea20bd99d2bb4da3ff /remoting | |
parent | 5e5376de8ce113df5653351f21a3efb7ba2db007 (diff) | |
download | chromium_src-f92d0bbbd1e7eb2bd77e1ebb7edc33a516bc29f6.zip chromium_src-f92d0bbbd1e7eb2bd77e1ebb7edc33a516bc29f6.tar.gz chromium_src-f92d0bbbd1e7eb2bd77e1ebb7edc33a516bc29f6.tar.bz2 |
Revert 74583 - Revert 74571 - Use SSE2 block differ for chromoting
(Quick test to see if it is the cause of crashes in media_unittests)
(Test showed this CL is not to blame)
We have the SSE2 lying around in the tree just never being used. This will
allow us to use it.
BUG=None
TEST=Chromoting to a host machine and the diff will work correctly
Review URL: http://codereview.chromium.org/6469022
TBR=hclam@chromium.org
Review URL: http://codereview.chromium.org/6488023
TBR=finnur@chromium.org
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@74588 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'remoting')
-rw-r--r-- | remoting/host/differ.cc | 32 | ||||
-rw-r--r-- | remoting/host/differ.h | 6 | ||||
-rw-r--r-- | remoting/host/differ_block.cc | 92 | ||||
-rw-r--r-- | remoting/host/differ_block.h | 1 | ||||
-rw-r--r-- | remoting/host/differ_block_internal.h | 25 | ||||
-rw-r--r-- | remoting/host/differ_block_sse2.cc | 111 | ||||
-rw-r--r-- | remoting/host/differ_unittest.cc | 10 | ||||
-rw-r--r-- | remoting/remoting.gyp | 43 |
8 files changed, 209 insertions, 111 deletions
diff --git a/remoting/host/differ.cc b/remoting/host/differ.cc index c7d28b0..c473870 100644 --- a/remoting/host/differ.cc +++ b/remoting/host/differ.cc @@ -5,6 +5,7 @@ #include "remoting/host/differ.h" #include "base/logging.h" +#include "remoting/host/differ_block.h" namespace remoting { @@ -72,7 +73,7 @@ void Differ::MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer) { uint8* diff_info = diff_info_row_start; for (int x = 0; x < x_full_blocks; x++) { - DiffInfo diff = DiffBlock(prev_block, curr_block, bytes_per_row_); + DiffInfo diff = BlockDifference(prev_block, curr_block, bytes_per_row_); if (diff != 0) { // Mark this block as being modified so that it gets incorporated into // a dirty rect. @@ -98,35 +99,6 @@ void Differ::MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer) { } } -DiffInfo Differ::DiffBlock(const uint8* prev_buffer, const uint8* curr_buffer, - int stride) { - const uint8* prev_row_start = prev_buffer; - const uint8* curr_row_start = curr_buffer; - - // Number of uint64s in each row of the block. - // This must be an integral number. - int int64s_per_row = (kBlockSize * bytes_per_pixel_) / sizeof(uint64); - DCHECK(((kBlockSize * bytes_per_pixel_) % sizeof(uint64)) == 0); - - for (int y = 0; y < kBlockSize; y++) { - const uint64* prev = reinterpret_cast<const uint64*>(prev_row_start); - const uint64* curr = reinterpret_cast<const uint64*>(curr_row_start); - - // Check each row in uint64-sized chunks. - // Note that this check may straddle multiple pixels. This is OK because - // we're interested in identifying whether or not there was change - we - // don't care what the actual change is. - for (int x = 0; x < int64s_per_row; x++) { - if (*prev++ != *curr++) { - return 1; - } - } - prev_row_start += stride; - curr_row_start += stride; - } - return 0; -} - DiffInfo Differ::DiffPartialBlock(const uint8* prev_buffer, const uint8* curr_buffer, int stride, int width, int height) { diff --git a/remoting/host/differ.h b/remoting/host/differ.h index d227333..8386e7a 100644 --- a/remoting/host/differ.h +++ b/remoting/host/differ.h @@ -36,12 +36,6 @@ class Differ { void MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer); // Diff a small block of image and return non-zero if there is a diff. - // Currently, this just returns 0 or 1, but this may change in the future - // to return the number of pixels changed. - DiffInfo DiffBlock(const uint8* prev_buffer, const uint8* curr_buffer, - int stride); - - // Diff a small block of image and return non-zero if there is a diff. // This checks only the part of the block specified by the width and // height parameters. // This is much slower than DiffBlock() since it cannot assume that the diff --git a/remoting/host/differ_block.cc b/remoting/host/differ_block.cc index f7b785d..88a4f8b6 100644 --- a/remoting/host/differ_block.cc +++ b/remoting/host/differ_block.cc @@ -4,76 +4,13 @@ #include "remoting/host/differ_block.h" -#include <stdlib.h> - -#if !defined(USE_SSE2) -#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || defined(_MSC_VER) -#define USE_SSE2 1 -#else -#define USE_SSE2 0 -#endif -#endif - -#if USE_SSE2 -#include <emmintrin.h> -#endif +#include "build/build_config.h" +#include "media/base/cpu_features.h" +#include "remoting/host/differ_block_internal.h" namespace remoting { -#if USE_SSE2 -int BlockDifference(const uint8* image1, const uint8* image2, int stride) { - __m128i acc = _mm_setzero_si128(); - __m128i v0; - __m128i v1; - __m128i sad; - for (int y = 0; y < kBlockHeight; ++y) { - const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); - const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); - v0 = _mm_loadu_si128(i1); - v1 = _mm_loadu_si128(i2); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 1); - v1 = _mm_loadu_si128(i2 + 1); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 2); - v1 = _mm_loadu_si128(i2 + 2); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 3); - v1 = _mm_loadu_si128(i2 + 3); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 4); - v1 = _mm_loadu_si128(i2 + 4); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 5); - v1 = _mm_loadu_si128(i2 + 5); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 6); - v1 = _mm_loadu_si128(i2 + 6); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 7); - v1 = _mm_loadu_si128(i2 + 7); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - sad = _mm_shuffle_epi32(acc, 0xEE); // [acc3, acc2, acc3, acc2] - sad = _mm_adds_epu16(sad, acc); - int diff = _mm_cvtsi128_si32(sad); - if (diff) { - return 1; - } - image1 += stride; - image2 += stride; - } - return 0; -} -#else -int BlockDifference(const uint8* image1, const uint8* image2, int stride) { +int BlockDifference_C(const uint8* image1, const uint8* image2, int stride) { // Number of uint64s in each row of the block. // This must be an integral number. int int64s_per_row = (kBlockWidth * kBytesPerPixel) / sizeof(uint64); @@ -96,6 +33,27 @@ int BlockDifference(const uint8* image1, const uint8* image2, int stride) { } return 0; } + +int BlockDifference(const uint8* image1, const uint8* image2, int stride) { + static int (*diff_proc)(const uint8*, const uint8*, int) = NULL; + + if (!diff_proc) { +#if defined(ARCH_CPU_ARM_FAMILY) + // For ARM processors, always use C version. + // TODO(hclam): Implement a NEON version. + diff_proc = &BlockDifference_C; +#else + // For x86 processors, check if SSE2 is supported. + if (media::hasSSE2() && kBlockWidth == 32) + diff_proc = &BlockDifference_SSE2_W32; + else if (media::hasSSE2() && kBlockWidth == 16) + diff_proc = &BlockDifference_SSE2_W16; + else + diff_proc = &BlockDifference_C; #endif + } + + return diff_proc(image1, image2, stride); +} } // namespace remoting diff --git a/remoting/host/differ_block.h b/remoting/host/differ_block.h index 8ca538f..9c03814 100644 --- a/remoting/host/differ_block.h +++ b/remoting/host/differ_block.h @@ -17,7 +17,6 @@ static const int kBytesPerPixel = 4; // Low level functions to compare 2 blocks of pixels. // zero means the blocks are identical. // one means the blocks are different. - int BlockDifference(const uint8* image1, const uint8* image2, int stride); } // namespace remoting diff --git a/remoting/host/differ_block_internal.h b/remoting/host/differ_block_internal.h new file mode 100644 index 0000000..d0ddce0 --- /dev/null +++ b/remoting/host/differ_block_internal.h @@ -0,0 +1,25 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// This header file is used only differ_block.h. It defines the SSE2 rountines +// for finding block difference. + +#ifndef REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_ +#define REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_ + +#include "base/basictypes.h" + +namespace remoting { + +// Find block difference of dimension 16x16. +extern int BlockDifference_SSE2_W16(const uint8* image1, const uint8* image2, + int stride); + +// Find block difference of dimension 32x32. +extern int BlockDifference_SSE2_W32(const uint8* image1, const uint8* image2, + int stride); + +} // namespace remoting + +#endif // REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_ diff --git a/remoting/host/differ_block_sse2.cc b/remoting/host/differ_block_sse2.cc new file mode 100644 index 0000000..c0cc3b6 --- /dev/null +++ b/remoting/host/differ_block_sse2.cc @@ -0,0 +1,111 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#if defined(_MSC_VER) +#include <intrin.h> +#else +#include <mmintrin.h> +#include <emmintrin.h> +#endif + +#include "remoting/host/differ_block.h" +#include "remoting/host/differ_block_internal.h" + +namespace remoting { + +extern int BlockDifference_SSE2_W16(const uint8* image1, const uint8* image2, + int stride) { + __m128i acc = _mm_setzero_si128(); + __m128i v0; + __m128i v1; + __m128i sad; + for (int y = 0; y < kBlockHeight; ++y) { + const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); + const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); + v0 = _mm_loadu_si128(i1); + v1 = _mm_loadu_si128(i2); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 1); + v1 = _mm_loadu_si128(i2 + 1); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 2); + v1 = _mm_loadu_si128(i2 + 2); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 3); + v1 = _mm_loadu_si128(i2 + 3); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + + // This essential means sad = acc >> 64. We only care about the lower 16 + // bits. + sad = _mm_shuffle_epi32(acc, 0xEE); + sad = _mm_adds_epu16(sad, acc); + int diff = _mm_cvtsi128_si32(sad); + if (diff) + return 1; + image1 += stride; + image2 += stride; + } + return 0; +} + +extern int BlockDifference_SSE2_W32(const uint8* image1, const uint8* image2, + int stride) { + __m128i acc = _mm_setzero_si128(); + __m128i v0; + __m128i v1; + __m128i sad; + for (int y = 0; y < kBlockHeight; ++y) { + const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); + const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); + v0 = _mm_loadu_si128(i1); + v1 = _mm_loadu_si128(i2); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 1); + v1 = _mm_loadu_si128(i2 + 1); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 2); + v1 = _mm_loadu_si128(i2 + 2); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 3); + v1 = _mm_loadu_si128(i2 + 3); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 4); + v1 = _mm_loadu_si128(i2 + 4); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 5); + v1 = _mm_loadu_si128(i2 + 5); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 6); + v1 = _mm_loadu_si128(i2 + 6); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 7); + v1 = _mm_loadu_si128(i2 + 7); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + + // This essential means sad = acc >> 64. We only care about the lower 16 + // bits. + sad = _mm_shuffle_epi32(acc, 0xEE); + sad = _mm_adds_epu16(sad, acc); + int diff = _mm_cvtsi128_si32(sad); + if (diff) + return 1; + image1 += stride; + image2 += stride; + } + return 0; +} + +} // namespace remoting diff --git a/remoting/host/differ_unittest.cc b/remoting/host/differ_unittest.cc index f6156f3..1728a73 100644 --- a/remoting/host/differ_unittest.cc +++ b/remoting/host/differ_unittest.cc @@ -4,14 +4,14 @@ #include "base/scoped_ptr.h" #include "remoting/host/differ.h" +#include "remoting/host/differ_block.h" #include "testing/gmock/include/gmock/gmock.h" namespace remoting { -// 96x96 screen gives a 3x3 grid of blocks. +// 96x96 screen gives a 4x4 grid of blocks. const int kScreenWidth= 96; const int kScreenHeight = 96; -const int kBytesPerPixel = 3; const int kBytesPerRow = (kBytesPerPixel * kScreenWidth); class DifferTest : public testing::Test { @@ -50,9 +50,9 @@ class DifferTest : public testing::Test { // Offset from upper-left of buffer to upper-left of requested block. int block_offset = ((block_y * stride_) + (block_x * bytes_per_pixel_)) * kBlockSize; - return differ_->DiffBlock(prev_.get() + block_offset, - curr_.get() + block_offset, - stride_); + return BlockDifference(prev_.get() + block_offset, + curr_.get() + block_offset, + stride_); } // Write the pixel |value| into the specified block in the |buffer|. diff --git a/remoting/remoting.gyp b/remoting/remoting.gyp index f39ef6b..9b5239f 100644 --- a/remoting/remoting.gyp +++ b/remoting/remoting.gyp @@ -183,6 +183,7 @@ 'chromoting_base', 'chromoting_jingle_glue', 'chromoting_protocol', + 'differ_block', ], 'sources': [ 'host/access_verifier.cc', @@ -197,8 +198,6 @@ 'host/chromoting_host_context.h', 'host/differ.h', 'host/differ.cc', - 'host/differ_block.h', - 'host/differ_block.cc', 'host/screen_recorder.cc', 'host/screen_recorder.h', 'host/heartbeat_sender.cc', @@ -421,6 +420,46 @@ }, # end of target 'chromoting_protocol' { + 'target_name': 'differ_block', + 'type': '<(library)', + 'include_dirs': [ + '..', + ], + 'dependencies': [ + '../media/media.gyp:cpu_features', + ], + 'conditions': [ + [ 'target_arch == "ia32" or target_arch == "x64"', { + 'dependencies': [ + 'differ_block_sse2', + ], + }], + ], + 'sources': [ + 'host/differ_block.cc', + 'host/differ_block.h', + ], + }, # end of target differ_block + + { + 'target_name': 'differ_block_sse2', + 'type': '<(library)', + 'include_dirs': [ + '..', + ], + 'conditions': [ + [ 'OS == "linux" or OS == "freebsd" or OS == "openbsd"', { + 'cflags': [ + '-msse2', + ], + }], + ], + 'sources': [ + 'host/differ_block_sse2.cc', + ], + }, # end of target differ_block_sse2 + + { 'target_name': 'chromotocol_test_client', 'type': 'executable', 'dependencies': [ |