summaryrefslogtreecommitdiffstats
path: root/remoting
diff options
context:
space:
mode:
authorfinnur@chromium.org <finnur@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-11 11:58:29 +0000
committerfinnur@chromium.org <finnur@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-11 11:58:29 +0000
commitf92d0bbbd1e7eb2bd77e1ebb7edc33a516bc29f6 (patch)
treea55b89c0f6de8a76d75b3cea20bd99d2bb4da3ff /remoting
parent5e5376de8ce113df5653351f21a3efb7ba2db007 (diff)
downloadchromium_src-f92d0bbbd1e7eb2bd77e1ebb7edc33a516bc29f6.zip
chromium_src-f92d0bbbd1e7eb2bd77e1ebb7edc33a516bc29f6.tar.gz
chromium_src-f92d0bbbd1e7eb2bd77e1ebb7edc33a516bc29f6.tar.bz2
Revert 74583 - Revert 74571 - Use SSE2 block differ for chromoting
(Quick test to see if it is the cause of crashes in media_unittests) (Test showed this CL is not to blame) We have the SSE2 lying around in the tree just never being used. This will allow us to use it. BUG=None TEST=Chromoting to a host machine and the diff will work correctly Review URL: http://codereview.chromium.org/6469022 TBR=hclam@chromium.org Review URL: http://codereview.chromium.org/6488023 TBR=finnur@chromium.org git-svn-id: svn://svn.chromium.org/chrome/trunk/src@74588 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'remoting')
-rw-r--r--remoting/host/differ.cc32
-rw-r--r--remoting/host/differ.h6
-rw-r--r--remoting/host/differ_block.cc92
-rw-r--r--remoting/host/differ_block.h1
-rw-r--r--remoting/host/differ_block_internal.h25
-rw-r--r--remoting/host/differ_block_sse2.cc111
-rw-r--r--remoting/host/differ_unittest.cc10
-rw-r--r--remoting/remoting.gyp43
8 files changed, 209 insertions, 111 deletions
diff --git a/remoting/host/differ.cc b/remoting/host/differ.cc
index c7d28b0..c473870 100644
--- a/remoting/host/differ.cc
+++ b/remoting/host/differ.cc
@@ -5,6 +5,7 @@
#include "remoting/host/differ.h"
#include "base/logging.h"
+#include "remoting/host/differ_block.h"
namespace remoting {
@@ -72,7 +73,7 @@ void Differ::MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer) {
uint8* diff_info = diff_info_row_start;
for (int x = 0; x < x_full_blocks; x++) {
- DiffInfo diff = DiffBlock(prev_block, curr_block, bytes_per_row_);
+ DiffInfo diff = BlockDifference(prev_block, curr_block, bytes_per_row_);
if (diff != 0) {
// Mark this block as being modified so that it gets incorporated into
// a dirty rect.
@@ -98,35 +99,6 @@ void Differ::MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer) {
}
}
-DiffInfo Differ::DiffBlock(const uint8* prev_buffer, const uint8* curr_buffer,
- int stride) {
- const uint8* prev_row_start = prev_buffer;
- const uint8* curr_row_start = curr_buffer;
-
- // Number of uint64s in each row of the block.
- // This must be an integral number.
- int int64s_per_row = (kBlockSize * bytes_per_pixel_) / sizeof(uint64);
- DCHECK(((kBlockSize * bytes_per_pixel_) % sizeof(uint64)) == 0);
-
- for (int y = 0; y < kBlockSize; y++) {
- const uint64* prev = reinterpret_cast<const uint64*>(prev_row_start);
- const uint64* curr = reinterpret_cast<const uint64*>(curr_row_start);
-
- // Check each row in uint64-sized chunks.
- // Note that this check may straddle multiple pixels. This is OK because
- // we're interested in identifying whether or not there was change - we
- // don't care what the actual change is.
- for (int x = 0; x < int64s_per_row; x++) {
- if (*prev++ != *curr++) {
- return 1;
- }
- }
- prev_row_start += stride;
- curr_row_start += stride;
- }
- return 0;
-}
-
DiffInfo Differ::DiffPartialBlock(const uint8* prev_buffer,
const uint8* curr_buffer,
int stride, int width, int height) {
diff --git a/remoting/host/differ.h b/remoting/host/differ.h
index d227333..8386e7a 100644
--- a/remoting/host/differ.h
+++ b/remoting/host/differ.h
@@ -36,12 +36,6 @@ class Differ {
void MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer);
// Diff a small block of image and return non-zero if there is a diff.
- // Currently, this just returns 0 or 1, but this may change in the future
- // to return the number of pixels changed.
- DiffInfo DiffBlock(const uint8* prev_buffer, const uint8* curr_buffer,
- int stride);
-
- // Diff a small block of image and return non-zero if there is a diff.
// This checks only the part of the block specified by the width and
// height parameters.
// This is much slower than DiffBlock() since it cannot assume that the
diff --git a/remoting/host/differ_block.cc b/remoting/host/differ_block.cc
index f7b785d..88a4f8b6 100644
--- a/remoting/host/differ_block.cc
+++ b/remoting/host/differ_block.cc
@@ -4,76 +4,13 @@
#include "remoting/host/differ_block.h"
-#include <stdlib.h>
-
-#if !defined(USE_SSE2)
-#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || defined(_MSC_VER)
-#define USE_SSE2 1
-#else
-#define USE_SSE2 0
-#endif
-#endif
-
-#if USE_SSE2
-#include <emmintrin.h>
-#endif
+#include "build/build_config.h"
+#include "media/base/cpu_features.h"
+#include "remoting/host/differ_block_internal.h"
namespace remoting {
-#if USE_SSE2
-int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
- __m128i acc = _mm_setzero_si128();
- __m128i v0;
- __m128i v1;
- __m128i sad;
- for (int y = 0; y < kBlockHeight; ++y) {
- const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
- const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
- v0 = _mm_loadu_si128(i1);
- v1 = _mm_loadu_si128(i2);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 1);
- v1 = _mm_loadu_si128(i2 + 1);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 2);
- v1 = _mm_loadu_si128(i2 + 2);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 3);
- v1 = _mm_loadu_si128(i2 + 3);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 4);
- v1 = _mm_loadu_si128(i2 + 4);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 5);
- v1 = _mm_loadu_si128(i2 + 5);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 6);
- v1 = _mm_loadu_si128(i2 + 6);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 7);
- v1 = _mm_loadu_si128(i2 + 7);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- sad = _mm_shuffle_epi32(acc, 0xEE); // [acc3, acc2, acc3, acc2]
- sad = _mm_adds_epu16(sad, acc);
- int diff = _mm_cvtsi128_si32(sad);
- if (diff) {
- return 1;
- }
- image1 += stride;
- image2 += stride;
- }
- return 0;
-}
-#else
-int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
+int BlockDifference_C(const uint8* image1, const uint8* image2, int stride) {
// Number of uint64s in each row of the block.
// This must be an integral number.
int int64s_per_row = (kBlockWidth * kBytesPerPixel) / sizeof(uint64);
@@ -96,6 +33,27 @@ int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
}
return 0;
}
+
+int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
+ static int (*diff_proc)(const uint8*, const uint8*, int) = NULL;
+
+ if (!diff_proc) {
+#if defined(ARCH_CPU_ARM_FAMILY)
+ // For ARM processors, always use C version.
+ // TODO(hclam): Implement a NEON version.
+ diff_proc = &BlockDifference_C;
+#else
+ // For x86 processors, check if SSE2 is supported.
+ if (media::hasSSE2() && kBlockWidth == 32)
+ diff_proc = &BlockDifference_SSE2_W32;
+ else if (media::hasSSE2() && kBlockWidth == 16)
+ diff_proc = &BlockDifference_SSE2_W16;
+ else
+ diff_proc = &BlockDifference_C;
#endif
+ }
+
+ return diff_proc(image1, image2, stride);
+}
} // namespace remoting
diff --git a/remoting/host/differ_block.h b/remoting/host/differ_block.h
index 8ca538f..9c03814 100644
--- a/remoting/host/differ_block.h
+++ b/remoting/host/differ_block.h
@@ -17,7 +17,6 @@ static const int kBytesPerPixel = 4;
// Low level functions to compare 2 blocks of pixels.
// zero means the blocks are identical.
// one means the blocks are different.
-
int BlockDifference(const uint8* image1, const uint8* image2, int stride);
} // namespace remoting
diff --git a/remoting/host/differ_block_internal.h b/remoting/host/differ_block_internal.h
new file mode 100644
index 0000000..d0ddce0
--- /dev/null
+++ b/remoting/host/differ_block_internal.h
@@ -0,0 +1,25 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This header file is used only differ_block.h. It defines the SSE2 rountines
+// for finding block difference.
+
+#ifndef REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_
+#define REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_
+
+#include "base/basictypes.h"
+
+namespace remoting {
+
+// Find block difference of dimension 16x16.
+extern int BlockDifference_SSE2_W16(const uint8* image1, const uint8* image2,
+ int stride);
+
+// Find block difference of dimension 32x32.
+extern int BlockDifference_SSE2_W32(const uint8* image1, const uint8* image2,
+ int stride);
+
+} // namespace remoting
+
+#endif // REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_
diff --git a/remoting/host/differ_block_sse2.cc b/remoting/host/differ_block_sse2.cc
new file mode 100644
index 0000000..c0cc3b6
--- /dev/null
+++ b/remoting/host/differ_block_sse2.cc
@@ -0,0 +1,111 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#else
+#include <mmintrin.h>
+#include <emmintrin.h>
+#endif
+
+#include "remoting/host/differ_block.h"
+#include "remoting/host/differ_block_internal.h"
+
+namespace remoting {
+
+extern int BlockDifference_SSE2_W16(const uint8* image1, const uint8* image2,
+ int stride) {
+ __m128i acc = _mm_setzero_si128();
+ __m128i v0;
+ __m128i v1;
+ __m128i sad;
+ for (int y = 0; y < kBlockHeight; ++y) {
+ const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
+ const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
+ v0 = _mm_loadu_si128(i1);
+ v1 = _mm_loadu_si128(i2);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 1);
+ v1 = _mm_loadu_si128(i2 + 1);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 2);
+ v1 = _mm_loadu_si128(i2 + 2);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 3);
+ v1 = _mm_loadu_si128(i2 + 3);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+
+ // This essential means sad = acc >> 64. We only care about the lower 16
+ // bits.
+ sad = _mm_shuffle_epi32(acc, 0xEE);
+ sad = _mm_adds_epu16(sad, acc);
+ int diff = _mm_cvtsi128_si32(sad);
+ if (diff)
+ return 1;
+ image1 += stride;
+ image2 += stride;
+ }
+ return 0;
+}
+
+extern int BlockDifference_SSE2_W32(const uint8* image1, const uint8* image2,
+ int stride) {
+ __m128i acc = _mm_setzero_si128();
+ __m128i v0;
+ __m128i v1;
+ __m128i sad;
+ for (int y = 0; y < kBlockHeight; ++y) {
+ const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
+ const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
+ v0 = _mm_loadu_si128(i1);
+ v1 = _mm_loadu_si128(i2);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 1);
+ v1 = _mm_loadu_si128(i2 + 1);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 2);
+ v1 = _mm_loadu_si128(i2 + 2);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 3);
+ v1 = _mm_loadu_si128(i2 + 3);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 4);
+ v1 = _mm_loadu_si128(i2 + 4);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 5);
+ v1 = _mm_loadu_si128(i2 + 5);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 6);
+ v1 = _mm_loadu_si128(i2 + 6);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 7);
+ v1 = _mm_loadu_si128(i2 + 7);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+
+ // This essential means sad = acc >> 64. We only care about the lower 16
+ // bits.
+ sad = _mm_shuffle_epi32(acc, 0xEE);
+ sad = _mm_adds_epu16(sad, acc);
+ int diff = _mm_cvtsi128_si32(sad);
+ if (diff)
+ return 1;
+ image1 += stride;
+ image2 += stride;
+ }
+ return 0;
+}
+
+} // namespace remoting
diff --git a/remoting/host/differ_unittest.cc b/remoting/host/differ_unittest.cc
index f6156f3..1728a73 100644
--- a/remoting/host/differ_unittest.cc
+++ b/remoting/host/differ_unittest.cc
@@ -4,14 +4,14 @@
#include "base/scoped_ptr.h"
#include "remoting/host/differ.h"
+#include "remoting/host/differ_block.h"
#include "testing/gmock/include/gmock/gmock.h"
namespace remoting {
-// 96x96 screen gives a 3x3 grid of blocks.
+// 96x96 screen gives a 4x4 grid of blocks.
const int kScreenWidth= 96;
const int kScreenHeight = 96;
-const int kBytesPerPixel = 3;
const int kBytesPerRow = (kBytesPerPixel * kScreenWidth);
class DifferTest : public testing::Test {
@@ -50,9 +50,9 @@ class DifferTest : public testing::Test {
// Offset from upper-left of buffer to upper-left of requested block.
int block_offset = ((block_y * stride_) + (block_x * bytes_per_pixel_))
* kBlockSize;
- return differ_->DiffBlock(prev_.get() + block_offset,
- curr_.get() + block_offset,
- stride_);
+ return BlockDifference(prev_.get() + block_offset,
+ curr_.get() + block_offset,
+ stride_);
}
// Write the pixel |value| into the specified block in the |buffer|.
diff --git a/remoting/remoting.gyp b/remoting/remoting.gyp
index f39ef6b..9b5239f 100644
--- a/remoting/remoting.gyp
+++ b/remoting/remoting.gyp
@@ -183,6 +183,7 @@
'chromoting_base',
'chromoting_jingle_glue',
'chromoting_protocol',
+ 'differ_block',
],
'sources': [
'host/access_verifier.cc',
@@ -197,8 +198,6 @@
'host/chromoting_host_context.h',
'host/differ.h',
'host/differ.cc',
- 'host/differ_block.h',
- 'host/differ_block.cc',
'host/screen_recorder.cc',
'host/screen_recorder.h',
'host/heartbeat_sender.cc',
@@ -421,6 +420,46 @@
}, # end of target 'chromoting_protocol'
{
+ 'target_name': 'differ_block',
+ 'type': '<(library)',
+ 'include_dirs': [
+ '..',
+ ],
+ 'dependencies': [
+ '../media/media.gyp:cpu_features',
+ ],
+ 'conditions': [
+ [ 'target_arch == "ia32" or target_arch == "x64"', {
+ 'dependencies': [
+ 'differ_block_sse2',
+ ],
+ }],
+ ],
+ 'sources': [
+ 'host/differ_block.cc',
+ 'host/differ_block.h',
+ ],
+ }, # end of target differ_block
+
+ {
+ 'target_name': 'differ_block_sse2',
+ 'type': '<(library)',
+ 'include_dirs': [
+ '..',
+ ],
+ 'conditions': [
+ [ 'OS == "linux" or OS == "freebsd" or OS == "openbsd"', {
+ 'cflags': [
+ '-msse2',
+ ],
+ }],
+ ],
+ 'sources': [
+ 'host/differ_block_sse2.cc',
+ ],
+ }, # end of target differ_block_sse2
+
+ {
'target_name': 'chromotocol_test_client',
'type': 'executable',
'dependencies': [