summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-11 18:00:57 +0000
committerdhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-11 18:00:57 +0000
commitc59d845257c5245a476ffd4284c506ef58fe008d (patch)
tree7d6db0a3b735ec3fb944ad902506d638aa1cf1dd
parenteff69e3f36db0ed1e17281506cf315e51e07c2c3 (diff)
downloadchromium_src-c59d845257c5245a476ffd4284c506ef58fe008d.zip
chromium_src-c59d845257c5245a476ffd4284c506ef58fe008d.tar.gz
chromium_src-c59d845257c5245a476ffd4284c506ef58fe008d.tar.bz2
Revert 74571 - Use SSE2 block differ for chromoting
We have the SSE2 lying around in the tree just never being used. This will allow us to use it. A number of Windows bots have gone red in media_tests on the waterfall: http://build.chromium.org/p/chromium/builders/XP%20Tests%20%281%29 http://build.chromium.org/p/chromium/builders/XP%20Tests%20%281%29/builds/501/steps/media_unittests/logs/stdio This seems to be related to r74571. So am reverting to see. BUG=None TEST=Chromoting to a host machine and the diff will work correctly Review URL: http://codereview.chromium.org/6469022 TBR=hclam@chromium.org Review URL: http://codereview.chromium.org/6502002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@74630 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--media/base/yuv_convert.cc3
-rw-r--r--remoting/host/differ.cc32
-rw-r--r--remoting/host/differ.h6
-rw-r--r--remoting/host/differ_block.cc92
-rw-r--r--remoting/host/differ_block.h1
-rw-r--r--remoting/host/differ_block_internal.h25
-rw-r--r--remoting/host/differ_block_sse2.cc111
-rw-r--r--remoting/host/differ_unittest.cc10
-rw-r--r--remoting/remoting.gyp43
9 files changed, 112 insertions, 211 deletions
diff --git a/media/base/yuv_convert.cc b/media/base/yuv_convert.cc
index 2e4af1d..f6e2857 100644
--- a/media/base/yuv_convert.cc
+++ b/media/base/yuv_convert.cc
@@ -17,7 +17,6 @@
#include "media/base/yuv_convert.h"
-#include "build/build_config.h"
#include "media/base/cpu_features.h"
#include "media/base/yuv_convert_internal.h"
#include "media/base/yuv_row.h"
@@ -357,7 +356,7 @@ void ConvertRGB32ToYUV(const uint8* rgbframe,
static void (*convert_proc)(const uint8*, uint8*, uint8*, uint8*,
int, int, int, int, int) = NULL;
if (!convert_proc) {
-#if defined(ARCH_CPU_ARM_FAMILY)
+#ifdef __arm__
// For ARM processors, always use C version.
// TODO(hclam): Implement a NEON version.
convert_proc = &ConvertRGB32ToYUV_C;
diff --git a/remoting/host/differ.cc b/remoting/host/differ.cc
index c473870..c7d28b0 100644
--- a/remoting/host/differ.cc
+++ b/remoting/host/differ.cc
@@ -5,7 +5,6 @@
#include "remoting/host/differ.h"
#include "base/logging.h"
-#include "remoting/host/differ_block.h"
namespace remoting {
@@ -73,7 +72,7 @@ void Differ::MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer) {
uint8* diff_info = diff_info_row_start;
for (int x = 0; x < x_full_blocks; x++) {
- DiffInfo diff = BlockDifference(prev_block, curr_block, bytes_per_row_);
+ DiffInfo diff = DiffBlock(prev_block, curr_block, bytes_per_row_);
if (diff != 0) {
// Mark this block as being modified so that it gets incorporated into
// a dirty rect.
@@ -99,6 +98,35 @@ void Differ::MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer) {
}
}
+DiffInfo Differ::DiffBlock(const uint8* prev_buffer, const uint8* curr_buffer,
+ int stride) {
+ const uint8* prev_row_start = prev_buffer;
+ const uint8* curr_row_start = curr_buffer;
+
+ // Number of uint64s in each row of the block.
+ // This must be an integral number.
+ int int64s_per_row = (kBlockSize * bytes_per_pixel_) / sizeof(uint64);
+ DCHECK(((kBlockSize * bytes_per_pixel_) % sizeof(uint64)) == 0);
+
+ for (int y = 0; y < kBlockSize; y++) {
+ const uint64* prev = reinterpret_cast<const uint64*>(prev_row_start);
+ const uint64* curr = reinterpret_cast<const uint64*>(curr_row_start);
+
+ // Check each row in uint64-sized chunks.
+ // Note that this check may straddle multiple pixels. This is OK because
+ // we're interested in identifying whether or not there was change - we
+ // don't care what the actual change is.
+ for (int x = 0; x < int64s_per_row; x++) {
+ if (*prev++ != *curr++) {
+ return 1;
+ }
+ }
+ prev_row_start += stride;
+ curr_row_start += stride;
+ }
+ return 0;
+}
+
DiffInfo Differ::DiffPartialBlock(const uint8* prev_buffer,
const uint8* curr_buffer,
int stride, int width, int height) {
diff --git a/remoting/host/differ.h b/remoting/host/differ.h
index 8386e7a..d227333 100644
--- a/remoting/host/differ.h
+++ b/remoting/host/differ.h
@@ -36,6 +36,12 @@ class Differ {
void MarkDirtyBlocks(const void* prev_buffer, const void* curr_buffer);
// Diff a small block of image and return non-zero if there is a diff.
+ // Currently, this just returns 0 or 1, but this may change in the future
+ // to return the number of pixels changed.
+ DiffInfo DiffBlock(const uint8* prev_buffer, const uint8* curr_buffer,
+ int stride);
+
+ // Diff a small block of image and return non-zero if there is a diff.
// This checks only the part of the block specified by the width and
// height parameters.
// This is much slower than DiffBlock() since it cannot assume that the
diff --git a/remoting/host/differ_block.cc b/remoting/host/differ_block.cc
index 88a4f8b6..f7b785d 100644
--- a/remoting/host/differ_block.cc
+++ b/remoting/host/differ_block.cc
@@ -4,13 +4,76 @@
#include "remoting/host/differ_block.h"
-#include "build/build_config.h"
-#include "media/base/cpu_features.h"
-#include "remoting/host/differ_block_internal.h"
+#include <stdlib.h>
+
+#if !defined(USE_SSE2)
+#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || defined(_MSC_VER)
+#define USE_SSE2 1
+#else
+#define USE_SSE2 0
+#endif
+#endif
+
+#if USE_SSE2
+#include <emmintrin.h>
+#endif
namespace remoting {
-int BlockDifference_C(const uint8* image1, const uint8* image2, int stride) {
+#if USE_SSE2
+int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
+ __m128i acc = _mm_setzero_si128();
+ __m128i v0;
+ __m128i v1;
+ __m128i sad;
+ for (int y = 0; y < kBlockHeight; ++y) {
+ const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
+ const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
+ v0 = _mm_loadu_si128(i1);
+ v1 = _mm_loadu_si128(i2);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 1);
+ v1 = _mm_loadu_si128(i2 + 1);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 2);
+ v1 = _mm_loadu_si128(i2 + 2);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 3);
+ v1 = _mm_loadu_si128(i2 + 3);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 4);
+ v1 = _mm_loadu_si128(i2 + 4);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 5);
+ v1 = _mm_loadu_si128(i2 + 5);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 6);
+ v1 = _mm_loadu_si128(i2 + 6);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 7);
+ v1 = _mm_loadu_si128(i2 + 7);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ sad = _mm_shuffle_epi32(acc, 0xEE); // [acc3, acc2, acc3, acc2]
+ sad = _mm_adds_epu16(sad, acc);
+ int diff = _mm_cvtsi128_si32(sad);
+ if (diff) {
+ return 1;
+ }
+ image1 += stride;
+ image2 += stride;
+ }
+ return 0;
+}
+#else
+int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
// Number of uint64s in each row of the block.
// This must be an integral number.
int int64s_per_row = (kBlockWidth * kBytesPerPixel) / sizeof(uint64);
@@ -33,27 +96,6 @@ int BlockDifference_C(const uint8* image1, const uint8* image2, int stride) {
}
return 0;
}
-
-int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
- static int (*diff_proc)(const uint8*, const uint8*, int) = NULL;
-
- if (!diff_proc) {
-#if defined(ARCH_CPU_ARM_FAMILY)
- // For ARM processors, always use C version.
- // TODO(hclam): Implement a NEON version.
- diff_proc = &BlockDifference_C;
-#else
- // For x86 processors, check if SSE2 is supported.
- if (media::hasSSE2() && kBlockWidth == 32)
- diff_proc = &BlockDifference_SSE2_W32;
- else if (media::hasSSE2() && kBlockWidth == 16)
- diff_proc = &BlockDifference_SSE2_W16;
- else
- diff_proc = &BlockDifference_C;
#endif
- }
-
- return diff_proc(image1, image2, stride);
-}
} // namespace remoting
diff --git a/remoting/host/differ_block.h b/remoting/host/differ_block.h
index 9c03814..8ca538f 100644
--- a/remoting/host/differ_block.h
+++ b/remoting/host/differ_block.h
@@ -17,6 +17,7 @@ static const int kBytesPerPixel = 4;
// Low level functions to compare 2 blocks of pixels.
// zero means the blocks are identical.
// one means the blocks are different.
+
int BlockDifference(const uint8* image1, const uint8* image2, int stride);
} // namespace remoting
diff --git a/remoting/host/differ_block_internal.h b/remoting/host/differ_block_internal.h
deleted file mode 100644
index d0ddce0..0000000
--- a/remoting/host/differ_block_internal.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// This header file is used only differ_block.h. It defines the SSE2 rountines
-// for finding block difference.
-
-#ifndef REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_
-#define REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_
-
-#include "base/basictypes.h"
-
-namespace remoting {
-
-// Find block difference of dimension 16x16.
-extern int BlockDifference_SSE2_W16(const uint8* image1, const uint8* image2,
- int stride);
-
-// Find block difference of dimension 32x32.
-extern int BlockDifference_SSE2_W32(const uint8* image1, const uint8* image2,
- int stride);
-
-} // namespace remoting
-
-#endif // REMOTING_HOST_DIFFER_BLOCK_INTERNAL_H_
diff --git a/remoting/host/differ_block_sse2.cc b/remoting/host/differ_block_sse2.cc
deleted file mode 100644
index c0cc3b6..0000000
--- a/remoting/host/differ_block_sse2.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#if defined(_MSC_VER)
-#include <intrin.h>
-#else
-#include <mmintrin.h>
-#include <emmintrin.h>
-#endif
-
-#include "remoting/host/differ_block.h"
-#include "remoting/host/differ_block_internal.h"
-
-namespace remoting {
-
-extern int BlockDifference_SSE2_W16(const uint8* image1, const uint8* image2,
- int stride) {
- __m128i acc = _mm_setzero_si128();
- __m128i v0;
- __m128i v1;
- __m128i sad;
- for (int y = 0; y < kBlockHeight; ++y) {
- const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
- const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
- v0 = _mm_loadu_si128(i1);
- v1 = _mm_loadu_si128(i2);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 1);
- v1 = _mm_loadu_si128(i2 + 1);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 2);
- v1 = _mm_loadu_si128(i2 + 2);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 3);
- v1 = _mm_loadu_si128(i2 + 3);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
-
- // This essential means sad = acc >> 64. We only care about the lower 16
- // bits.
- sad = _mm_shuffle_epi32(acc, 0xEE);
- sad = _mm_adds_epu16(sad, acc);
- int diff = _mm_cvtsi128_si32(sad);
- if (diff)
- return 1;
- image1 += stride;
- image2 += stride;
- }
- return 0;
-}
-
-extern int BlockDifference_SSE2_W32(const uint8* image1, const uint8* image2,
- int stride) {
- __m128i acc = _mm_setzero_si128();
- __m128i v0;
- __m128i v1;
- __m128i sad;
- for (int y = 0; y < kBlockHeight; ++y) {
- const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
- const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
- v0 = _mm_loadu_si128(i1);
- v1 = _mm_loadu_si128(i2);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 1);
- v1 = _mm_loadu_si128(i2 + 1);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 2);
- v1 = _mm_loadu_si128(i2 + 2);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 3);
- v1 = _mm_loadu_si128(i2 + 3);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 4);
- v1 = _mm_loadu_si128(i2 + 4);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 5);
- v1 = _mm_loadu_si128(i2 + 5);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 6);
- v1 = _mm_loadu_si128(i2 + 6);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 7);
- v1 = _mm_loadu_si128(i2 + 7);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
-
- // This essential means sad = acc >> 64. We only care about the lower 16
- // bits.
- sad = _mm_shuffle_epi32(acc, 0xEE);
- sad = _mm_adds_epu16(sad, acc);
- int diff = _mm_cvtsi128_si32(sad);
- if (diff)
- return 1;
- image1 += stride;
- image2 += stride;
- }
- return 0;
-}
-
-} // namespace remoting
diff --git a/remoting/host/differ_unittest.cc b/remoting/host/differ_unittest.cc
index 1728a73..f6156f3 100644
--- a/remoting/host/differ_unittest.cc
+++ b/remoting/host/differ_unittest.cc
@@ -4,14 +4,14 @@
#include "base/scoped_ptr.h"
#include "remoting/host/differ.h"
-#include "remoting/host/differ_block.h"
#include "testing/gmock/include/gmock/gmock.h"
namespace remoting {
-// 96x96 screen gives a 4x4 grid of blocks.
+// 96x96 screen gives a 3x3 grid of blocks.
const int kScreenWidth= 96;
const int kScreenHeight = 96;
+const int kBytesPerPixel = 3;
const int kBytesPerRow = (kBytesPerPixel * kScreenWidth);
class DifferTest : public testing::Test {
@@ -50,9 +50,9 @@ class DifferTest : public testing::Test {
// Offset from upper-left of buffer to upper-left of requested block.
int block_offset = ((block_y * stride_) + (block_x * bytes_per_pixel_))
* kBlockSize;
- return BlockDifference(prev_.get() + block_offset,
- curr_.get() + block_offset,
- stride_);
+ return differ_->DiffBlock(prev_.get() + block_offset,
+ curr_.get() + block_offset,
+ stride_);
}
// Write the pixel |value| into the specified block in the |buffer|.
diff --git a/remoting/remoting.gyp b/remoting/remoting.gyp
index 9b5239f..f39ef6b 100644
--- a/remoting/remoting.gyp
+++ b/remoting/remoting.gyp
@@ -183,7 +183,6 @@
'chromoting_base',
'chromoting_jingle_glue',
'chromoting_protocol',
- 'differ_block',
],
'sources': [
'host/access_verifier.cc',
@@ -198,6 +197,8 @@
'host/chromoting_host_context.h',
'host/differ.h',
'host/differ.cc',
+ 'host/differ_block.h',
+ 'host/differ_block.cc',
'host/screen_recorder.cc',
'host/screen_recorder.h',
'host/heartbeat_sender.cc',
@@ -420,46 +421,6 @@
}, # end of target 'chromoting_protocol'
{
- 'target_name': 'differ_block',
- 'type': '<(library)',
- 'include_dirs': [
- '..',
- ],
- 'dependencies': [
- '../media/media.gyp:cpu_features',
- ],
- 'conditions': [
- [ 'target_arch == "ia32" or target_arch == "x64"', {
- 'dependencies': [
- 'differ_block_sse2',
- ],
- }],
- ],
- 'sources': [
- 'host/differ_block.cc',
- 'host/differ_block.h',
- ],
- }, # end of target differ_block
-
- {
- 'target_name': 'differ_block_sse2',
- 'type': '<(library)',
- 'include_dirs': [
- '..',
- ],
- 'conditions': [
- [ 'OS == "linux" or OS == "freebsd" or OS == "openbsd"', {
- 'cflags': [
- '-msse2',
- ],
- }],
- ],
- 'sources': [
- 'host/differ_block_sse2.cc',
- ],
- }, # end of target differ_block_sse2
-
- {
'target_name': 'chromotocol_test_client',
'type': 'executable',
'dependencies': [