summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-06-11 00:24:39 +0000
committerfbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-06-11 00:24:39 +0000
commit9f565bf8e7bddfec86e454a8375cb0a8465afad3 (patch)
tree3fe38130431d68961ed5ec62f4953a05d5b9273f
parent758529f145952f6a1956f183e709cbb6829caf94 (diff)
downloadchromium_src-9f565bf8e7bddfec86e454a8375cb0a8465afad3.zip
chromium_src-9f565bf8e7bddfec86e454a8375cb0a8465afad3.tar.gz
chromium_src-9f565bf8e7bddfec86e454a8375cb0a8465afad3.tar.bz2
psadbw based differencing function
BUG=none TEST=should still build and pass unittest, but more efficiently Review URL: http://codereview.chromium.org/2749007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@49480 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--remoting/host/differ_block.cc61
1 files changed, 58 insertions, 3 deletions
diff --git a/remoting/host/differ_block.cc b/remoting/host/differ_block.cc
index c42c171..4dfe58c 100644
--- a/remoting/host/differ_block.cc
+++ b/remoting/host/differ_block.cc
@@ -6,13 +6,67 @@
#include <stdlib.h>
+#if !defined(USE_SSE2)
+#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || defined(_MSC_VER)
+#define USE_SSE2 1
+#else
+#define USE_SSE2 0
+#endif
+#endif
+
+#if USE_SSE2
+#include <emmintrin.h>
+#endif
+
namespace remoting {
// TODO(fbarchard): Use common header for block size.
-int kBlockWidth = 32;
-int kBlockHeight = 32;
-int kBytesPerPixel = 3;
+const int kBlockWidth = 32;
+const int kBlockHeight = 32;
+const int kBytesPerPixel = 3;
+#if USE_SSE2
+int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
+ __m128i acc = _mm_setzero_si128();
+ __m128i v0;
+ __m128i v1;
+ __m128i sad;
+ for (int y = 0; y < kBlockHeight; ++y) {
+ const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
+ const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
+ v0 = _mm_loadu_si128(i1);
+ v1 = _mm_loadu_si128(i2);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 1);
+ v1 = _mm_loadu_si128(i2 + 1);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 2);
+ v1 = _mm_loadu_si128(i2 + 2);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 3);
+ v1 = _mm_loadu_si128(i2 + 3);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 4);
+ v1 = _mm_loadu_si128(i2 + 4);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ v0 = _mm_loadu_si128(i1 + 5);
+ v1 = _mm_loadu_si128(i2 + 5);
+ sad = _mm_sad_epu8(v0, v1);
+ acc = _mm_adds_epu16(acc, sad);
+ image1 += stride;
+ image2 += stride;
+ }
+ sad = _mm_shuffle_epi32(acc, 0xEE); // [acc3, acc2, acc3, acc2]
+ sad = _mm_adds_epu16(sad, acc);
+ int diff = _mm_cvtsi128_si32(sad);
+ return diff;
+}
+#else
int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
int diff = 0;
for (int y = 0; y < kBlockHeight; ++y) {
@@ -24,5 +78,6 @@ int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
}
return diff;
}
+#endif
} // namespace remoting