diff options
author | fbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-06-11 00:24:39 +0000 |
---|---|---|
committer | fbarchard@chromium.org <fbarchard@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-06-11 00:24:39 +0000 |
commit | 9f565bf8e7bddfec86e454a8375cb0a8465afad3 (patch) | |
tree | 3fe38130431d68961ed5ec62f4953a05d5b9273f | |
parent | 758529f145952f6a1956f183e709cbb6829caf94 (diff) | |
download | chromium_src-9f565bf8e7bddfec86e454a8375cb0a8465afad3.zip chromium_src-9f565bf8e7bddfec86e454a8375cb0a8465afad3.tar.gz chromium_src-9f565bf8e7bddfec86e454a8375cb0a8465afad3.tar.bz2 |
psadbw based differencing function
BUG=none
TEST=should still build and pass unittest, but more efficiently
Review URL: http://codereview.chromium.org/2749007
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@49480 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | remoting/host/differ_block.cc | 61 |
1 files changed, 58 insertions, 3 deletions
diff --git a/remoting/host/differ_block.cc b/remoting/host/differ_block.cc index c42c171..4dfe58c 100644 --- a/remoting/host/differ_block.cc +++ b/remoting/host/differ_block.cc @@ -6,13 +6,67 @@ #include <stdlib.h> +#if !defined(USE_SSE2) +#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || defined(_MSC_VER) +#define USE_SSE2 1 +#else +#define USE_SSE2 0 +#endif +#endif + +#if USE_SSE2 +#include <emmintrin.h> +#endif + namespace remoting { // TODO(fbarchard): Use common header for block size. -int kBlockWidth = 32; -int kBlockHeight = 32; -int kBytesPerPixel = 3; +const int kBlockWidth = 32; +const int kBlockHeight = 32; +const int kBytesPerPixel = 3; +#if USE_SSE2 +int BlockDifference(const uint8* image1, const uint8* image2, int stride) { + __m128i acc = _mm_setzero_si128(); + __m128i v0; + __m128i v1; + __m128i sad; + for (int y = 0; y < kBlockHeight; ++y) { + const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); + const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); + v0 = _mm_loadu_si128(i1); + v1 = _mm_loadu_si128(i2); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 1); + v1 = _mm_loadu_si128(i2 + 1); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 2); + v1 = _mm_loadu_si128(i2 + 2); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 3); + v1 = _mm_loadu_si128(i2 + 3); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 4); + v1 = _mm_loadu_si128(i2 + 4); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + v0 = _mm_loadu_si128(i1 + 5); + v1 = _mm_loadu_si128(i2 + 5); + sad = _mm_sad_epu8(v0, v1); + acc = _mm_adds_epu16(acc, sad); + image1 += stride; + image2 += stride; + } + sad = _mm_shuffle_epi32(acc, 0xEE); // [acc3, acc2, acc3, acc2] + sad = _mm_adds_epu16(sad, acc); + int diff = _mm_cvtsi128_si32(sad); + return diff; +} +#else int BlockDifference(const uint8* image1, const uint8* image2, int stride) { int diff = 0; for (int y = 0; y < kBlockHeight; ++y) { @@ -24,5 +78,6 @@ int BlockDifference(const uint8* image1, const uint8* image2, int stride) { } return diff; } +#endif } // namespace remoting |