diff options
Diffstat (limited to 'remoting/host/differ_block.cc')
-rw-r--r-- | remoting/host/differ_block.cc | 92 |
1 files changed, 25 insertions, 67 deletions
diff --git a/remoting/host/differ_block.cc b/remoting/host/differ_block.cc index f7b785d..88a4f8b6 100644 --- a/remoting/host/differ_block.cc +++ b/remoting/host/differ_block.cc @@ -4,76 +4,13 @@ #include "remoting/host/differ_block.h" -#include <stdlib.h> - -#if !defined(USE_SSE2) -#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || defined(_MSC_VER) -#define USE_SSE2 1 -#else -#define USE_SSE2 0 -#endif -#endif - -#if USE_SSE2 -#include <emmintrin.h> -#endif +#include "build/build_config.h" +#include "media/base/cpu_features.h" +#include "remoting/host/differ_block_internal.h" namespace remoting { -#if USE_SSE2 -int BlockDifference(const uint8* image1, const uint8* image2, int stride) { - __m128i acc = _mm_setzero_si128(); - __m128i v0; - __m128i v1; - __m128i sad; - for (int y = 0; y < kBlockHeight; ++y) { - const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); - const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); - v0 = _mm_loadu_si128(i1); - v1 = _mm_loadu_si128(i2); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 1); - v1 = _mm_loadu_si128(i2 + 1); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 2); - v1 = _mm_loadu_si128(i2 + 2); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 3); - v1 = _mm_loadu_si128(i2 + 3); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 4); - v1 = _mm_loadu_si128(i2 + 4); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 5); - v1 = _mm_loadu_si128(i2 + 5); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 6); - v1 = _mm_loadu_si128(i2 + 6); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - v0 = _mm_loadu_si128(i1 + 7); - v1 = _mm_loadu_si128(i2 + 7); - sad = _mm_sad_epu8(v0, v1); - acc = _mm_adds_epu16(acc, sad); - sad = _mm_shuffle_epi32(acc, 0xEE); // [acc3, acc2, acc3, acc2] - sad = _mm_adds_epu16(sad, acc); - int diff = _mm_cvtsi128_si32(sad); - if (diff) { - return 1; - } - image1 += stride; - image2 += stride; - } - return 0; -} -#else -int BlockDifference(const uint8* image1, const uint8* image2, int stride) { +int BlockDifference_C(const uint8* image1, const uint8* image2, int stride) { // Number of uint64s in each row of the block. // This must be an integral number. int int64s_per_row = (kBlockWidth * kBytesPerPixel) / sizeof(uint64); @@ -96,6 +33,27 @@ int BlockDifference(const uint8* image1, const uint8* image2, int stride) { } return 0; } + +int BlockDifference(const uint8* image1, const uint8* image2, int stride) { + static int (*diff_proc)(const uint8*, const uint8*, int) = NULL; + + if (!diff_proc) { +#if defined(ARCH_CPU_ARM_FAMILY) + // For ARM processors, always use C version. + // TODO(hclam): Implement a NEON version. + diff_proc = &BlockDifference_C; +#else + // For x86 processors, check if SSE2 is supported. + if (media::hasSSE2() && kBlockWidth == 32) + diff_proc = &BlockDifference_SSE2_W32; + else if (media::hasSSE2() && kBlockWidth == 16) + diff_proc = &BlockDifference_SSE2_W16; + else + diff_proc = &BlockDifference_C; #endif + } + + return diff_proc(image1, image2, stride); +} } // namespace remoting |