summaryrefslogtreecommitdiffstats
path: root/remoting/host/differ_block.cc
diff options
context:
space:
mode:
Diffstat (limited to 'remoting/host/differ_block.cc')
-rw-r--r--remoting/host/differ_block.cc92
1 files changed, 25 insertions, 67 deletions
diff --git a/remoting/host/differ_block.cc b/remoting/host/differ_block.cc
index f7b785d..88a4f8b6 100644
--- a/remoting/host/differ_block.cc
+++ b/remoting/host/differ_block.cc
@@ -4,76 +4,13 @@
#include "remoting/host/differ_block.h"
-#include <stdlib.h>
-
-#if !defined(USE_SSE2)
-#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || defined(_MSC_VER)
-#define USE_SSE2 1
-#else
-#define USE_SSE2 0
-#endif
-#endif
-
-#if USE_SSE2
-#include <emmintrin.h>
-#endif
+#include "build/build_config.h"
+#include "media/base/cpu_features.h"
+#include "remoting/host/differ_block_internal.h"
namespace remoting {
-#if USE_SSE2
-int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
- __m128i acc = _mm_setzero_si128();
- __m128i v0;
- __m128i v1;
- __m128i sad;
- for (int y = 0; y < kBlockHeight; ++y) {
- const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
- const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
- v0 = _mm_loadu_si128(i1);
- v1 = _mm_loadu_si128(i2);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 1);
- v1 = _mm_loadu_si128(i2 + 1);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 2);
- v1 = _mm_loadu_si128(i2 + 2);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 3);
- v1 = _mm_loadu_si128(i2 + 3);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 4);
- v1 = _mm_loadu_si128(i2 + 4);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 5);
- v1 = _mm_loadu_si128(i2 + 5);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 6);
- v1 = _mm_loadu_si128(i2 + 6);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- v0 = _mm_loadu_si128(i1 + 7);
- v1 = _mm_loadu_si128(i2 + 7);
- sad = _mm_sad_epu8(v0, v1);
- acc = _mm_adds_epu16(acc, sad);
- sad = _mm_shuffle_epi32(acc, 0xEE); // [acc3, acc2, acc3, acc2]
- sad = _mm_adds_epu16(sad, acc);
- int diff = _mm_cvtsi128_si32(sad);
- if (diff) {
- return 1;
- }
- image1 += stride;
- image2 += stride;
- }
- return 0;
-}
-#else
-int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
+int BlockDifference_C(const uint8* image1, const uint8* image2, int stride) {
// Number of uint64s in each row of the block.
// This must be an integral number.
int int64s_per_row = (kBlockWidth * kBytesPerPixel) / sizeof(uint64);
@@ -96,6 +33,27 @@ int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
}
return 0;
}
+
+int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
+ static int (*diff_proc)(const uint8*, const uint8*, int) = NULL;
+
+ if (!diff_proc) {
+#if defined(ARCH_CPU_ARM_FAMILY)
+ // For ARM processors, always use C version.
+ // TODO(hclam): Implement a NEON version.
+ diff_proc = &BlockDifference_C;
+#else
+ // For x86 processors, check if SSE2 is supported.
+ if (media::hasSSE2() && kBlockWidth == 32)
+ diff_proc = &BlockDifference_SSE2_W32;
+ else if (media::hasSSE2() && kBlockWidth == 16)
+ diff_proc = &BlockDifference_SSE2_W16;
+ else
+ diff_proc = &BlockDifference_C;
#endif
+ }
+
+ return diff_proc(image1, image2, stride);
+}
} // namespace remoting