summaryrefslogtreecommitdiffstats
path: root/remoting/host/differ_block.cc
blob: f7b785d7c09831a0f16ede3b590f8569e5572b99 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "remoting/host/differ_block.h"

#include <stdlib.h>

#if !defined(USE_SSE2)
#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || defined(_MSC_VER)
#define USE_SSE2 1
#else
#define USE_SSE2 0
#endif
#endif

#if USE_SSE2
#include <emmintrin.h>
#endif

namespace remoting {

#if USE_SSE2
int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
  __m128i acc = _mm_setzero_si128();
  __m128i v0;
  __m128i v1;
  __m128i sad;
  for (int y = 0; y < kBlockHeight; ++y) {
    const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
    const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
    v0 = _mm_loadu_si128(i1);
    v1 = _mm_loadu_si128(i2);
    sad = _mm_sad_epu8(v0, v1);
    acc = _mm_adds_epu16(acc, sad);
    v0 = _mm_loadu_si128(i1 + 1);
    v1 = _mm_loadu_si128(i2 + 1);
    sad = _mm_sad_epu8(v0, v1);
    acc = _mm_adds_epu16(acc, sad);
    v0 = _mm_loadu_si128(i1 + 2);
    v1 = _mm_loadu_si128(i2 + 2);
    sad = _mm_sad_epu8(v0, v1);
    acc = _mm_adds_epu16(acc, sad);
    v0 = _mm_loadu_si128(i1 + 3);
    v1 = _mm_loadu_si128(i2 + 3);
    sad = _mm_sad_epu8(v0, v1);
    acc = _mm_adds_epu16(acc, sad);
    v0 = _mm_loadu_si128(i1 + 4);
    v1 = _mm_loadu_si128(i2 + 4);
    sad = _mm_sad_epu8(v0, v1);
    acc = _mm_adds_epu16(acc, sad);
    v0 = _mm_loadu_si128(i1 + 5);
    v1 = _mm_loadu_si128(i2 + 5);
    sad = _mm_sad_epu8(v0, v1);
    acc = _mm_adds_epu16(acc, sad);
    v0 = _mm_loadu_si128(i1 + 6);
    v1 = _mm_loadu_si128(i2 + 6);
    sad = _mm_sad_epu8(v0, v1);
    acc = _mm_adds_epu16(acc, sad);
    v0 = _mm_loadu_si128(i1 + 7);
    v1 = _mm_loadu_si128(i2 + 7);
    sad = _mm_sad_epu8(v0, v1);
    acc = _mm_adds_epu16(acc, sad);
    sad = _mm_shuffle_epi32(acc, 0xEE);  // [acc3, acc2, acc3, acc2]
    sad = _mm_adds_epu16(sad, acc);
    int diff = _mm_cvtsi128_si32(sad);
    if (diff) {
      return 1;
    }
    image1 += stride;
    image2 += stride;
  }
  return 0;
}
#else
int BlockDifference(const uint8* image1, const uint8* image2, int stride) {
  // Number of uint64s in each row of the block.
  // This must be an integral number.
  int int64s_per_row = (kBlockWidth * kBytesPerPixel) / sizeof(uint64);

  for (int y = 0; y < kBlockHeight; y++) {
    const uint64* prev = reinterpret_cast<const uint64*>(image1);
    const uint64* curr = reinterpret_cast<const uint64*>(image2);

    // Check each row in uint64-sized chunks.
    // Note that this check may straddle multiple pixels. This is OK because
    // we're interested in identifying whether or not there was change - we
    // don't care what the actual change is.
    for (int x = 0; x < int64s_per_row; x++) {
      if (*prev++ != *curr++) {
        return 1;
      }
    }
    image1 += stride;
    image2 += stride;
  }
  return 0;
}
#endif

}  // namespace remoting