// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "remoting/host/differ_block.h" #include #if !defined(USE_SSE2) #if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || defined(_MSC_VER) #define USE_SSE2 1 #else #define USE_SSE2 0 #endif #endif #if USE_SSE2 #include #endif namespace remoting { #if USE_SSE2 int BlockDifference(const uint8* image1, const uint8* image2, int stride) { __m128i acc = _mm_setzero_si128(); __m128i v0; __m128i v1; __m128i sad; for (int y = 0; y < kBlockHeight; ++y) { const __m128i* i1 = reinterpret_cast(image1); const __m128i* i2 = reinterpret_cast(image2); v0 = _mm_loadu_si128(i1); v1 = _mm_loadu_si128(i2); sad = _mm_sad_epu8(v0, v1); acc = _mm_adds_epu16(acc, sad); v0 = _mm_loadu_si128(i1 + 1); v1 = _mm_loadu_si128(i2 + 1); sad = _mm_sad_epu8(v0, v1); acc = _mm_adds_epu16(acc, sad); v0 = _mm_loadu_si128(i1 + 2); v1 = _mm_loadu_si128(i2 + 2); sad = _mm_sad_epu8(v0, v1); acc = _mm_adds_epu16(acc, sad); v0 = _mm_loadu_si128(i1 + 3); v1 = _mm_loadu_si128(i2 + 3); sad = _mm_sad_epu8(v0, v1); acc = _mm_adds_epu16(acc, sad); v0 = _mm_loadu_si128(i1 + 4); v1 = _mm_loadu_si128(i2 + 4); sad = _mm_sad_epu8(v0, v1); acc = _mm_adds_epu16(acc, sad); v0 = _mm_loadu_si128(i1 + 5); v1 = _mm_loadu_si128(i2 + 5); sad = _mm_sad_epu8(v0, v1); acc = _mm_adds_epu16(acc, sad); v0 = _mm_loadu_si128(i1 + 6); v1 = _mm_loadu_si128(i2 + 6); sad = _mm_sad_epu8(v0, v1); acc = _mm_adds_epu16(acc, sad); v0 = _mm_loadu_si128(i1 + 7); v1 = _mm_loadu_si128(i2 + 7); sad = _mm_sad_epu8(v0, v1); acc = _mm_adds_epu16(acc, sad); sad = _mm_shuffle_epi32(acc, 0xEE); // [acc3, acc2, acc3, acc2] sad = _mm_adds_epu16(sad, acc); int diff = _mm_cvtsi128_si32(sad); if (diff) { return 1; } image1 += stride; image2 += stride; } return 0; } #else int BlockDifference(const uint8* image1, const uint8* image2, int stride) { // Number of uint64s in each row of the block. // This must be an integral number. int int64s_per_row = (kBlockWidth * kBytesPerPixel) / sizeof(uint64); for (int y = 0; y < kBlockHeight; y++) { const uint64* prev = reinterpret_cast(image1); const uint64* curr = reinterpret_cast(image2); // Check each row in uint64-sized chunks. // Note that this check may straddle multiple pixels. This is OK because // we're interested in identifying whether or not there was change - we // don't care what the actual change is. for (int x = 0; x < int64s_per_row; x++) { if (*prev++ != *curr++) { return 1; } } image1 += stride; image2 += stride; } return 0; } #endif } // namespace remoting