aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan McClain <aeroevan@gmail.com>2013-05-26 09:22:54 -0400
committerEvan McClain <aeroevan@gmail.com>2013-05-26 09:22:54 -0400
commitc3a04a0580f84bf26b099607eee8b6ee40bc1143 (patch)
treeabbb9175edc5baf60fd117560522d2c97a03aa61
parent705eb622415dc3db0d1740ff29aa0fbf4e8aafa5 (diff)
downloadexternal_skia-c3a04a0580f84bf26b099607eee8b6ee40bc1143.zip
external_skia-c3a04a0580f84bf26b099607eee8b6ee40bc1143.tar.gz
external_skia-c3a04a0580f84bf26b099607eee8b6ee40bc1143.tar.bz2
Revert "Neon optimized implementation of S16_opaque_D32_nofilter_DX"
This reverts commit a9a4c66d163e245628ddde71dca40e4c29a44f47.
-rw-r--r--src/opts/SkBitmapProcState_opts_arm.cpp204
1 files changed, 0 insertions, 204 deletions
diff --git a/src/opts/SkBitmapProcState_opts_arm.cpp b/src/opts/SkBitmapProcState_opts_arm.cpp
index f7b89a9..20d62e1 100644
--- a/src/opts/SkBitmapProcState_opts_arm.cpp
+++ b/src/opts/SkBitmapProcState_opts_arm.cpp
@@ -11,11 +11,6 @@
#include "SkColorPriv.h"
#include "SkUtils.h"
-#if defined(__ARM_HAVE_NEON)
-#include <arm_neon.h>
-#endif
-
-
#if __ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN)
void SI8_D16_nofilter_DX_arm(
const SkBitmapProcState& s,
@@ -189,201 +184,11 @@ void SI8_opaque_D32_nofilter_DX_arm(const SkBitmapProcState& s,
}
#endif //__ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN)
-
-#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
-void S16_opaque_D32_nofilter_DX_neon_asm(const SkBitmapProcState& s,
- const uint32_t* __restrict__ xy,
- int count, uint32_t* __restrict__ colors) {
-
- const uint16_t* __restrict__ srcAddr = (const uint16_t*)s.fBitmap->getPixels();
-
- uint16_t* index;
- uint16_t src;
- int i;
-
- srcAddr = (const uint16_t*)((const char*)srcAddr + xy[0] * s.fBitmap->rowBytes());
-
- const uint16_t* __restrict__ xx = (const uint16_t*)(++xy);
-
- if (1 == s.fBitmap->width()) {
-
- src = srcAddr[0];
- uint32_t dstValue = SkPixel16ToPixel32(src);
- sk_memset32(colors, dstValue, count);
- } else if ((xx[count-1] - xx[0]) == (count-1)) {
- // No scaling
- const uint16_t* src_data = (const uint16_t*)(srcAddr + xx[0]);
- asm volatile (
- "subs %[count], %[count], #8 \n\t" // count -= 8, set flag
- "blt 2f \n\t" // if count < 0, branch to label 2
- "vmov.u16 q8, #0xFF00 \n\t" // Load alpha value into q8 for later use.
- "1: \n\t" // 8 loop
- // Handle 8 pixels in one loop.
-
- "vld1.u16 {q0}, [%[src_data]]! \n\t" // load eight src 565 pixels
-
- "vshl.u16 q2, q0, #5 \n\t" // put green in the 6 high bits of q2
- "vshl.u16 q3, q0, #11 \n\t" // put blue in the 5 high bits of q3
- "vmov.u16 q1, q8 \n\t" // copy alpha from q8
- "vsri.u16 q1, q3, #8 \n\t" // put blue below alpha in q1
- "vsri.u16 q1, q3, #13 \n\t" // put 3 MSB blue below blue in q1
- "vsri.u16 q2, q2, #6 \n\t" // put 2 MSB green below green in q2
- "vsri.u16 q2, q0, #8 \n\t" // put red below green in q2
- "vsri.u16 q2, q0, #13 \n\t" // put 3 MSB red below red in q2
- "vzip.16 q2, q1 \n\t" // interleave q1 and q2
- "vst1.16 {d4, d5}, [%[colors]]! \n\t" // store q1 to dst
- "subs %[count], %[count], #8 \n\t" // count -= 8, set flag
- "vst1.16 {d2, d3}, [%[colors]]! \n\t" // store q1 to dst
-
- "bge 1b \n\t" // loop if count >= 0
- "2: \n\t" // exit of 8 loop
-
- "adds %[count], %[count], #4 \n\t" // add 4 to count to see if a 4 loop is needed.
- "blt 3f \n\t" // if count < 0, branch to label 3
-
- // Handle 4 pixels at once
-
- "vld1.u16 {d0}, [%[src_data]]! \n\t" // load eight src 565 pixels
-
- "vshl.u16 d2, d0, #5 \n\t" // put green in the 6 high bits of d2
- "vshl.u16 d1, d0, #11 \n\t" // put blue in the 5 high bits of d1
- "vmov.u16 d3, d16 \n\t" // copy alpha from d16
- "vsri.u16 d3, d1, #8 \n\t" // put blue below alpha in d3
- "vsri.u16 d3, d1, #13 \n\t" // put 3 MSB blue below blue in d3
- "vsri.u16 d2, d2, #6 \n\t" // put 2 MSB green below green in d2
- "vsri.u16 d2, d0, #8 \n\t" // put red below green in d2
- "vsri.u16 d2, d0, #13 \n\t" // put 3 MSB red below red in d2
- "vzip.16 d2, d3 \n\t" // interleave d2 and d3
- "vst1.16 {d2, d3}, [%[colors]]! \n\t" // store d2 and d3 to dst
-
- "3: \n\t" // end
- : [src_data] "+r" (src_data), [colors] "+r" (colors), [count] "+r" (count)
- :
- : "cc", "memory","d0","d1","d2","d3","d4","d5","d6","d7","d16","d17"
- );
-
- for (i = (count & 3); i > 0; --i) {
- *colors++ = SkPixel16ToPixel32(*src_data++);
- }
-
- } else {
- // Scaling case
- uint16_t data[8];
-
- asm volatile (
- "subs %[count], %[count], #8 \n\t" // count -= 8, set flag
- "blt 2f \n\t" // if count < 0, branch to label 2
- "vmov.u16 q8, #0xFF00 \n\t" // Load alpha value into q8 for later use.
- "1: \n\t" // 8 loop
- // Handle 8 pixels in one loop.
- "ldmia %[xx]!, {r4, r5, r6, r7} \n\t" // load ptrs to pixels 0-7
-
- "mov r4, r4, lsl #1 \n\t" // <<1 because of 16 bit pointer
- "mov r5, r5, lsl #1 \n\t" // <<1 because of 16 bit pointer
- "mov r6, r6, lsl #1 \n\t" // <<1 because of 16 bit pointer
- "mov r7, r7, lsl #1 \n\t" // <<1 because of 16 bit pointer
-
- "uxth r8, r4 \n\t" // extract ptr 0
- "mov r4, r4, lsr #16 \n\t" // extract ptr 1
- "ldrh r8, [%[srcAddr], r8] \n\t" // load pixel 0 from image
- "ldrh r4, [%[srcAddr], r4] \n\t" // load pixel 1 from image
- "pkhbt r4, r8, r4, lsl #16 \n\t" // combine pixel 0 and 1 in one register
-
- "uxth r8, r5 \n\t" // extract ptr 2
- "mov r5, r5, lsr #16 \n\t" // extract ptr 3
- "ldrh r8, [%[srcAddr], r8] \n\t" // load pixel 2 from image
- "ldrh r5, [%[srcAddr], r5] \n\t" // load pixel 3 from image
- "pkhbt r5, r8, r5, lsl #16 \n\t" // combine pixel 2 and 3 in one register
-
- "uxth r8, r6 \n\t" // extract ptr 4
- "mov r6, r6, lsr #16 \n\t" // extract ptr 5
- "ldrh r8, [%[srcAddr], r8] \n\t" // load pixel 4 from image
- "ldrh r6, [%[srcAddr], r6] \n\t" // load pixel 5 from image
- "pkhbt r6, r8, r6, lsl #16 \n\t" // combine pixel 4 and 5 in one register
-
- "uxth r8, r7 \n\t" // extract ptr 6
- "mov r7, r7, lsr #16 \n\t" // extract ptr 7
- "ldrh r8, [%[srcAddr], r8] \n\t" // load pixel 6 from image
- "ldrh r7, [%[srcAddr], r7] \n\t" // load pixel 7 from image
- "pkhbt r7, r8, r7, lsl #16 \n\t" // combine pixel 6 and 7 in one register
-
- "stmia %[data], {r4, r5, r6, r7} \n\t" // store 8 src pixels
-
- "vld1.u16 {q0}, [%[data]] \n\t" // load eight src 565 pixels
-
- "vshl.u16 q2, q0, #5 \n\t" // put green in the 6 high bits of q2
- "vshl.u16 q3, q0, #11 \n\t" // put blue in the 5 high bits of q3
- "vmov.u16 q1, q8 \n\t" // copy alpha from q8
- "vsri.u16 q1, q3, #8 \n\t" // put blue below alpha in q1
- "vsri.u16 q1, q3, #13 \n\t" // put 3 MSB blue below blue in q1
- "vsri.u16 q2, q2, #6 \n\t" // put 2 MSB green below green in q2
- "vsri.u16 q2, q0, #8 \n\t" // put red below green in q2
- "vsri.u16 q2, q0, #13 \n\t" // put 3 MSB red below red in q2
- "vzip.16 q2, q1 \n\t" // interleave q1 and q2
- "vst1.16 {d4, d5}, [%[colors]]! \n\t" // store q1 to dst
- "subs %[count], %[count], #8 \n\t" // count -= 8, set flag
- "vst1.16 {d2, d3}, [%[colors]]! \n\t" // store q2 to dst
-
- "bge 1b \n\t" // loop if count >= 0
- "2: \n\t" // exit of 8 loop
-
- "adds %[count], %[count], #4 \n\t" // add 4 to count to see if a 4 loop is needed.
- "blt 3f \n\t" // if count < 0, branch to label 3
-
- // Handle 4 pixels at once
- "ldmia %[xx]!, {r4, r5} \n\t" // load ptrs to pixels 0-3
-
- "mov r4, r4, lsl #1 \n\t" // <<1 because of 16 bit pointer
- "mov r5, r5, lsl #1 \n\t" // <<1 because of 16 bit pointer
-
- "uxth r8, r4 \n\t" // extract ptr 0
- "mov r4, r4, lsr #16 \n\t" // extract ptr 1
- "ldrh r8, [%[srcAddr], r8] \n\t" // load pixel 0 from image
- "ldrh r4, [%[srcAddr], r4] \n\t" // load pixel 1 from image
- "pkhbt r4, r8, r4, lsl #16 \n\t" // combine pixel 0 and 1 in one register
-
- "uxth r8, r5 \n\t" // extract ptr 2
- "mov r5, r5, lsr #16 \n\t" // extract ptr 3
- "ldrh r8, [%[srcAddr], r8] \n\t" // load pixel 2 from image
- "ldrh r5, [%[srcAddr], r5] \n\t" // load pixel 3 from image
- "pkhbt r5, r8, r5, lsl #16 \n\t" // combine pixel 2 and 3 in one register
-
- "stmia %[data], {r4, r5} \n\t" // store 4 src pixels
-
- "vld1.u16 {d0}, [%[data]] \n\t" // load eight src 565 pixels
-
- "vshl.u16 d2, d0, #5 \n\t" // put green in the 6 high bits of d2
- "vshl.u16 d1, d0, #11 \n\t" // put blue in the 5 high bits of d1
- "vmov.u16 d3, d16 \n\t" // copy alpha from d16
- "vsri.u16 d3, d1, #8 \n\t" // put blue below alpha in d3
- "vsri.u16 d3, d1, #13 \n\t" // put 3 MSB blue below blue in d3
- "vsri.u16 d2, d2, #6 \n\t" // put 2 MSB green below green in d2
- "vsri.u16 d2, d0, #8 \n\t" // put red below green in d2
- "vsri.u16 d2, d0, #13 \n\t" // put 3 MSB red below red in d2
- "vzip.16 d2, d3 \n\t" // interleave d2 and d3
- "vst1.16 {d2, d3}, [%[colors]]! \n\t" // store d2 and d3 to dst
-
- "3: \n\t" // End
- : [xx] "+r" (xx), [colors] "+r" (colors), [count] "+r" (count)
- : [data] "r" (data), [srcAddr] "r" (srcAddr)
- : "cc", "memory","r4","r5","r6","r7","r8","d0","d1","d2","d3","d4","d5","d6","d7","d16","d17"
- );
-
- for (i = (count & 3); i > 0; --i) {
- src = srcAddr[*xx++]; *colors++ = SkPixel16ToPixel32(src);
- }
- }
-}
-#endif
-
-
///////////////////////////////////////////////////////////////////////////////
/* If we replace a sampleproc, then we null-out the associated shaderproc,
otherwise the shader won't even look at the matrix/sampler
*/
-
-
void SkBitmapProcState::platformProcs() {
bool doFilter = fDoFilter;
bool isOpaque = 256 == fAlphaScale;
@@ -409,15 +214,6 @@ void SkBitmapProcState::platformProcs() {
}
#endif
break;
- case SkBitmap::kRGB_565_Config:
-#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
- if (justDx && !doFilter) {
- if (isOpaque) {
- fSampleProc32 = S16_opaque_D32_nofilter_DX_neon_asm;
- }
- }
-#endif
- break;
default:
break;
}