1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
/*
* Copyright 2009 The Android Open Source Project
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkColorPriv.h"
/*
Filter_32_opaque
There is no hard-n-fast rule that the filtering must produce
exact results for the color components, but if the 4 incoming colors are
all opaque, then the output color must also be opaque. Subsequent parts of
the drawing pipeline may rely on this (e.g. which blitrow proc to use).
*/
#if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN)
static inline void Filter_32_opaque_neon(unsigned x, unsigned y,
SkPMColor a00, SkPMColor a01,
SkPMColor a10, SkPMColor a11,
SkPMColor *dst) {
asm volatile(
"vdup.8 d0, %[y] \n\t" // duplicate y into d0
"vmov.u8 d16, #16 \n\t" // set up constant in d16
"vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y
"vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4
"vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5
"vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01
"vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11
"vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y)
"vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y
"vdup.16 d5, %[x] \n\t" // duplicate x into d5
"vmov.u16 d16, #16 \n\t" // set up constant in d16
"vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x
"vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x
"vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x
"vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)
"vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)
"vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8
"vst1.32 {d0[0]}, [%[dst]] \n\t" // store result
:
: [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst)
: "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
);
}
static inline void Filter_32_alpha_neon(unsigned x, unsigned y,
SkPMColor a00, SkPMColor a01,
SkPMColor a10, SkPMColor a11,
SkPMColor *dst, uint16_t scale) {
asm volatile(
"vdup.8 d0, %[y] \n\t" // duplicate y into d0
"vmov.u8 d16, #16 \n\t" // set up constant in d16
"vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y
"vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4
"vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5
"vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01
"vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11
"vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y)
"vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y
"vdup.16 d5, %[x] \n\t" // duplicate x into d5
"vmov.u16 d16, #16 \n\t" // set up constant in d16
"vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x
"vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x
"vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x
"vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)
"vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)
"vdup.16 d3, %[scale] \n\t" // duplicate scale into d3
"vshr.u16 d4, d4, #8 \n\t" // shift down result by 8
"vmul.i16 d4, d4, d3 \n\t" // multiply result by scale
"vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8
"vst1.32 {d0[0]}, [%[dst]] \n\t" // store result
:
: [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale)
: "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
);
}
#define Filter_32_opaque Filter_32_opaque_neon
#define Filter_32_alpha Filter_32_alpha_neon
#else
static inline void Filter_32_opaque_portable(unsigned x, unsigned y,
SkPMColor a00, SkPMColor a01,
SkPMColor a10, SkPMColor a11,
SkPMColor* dstColor) {
SkASSERT((unsigned)x <= 0xF);
SkASSERT((unsigned)y <= 0xF);
int xy = x * y;
static const uint32_t mask = gMask_00FF00FF; //0xFF00FF;
int scale = 256 - 16*y - 16*x + xy;
uint32_t lo = (a00 & mask) * scale;
uint32_t hi = ((a00 >> 8) & mask) * scale;
scale = 16*x - xy;
lo += (a01 & mask) * scale;
hi += ((a01 >> 8) & mask) * scale;
scale = 16*y - xy;
lo += (a10 & mask) * scale;
hi += ((a10 >> 8) & mask) * scale;
lo += (a11 & mask) * xy;
hi += ((a11 >> 8) & mask) * xy;
*dstColor = ((lo >> 8) & mask) | (hi & ~mask);
}
static inline void Filter_32_alpha_portable(unsigned x, unsigned y,
SkPMColor a00, SkPMColor a01,
SkPMColor a10, SkPMColor a11,
SkPMColor* dstColor,
unsigned alphaScale) {
SkASSERT((unsigned)x <= 0xF);
SkASSERT((unsigned)y <= 0xF);
SkASSERT(alphaScale <= 256);
int xy = x * y;
static const uint32_t mask = gMask_00FF00FF; //0xFF00FF;
int scale = 256 - 16*y - 16*x + xy;
uint32_t lo = (a00 & mask) * scale;
uint32_t hi = ((a00 >> 8) & mask) * scale;
scale = 16*x - xy;
lo += (a01 & mask) * scale;
hi += ((a01 >> 8) & mask) * scale;
scale = 16*y - xy;
lo += (a10 & mask) * scale;
hi += ((a10 >> 8) & mask) * scale;
lo += (a11 & mask) * xy;
hi += ((a11 >> 8) & mask) * xy;
lo = ((lo >> 8) & mask) * alphaScale;
hi = ((hi >> 8) & mask) * alphaScale;
*dstColor = ((lo >> 8) & mask) | (hi & ~mask);
}
#define Filter_32_opaque Filter_32_opaque_portable
#define Filter_32_alpha Filter_32_alpha_portable
#endif
|