aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenrik Smiding <henrik.smiding@stericsson.com>2013-03-20 17:00:59 +0100
committerEvan McClain <aeroevan@gmail.com>2013-05-26 09:23:32 -0400
commit0f1f6b46ef441d15bcc77825bb6316ff4dc4338e (patch)
tree13b076fa3fa3d18f8c4e919fc1c238c6f7e0caeb
parent5dd8eff4587a42ce15fdc3ba30c879405e6d1401 (diff)
downloadexternal_skia-0f1f6b46ef441d15bcc77825bb6316ff4dc4338e.zip
external_skia-0f1f6b46ef441d15bcc77825bb6316ff4dc4338e.tar.gz
external_skia-0f1f6b46ef441d15bcc77825bb6316ff4dc4338e.tar.bz2
Add shader for Clamp_S32_Opaque_D32_nofilter_DX.
Adds an optimized shader for S32_Opaque_D32_nofilter_DX when running with clamped tile mode. Improves performance of system and various applications. Change-Id: I8dd7be24870315b2b8273bf72d9c22eb0aeb97fb Signed-off-by: Henrik Smiding <henrik.smiding@stericsson.com> Signed-off-by: Patrik Ryd <patrik.ryd@stericsson.com>
-rw-r--r--src/core/SkBitmapProcState.h2
-rw-r--r--src/opts/SkBitmapProcState_opts_arm.cpp140
2 files changed, 142 insertions, 0 deletions
diff --git a/src/core/SkBitmapProcState.h b/src/core/SkBitmapProcState.h
index 1b42b61..90dbf35 100644
--- a/src/core/SkBitmapProcState.h
+++ b/src/core/SkBitmapProcState.h
@@ -146,5 +146,7 @@ void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s,
uint32_t xy[], int count, int x, int y);
void SI8_opaque_D32_filter_DX(const SkBitmapProcState& s, const uint32_t xy[],
int count, SkPMColor colors[]);
+void S32_opaque_D32_nofilter_DX(const SkBitmapProcState& s, const uint32_t xy[],
+ int count, SkPMColor colors[]);
#endif
diff --git a/src/opts/SkBitmapProcState_opts_arm.cpp b/src/opts/SkBitmapProcState_opts_arm.cpp
index ce75fea..3a5694f 100644
--- a/src/opts/SkBitmapProcState_opts_arm.cpp
+++ b/src/opts/SkBitmapProcState_opts_arm.cpp
@@ -776,6 +776,144 @@ void Repeat_SI8_opaque_D32_filter_DX_shaderproc(const SkBitmapProcState& s, int
s.fBitmap->getColorTable()->unlockColors(false);
}
+
+
+void Clamp_S32_opaque_D32_nofilter_DX_shaderproc(const SkBitmapProcState& s, int x, int y,
+ SkPMColor* SK_RESTRICT colors, int count) {
+ SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
+ SkMatrix::kScale_Mask)) == 0);
+ SkASSERT(s.fInvKy == 0);
+ SkASSERT(count > 0 && colors != NULL);
+ SkASSERT(!s.fDoFilter);
+ SkDEBUGCODE(SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); SkASSERT(s.fAlphaScale == 256);)
+
+ const register unsigned maxX = s.fBitmap->width() - 1;
+ const SkFixed dx = s.fInvSx;
+ register SkFixed fx;
+ const SkPMColor* SK_RESTRICT row;
+ int num;
+
+ {
+ SkPoint pt;
+ s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+ SkIntToScalar(y) + SK_ScalarHalf, &pt);
+ SkFixed fy = SkScalarToFixed(pt.fY);
+ const unsigned maxY = s.fBitmap->height() - 1;
+ int yy = SkClampMax(SkFixedFloorToInt(fy), maxY);
+ const char* SK_RESTRICT srcAddr = (const char*)s.fBitmap->getPixels();
+ unsigned rb = s.fBitmap->rowBytes();
+ row = (const SkPMColor*)(srcAddr + yy * rb);
+ // now initialize fx
+ fx = SkScalarToFixed(pt.fX);
+ }
+
+ // Single pixel left in source?
+ if (maxX == 0) {
+ sk_memset32(colors, row[0], count);
+ return;
+ }
+
+ // Special case if unscaled.
+ if (dx == SK_Fixed1) {
+ fx = SkFixedFloorToInt(fx);
+
+ // Any clamped pixels at the beginning?
+ if (fx < 0) {
+ num = SkMin32(-fx, count);
+ sk_memset32(colors, row[0], num);
+ count -= num;
+ fx += num;
+ colors += num;
+ }
+
+ // Copy pixels
+ num = SkMin32(SkMax32(maxX + 1 - fx, 0), count);
+ memcpy(colors, row + fx, num * sizeof(SkPMColor));
+ count -= num;
+
+ if (count > 0) {
+ colors += num;
+ sk_memset32(colors, row[maxX], count);
+ }
+
+ return;
+ }
+
+ // Can we run unclamped case?
+ if (((dx >= 0) && (((fx + (count - 1) * dx) >> 16) <= (const signed)maxX) && (fx >= 0)) ||
+ ((dx < 0) && ((fx >> 16) <= (const signed)maxX) && (((fx + (count - 1) * dx) >> 16) >= 0))) {
+
+ asm volatile (
+ // Setup constants
+ "pld [%[row]] \n\t" // Pre-load source
+ "subs %[count], #4 \n\t" // Decrease loop counter
+ "bmi 2f \n\t" //
+ "pld [%[row], #32] \n\t" // Pre-load source
+ "1: \n\t" // Loop start
+ // Load pixels
+ "lsr r1, %[fx], #16 \n\t" // Calculate SkFixedFloorToInt(fx)
+ "add %[fx], %[fx], %[dx] \n\t" // Increment to next position
+ "add r1, %[row], r1, lsl #2 \n\t" // Calculate row[SkFixedFloorToInt(fx)];
+ "vldr.32 s0, [r1] \n\t" // Load pixel #1
+
+ "lsr r2, %[fx], #16 \n\t" // Calculate SkFixedFloorToInt(fx)
+ "add %[fx], %[fx], %[dx] \n\t" // Increment to next position
+ "add r2, %[row], r2, lsl #2 \n\t" // Calculate row[SkFixedFloorToInt(fx)];
+ "vldr.32 s1, [r2] \n\t" // Load pixel #2
+
+ "lsr r1, %[fx], #16 \n\t" // Calculate SkFixedFloorToInt(fx)
+ "add %[fx], %[fx], %[dx] \n\t" // Increment to next position
+ "add r1, %[row], r1, lsl #2 \n\t" // Calculate row[SkFixedFloorToInt(fx)];
+ "vldr.32 s2, [r1] \n\t" // Load pixel #3
+
+ "lsr r2, %[fx], #16 \n\t" // Calculate SkFixedFloorToInt(fx)
+ "add r2, %[row], r2, lsl #2 \n\t" // Calculate row[SkFixedFloorToInt(fx)];
+ "vldr.32 s3, [r2] \n\t" // Load pixel #4
+ // Write pixels
+ "subs %[count], #4 \n\t" // Decrease loop counter
+ "pld [r2, #52] \n\t" // Pre-load source
+ "add %[fx], %[fx], %[dx] \n\t" // Increment to next position
+ "vstm %[colors]!, {s0-s3} \n\t" // Write result to memory
+
+ "bpl 1b \n\t"
+ "2: \n\t" // Loop start
+ "add %[count], #4 \n\t" // Restore loop counter
+ : [fx] "+r" (fx), [colors] "+r" (colors), [count] "+r" (count)
+ : [row] "r" (row), [dx] "r" (dx)
+ : "cc", "memory", "r1", "r2", "s0", "s1", "s2", "s3"
+ );
+
+ for (num = (count & 3); num > 0; --num) {
+ *colors++ = row[SkFixedFloorToInt(fx)];
+ fx += dx;
+ }
+
+ return;
+ }
+
+ // Fallback case
+ for (num = (count >> 2); num > 0; --num) {
+ SkPMColor p0, p1, p2, p3;
+
+ p0 = row[SkClampMax(SkFixedFloorToInt(fx), maxX)];
+ fx += dx;
+ p1 = row[SkClampMax(SkFixedFloorToInt(fx), maxX)];
+ fx += dx;
+ p2 = row[SkClampMax(SkFixedFloorToInt(fx), maxX)];
+ fx += dx;
+ p3 = row[SkClampMax(SkFixedFloorToInt(fx), maxX)];
+ *colors++ = p0;
+ *colors++ = p1;
+ *colors++ = p2;
+ *colors++ = p3;
+ fx += dx;
+ }
+
+ for (num = (count & 3); num > 0; --num) {
+ *colors++ = row[SkClampMax(SkFixedFloorToInt(fx), maxX)];
+ fx += dx;
+ }
+}
#endif
@@ -821,6 +959,8 @@ void SkBitmapProcState::platformProcs() {
#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
if (S32_opaque_D32_filter_DX == fSampleProc32 && clamp_clamp) {
fShaderProc32 = Clamp_S32_opaque_D32_filter_DX_shaderproc;
+ } else if (S32_opaque_D32_nofilter_DX == fSampleProc32 && clamp_clamp) {
+ fShaderProc32 = Clamp_S32_opaque_D32_nofilter_DX_shaderproc;
}
#endif
break;