summaryrefslogtreecommitdiffstats
path: root/third_party/qcms
diff options
context:
space:
mode:
authorradu.velea <radu.velea@intel.com>2015-10-06 08:05:15 -0700
committerCommit bot <commit-bot@chromium.org>2015-10-06 15:06:56 +0000
commit0206f157d93f8c47ce959c76e029324ec9f35ff2 (patch)
treeb451d85987d89ee1dc62c59b5f393cf3d72bfeb9 /third_party/qcms
parentd878cd0f66a72eb4096101c17cee20a458718283 (diff)
downloadchromium_src-0206f157d93f8c47ce959c76e029324ec9f35ff2.zip
chromium_src-0206f157d93f8c47ce959c76e029324ec9f35ff2.tar.gz
chromium_src-0206f157d93f8c47ce959c76e029324ec9f35ff2.tar.bz2
Use the caching transform prelude for non-SSE code in transform.c
Follow on from https://codereview.chromium.org/1299203002, use the cache for the qcms_transform_data_tetra_clut_rgba s/w path also. Note: this s/w code path is not currently used in chrome (the SSE code path is always selected). Before this CL: Windows 64 bit: ./out/Release_x64/qcms_test.exe -i 40 -w 2880 -h 1800 Test qcms clut transforms for 40 iterations Test image size 2880 x 1800 pixels 15.533663 (avg 0.388342) seconds qcms_transform_data_tetra_clut_rgba 4.307897 (avg 0.107697) seconds qcms_transform_data_tetra_clut_rgba_sse2 3.605858 speedup after 40 iterations Linux 64 bit: ./out/Release/qcms_test -i 40 -w 2880 -h 1800 Test qcms clut transforms for 40 iterations Test image size 2880 x 1800 pixels 7.678742 (avg 0.191969) seconds qcms_transform_data_tetra_clut_rgba 3.138837 (avg 0.078471) seconds qcms_transform_data_tetra_clut_rgba_sse2 2.446366 speedup after 40 iterations With this CL: Windows 64 bit: ./out/Release_x64/qcms_test.exe -i 40 -w 2880 -h 1800 Test qcms clut transforms for 40 iterations Test image size 2880 x 1800 pixels 9.351532 (avg 0.233788) seconds qcms_transform_data_tetra_clut_rgba 4.337411 (avg 0.108435) seconds qcms_transform_data_tetra_clut_rgba_sse2 2.156017 speedup after 40 iterations Linux 64 bit: ./out/Release/qcms_test -i 40 -w 2880 -h 1800 Test qcms clut transforms for 40 iterations Test image size 2880 x 1800 pixels 5.444788 (avg 0.136120) seconds qcms_transform_data_tetra_clut_rgba 3.143324 (avg 0.078583) seconds qcms_transform_data_tetra_clut_rgba_sse2 1.732175 speedup after 40 iterations Win software path 388ms -> 233ms, no change in the SSE path as expected (it was speedup by the previous code review). Also compared to where we were before doing tetra SSE, the s/w path has been speedup by ~2x, and the SSE path 4.25x. Absolute frame speed of the SSE is now faster than SkColorCubeFilter on win-x64 (122ms). CQ_INCLUDE_TRYBOTS=tryserver.blink:linux_blink_rel,mac_blink_rel,win_blink_rel BUG=506607 Review URL: https://codereview.chromium.org/1391603002 Cr-Commit-Position: refs/heads/master@{#352603}
Diffstat (limited to 'third_party/qcms')
-rw-r--r--third_party/qcms/README.chromium2
-rw-r--r--third_party/qcms/src/transform.c42
2 files changed, 26 insertions, 18 deletions
diff --git a/third_party/qcms/README.chromium b/third_party/qcms/README.chromium
index b1845fd..d05e0c1 100644
--- a/third_party/qcms/README.chromium
+++ b/third_party/qcms/README.chromium
@@ -105,6 +105,8 @@ The following changes have been made since qcms was imported:
- https://code.google.com/p/chromium/issues/detail?id=487284
- Caching transform prelude to save computation in V4 profiles
- https://code.google.com/p/chromium/issues/detail?id=506607
+ - Use the caching transform prelude for non-SSE code in transform.c
+ - https://code.google.com/p/chromium/issues/detail?id=506607
For the Chromium changes, since the import, in a patch format run:
git diff b8456f38 src
diff --git a/third_party/qcms/src/transform.c b/third_party/qcms/src/transform.c
index cbdfa59..1e707b7 100644
--- a/third_party/qcms/src/transform.c
+++ b/third_party/qcms/src/transform.c
@@ -531,6 +531,9 @@ void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned cha
float c0_b, c1_b, c2_b, c3_b;
float clut_r, clut_g, clut_b;
+ if (!(transform->transform_flags & TRANSFORM_FLAG_CLUT_CACHE))
+ qcms_transform_build_clut_cache(transform);
+
for (i = 0; i < length; i++) {
unsigned char in_r = *src++;
unsigned char in_g = *src++;
@@ -539,17 +542,17 @@ void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned cha
float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
- int x = floor(linear_r * (transform->grid_size-1));
- int y = floor(linear_g * (transform->grid_size-1));
- int z = floor(linear_b * (transform->grid_size-1));
+ int x = transform->floor_cache[in_r];
+ int y = transform->floor_cache[in_g];
+ int z = transform->floor_cache[in_b];
- int x_n = ceil(linear_r * (transform->grid_size-1));
- int y_n = ceil(linear_g * (transform->grid_size-1));
- int z_n = ceil(linear_b * (transform->grid_size-1));
+ int x_n = transform->ceil_cache[in_r];
+ int y_n = transform->ceil_cache[in_g];
+ int z_n = transform->ceil_cache[in_b];
- float rx = linear_r * (transform->grid_size-1) - x;
- float ry = linear_g * (transform->grid_size-1) - y;
- float rz = linear_b * (transform->grid_size-1) - z;
+ float rx = transform->r_cache[in_r];
+ float ry = transform->r_cache[in_g];
+ float rz = transform->r_cache[in_b];
c0_r = CLU(r_table, x, y, z);
c0_g = CLU(g_table, x, y, z);
@@ -655,6 +658,9 @@ static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c
float c0_b, c1_b, c2_b, c3_b;
float clut_r, clut_g, clut_b;
+ if (!(transform->transform_flags & TRANSFORM_FLAG_CLUT_CACHE))
+ qcms_transform_build_clut_cache(transform);
+
for (i = 0; i < length; i++) {
unsigned char in_r = *src++;
unsigned char in_g = *src++;
@@ -662,17 +668,17 @@ static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c
float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
- int x = floor(linear_r * (transform->grid_size-1));
- int y = floor(linear_g * (transform->grid_size-1));
- int z = floor(linear_b * (transform->grid_size-1));
+ int x = transform->floor_cache[in_r];
+ int y = transform->floor_cache[in_g];
+ int z = transform->floor_cache[in_b];
- int x_n = ceil(linear_r * (transform->grid_size-1));
- int y_n = ceil(linear_g * (transform->grid_size-1));
- int z_n = ceil(linear_b * (transform->grid_size-1));
+ int x_n = transform->ceil_cache[in_r];
+ int y_n = transform->ceil_cache[in_g];
+ int z_n = transform->ceil_cache[in_b];
- float rx = linear_r * (transform->grid_size-1) - x;
- float ry = linear_g * (transform->grid_size-1) - y;
- float rz = linear_b * (transform->grid_size-1) - z;
+ float rx = transform->r_cache[in_r];
+ float ry = transform->r_cache[in_g];
+ float rz = transform->r_cache[in_b];
c0_r = CLU(r_table, x, y, z);
c0_g = CLU(g_table, x, y, z);