diff options
author | radu.velea <radu.velea@intel.com> | 2015-10-06 08:05:15 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-10-06 15:06:56 +0000 |
commit | 0206f157d93f8c47ce959c76e029324ec9f35ff2 (patch) | |
tree | b451d85987d89ee1dc62c59b5f393cf3d72bfeb9 /third_party/qcms | |
parent | d878cd0f66a72eb4096101c17cee20a458718283 (diff) | |
download | chromium_src-0206f157d93f8c47ce959c76e029324ec9f35ff2.zip chromium_src-0206f157d93f8c47ce959c76e029324ec9f35ff2.tar.gz chromium_src-0206f157d93f8c47ce959c76e029324ec9f35ff2.tar.bz2 |
Use the caching transform prelude for non-SSE code in transform.c
Follow on from https://codereview.chromium.org/1299203002, use
the cache for the qcms_transform_data_tetra_clut_rgba s/w path
also. Note: this s/w code path is not currently used in chrome
(the SSE code path is always selected).
Before this CL:
Windows 64 bit:
./out/Release_x64/qcms_test.exe -i 40 -w 2880 -h 1800
Test qcms clut transforms for 40 iterations
Test image size 2880 x 1800 pixels
15.533663 (avg 0.388342) seconds qcms_transform_data_tetra_clut_rgba
4.307897 (avg 0.107697) seconds qcms_transform_data_tetra_clut_rgba_sse2
3.605858 speedup after 40 iterations
Linux 64 bit:
./out/Release/qcms_test -i 40 -w 2880 -h 1800
Test qcms clut transforms for 40 iterations
Test image size 2880 x 1800 pixels
7.678742 (avg 0.191969) seconds qcms_transform_data_tetra_clut_rgba
3.138837 (avg 0.078471) seconds qcms_transform_data_tetra_clut_rgba_sse2
2.446366 speedup after 40 iterations
With this CL:
Windows 64 bit:
./out/Release_x64/qcms_test.exe -i 40 -w 2880 -h 1800
Test qcms clut transforms for 40 iterations
Test image size 2880 x 1800 pixels
9.351532 (avg 0.233788) seconds qcms_transform_data_tetra_clut_rgba
4.337411 (avg 0.108435) seconds qcms_transform_data_tetra_clut_rgba_sse2
2.156017 speedup after 40 iterations
Linux 64 bit:
./out/Release/qcms_test -i 40 -w 2880 -h 1800
Test qcms clut transforms for 40 iterations
Test image size 2880 x 1800 pixels
5.444788 (avg 0.136120) seconds qcms_transform_data_tetra_clut_rgba
3.143324 (avg 0.078583) seconds qcms_transform_data_tetra_clut_rgba_sse2
1.732175 speedup after 40 iterations
Win software path 388ms -> 233ms, no change in the SSE path as
expected (it was speedup by the previous code review).
Also compared to where we were before doing tetra SSE, the s/w
path has been speedup by ~2x, and the SSE path 4.25x. Absolute
frame speed of the SSE is now faster than SkColorCubeFilter on
win-x64 (122ms).
CQ_INCLUDE_TRYBOTS=tryserver.blink:linux_blink_rel,mac_blink_rel,win_blink_rel
BUG=506607
Review URL: https://codereview.chromium.org/1391603002
Cr-Commit-Position: refs/heads/master@{#352603}
Diffstat (limited to 'third_party/qcms')
-rw-r--r-- | third_party/qcms/README.chromium | 2 | ||||
-rw-r--r-- | third_party/qcms/src/transform.c | 42 |
2 files changed, 26 insertions, 18 deletions
diff --git a/third_party/qcms/README.chromium b/third_party/qcms/README.chromium index b1845fd..d05e0c1 100644 --- a/third_party/qcms/README.chromium +++ b/third_party/qcms/README.chromium @@ -105,6 +105,8 @@ The following changes have been made since qcms was imported: - https://code.google.com/p/chromium/issues/detail?id=487284 - Caching transform prelude to save computation in V4 profiles - https://code.google.com/p/chromium/issues/detail?id=506607 + - Use the caching transform prelude for non-SSE code in transform.c + - https://code.google.com/p/chromium/issues/detail?id=506607 For the Chromium changes, since the import, in a patch format run: git diff b8456f38 src diff --git a/third_party/qcms/src/transform.c b/third_party/qcms/src/transform.c index cbdfa59..1e707b7 100644 --- a/third_party/qcms/src/transform.c +++ b/third_party/qcms/src/transform.c @@ -531,6 +531,9 @@ void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned cha float c0_b, c1_b, c2_b, c3_b; float clut_r, clut_g, clut_b; + if (!(transform->transform_flags & TRANSFORM_FLAG_CLUT_CACHE)) + qcms_transform_build_clut_cache(transform); + for (i = 0; i < length; i++) { unsigned char in_r = *src++; unsigned char in_g = *src++; @@ -539,17 +542,17 @@ void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned cha float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; - int x = floor(linear_r * (transform->grid_size-1)); - int y = floor(linear_g * (transform->grid_size-1)); - int z = floor(linear_b * (transform->grid_size-1)); + int x = transform->floor_cache[in_r]; + int y = transform->floor_cache[in_g]; + int z = transform->floor_cache[in_b]; - int x_n = ceil(linear_r * (transform->grid_size-1)); - int y_n = ceil(linear_g * (transform->grid_size-1)); - int z_n = ceil(linear_b * (transform->grid_size-1)); + int x_n = transform->ceil_cache[in_r]; + int y_n = transform->ceil_cache[in_g]; + int z_n = transform->ceil_cache[in_b]; - float rx = linear_r * (transform->grid_size-1) - x; - float ry = linear_g * (transform->grid_size-1) - y; - float rz = linear_b * (transform->grid_size-1) - z; + float rx = transform->r_cache[in_r]; + float ry = transform->r_cache[in_g]; + float rz = transform->r_cache[in_b]; c0_r = CLU(r_table, x, y, z); c0_g = CLU(g_table, x, y, z); @@ -655,6 +658,9 @@ static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c float c0_b, c1_b, c2_b, c3_b; float clut_r, clut_g, clut_b; + if (!(transform->transform_flags & TRANSFORM_FLAG_CLUT_CACHE)) + qcms_transform_build_clut_cache(transform); + for (i = 0; i < length; i++) { unsigned char in_r = *src++; unsigned char in_g = *src++; @@ -662,17 +668,17 @@ static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; - int x = floor(linear_r * (transform->grid_size-1)); - int y = floor(linear_g * (transform->grid_size-1)); - int z = floor(linear_b * (transform->grid_size-1)); + int x = transform->floor_cache[in_r]; + int y = transform->floor_cache[in_g]; + int z = transform->floor_cache[in_b]; - int x_n = ceil(linear_r * (transform->grid_size-1)); - int y_n = ceil(linear_g * (transform->grid_size-1)); - int z_n = ceil(linear_b * (transform->grid_size-1)); + int x_n = transform->ceil_cache[in_r]; + int y_n = transform->ceil_cache[in_g]; + int z_n = transform->ceil_cache[in_b]; - float rx = linear_r * (transform->grid_size-1) - x; - float ry = linear_g * (transform->grid_size-1) - y; - float rz = linear_b * (transform->grid_size-1) - z; + float rx = transform->r_cache[in_r]; + float ry = transform->r_cache[in_g]; + float rz = transform->r_cache[in_b]; c0_r = CLU(r_table, x, y, z); c0_g = CLU(g_table, x, y, z); |