1 files changed, 249 insertions, 40 deletions
diff --git a/third_party/libwebp/enc/picture.c b/third_party/libwebp/enc/picture.c
index 5aaa385..011690d 100644
--- a/third_party/libwebp/enc/picture.c
+++ b/third_party/libwebp/enc/picture.c
@@ -16,14 +16,15 @@
 #include <math.h>
 
 #include "./vp8enci.h"
+#include "../utils/alpha_processing.h"
+#include "../utils/random.h"
 #include "../utils/rescaler.h"
 #include "../utils/utils.h"
 #include "../dsp/dsp.h"
 #include "../dsp/yuv.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+// Uncomment to disable gamma-compression during RGB->U/V averaging
+#define USE_GAMMA_COMPRESSION
 
 #define HALVE(x) (((x) + 1) >> 1)
 #define IS_YUV_CSP(csp, YUV_CSP) (((csp) & WEBP_CSP_UV_MASK) == (YUV_CSP))
@@ -34,6 +35,10 @@ static const union {
 } test_endian = { 0xff000000u };
 #define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
 
+static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
+  return (0xff000000u | (r << 16) | (g << 8) | b);
+}
+
 //------------------------------------------------------------------------------
 // WebPPicture
 //------------------------------------------------------------------------------
@@ -118,6 +123,7 @@ int WebPPictureAlloc(WebPPicture* picture) {
         picture->v0 = mem;
         mem += uv0_size;
       }
+      (void)mem;  // makes the static analyzer happy
     } else {
       void* memory;
       const uint64_t argb_size = (uint64_t)width * height;
@@ -395,6 +401,28 @@ static void RescalePlane(const uint8_t* src,
   }
 }
 
+static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) {
+  uint32_t* ptr = pic->argb;
+  int y;
+  for (y = 0; y < pic->height; ++y) {
+    WebPMultARGBRow(ptr, pic->width, inverse);
+    ptr += pic->argb_stride;
+  }
+}
+
+static void AlphaMultiplyY(WebPPicture* const pic, int inverse) {
+  const uint8_t* ptr_a = pic->a;
+  if (ptr_a != NULL) {
+    uint8_t* ptr_y = pic->y;
+    int y;
+    for (y = 0; y < pic->height; ++y) {
+      WebPMultRow(ptr_y, ptr_a, pic->width, inverse);
+      ptr_y += pic->y_stride;
+      ptr_a += pic->a_stride;
+    }
+  }
+}
+
 int WebPPictureRescale(WebPPicture* pic, int width, int height) {
   WebPPicture tmp;
   int prev_width, prev_height;
@@ -425,9 +453,19 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
       WebPPictureFree(&tmp);
       return 0;
     }
+    // If present, we need to rescale alpha first (for AlphaMultiplyY).
+    if (pic->a != NULL) {
+      RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
+                   tmp.a, width, height, tmp.a_stride, work, 1);
+    }
 
+    // We take transparency into account on the luma plane only. That's not
+    // totally exact blending, but still is a good approximation.
+    AlphaMultiplyY(pic, 0);
     RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
                  tmp.y, width, height, tmp.y_stride, work, 1);
+    AlphaMultiplyY(&tmp, 1);
+
     RescalePlane(pic->u,
                  HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
                  tmp.u,
@@ -437,10 +475,6 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
                  tmp.v,
                  HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
 
-    if (tmp.a != NULL) {
-      RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
-                   tmp.a, width, height, tmp.a_stride, work, 1);
-    }
 #ifdef WEBP_EXPERIMENTAL_FEATURES
     if (tmp.u0 != NULL) {
       const int s = IS_YUV_CSP(tmp.colorspace, WEBP_YUV422) ? 2 : 1;
@@ -458,12 +492,16 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
       WebPPictureFree(&tmp);
       return 0;
     }
-
+    // In order to correctly interpolate colors, we need to apply the alpha
+    // weighting first (black-matting), scale the RGB values, and remove
+    // the premultiplication afterward (while preserving the alpha channel).
+    AlphaMultiplyARGB(pic, 0);
     RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height,
                  pic->argb_stride * 4,
                  (uint8_t*)tmp.argb, width, height,
                  tmp.argb_stride * 4,
                  work, 4);
+    AlphaMultiplyARGB(&tmp, 1);
   }
   WebPPictureFree(pic);
   free(work);
@@ -552,20 +590,101 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {
 //------------------------------------------------------------------------------
 // RGB -> YUV conversion
 
-// TODO: we can do better than simply 2x2 averaging on U/V samples.
-#define SUM4(ptr) ((ptr)[0] + (ptr)[step] + \
-                   (ptr)[rgb_stride] + (ptr)[rgb_stride + step])
-#define SUM2H(ptr) (2 * (ptr)[0] + 2 * (ptr)[step])
-#define SUM2V(ptr) (2 * (ptr)[0] + 2 * (ptr)[rgb_stride])
-#define SUM1(ptr)  (4 * (ptr)[0])
+static int RGBToY(int r, int g, int b, VP8Random* const rg) {
+  return VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
+}
+
+static int RGBToU(int r, int g, int b, VP8Random* const rg) {
+  return VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
+}
+
+static int RGBToV(int r, int g, int b, VP8Random* const rg) {
+  return VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(USE_GAMMA_COMPRESSION)
+
+// gamma-compensates loss of resolution during chroma subsampling
+#define kGamma 0.80
+#define kGammaFix 12     // fixed-point precision for linear values
+#define kGammaScale ((1 << kGammaFix) - 1)
+#define kGammaTabFix 7   // fixed-point fractional bits precision
+#define kGammaTabScale (1 << kGammaTabFix)
+#define kGammaTabRounder (kGammaTabScale >> 1)
+#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
+
+static int kLinearToGammaTab[kGammaTabSize + 1];
+static uint16_t kGammaToLinearTab[256];
+static int kGammaTablesOk = 0;
+
+static void InitGammaTables(void) {
+  if (!kGammaTablesOk) {
+    int v;
+    const double scale = 1. / kGammaScale;
+    for (v = 0; v <= 255; ++v) {
+      kGammaToLinearTab[v] =
+          (uint16_t)(pow(v / 255., kGamma) * kGammaScale + .5);
+    }
+    for (v = 0; v <= kGammaTabSize; ++v) {
+      const double x = scale * (v << kGammaTabFix);
+      kLinearToGammaTab[v] = (int)(pow(x, 1. / kGamma) * 255. + .5);
+    }
+    kGammaTablesOk = 1;
+  }
+}
+
+static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
+  return kGammaToLinearTab[v];
+}
+
+// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
+// U/V value, suitable for RGBToU/V calls.
+static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
+  const int v = base_value << shift;              // final uplifted value
+  const int tab_pos = v >> (kGammaTabFix + 2);    // integer part
+  const int x = v & ((kGammaTabScale << 2) - 1);  // fractional part
+  const int v0 = kLinearToGammaTab[tab_pos];
+  const int v1 = kLinearToGammaTab[tab_pos + 1];
+  const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x);   // interpolate
+  return (y + kGammaTabRounder) >> kGammaTabFix;             // descale
+}
+
+#else
+
+static void InitGammaTables(void) {}
+static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
+static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
+  (void)shift;
+  return v;
+}
+
+#endif    // USE_GAMMA_COMPRESSION
+
+//------------------------------------------------------------------------------
+
+#define SUM4(ptr) LinearToGamma(                         \
+    GammaToLinear((ptr)[0]) +                            \
+    GammaToLinear((ptr)[step]) +                         \
+    GammaToLinear((ptr)[rgb_stride]) +                   \
+    GammaToLinear((ptr)[rgb_stride + step]), 0)          \
+
+#define SUM2H(ptr) \
+    LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[step]), 1)
+#define SUM2V(ptr) \
+    LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
+#define SUM1(ptr)  \
+    LinearToGamma(GammaToLinear((ptr)[0]), 2)
+
 #define RGB_TO_UV(x, y, SUM) {                           \
   const int src = (2 * (step * (x) + (y) * rgb_stride)); \
   const int dst = (x) + (y) * picture->uv_stride;        \
   const int r = SUM(r_ptr + src);                        \
   const int g = SUM(g_ptr + src);                        \
   const int b = SUM(b_ptr + src);                        \
-  picture->u[dst] = VP8RGBToU(r, g, b);                  \
-  picture->v[dst] = VP8RGBToV(r, g, b);                  \
+  picture->u[dst] = RGBToU(r, g, b, &rg);                \
+  picture->v[dst] = RGBToV(r, g, b, &rg);                \
 }
 
 #define RGB_TO_UV0(x_in, x_out, y, SUM) {                \
@@ -574,8 +693,8 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {
   const int r = SUM(r_ptr + src);                        \
   const int g = SUM(g_ptr + src);                        \
   const int b = SUM(b_ptr + src);                        \
-  picture->u0[dst] = VP8RGBToU(r, g, b);                 \
-  picture->v0[dst] = VP8RGBToV(r, g, b);                 \
+  picture->u0[dst] = RGBToU(r, g, b, &rg);               \
+  picture->v0[dst] = RGBToV(r, g, b, &rg);               \
 }
 
 static void MakeGray(WebPPicture* const picture) {
@@ -594,12 +713,14 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
                               const uint8_t* const a_ptr,
                               int step,         // bytes per pixel
                               int rgb_stride,   // bytes per scanline
+                              float dithering,
                               WebPPicture* const picture) {
   const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK;
   int x, y;
   const int width = picture->width;
   const int height = picture->height;
   const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
+  VP8Random rg;
 
   picture->colorspace = uv_csp;
   picture->use_argb = 0;
@@ -608,12 +729,15 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
   }
   if (!WebPPictureAlloc(picture)) return 0;
 
+  VP8InitRandom(&rg, dithering);
+  InitGammaTables();
+
   // Import luma plane
   for (y = 0; y < height; ++y) {
     for (x = 0; x < width; ++x) {
       const int offset = step * x + y * rgb_stride;
       picture->y[x + y * picture->y_stride] =
-          VP8RGBToY(r_ptr[offset], g_ptr[offset], b_ptr[offset]);
+          RGBToY(r_ptr[offset], g_ptr[offset], b_ptr[offset], &rg);
     }
   }
 
@@ -661,6 +785,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
 
   if (has_alpha) {
     assert(step >= 4);
+    assert(picture->a != NULL);
     for (y = 0; y < height; ++y) {
       for (x = 0; x < width; ++x) {
         picture->a[x + y * picture->a_stride] =
@@ -683,7 +808,7 @@ static int Import(WebPPicture* const picture,
 
   if (!picture->use_argb) {
     return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
-                              picture);
+                              0.f /* no dithering */, picture);
   }
   if (import_alpha) {
     picture->colorspace |= WEBP_CSP_ALPHA_BIT;
@@ -698,10 +823,7 @@ static int Import(WebPPicture* const picture,
       for (x = 0; x < width; ++x) {
         const int offset = step * x + y * rgb_stride;
         const uint32_t argb =
-            0xff000000u |
-            (r_ptr[offset] << 16) |
-            (g_ptr[offset] <<  8) |
-            (b_ptr[offset]);
+            MakeARGB32(r_ptr[offset], g_ptr[offset], b_ptr[offset]);
         picture->argb[x + y * picture->argb_stride] = argb;
       }
     }
@@ -762,8 +884,7 @@ int WebPPictureImportBGRX(WebPPicture* picture,
 
 int WebPPictureYUVAToARGB(WebPPicture* picture) {
   if (picture == NULL) return 0;
-  if (picture->memory_ == NULL || picture->y == NULL ||
-      picture->u == NULL || picture->v == NULL) {
+  if (picture->y == NULL || picture->u == NULL || picture->v == NULL) {
     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
   }
   if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
@@ -786,7 +907,7 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
     WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST);
 
     // First row, with replicated top samples.
-    upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, width);
+    upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
     cur_y += picture->y_stride;
     dst += argb_stride;
     // Center rows.
@@ -819,7 +940,8 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
   return 1;
 }
 
-int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
+int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace,
+                                  float dithering) {
   if (picture == NULL) return 0;
   if (picture->argb == NULL) {
     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
@@ -835,7 +957,8 @@ int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
     PictureResetARGB(&tmp);  // reset ARGB buffer so that it's not free()'d.
     tmp.use_argb = 0;
     tmp.colorspace = colorspace & WEBP_CSP_UV_MASK;
-    if (!ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, &tmp)) {
+    if (!ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, dithering,
+                            &tmp)) {
       return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
     }
     // Copy back the YUV specs into 'picture'.
@@ -847,6 +970,10 @@ int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
   return 1;
 }
 
+int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
+  return WebPPictureARGBToYUVADithered(picture, colorspace, 0.f);
+}
+
 //------------------------------------------------------------------------------
 // Helper: clean up fully transparent area to help compressibility.
 
@@ -913,6 +1040,91 @@ void WebPCleanupTransparentArea(WebPPicture* pic) {
 #undef SIZE2
 
 //------------------------------------------------------------------------------
+// Blend color and remove transparency info
+
+#define BLEND(V0, V1, ALPHA) \
+    ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 16)
+#define BLEND_10BIT(V0, V1, ALPHA) \
+    ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 18)
+
+void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
+  const int red = (background_rgb >> 16) & 0xff;
+  const int green = (background_rgb >> 8) & 0xff;
+  const int blue = (background_rgb >> 0) & 0xff;
+  VP8Random rg;
+  int x, y;
+  if (pic == NULL) return;
+  VP8InitRandom(&rg, 0.f);
+  if (!pic->use_argb) {
+    const int uv_width = (pic->width >> 1);  // omit last pixel during u/v loop
+    const int Y0 = RGBToY(red, green, blue, &rg);
+    // VP8RGBToU/V expects the u/v values summed over four pixels
+    const int U0 = RGBToU(4 * red, 4 * green, 4 * blue, &rg);
+    const int V0 = RGBToV(4 * red, 4 * green, 4 * blue, &rg);
+    const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT;
+    if (!has_alpha || pic->a == NULL) return;    // nothing to do
+    for (y = 0; y < pic->height; ++y) {
+      // Luma blending
+      uint8_t* const y_ptr = pic->y + y * pic->y_stride;
+      uint8_t* const a_ptr = pic->a + y * pic->a_stride;
+      for (x = 0; x < pic->width; ++x) {
+        const int alpha = a_ptr[x];
+        if (alpha < 0xff) {
+          y_ptr[x] = BLEND(Y0, y_ptr[x], a_ptr[x]);
+        }
+      }
+      // Chroma blending every even line
+      if ((y & 1) == 0) {
+        uint8_t* const u = pic->u + (y >> 1) * pic->uv_stride;
+        uint8_t* const v = pic->v + (y >> 1) * pic->uv_stride;
+        uint8_t* const a_ptr2 =
+            (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride;
+        for (x = 0; x < uv_width; ++x) {
+          // Average four alpha values into a single blending weight.
+          // TODO(skal): might lead to visible contouring. Can we do better?
+          const int alpha =
+              a_ptr[2 * x + 0] + a_ptr[2 * x + 1] +
+              a_ptr2[2 * x + 0] + a_ptr2[2 * x + 1];
+          u[x] = BLEND_10BIT(U0, u[x], alpha);
+          v[x] = BLEND_10BIT(V0, v[x], alpha);
+        }
+        if (pic->width & 1) {   // rightmost pixel
+          const int alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
+          u[x] = BLEND_10BIT(U0, u[x], alpha);
+          v[x] = BLEND_10BIT(V0, v[x], alpha);
+        }
+      }
+      memset(a_ptr, 0xff, pic->width);
+    }
+  } else {
+    uint32_t* argb = pic->argb;
+    const uint32_t background = MakeARGB32(red, green, blue);
+    for (y = 0; y < pic->height; ++y) {
+      for (x = 0; x < pic->width; ++x) {
+        const int alpha = (argb[x] >> 24) & 0xff;
+        if (alpha != 0xff) {
+          if (alpha > 0) {
+            int r = (argb[x] >> 16) & 0xff;
+            int g = (argb[x] >>  8) & 0xff;
+            int b = (argb[x] >>  0) & 0xff;
+            r = BLEND(red, r, alpha);
+            g = BLEND(green, g, alpha);
+            b = BLEND(blue, b, alpha);
+            argb[x] = MakeARGB32(r, g, b);
+          } else {
+            argb[x] = background;
+          }
+        }
+      }
+      argb += pic->argb_stride;
+    }
+  }
+}
+
+#undef BLEND
+#undef BLEND_10BIT
+
+//------------------------------------------------------------------------------
 // local-min distortion
 //
 // For every pixel in the *reference* picture, we search for the local best
@@ -1088,10 +1300,10 @@ size_t NAME(const uint8_t* in, int w, int h, int bps, float q,          \
   return Encode(in, w, h, bps, IMPORTER, q, 0, out);                    \
 }
 
-ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB);
-ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR);
-ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA);
-ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA);
+ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB)
+ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR)
+ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA)
+ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA)
 
 #undef ENCODE_FUNC
 
@@ -1101,15 +1313,12 @@ size_t NAME(const uint8_t* in, int w, int h, int bps, uint8_t** out) {       \
   return Encode(in, w, h, bps, IMPORTER, LOSSLESS_DEFAULT_QUALITY, 1, out);  \
 }
 
-LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB);
-LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR);
-LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA);
-LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA);
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA)
 
 #undef LOSSLESS_ENCODE_FUNC
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif