diff options
author | jzern@chromium.org <jzern@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-01-09 22:25:42 +0000 |
---|---|---|
committer | jzern@chromium.org <jzern@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-01-09 22:25:42 +0000 |
commit | b7afef7463d25d0a6c366760ee507e62e4c052bd (patch) | |
tree | 85c318db49dfdb548cbb16a2e32f0c8d41d4a0cb /third_party/libwebp/dec | |
parent | dd32d8a6f739d059b2965a9b7ba8a6eabe96ecc3 (diff) | |
download | chromium_src-b7afef7463d25d0a6c366760ee507e62e4c052bd.zip chromium_src-b7afef7463d25d0a6c366760ee507e62e4c052bd.tar.gz chromium_src-b7afef7463d25d0a6c366760ee507e62e4c052bd.tar.bz2 |
libwebp: update snapshot to v0.1.3
adds sse2 optimizations for the encoder & decoder.
BUG=108376
TEST=webkit layout tests
Review URL: http://codereview.chromium.org/8529002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@116933 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party/libwebp/dec')
-rw-r--r-- | third_party/libwebp/dec/alpha.c | 69 | ||||
-rw-r--r-- | third_party/libwebp/dec/bits.c | 79 | ||||
-rw-r--r-- | third_party/libwebp/dec/bits.h | 107 | ||||
-rw-r--r-- | third_party/libwebp/dec/buffer.c | 198 | ||||
-rw-r--r-- | third_party/libwebp/dec/dsp.c | 696 | ||||
-rw-r--r-- | third_party/libwebp/dec/frame.c | 383 | ||||
-rw-r--r-- | third_party/libwebp/dec/idec.c | 333 | ||||
-rw-r--r-- | third_party/libwebp/dec/io.c | 668 | ||||
-rw-r--r-- | third_party/libwebp/dec/layer.c | 34 | ||||
-rw-r--r-- | third_party/libwebp/dec/quant.c | 4 | ||||
-rw-r--r-- | third_party/libwebp/dec/tree.c | 4 | ||||
-rw-r--r-- | third_party/libwebp/dec/vp8.c | 236 | ||||
-rw-r--r-- | third_party/libwebp/dec/vp8i.h | 152 | ||||
-rw-r--r-- | third_party/libwebp/dec/webp.c | 916 | ||||
-rw-r--r-- | third_party/libwebp/dec/webpi.h | 180 | ||||
-rw-r--r-- | third_party/libwebp/dec/yuv.c | 46 | ||||
-rw-r--r-- | third_party/libwebp/dec/yuv.h | 66 |
17 files changed, 2328 insertions, 1843 deletions
diff --git a/third_party/libwebp/dec/alpha.c b/third_party/libwebp/dec/alpha.c new file mode 100644 index 0000000..3052ced --- /dev/null +++ b/third_party/libwebp/dec/alpha.c @@ -0,0 +1,69 @@ +// Copyright 2011 Google Inc. +// +// This code is licensed under the same terms as WebM: +// Software License Agreement: http://www.webmproject.org/license/software/ +// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// ----------------------------------------------------------------------------- +// +// Alpha-plane decompression. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <stdlib.h> +#include "vp8i.h" + +#ifdef WEBP_EXPERIMENTAL_FEATURES + +#include "zlib.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +//------------------------------------------------------------------------------ + +const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, + int row, int num_rows) { + uint8_t* output = dec->alpha_plane_; + const int stride = dec->pic_hdr_.width_; + if (row < 0 || row + num_rows > dec->pic_hdr_.height_) { + return NULL; // sanity check + } + if (row == 0) { + // TODO(skal): for now, we just decompress everything during the first call. + // Later, we'll decode progressively, but we need to store the + // z_stream state. + const uint8_t* data = dec->alpha_data_; + size_t data_size = dec->alpha_data_size_; + const size_t output_size = stride * dec->pic_hdr_.height_; + int ret = Z_OK; + z_stream strm; + + memset(&strm, 0, sizeof(strm)); + if (inflateInit(&strm) != Z_OK) { + return 0; + } + strm.avail_in = data_size; + strm.next_in = (unsigned char*)data; + do { + strm.avail_out = output_size; + strm.next_out = output; + ret = inflate(&strm, Z_NO_FLUSH); + if (ret == Z_NEED_DICT || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) { + break; + } + } while (strm.avail_out == 0); + + inflateEnd(&strm); + if (ret != Z_STREAM_END) { + return NULL; // error + } + } + return output + row * stride; +} + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // WEBP_EXPERIMENTAL_FEATURES diff --git a/third_party/libwebp/dec/bits.c b/third_party/libwebp/dec/bits.c deleted file mode 100644 index da3b777..0000000 --- a/third_party/libwebp/dec/bits.c +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2010 Google Inc. -// -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ -// ----------------------------------------------------------------------------- -// -// Boolean decoder -// -// Author: Skal (pascal.massimino@gmail.com) - -#include "bits.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -//----------------------------------------------------------------------------- -// VP8BitReader - -void VP8InitBitReader(VP8BitReader* const br, - const uint8_t* const start, const uint8_t* const end) { - assert(br); - assert(start); - assert(start <= end); - br->range_ = 255 - 1; - br->buf_ = start; - br->buf_end_ = end; - br->value_ = 0; - br->missing_ = 8; - br->eof_ = 0; -} - -const uint8_t kVP8Log2Range[128] = { - 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0 -}; - -// range = ((range + 1) << kVP8Log2Range[range]) - 1 -const uint8_t kVP8NewRange[128] = { - 127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239, - 127, 135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239, - 247, 127, 131, 135, 139, 143, 147, 151, 155, 159, 163, 167, 171, 175, 179, - 183, 187, 191, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, - 243, 247, 251, 127, 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, - 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173, 175, 177, 179, - 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203, 205, 207, 209, - 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 237, 239, - 241, 243, 245, 247, 249, 251, 253, 127 -}; - -//----------------------------------------------------------------------------- -// Higher-level calls - -uint32_t VP8GetValue(VP8BitReader* const br, int bits) { - uint32_t v = 0; - while (bits-- > 0) { - v |= VP8GetBit(br, 0x80) << bits; - } - return v; -} - -int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) { - const int value = VP8GetValue(br, bits); - return VP8Get(br) ? -value : value; -} - -//----------------------------------------------------------------------------- - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/third_party/libwebp/dec/bits.h b/third_party/libwebp/dec/bits.h deleted file mode 100644 index 82e4c3a..0000000 --- a/third_party/libwebp/dec/bits.h +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright 2010 Google Inc. -// -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ -// ----------------------------------------------------------------------------- -// -// Boolean decoder -// -// Author: Skal (pascal.massimino@gmail.com) - -#ifndef WEBP_DEC_BITS_H_ -#define WEBP_DEC_BITS_H_ - -#include <assert.h> -#include "webp/decode_vp8.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -//----------------------------------------------------------------------------- -// Bitreader and code-tree reader - -typedef struct { - const uint8_t* buf_; // next byte to be read - const uint8_t* buf_end_; // end of read buffer - int eof_; // true if input is exhausted - - // boolean decoder - uint32_t range_; // current range minus 1. In [127, 254] interval. - uint32_t value_; // current value - int missing_; // number of missing bits in value_ (8bit) -} VP8BitReader; - -// Initialize the bit reader and the boolean decoder. -void VP8InitBitReader(VP8BitReader* const br, - const uint8_t* const start, const uint8_t* const end); - -// return the next value made of 'num_bits' bits -uint32_t VP8GetValue(VP8BitReader* const br, int num_bits); -static inline uint32_t VP8Get(VP8BitReader* const br) { - return VP8GetValue(br, 1); -} - -// return the next value with sign-extension. -int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits); - -// Read a bit with proba 'prob'. Speed-critical function! -extern const uint8_t kVP8Log2Range[128]; -extern const uint8_t kVP8NewRange[128]; -static inline uint32_t VP8GetByte(VP8BitReader* const br) { - assert(br); - if (br->buf_ < br->buf_end_) { - assert(br->buf_); - return *br->buf_++; - } - br->eof_ = 1; - return 0xff; -} - -static inline uint32_t VP8BitUpdate(VP8BitReader* const br, uint32_t split) { - uint32_t bit; - // Make sure we have a least 8 bits in 'value_' - if (br->missing_ > 0) { - br->value_ |= VP8GetByte(br) << br->missing_; - br->missing_ -= 8; - } - bit = ((br->value_ >> 8) > split); - if (bit) { - br->range_ -= split + 1; - br->value_ -= (split + 1) << 8; - } else { - br->range_ = split; - } - return bit; -} - -static inline void VP8Shift(VP8BitReader* const br) { - // range_ is in [0..127] interval here. - const int shift = kVP8Log2Range[br->range_]; - br->range_ = kVP8NewRange[br->range_]; - br->value_ <<= shift; - br->missing_ += shift; -} - -static inline uint32_t VP8GetBit(VP8BitReader* const br, int prob) { - const uint32_t split = (br->range_ * prob) >> 8; - const uint32_t bit = VP8BitUpdate(br, split); - if (br->range_ < 0x7f) { - VP8Shift(br); - } - return bit; -} - -static inline int VP8GetSigned(VP8BitReader* const br, int v) { - const uint32_t split = br->range_ >> 1; - const uint32_t bit = VP8BitUpdate(br, split); - VP8Shift(br); - return bit ? -v : v; -} - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif - -#endif // WEBP_DEC_BITS_H_ diff --git a/third_party/libwebp/dec/buffer.c b/third_party/libwebp/dec/buffer.c new file mode 100644 index 0000000..5de5e6f --- /dev/null +++ b/third_party/libwebp/dec/buffer.c @@ -0,0 +1,198 @@ +// Copyright 2011 Google Inc. +// +// This code is licensed under the same terms as WebM: +// Software License Agreement: http://www.webmproject.org/license/software/ +// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// ----------------------------------------------------------------------------- +// +// Everything about WebPDecBuffer +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <stdlib.h> +#include "vp8i.h" +#include "webpi.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +//------------------------------------------------------------------------------ +// WebPDecBuffer + +// Number of bytes per pixel for the different color-spaces. +static const int kModeBpp[MODE_LAST] = { 3, 4, 3, 4, 4, 2, 2, 1, 1 }; + +static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) { + int ok = 1; + WEBP_CSP_MODE mode = buffer->colorspace; + const int width = buffer->width; + const int height = buffer->height; + if (mode >= MODE_YUV) { // YUV checks + const WebPYUVABuffer* const buf = &buffer->u.YUVA; + const int size = buf->y_stride * height; + const int u_size = buf->u_stride * ((height + 1) / 2); + const int v_size = buf->v_stride * ((height + 1) / 2); + const int a_size = buf->a_stride * height; + ok &= (size <= buf->y_size); + ok &= (u_size <= buf->u_size); + ok &= (v_size <= buf->v_size); + ok &= (a_size <= buf->a_size); + ok &= (buf->y_stride >= width); + ok &= (buf->u_stride >= (width + 1) / 2); + ok &= (buf->v_stride >= (width + 1) / 2); + if (buf->a) { + ok &= (buf->a_stride >= width); + } + } else { // RGB checks + const WebPRGBABuffer* const buf = &buffer->u.RGBA; + ok &= (buf->stride * height <= buf->size); + ok &= (buf->stride >= width * kModeBpp[mode]); + } + return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM; +} + +static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { + const int w = buffer->width; + const int h = buffer->height; + + if (w <= 0 || h <= 0) { + return VP8_STATUS_INVALID_PARAM; + } + + if (!buffer->is_external_memory && buffer->private_memory == NULL) { + uint8_t* output; + WEBP_CSP_MODE mode = buffer->colorspace; + int stride; + int uv_stride = 0, a_stride = 0; + int uv_size = 0; + uint64_t size, a_size = 0, total_size; + // We need memory and it hasn't been allocated yet. + // => initialize output buffer, now that dimensions are known. + stride = w * kModeBpp[mode]; + size = (uint64_t)stride * h; + + if (mode >= MODE_YUV) { + uv_stride = (w + 1) / 2; + uv_size = (uint64_t)uv_stride * ((h + 1) / 2); + if (mode == MODE_YUVA) { + a_stride = w; + a_size = (uint64_t)a_stride * h; + } + } + total_size = size + 2 * uv_size + a_size; + + // Security/sanity checks + if (((size_t)total_size != total_size) || (total_size >= (1ULL << 40))) { + return VP8_STATUS_INVALID_PARAM; + } + + buffer->private_memory = output = (uint8_t*)malloc((size_t)total_size); + if (output == NULL) { + return VP8_STATUS_OUT_OF_MEMORY; + } + + if (mode >= MODE_YUV) { // YUVA initialization + WebPYUVABuffer* const buf = &buffer->u.YUVA; + buf->y = output; + buf->y_stride = stride; + buf->y_size = size; + buf->u = output + size; + buf->u_stride = uv_stride; + buf->u_size = uv_size; + buf->v = output + size + uv_size; + buf->v_stride = uv_stride; + buf->v_size = uv_size; + if (mode == MODE_YUVA) { + buf->a = output + size + 2 * uv_size; + } + buf->a_size = a_size; + buf->a_stride = a_stride; + } else { // RGBA initialization + WebPRGBABuffer* const buf = &buffer->u.RGBA; + buf->rgba = output; + buf->stride = stride; + buf->size = size; + } + } + return CheckDecBuffer(buffer); +} + +VP8StatusCode WebPAllocateDecBuffer(int w, int h, + const WebPDecoderOptions* const options, + WebPDecBuffer* const out) { + if (out == NULL || w <= 0 || h <= 0) { + return VP8_STATUS_INVALID_PARAM; + } + if (options != NULL) { // First, apply options if there is any. + if (options->use_cropping) { + const int cw = options->crop_width; + const int ch = options->crop_height; + const int x = options->crop_left & ~1; + const int y = options->crop_top & ~1; + if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) { + return VP8_STATUS_INVALID_PARAM; // out of frame boundary. + } + w = cw; + h = ch; + } + if (options->use_scaling) { + if (options->scaled_width <= 0 || options->scaled_height <= 0) { + return VP8_STATUS_INVALID_PARAM; + } + w = options->scaled_width; + h = options->scaled_height; + } + } + out->width = w; + out->height = h; + + // Then, allocate buffer for real + return AllocateBuffer(out); +} + +//------------------------------------------------------------------------------ +// constructors / destructors + +int WebPInitDecBufferInternal(WebPDecBuffer* const buffer, int version) { + if (version != WEBP_DECODER_ABI_VERSION) return 0; // version mismatch + if (!buffer) return 0; + memset(buffer, 0, sizeof(*buffer)); + return 1; +} + +void WebPFreeDecBuffer(WebPDecBuffer* const buffer) { + if (buffer) { + if (!buffer->is_external_memory) + free(buffer->private_memory); + buffer->private_memory = NULL; + } +} + +void WebPCopyDecBuffer(const WebPDecBuffer* const src, + WebPDecBuffer* const dst) { + if (src && dst) { + *dst = *src; + if (src->private_memory) { + dst->is_external_memory = 1; // dst buffer doesn't own the memory. + dst->private_memory = NULL; + } + } +} + +// Copy and transfer ownership from src to dst (beware of parameter order!) +void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) { + if (src && dst) { + *dst = *src; + if (src->private_memory) { + src->is_external_memory = 1; // src relinquishes ownership + src->private_memory = NULL; + } + } +} + +//------------------------------------------------------------------------------ + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif diff --git a/third_party/libwebp/dec/dsp.c b/third_party/libwebp/dec/dsp.c deleted file mode 100644 index efde49d..0000000 --- a/third_party/libwebp/dec/dsp.c +++ /dev/null @@ -1,696 +0,0 @@ -// Copyright 2010 Google Inc. -// -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ -// ----------------------------------------------------------------------------- -// -// speed-critical functions. -// -// Author: Skal (pascal.massimino@gmail.com) - -#include "vp8i.h" - -#if defined(__SSE2__) -#include <emmintrin.h> -#endif - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -//----------------------------------------------------------------------------- -// run-time tables (~4k) - -static uint8_t abs0[255 + 255 + 1]; // abs(i) -static uint8_t abs1[255 + 255 + 1]; // abs(i)>>1 -static int8_t sclip1[1020 + 1020 + 1]; // clips [-1020, 1020] to [-128, 127] -static int8_t sclip2[112 + 112 + 1]; // clips [-112, 112] to [-16, 15] -static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] - -// We declare this variable 'volatile' to prevent instruction reordering -// and make sure it's set to true _last_ (so as to be thread-safe) -static volatile int tables_ok = 0; - -void VP8DspInitTables(void) { - if (!tables_ok) { - int i; - for (i = -255; i <= 255; ++i) { - abs0[255 + i] = (i < 0) ? -i : i; - abs1[255 + i] = abs0[255 + i] >> 1; - } - for (i = -1020; i <= 1020; ++i) { - sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i; - } - for (i = -112; i <= 112; ++i) { - sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i; - } - for (i = -255; i <= 255 + 255; ++i) { - clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; - } - tables_ok = 1; - } -} - -static inline uint8_t clip_8b(int v) { - return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; -} - -//----------------------------------------------------------------------------- -// Transforms (Paragraph 14.4) - -#define STORE(x, y, v) \ - dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) - -static const int kC1 = 20091 + (1 << 16); -static const int kC2 = 35468; -#define MUL(a, b) (((a) * (b)) >> 16) - -static void Transform(const int16_t* in, uint8_t* dst) { - int C[4 * 4], *tmp; - int i; - tmp = C; - for (i = 0; i < 4; ++i) { // vertical pass - const int a = in[0] + in[8]; // [-4096, 4094] - const int b = in[0] - in[8]; // [-4095, 4095] - const int c = MUL(in[4], kC2) - MUL(in[12], kC1); // [-3783, 3783] - const int d = MUL(in[4], kC1) + MUL(in[12], kC2); // [-3785, 3781] - tmp[0] = a + d; // [-7881, 7875] - tmp[1] = b + c; // [-7878, 7878] - tmp[2] = b - c; // [-7878, 7878] - tmp[3] = a - d; // [-7877, 7879] - tmp += 4; - in++; - } - // Each pass is expanding the dynamic range by ~3.85 (upper bound). - // The exact value is (2. + (kC1 + kC2) / 65536). - // After the second pass, maximum interval is [-3794, 3794], assuming - // an input in [-2048, 2047] interval. We then need to add a dst value - // in the [0, 255] range. - // In the worst case scenario, the input to clip_8b() can be as large as - // [-60713, 60968]. - tmp = C; - for (i = 0; i < 4; ++i) { // horizontal pass - const int dc = tmp[0] + 4; - const int a = dc + tmp[8]; - const int b = dc - tmp[8]; - const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); - const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); - STORE(0, 0, a + d); - STORE(1, 0, b + c); - STORE(2, 0, b - c); - STORE(3, 0, a - d); - tmp++; - dst += BPS; - } -} -#undef MUL - -static void TransformUV(const int16_t* in, uint8_t* dst) { - Transform(in + 0 * 16, dst); - Transform(in + 1 * 16, dst + 4); - Transform(in + 2 * 16, dst + 4 * BPS); - Transform(in + 3 * 16, dst + 4 * BPS + 4); -} - -static void TransformDC(const int16_t *in, uint8_t* dst) { - const int DC = in[0] + 4; - int i, j; - for (j = 0; j < 4; ++j) { - for (i = 0; i < 4; ++i) { - STORE(i, j, DC); - } - } -} - -static void TransformDCUV(const int16_t* in, uint8_t* dst) { - if (in[0 * 16]) TransformDC(in + 0 * 16, dst); - if (in[1 * 16]) TransformDC(in + 1 * 16, dst + 4); - if (in[2 * 16]) TransformDC(in + 2 * 16, dst + 4 * BPS); - if (in[3 * 16]) TransformDC(in + 3 * 16, dst + 4 * BPS + 4); -} - -#undef STORE - -// default C implementations: -VP8Idct VP8Transform = Transform; -VP8Idct VP8TransformUV = TransformUV; -VP8Idct VP8TransformDC = TransformDC; -VP8Idct VP8TransformDCUV = TransformDCUV; - -//----------------------------------------------------------------------------- -// Paragraph 14.3 - -static void TransformWHT(const int16_t* in, int16_t* out) { - int tmp[16]; - int i; - for (i = 0; i < 4; ++i) { - const int a0 = in[0 + i] + in[12 + i]; - const int a1 = in[4 + i] + in[ 8 + i]; - const int a2 = in[4 + i] - in[ 8 + i]; - const int a3 = in[0 + i] - in[12 + i]; - tmp[0 + i] = a0 + a1; - tmp[8 + i] = a0 - a1; - tmp[4 + i] = a3 + a2; - tmp[12 + i] = a3 - a2; - } - for (i = 0; i < 4; ++i) { - const int dc = tmp[0 + i * 4] + 3; // w/ rounder - const int a0 = dc + tmp[3 + i * 4]; - const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4]; - const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4]; - const int a3 = dc - tmp[3 + i * 4]; - out[ 0] = (a0 + a1) >> 3; - out[16] = (a3 + a2) >> 3; - out[32] = (a0 - a1) >> 3; - out[48] = (a3 - a2) >> 3; - out += 64; - } -} - -void (*VP8TransformWHT)(const int16_t* in, int16_t* out) = TransformWHT; - -//----------------------------------------------------------------------------- -// Intra predictions - -#define OUT(x, y) dst[(x) + (y) * BPS] - -static inline void TrueMotion(uint8_t *dst, int size) { - const uint8_t* top = dst - BPS; - const uint8_t* const clip0 = clip1 + 255 - top[-1]; - int y; - for (y = 0; y < size; ++y) { - const uint8_t* const clip = clip0 + dst[-1]; - int x; - for (x = 0; x < size; ++x) { - dst[x] = clip[top[x]]; - } - dst += BPS; - } -} -static void TM4(uint8_t *dst) { TrueMotion(dst, 4); } -static void TM8uv(uint8_t *dst) { TrueMotion(dst, 8); } -static void TM16(uint8_t *dst) { TrueMotion(dst, 16); } - -//----------------------------------------------------------------------------- -// 16x16 - -static void VE16(uint8_t *dst) { // vertical - int j; - for (j = 0; j < 16; ++j) { - memcpy(dst + j * BPS, dst - BPS, 16); - } -} - -static void HE16(uint8_t *dst) { // horizontal - int j; - for (j = 16; j > 0; --j) { - memset(dst, dst[-1], 16); - dst += BPS; - } -} - -static inline void Put16(int v, uint8_t* dst) { - int j; - for (j = 0; j < 16; ++j) { - memset(dst + j * BPS, v, 16); - } -} - -static void DC16(uint8_t *dst) { // DC - int DC = 16; - int j; - for (j = 0; j < 16; ++j) { - DC += dst[-1 + j * BPS] + dst[j - BPS]; - } - Put16(DC >> 5, dst); -} - -static void DC16NoTop(uint8_t *dst) { // DC with top samples not available - int DC = 8; - int j; - for (j = 0; j < 16; ++j) { - DC += dst[-1 + j * BPS]; - } - Put16(DC >> 4, dst); -} - -static void DC16NoLeft(uint8_t *dst) { // DC with left samples not available - int DC = 8; - int i; - for (i = 0; i < 16; ++i) { - DC += dst[i - BPS]; - } - Put16(DC >> 4, dst); -} - -static void DC16NoTopLeft(uint8_t *dst) { // DC with no top and left samples - Put16(0x80, dst); -} - -//----------------------------------------------------------------------------- -// 4x4 - -#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) -#define AVG2(a, b) (((a) + (b) + 1) >> 1) - -static void VE4(uint8_t *dst) { // vertical - const uint8_t* top = dst - BPS; - const uint8_t vals[4] = { - AVG3(top[-1], top[0], top[1]), - AVG3(top[ 0], top[1], top[2]), - AVG3(top[ 1], top[2], top[3]), - AVG3(top[ 2], top[3], top[4]) - }; - int i; - for (i = 0; i < 4; ++i) { - memcpy(dst + i * BPS, vals, sizeof(vals)); - } -} - -static void HE4(uint8_t *dst) { // horizontal - const int A = dst[-1 - BPS]; - const int B = dst[-1]; - const int C = dst[-1 + BPS]; - const int D = dst[-1 + 2 * BPS]; - const int E = dst[-1 + 3 * BPS]; - *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(A, B, C); - *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(B, C, D); - *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(C, D, E); - *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(D, E, E); -} - -static void DC4(uint8_t *dst) { // DC - uint32_t dc = 4; - int i; - for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS]; - dc >>= 3; - for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4); -} - -static void RD4(uint8_t *dst) { // Down-right - const int I = dst[-1 + 0 * BPS]; - const int J = dst[-1 + 1 * BPS]; - const int K = dst[-1 + 2 * BPS]; - const int L = dst[-1 + 3 * BPS]; - const int X = dst[-1 - BPS]; - const int A = dst[0 - BPS]; - const int B = dst[1 - BPS]; - const int C = dst[2 - BPS]; - const int D = dst[3 - BPS]; - OUT(0, 3) = AVG3(J, K, L); - OUT(0, 2) = OUT(1, 3) = AVG3(I, J, K); - OUT(0, 1) = OUT(1, 2) = OUT(2, 3) = AVG3(X, I, J); - OUT(0, 0) = OUT(1, 1) = OUT(2, 2) = OUT(3, 3) = AVG3(A, X, I); - OUT(1, 0) = OUT(2, 1) = OUT(3, 2) = AVG3(B, A, X); - OUT(2, 0) = OUT(3, 1) = AVG3(C, B, A); - OUT(3, 0) = AVG3(D, C, B); -} - -static void LD4(uint8_t *dst) { // Down-Left - const int A = dst[0 - BPS]; - const int B = dst[1 - BPS]; - const int C = dst[2 - BPS]; - const int D = dst[3 - BPS]; - const int E = dst[4 - BPS]; - const int F = dst[5 - BPS]; - const int G = dst[6 - BPS]; - const int H = dst[7 - BPS]; - OUT(0, 0) = AVG3(A, B, C); - OUT(1, 0) = OUT(0, 1) = AVG3(B, C, D); - OUT(2, 0) = OUT(1, 1) = OUT(0, 2) = AVG3(C, D, E); - OUT(3, 0) = OUT(2, 1) = OUT(1, 2) = OUT(0, 3) = AVG3(D, E, F); - OUT(3, 1) = OUT(2, 2) = OUT(1, 3) = AVG3(E, F, G); - OUT(3, 2) = OUT(2, 3) = AVG3(F, G, H); - OUT(3, 3) = AVG3(G, H, H); -} - -static void VR4(uint8_t *dst) { // Vertical-Right - const int I = dst[-1 + 0 * BPS]; - const int J = dst[-1 + 1 * BPS]; - const int K = dst[-1 + 2 * BPS]; - const int X = dst[-1 - BPS]; - const int A = dst[0 - BPS]; - const int B = dst[1 - BPS]; - const int C = dst[2 - BPS]; - const int D = dst[3 - BPS]; - OUT(0, 0) = OUT(1, 2) = AVG2(X, A); - OUT(1, 0) = OUT(2, 2) = AVG2(A, B); - OUT(2, 0) = OUT(3, 2) = AVG2(B, C); - OUT(3, 0) = AVG2(C, D); - - OUT(0, 3) = AVG3(K, J, I); - OUT(0, 2) = AVG3(J, I, X); - OUT(0, 1) = OUT(1, 3) = AVG3(I, X, A); - OUT(1, 1) = OUT(2, 3) = AVG3(X, A, B); - OUT(2, 1) = OUT(3, 3) = AVG3(A, B, C); - OUT(3, 1) = AVG3(B, C, D); -} - -static void VL4(uint8_t *dst) { // Vertical-Left - const int A = dst[0 - BPS]; - const int B = dst[1 - BPS]; - const int C = dst[2 - BPS]; - const int D = dst[3 - BPS]; - const int E = dst[4 - BPS]; - const int F = dst[5 - BPS]; - const int G = dst[6 - BPS]; - const int H = dst[7 - BPS]; - OUT(0, 0) = AVG2(A, B); - OUT(1, 0) = OUT(0, 2) = AVG2(B, C); - OUT(2, 0) = OUT(1, 2) = AVG2(C, D); - OUT(3, 0) = OUT(2, 2) = AVG2(D, E); - - OUT(0, 1) = AVG3(A, B, C); - OUT(1, 1) = OUT(0, 3) = AVG3(B, C, D); - OUT(2, 1) = OUT(1, 3) = AVG3(C, D, E); - OUT(3, 1) = OUT(2, 3) = AVG3(D, E, F); - OUT(3, 2) = AVG3(E, F, G); - OUT(3, 3) = AVG3(F, G, H); -} - -static void HU4(uint8_t *dst) { // Horizontal-Up - const int I = dst[-1 + 0 * BPS]; - const int J = dst[-1 + 1 * BPS]; - const int K = dst[-1 + 2 * BPS]; - const int L = dst[-1 + 3 * BPS]; - OUT(0, 0) = AVG2(I, J); - OUT(2, 0) = OUT(0, 1) = AVG2(J, K); - OUT(2, 1) = OUT(0, 2) = AVG2(K, L); - OUT(1, 0) = AVG3(I, J, K); - OUT(3, 0) = OUT(1, 1) = AVG3(J, K, L); - OUT(3, 1) = OUT(1, 2) = AVG3(K, L, L); - OUT(3, 2) = OUT(2, 2) = - OUT(0, 3) = OUT(1, 3) = OUT(2, 3) = OUT(3, 3) = L; -} - -static void HD4(uint8_t *dst) { // Horizontal-Down - const int I = dst[-1 + 0 * BPS]; - const int J = dst[-1 + 1 * BPS]; - const int K = dst[-1 + 2 * BPS]; - const int L = dst[-1 + 3 * BPS]; - const int X = dst[-1 - BPS]; - const int A = dst[0 - BPS]; - const int B = dst[1 - BPS]; - const int C = dst[2 - BPS]; - - OUT(0, 0) = OUT(2, 1) = AVG2(I, X); - OUT(0, 1) = OUT(2, 2) = AVG2(J, I); - OUT(0, 2) = OUT(2, 3) = AVG2(K, J); - OUT(0, 3) = AVG2(L, K); - - OUT(3, 0) = AVG3(A, B, C); - OUT(2, 0) = AVG3(X, A, B); - OUT(1, 0) = OUT(3, 1) = AVG3(I, X, A); - OUT(1, 1) = OUT(3, 2) = AVG3(J, I, X); - OUT(1, 2) = OUT(3, 3) = AVG3(K, J, I); - OUT(1, 3) = AVG3(L, K, J); -} - -#undef AVG3 -#undef AVG2 - -//----------------------------------------------------------------------------- -// Chroma - -static void VE8uv(uint8_t *dst) { // vertical - int j; - for (j = 0; j < 8; ++j) { - memcpy(dst + j * BPS, dst - BPS, 8); - } -} - -static void HE8uv(uint8_t *dst) { // horizontal - int j; - for (j = 0; j < 8; ++j) { - memset(dst, dst[-1], 8); - dst += BPS; - } -} - -// helper for chroma-DC predictions -static inline void Put8x8uv(uint64_t v, uint8_t* dst) { - int j; - for (j = 0; j < 8; ++j) { - *(uint64_t*)(dst + j * BPS) = v; - } -} - -static void DC8uv(uint8_t *dst) { // DC - int dc0 = 8; - int i; - for (i = 0; i < 8; ++i) { - dc0 += dst[i - BPS] + dst[-1 + i * BPS]; - } - Put8x8uv((uint64_t)((dc0 >> 4) * 0x0101010101010101ULL), dst); -} - -static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples - int dc0 = 4; - int i; - for (i = 0; i < 8; ++i) { - dc0 += dst[i - BPS]; - } - Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst); -} - -static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples - int dc0 = 4; - int i; - for (i = 0; i < 8; ++i) { - dc0 += dst[-1 + i * BPS]; - } - Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst); -} - -static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing - Put8x8uv(0x8080808080808080ULL, dst); -} - -//----------------------------------------------------------------------------- -// default C implementations - -VP8PredFunc VP8PredLuma4[NUM_BMODES] = { - DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4 -}; - -VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = { - DC16, TM16, VE16, HE16, - DC16NoTop, DC16NoLeft, DC16NoTopLeft -}; - -VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = { - DC8uv, TM8uv, VE8uv, HE8uv, - DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft -}; - -//----------------------------------------------------------------------------- -// Edge filtering functions - -// 4 pixels in, 2 pixels out -static inline void do_filter2(uint8_t* p, int step) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1]; - const int a1 = sclip2[112 + ((a + 4) >> 3)]; - const int a2 = sclip2[112 + ((a + 3) >> 3)]; - p[-step] = clip1[255 + p0 + a2]; - p[ 0] = clip1[255 + q0 - a1]; -} - -// 4 pixels in, 4 pixels out -static inline void do_filter4(uint8_t* p, int step) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - const int a = 3 * (q0 - p0); - const int a1 = sclip2[112 + ((a + 4) >> 3)]; - const int a2 = sclip2[112 + ((a + 3) >> 3)]; - const int a3 = (a1 + 1) >> 1; - p[-2*step] = clip1[255 + p1 + a3]; - p[- step] = clip1[255 + p0 + a2]; - p[ 0] = clip1[255 + q0 - a1]; - p[ step] = clip1[255 + q1 - a3]; -} - -// 6 pixels in, 6 pixels out -static inline void do_filter6(uint8_t* p, int step) { - const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; - const int q0 = p[0], q1 = p[step], q2 = p[2*step]; - const int a = sclip1[1020 + 3 * (q0 - p0) + sclip1[1020 + p1 - q1]]; - const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 - const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 - const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 - p[-3*step] = clip1[255 + p2 + a3]; - p[-2*step] = clip1[255 + p1 + a2]; - p[- step] = clip1[255 + p0 + a1]; - p[ 0] = clip1[255 + q0 - a1]; - p[ step] = clip1[255 + q1 - a2]; - p[ 2*step] = clip1[255 + q2 - a3]; -} - -static inline int hev(const uint8_t* p, int step, int thresh) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh); -} - -static inline int needs_filter(const uint8_t* p, int step, int thresh) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh; -} - -static inline int needs_filter2(const uint8_t* p, int step, int t, int it) { - const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; - const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step]; - if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t) - return 0; - return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it && - abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it && - abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it; -} - -//----------------------------------------------------------------------------- -// Simple In-loop filtering (Paragraph 15.2) - -static void SimpleVFilter16(uint8_t* p, int stride, int thresh) { - int i; - for (i = 0; i < 16; ++i) { - if (needs_filter(p + i, stride, thresh)) { - do_filter2(p + i, stride); - } - } -} - -static void SimpleHFilter16(uint8_t* p, int stride, int thresh) { - int i; - for (i = 0; i < 16; ++i) { - if (needs_filter(p + i * stride, 1, thresh)) { - do_filter2(p + i * stride, 1); - } - } -} - -static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4 * stride; - SimpleVFilter16(p, stride, thresh); - } -} - -static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4; - SimpleHFilter16(p, stride, thresh); - } -} - -//----------------------------------------------------------------------------- -// Complex In-loop filtering (Paragraph 15.3) - -static inline void FilterLoop26(uint8_t* p, int hstride, int vstride, int size, - int thresh, int ithresh, int hev_thresh) { - while (size-- > 0) { - if (needs_filter2(p, hstride, thresh, ithresh)) { - if (hev(p, hstride, hev_thresh)) { - do_filter2(p, hstride); - } else { - do_filter6(p, hstride); - } - } - p += vstride; - } -} - -static inline void FilterLoop24(uint8_t* p, int hstride, int vstride, int size, - int thresh, int ithresh, int hev_thresh) { - while (size-- > 0) { - if (needs_filter2(p, hstride, thresh, ithresh)) { - if (hev(p, hstride, hev_thresh)) { - do_filter2(p, hstride); - } else { - do_filter4(p, hstride); - } - } - p += vstride; - } -} - -// on macroblock edges -static void VFilter16(uint8_t* p, int stride, - int thresh, int ithresh, int hev_thresh) { - FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh); -} - -static void HFilter16(uint8_t* p, int stride, - int thresh, int ithresh, int hev_thresh) { - FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh); -} - -// on three inner edges -static void VFilter16i(uint8_t* p, int stride, - int thresh, int ithresh, int hev_thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4 * stride; - FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh); - } -} - -static void HFilter16i(uint8_t* p, int stride, - int thresh, int ithresh, int hev_thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4; - FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh); - } -} - -// 8-pixels wide variant, for chroma filtering -static void VFilter8(uint8_t* u, uint8_t* v, int stride, - int thresh, int ithresh, int hev_thresh) { - FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh); - FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh); -} - -static void HFilter8(uint8_t* u, uint8_t* v, int stride, - int thresh, int ithresh, int hev_thresh) { - FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh); - FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh); -} - -static void VFilter8i(uint8_t* u, uint8_t* v, int stride, - int thresh, int ithresh, int hev_thresh) { - FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); - FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); -} - -static void HFilter8i(uint8_t* u, uint8_t* v, int stride, - int thresh, int ithresh, int hev_thresh) { - FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); - FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); -} - -//----------------------------------------------------------------------------- - -void (*VP8VFilter16)(uint8_t*, int, int, int, int) = VFilter16; -void (*VP8HFilter16)(uint8_t*, int, int, int, int) = HFilter16; -void (*VP8VFilter8)(uint8_t*, uint8_t*, int, int, int, int) = VFilter8; -void (*VP8HFilter8)(uint8_t*, uint8_t*, int, int, int, int) = HFilter8; -void (*VP8VFilter16i)(uint8_t*, int, int, int, int) = VFilter16i; -void (*VP8HFilter16i)(uint8_t*, int, int, int, int) = HFilter16i; -void (*VP8VFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = VFilter8i; -void (*VP8HFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = HFilter8i; - -void (*VP8SimpleVFilter16)(uint8_t*, int, int) = SimpleVFilter16; -void (*VP8SimpleHFilter16)(uint8_t*, int, int) = SimpleHFilter16; -void (*VP8SimpleVFilter16i)(uint8_t*, int, int) = SimpleVFilter16i; -void (*VP8SimpleHFilter16i)(uint8_t*, int, int) = SimpleHFilter16i; - -//----------------------------------------------------------------------------- - -void VP8DspInit(void) { - // later we'll plug some SSE2 variant here -} - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/third_party/libwebp/dec/frame.c b/third_party/libwebp/dec/frame.c index 44c6357..887e565 100644 --- a/third_party/libwebp/dec/frame.c +++ b/third_party/libwebp/dec/frame.c @@ -10,7 +10,7 @@ // Author: Skal (pascal.massimino@gmail.com) #include <stdlib.h> -#include "vp8i.h" +#include "./vp8i.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -18,25 +18,84 @@ extern "C" { #define ALIGN_MASK (32 - 1) -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ +// For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line. +// +// Reason is: the deblocking filter cannot deblock the bottom horizontal edges +// immediately, and needs to wait for first few rows of the next macroblock to +// be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending +// on strength). +// With two threads, the vertical positions of the rows being decoded are: +// Decode: [ 0..15][16..31][32..47][48..63][64..79][... +// Deblock: [ 0..11][12..27][28..43][44..59][... +// If we use two threads and two caches of 16 pixels, the sequence would be: +// Decode: [ 0..15][16..31][ 0..15!!][16..31][ 0..15][... +// Deblock: [ 0..11][12..27!!][-4..11][12..27][... +// The problem occurs during row [12..15!!] that both the decoding and +// deblocking threads are writing simultaneously. +// With 3 cache lines, one get a safe write pattern: +// Decode: [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0.. +// Deblock: [ 0..11][12..27][28..43][-4..11][12..27][28... +// Note that multi-threaded output _without_ deblocking can make use of two +// cache lines of 16 pixels only, since there's no lagging behind. The decoding +// and output process have non-concurrent writing: +// Decode: [ 0..15][16..31][ 0..15][16..31][... +// io->put: [ 0..15][16..31][ 0..15][... + +#define MT_CACHE_LINES 3 +#define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case + +// Initialize multi/single-thread worker +static int InitThreadContext(VP8Decoder* const dec) { + dec->cache_id_ = 0; + if (dec->use_threads_) { + WebPWorker* const worker = &dec->worker_; + if (!WebPWorkerReset(worker)) { + return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, + "thread initialization failed."); + } + worker->data1 = dec; + worker->data2 = (void*)&dec->thread_ctx_.io_; + worker->hook = (WebPWorkerHook)VP8FinishRow; + dec->num_caches_ = + (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1; + } else { + dec->num_caches_ = ST_CACHE_LINES; + } + return 1; +} + +//------------------------------------------------------------------------------ // Memory setup -// how many extra luma lines are needed for caching, given a filtering level -static const uint8_t kFilterExtraRows[3] = { 0, 4, 8 }; +// kFilterExtraRows[] = How many extra lines are needed on the MB boundary +// for caching, given a filtering level. +// Simple filter: up to 2 luma samples are read and 1 is written. +// Complex filter: up to 4 luma samples are read and 3 are written. Same for +// U/V, so it's 8 samples total (because of the 2x upsampling). +static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 }; -int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { +static int AllocateMemory(VP8Decoder* const dec) { + const int num_caches = dec->num_caches_; const int mb_w = dec->mb_w_; const int intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t); const int top_size = (16 + 8 + 8) * mb_w; - const int info_size = (mb_w + 1) * sizeof(VP8MB); + const int mb_info_size = (mb_w + 1) * sizeof(VP8MB); + const int f_info_size = + (dec->filter_type_ > 0) ? + mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo) + : 0; const int yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_); const int coeffs_size = 384 * sizeof(*dec->coeffs_); - const int cache_height = (16 + kFilterExtraRows[dec->filter_type_]) * 3 / 2; + const int cache_height = (16 * num_caches + + kFilterExtraRows[dec->filter_type_]) * 3 / 2; const int cache_size = top_size * cache_height; + const int alpha_size = + dec->alpha_data_ ? (dec->pic_hdr_.width_ * dec->pic_hdr_.height_) : 0; const int needed = intra_pred_mode_size - + top_size + info_size + + top_size + mb_info_size + f_info_size + yuv_size + coeffs_size - + cache_size + ALIGN_MASK; + + cache_size + alpha_size + ALIGN_MASK; uint8_t* mem; if (needed > dec->mem_size_) { @@ -62,7 +121,18 @@ int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { mem += 8 * mb_w; dec->mb_info_ = ((VP8MB*)mem) + 1; - mem += info_size; + mem += mb_info_size; + + dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL; + mem += f_info_size; + dec->thread_ctx_.id_ = 0; + dec->thread_ctx_.f_info_ = dec->f_info_; + if (dec->use_threads_) { + // secondary cache line. The deblocking process need to make use of the + // filtering strength from previous macroblock row, while the new ones + // are being decoded in parallel. We'll just swap the pointers. + dec->thread_ctx_.f_info_ += mb_w; + } mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK); assert((yuv_size & ALIGN_MASK) == 0); @@ -79,36 +149,48 @@ int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { const int extra_y = extra_rows * dec->cache_y_stride_; const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_; dec->cache_y_ = ((uint8_t*)mem) + extra_y; - dec->cache_u_ = dec->cache_y_ + 16 * dec->cache_y_stride_ + extra_uv; - dec->cache_v_ = dec->cache_u_ + 8 * dec->cache_uv_stride_ + extra_uv; + dec->cache_u_ = dec->cache_y_ + + 16 * num_caches * dec->cache_y_stride_ + extra_uv; + dec->cache_v_ = dec->cache_u_ + + 8 * num_caches * dec->cache_uv_stride_ + extra_uv; + dec->cache_id_ = 0; } mem += cache_size; + // alpha plane + dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL; + mem += alpha_size; + // note: left-info is initialized once for all. - memset(dec->mb_info_ - 1, 0, (mb_w + 1) * sizeof(*dec->mb_info_)); + memset(dec->mb_info_ - 1, 0, mb_info_size); // initialize top memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size); + return 1; +} + +static void InitIo(VP8Decoder* const dec, VP8Io* io) { // prepare 'io' - io->width = dec->pic_hdr_.width_; - io->height = dec->pic_hdr_.height_; io->mb_y = 0; io->y = dec->cache_y_; io->u = dec->cache_u_; io->v = dec->cache_v_; io->y_stride = dec->cache_y_stride_; io->uv_stride = dec->cache_uv_stride_; - io->fancy_upscaling = 0; // default - - // Init critical function pointers and look-up tables. - VP8DspInitTables(); - VP8DspInit(); + io->fancy_upsampling = 0; // default + io->a = NULL; +} +int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { + if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_. + if (!AllocateMemory(dec)) return 0; + InitIo(dec, io); + VP8DspInit(); // Init critical function pointers and look-up tables. return 1; } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Filtering static inline int hev_thresh_from_level(int level, int keyframe) { @@ -119,12 +201,13 @@ static inline int hev_thresh_from_level(int level, int keyframe) { } } -static void DoFilter(VP8Decoder* const dec, int mb_x, int mb_y) { - VP8MB* const mb = dec->mb_info_ + mb_x; - uint8_t* const y_dst = dec->cache_y_ + mb_x * 16; +static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) { + const VP8ThreadContext* const ctx = &dec->thread_ctx_; const int y_bps = dec->cache_y_stride_; - const int level = mb->f_level_; - const int ilevel = mb->f_ilevel_; + VP8FInfo* const f_info = ctx->f_info_ + mb_x; + uint8_t* const y_dst = dec->cache_y_ + ctx->id_ * 16 * y_bps + mb_x * 16; + const int level = f_info->f_level_; + const int ilevel = f_info->f_ilevel_; const int limit = 2 * level + ilevel; if (level == 0) { return; @@ -133,26 +216,26 @@ static void DoFilter(VP8Decoder* const dec, int mb_x, int mb_y) { if (mb_x > 0) { VP8SimpleHFilter16(y_dst, y_bps, limit + 4); } - if (mb->f_inner_) { + if (f_info->f_inner_) { VP8SimpleHFilter16i(y_dst, y_bps, limit); } if (mb_y > 0) { VP8SimpleVFilter16(y_dst, y_bps, limit + 4); } - if (mb->f_inner_) { + if (f_info->f_inner_) { VP8SimpleVFilter16i(y_dst, y_bps, limit); } } else { // complex - uint8_t* const u_dst = dec->cache_u_ + mb_x * 8; - uint8_t* const v_dst = dec->cache_v_ + mb_x * 8; const int uv_bps = dec->cache_uv_stride_; + uint8_t* const u_dst = dec->cache_u_ + ctx->id_ * 8 * uv_bps + mb_x * 8; + uint8_t* const v_dst = dec->cache_v_ + ctx->id_ * 8 * uv_bps + mb_x * 8; const int hev_thresh = hev_thresh_from_level(level, dec->frm_hdr_.key_frame_); if (mb_x > 0) { VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); } - if (mb->f_inner_) { + if (f_info->f_inner_) { VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh); VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh); } @@ -160,16 +243,29 @@ static void DoFilter(VP8Decoder* const dec, int mb_x, int mb_y) { VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); } - if (mb->f_inner_) { + if (f_info->f_inner_) { VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh); VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh); } } } +// Filter the decoded macroblock row (if needed) +static void FilterRow(const VP8Decoder* const dec) { + int mb_x; + const int mb_y = dec->thread_ctx_.mb_y_; + assert(dec->thread_ctx_.filter_row_); + for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) { + DoFilter(dec, mb_x, mb_y); + } +} + +//------------------------------------------------------------------------------ + void VP8StoreBlock(VP8Decoder* const dec) { if (dec->filter_type_ > 0) { - VP8MB* const info = dec->mb_info_ + dec->mb_x_; + VP8FInfo* const info = dec->f_info_ + dec->mb_x_; + const int skip = dec->mb_info_[dec->mb_x_].skip_; int level = dec->filter_levels_[dec->segment_]; if (dec->filter_hdr_.use_lf_delta_) { // TODO(skal): only CURRENT is handled for now. @@ -193,14 +289,16 @@ void VP8StoreBlock(VP8Decoder* const dec) { } info->f_ilevel_ = (level < 1) ? 1 : level; - info->f_inner_ = (!info->skip_ || dec->is_i4x4_); + info->f_inner_ = (!skip || dec->is_i4x4_); } { // Transfer samples to row cache int y; - uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16; - uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8; - uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8; + const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_; + const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_; + uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset; + uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset; + uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset; for (y = 0; y < 16; ++y) { memcpy(ydst + y * dec->cache_y_stride_, dec->yuv_b_ + Y_OFF + y * BPS, 16); @@ -214,56 +312,205 @@ void VP8StoreBlock(VP8Decoder* const dec) { } } +//------------------------------------------------------------------------------ +// This function is called after a row of macroblocks is finished decoding. +// It also takes into account the following restrictions: +// * In case of in-loop filtering, we must hold off sending some of the bottom +// pixels as they are yet unfiltered. They will be when the next macroblock +// row is decoded. Meanwhile, we must preserve them by rotating them in the +// cache area. This doesn't hold for the very bottom row of the uncropped +// picture of course. +// * we must clip the remaining pixels against the cropping area. The VP8Io +// struct must have the following fields set correctly before calling put(): + +#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB + +// Finalize and transmit a complete row. Return false in case of user-abort. int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) { + int ok = 1; + const VP8ThreadContext* const ctx = &dec->thread_ctx_; const int extra_y_rows = kFilterExtraRows[dec->filter_type_]; const int ysize = extra_y_rows * dec->cache_y_stride_; const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_; - const int first_row = (dec->mb_y_ == 0); - const int last_row = (dec->mb_y_ >= dec->mb_h_ - 1); - uint8_t* const ydst = dec->cache_y_ - ysize; - uint8_t* const udst = dec->cache_u_ - uvsize; - uint8_t* const vdst = dec->cache_v_ - uvsize; - if (dec->filter_type_ > 0) { - int mb_x; - for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { - DoFilter(dec, mb_x, dec->mb_y_); - } + const int y_offset = ctx->id_ * 16 * dec->cache_y_stride_; + const int uv_offset = ctx->id_ * 8 * dec->cache_uv_stride_; + uint8_t* const ydst = dec->cache_y_ - ysize + y_offset; + uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset; + uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset; + const int first_row = (ctx->mb_y_ == 0); + const int last_row = (ctx->mb_y_ >= dec->br_mb_y_ - 1); + int y_start = MACROBLOCK_VPOS(ctx->mb_y_); + int y_end = MACROBLOCK_VPOS(ctx->mb_y_ + 1); + + if (ctx->filter_row_) { + FilterRow(dec); } + if (io->put) { - int y_start = dec->mb_y_ * 16; - int y_end = y_start + 16; if (!first_row) { y_start -= extra_y_rows; io->y = ydst; io->u = udst; io->v = vdst; } else { - io->y = dec->cache_y_; - io->u = dec->cache_u_; - io->v = dec->cache_v_; + io->y = dec->cache_y_ + y_offset; + io->u = dec->cache_u_ + uv_offset; + io->v = dec->cache_v_ + uv_offset; } + if (!last_row) { y_end -= extra_y_rows; } - if (y_end > io->height) { - y_end = io->height; + if (y_end > io->crop_bottom) { + y_end = io->crop_bottom; // make sure we don't overflow on last row. + } + io->a = NULL; +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (dec->alpha_data_) { + io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start); + if (io->a == NULL) { + return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, + "Could not decode alpha data."); + } } - io->mb_y = y_start; - io->mb_h = y_end - y_start; - if (!io->put(io)) { - return 0; +#endif + if (y_start < io->crop_top) { + const int delta_y = io->crop_top - y_start; + y_start = io->crop_top; + assert(!(delta_y & 1)); + io->y += dec->cache_y_stride_ * delta_y; + io->u += dec->cache_uv_stride_ * (delta_y >> 1); + io->v += dec->cache_uv_stride_ * (delta_y >> 1); + if (io->a) { + io->a += io->width * delta_y; + } + } + if (y_start < y_end) { + io->y += io->crop_left; + io->u += io->crop_left >> 1; + io->v += io->crop_left >> 1; + if (io->a) { + io->a += io->crop_left; + } + io->mb_y = y_start - io->crop_top; + io->mb_w = io->crop_right - io->crop_left; + io->mb_h = y_end - y_start; + ok = io->put(io); } } - // rotate top samples - if (!last_row) { - memcpy(ydst, ydst + 16 * dec->cache_y_stride_, ysize); - memcpy(udst, udst + 8 * dec->cache_uv_stride_, uvsize); - memcpy(vdst, vdst + 8 * dec->cache_uv_stride_, uvsize); + // rotate top samples if needed + if (ctx->id_ + 1 == dec->num_caches_) { + if (!last_row) { + memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize); + memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize); + memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize); + } } - return 1; + + return ok; +} + +#undef MACROBLOCK_VPOS + +//------------------------------------------------------------------------------ + +int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) { + int ok = 1; + VP8ThreadContext* const ctx = &dec->thread_ctx_; + if (!dec->use_threads_) { + // ctx->id_ and ctx->f_info_ are already set + ctx->mb_y_ = dec->mb_y_; + ctx->filter_row_ = dec->filter_row_; + ok = VP8FinishRow(dec, io); + } else { + WebPWorker* const worker = &dec->worker_; + // Finish previous job *before* updating context + ok &= WebPWorkerSync(worker); + assert(worker->status_ == OK); + if (ok) { // spawn a new deblocking/output job + ctx->io_ = *io; + ctx->id_ = dec->cache_id_; + ctx->mb_y_ = dec->mb_y_; + ctx->filter_row_ = dec->filter_row_; + if (ctx->filter_row_) { // just swap filter info + VP8FInfo* const tmp = ctx->f_info_; + ctx->f_info_ = dec->f_info_; + dec->f_info_ = tmp; + } + WebPWorkerLaunch(worker); + if (++dec->cache_id_ == dec->num_caches_) { + dec->cache_id_ = 0; + } + } + } + return ok; +} + +//------------------------------------------------------------------------------ +// Finish setting up the decoding parameter once user's setup() is called. + +VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) { + // Call setup() first. This may trigger additional decoding features on 'io'. + // Note: Afterward, we must call teardown() not matter what. + if (io->setup && !io->setup(io)) { + VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed"); + return dec->status_; + } + + // Disable filtering per user request + if (io->bypass_filtering) { + dec->filter_type_ = 0; + } + // TODO(skal): filter type / strength / sharpness forcing + + // Define the area where we can skip in-loop filtering, in case of cropping. + // + // 'Simple' filter reads two luma samples outside of the macroblock and + // and filters one. It doesn't filter the chroma samples. Hence, we can + // avoid doing the in-loop filtering before crop_top/crop_left position. + // For the 'Complex' filter, 3 samples are read and up to 3 are filtered. + // Means: there's a dependency chain that goes all the way up to the + // top-left corner of the picture (MB #0). We must filter all the previous + // macroblocks. + // TODO(skal): add an 'approximate_decoding' option, that won't produce + // a 1:1 bit-exactness for complex filtering? + { + const int extra_pixels = kFilterExtraRows[dec->filter_type_]; + if (dec->filter_type_ == 2) { + // For complex filter, we need to preserve the dependency chain. + dec->tl_mb_x_ = 0; + dec->tl_mb_y_ = 0; + } else { + // For simple filter, we can filter only the cropped region. + dec->tl_mb_y_ = io->crop_top >> 4; + dec->tl_mb_x_ = io->crop_left >> 4; + } + // We need some 'extra' pixels on the right/bottom. + dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4; + dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4; + if (dec->br_mb_x_ > dec->mb_w_) { + dec->br_mb_x_ = dec->mb_w_; + } + if (dec->br_mb_y_ > dec->mb_h_) { + dec->br_mb_y_ = dec->mb_h_; + } + } + return VP8_STATUS_OK; +} + +int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) { + int ok = 1; + if (dec->use_threads_) { + ok = WebPWorkerSync(&dec->worker_); + } + + if (io->teardown) { + io->teardown(io); + } + return ok; } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Main reconstruction function. static const int kScan[16] = { @@ -358,7 +605,7 @@ void VP8ReconstructBlock(VP8Decoder* const dec) { uint8_t* const dst = y_dst + kScan[n]; VP8PredLuma4[dec->imodes_[n]](dst); if (dec->non_zero_ac_ & (1 << n)) { - VP8Transform(coeffs + n * 16, dst); + VP8Transform(coeffs + n * 16, dst, 0); } else if (dec->non_zero_ & (1 << n)) { // only DC is present VP8TransformDC(coeffs + n * 16, dst); } @@ -370,7 +617,7 @@ void VP8ReconstructBlock(VP8Decoder* const dec) { for (n = 0; n < 16; n++) { uint8_t* const dst = y_dst + kScan[n]; if (dec->non_zero_ac_ & (1 << n)) { - VP8Transform(coeffs + n * 16, dst); + VP8Transform(coeffs + n * 16, dst, 0); } else if (dec->non_zero_ & (1 << n)) { // only DC is present VP8TransformDC(coeffs + n * 16, dst); } @@ -410,7 +657,7 @@ void VP8ReconstructBlock(VP8Decoder* const dec) { } } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #if defined(__cplusplus) || defined(c_plusplus) } // extern "C" diff --git a/third_party/libwebp/dec/idec.c b/third_party/libwebp/dec/idec.c index d49ceb0..048d3c5 100644 --- a/third_party/libwebp/dec/idec.c +++ b/third_party/libwebp/dec/idec.c @@ -15,15 +15,11 @@ #include "webpi.h" #include "vp8i.h" -#include "yuv.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif -#define RIFF_HEADER_SIZE 20 -#define VP8_HEADER_SIZE 10 -#define WEBP_HEADER_SIZE (RIFF_HEADER_SIZE + VP8_HEADER_SIZE) #define CHUNK_SIZE 4096 #define MAX_MB_SIZE 4096 @@ -32,14 +28,20 @@ extern "C" { // Decoding states. State normally flows like HEADER->PARTS0->DATA->DONE. // If there is any error the decoder goes into state ERROR. -typedef enum { STATE_HEADER = 0, STATE_PARTS0 = 1, - STATE_DATA = 2, STATE_DONE = 3, - STATE_ERROR = 4 +typedef enum { + STATE_PRE_VP8, // All data before that of the first VP8 chunk. + STATE_VP8_FRAME_HEADER, // For VP8 Frame header (within VP8 chunk). + STATE_VP8_PARTS0, + STATE_VP8_DATA, + STATE_DONE, + STATE_ERROR } DecState; // Operating state for the MemBuffer -typedef enum { MEM_MODE_NONE = 0, - MEM_MODE_APPEND, MEM_MODE_MAP +typedef enum { + MEM_MODE_NONE = 0, + MEM_MODE_APPEND, + MEM_MODE_MAP } MemBufferMode; // storage for partition #0 and partial data (in a rolling fashion) @@ -56,12 +58,13 @@ typedef struct { struct WebPIDecoder { DecState state_; // current decoding state - int w_, h_; // width and height WebPDecParams params_; // Params to store output info VP8Decoder* dec_; VP8Io io_; - MemBuffer mem_; // memory buffer + MemBuffer mem_; // input memory buffer. + WebPDecBuffer output_; // output buffer (when no external one is supplied) + uint32_t vp8_size_; // VP8 size extracted from VP8 Header. }; // MB context to restore in case VP8DecodeMB() fails @@ -229,43 +232,63 @@ static void RestoreContext(const MBContext* context, VP8Decoder* const dec, //------------------------------------------------------------------------------ -static VP8StatusCode IDecError(WebPIDecoder* idec, VP8StatusCode error) { +static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) { + if (idec->state_ == STATE_VP8_DATA) { + VP8Io* const io = &idec->io_; + if (io->teardown) { + io->teardown(io); + } + } idec->state_ = STATE_ERROR; return error; } -// Header -static VP8StatusCode DecodeHeader(WebPIDecoder* const idec) { - int width, height; - uint32_t curr_size, riff_header_size, bits; - WebPDecParams* params = &idec->params_; +static void ChangeState(WebPIDecoder* const idec, DecState new_state, + uint32_t consumed_bytes) { + idec->state_ = new_state; + idec->mem_.start_ += consumed_bytes; + assert(idec->mem_.start_ <= idec->mem_.end_); +} + +// Headers +static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) { const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_; + uint32_t curr_size = MemDataSize(&idec->mem_); + uint32_t vp8_size; + uint32_t bytes_skipped; + VP8StatusCode status; - if (MemDataSize(&idec->mem_) < WEBP_HEADER_SIZE) { - return VP8_STATUS_SUSPENDED; + status = WebPParseHeaders(&data, &curr_size, &vp8_size, &bytes_skipped); + if (status == VP8_STATUS_NOT_ENOUGH_DATA) { + return VP8_STATUS_SUSPENDED; // We haven't found a VP8 chunk yet. + } else if (status == VP8_STATUS_OK) { + idec->vp8_size_ = vp8_size; + ChangeState(idec, STATE_VP8_FRAME_HEADER, bytes_skipped); + return VP8_STATUS_OK; // We have skipped all pre-VP8 chunks. + } else { + return IDecError(idec, status); } +} - if (!WebPInitDecParams(data, idec->mem_.end_, &width, &height, params)) { - return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR); - } +static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) { + const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_; + const uint32_t curr_size = MemDataSize(&idec->mem_); + uint32_t bits; - // Validate and Skip over RIFF header - curr_size = MemDataSize(&idec->mem_); - if (!WebPCheckRIFFHeader(&data, &curr_size)) { + if (curr_size < VP8_FRAME_HEADER_SIZE) { + // Not enough data bytes to extract VP8 Frame Header. + return VP8_STATUS_SUSPENDED; + } + if (!VP8GetInfo(data, curr_size, idec->vp8_size_, NULL, NULL, NULL)) { return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR); } - riff_header_size = idec->mem_.end_ - curr_size; - bits = data[0] | (data[1] << 8) | (data[2] << 16); - idec->mem_.part0_size_ = (bits >> 5) + VP8_HEADER_SIZE; - idec->mem_.start_ += riff_header_size; - assert(idec->mem_.start_ <= idec->mem_.end_); + bits = data[0] | (data[1] << 8) | (data[2] << 16); + idec->mem_.part0_size_ = (bits >> 5) + VP8_FRAME_HEADER_SIZE; - idec->w_ = width; - idec->h_ = height; - idec->io_.data_size -= riff_header_size; + idec->io_.data_size = curr_size; idec->io_.data = data; - idec->state_ = STATE_PARTS0; + idec->state_ = STATE_VP8_PARTS0; return VP8_STATUS_OK; } @@ -298,14 +321,13 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) { VP8Decoder* const dec = idec->dec_; VP8Io* const io = &idec->io_; const WebPDecParams* const params = &idec->params_; - const WEBP_CSP_MODE mode = params->mode; + WebPDecBuffer* const output = params->output; // Wait till we have enough data for the whole partition #0 if (MemDataSize(&idec->mem_) < idec->mem_.part0_size_) { return VP8_STATUS_SUSPENDED; } - io->opaque = &idec->params_; if (!VP8GetHeaders(dec, io)) { const VP8StatusCode status = dec->status_; if (status == VP8_STATUS_SUSPENDED || @@ -316,36 +338,35 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) { return IDecError(idec, status); } - if (!WebPCheckDecParams(io, params)) { - return IDecError(idec, VP8_STATUS_INVALID_PARAM); + // Allocate/Verify output buffer now + dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options, + output); + if (dec->status_ != VP8_STATUS_OK) { + return IDecError(idec, dec->status_); } - if (mode != MODE_YUV) { - VP8YUVInit(); - } - - // allocate memory and prepare everything. - if (!VP8InitFrame(dec, io)) { + if (!CopyParts0Data(idec)) { return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY); } - if (io->setup && !io->setup(io)) { - return IDecError(idec, VP8_STATUS_USER_ABORT); - } - // disable filtering per user request (_after_ setup() is called) - if (io->bypass_filtering) dec->filter_type_ = 0; - - if (!CopyParts0Data(idec)) { - return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY); + // Finish setting up the decoding parameters. Will call io->setup(). + if (VP8EnterCritical(dec, io) != VP8_STATUS_OK) { + return IDecError(idec, dec->status_); } - idec->state_ = STATE_DATA; + // Note: past this point, teardown() must always be called + // in case of error. + idec->state_ = STATE_VP8_DATA; + // Allocate memory and prepare everything. + if (!VP8InitFrame(dec, io)) { + return IDecError(idec, dec->status_); + } return VP8_STATUS_OK; } // Remaining partitions static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) { - VP8BitReader* br; + VP8BitReader* br; VP8Decoder* const dec = idec->dec_; VP8Io* const io = &idec->io_; @@ -355,12 +376,8 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) { for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) { VP8BitReader* token_br = &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)]; if (dec->mb_x_ == 0) { - VP8MB* const left = dec->mb_info_ - 1; - left->nz_ = 0; - left->dc_nz_ = 0; - memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_)); + VP8InitScanline(dec); } - for (; dec->mb_x_ < dec->mb_w_; dec->mb_x_++) { MBContext context; SaveContext(dec, token_br, &context); @@ -383,14 +400,14 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) { assert(idec->mem_.start_ <= idec->mem_.end_); } } - if (!VP8FinishRow(dec, io)) { + if (!VP8ProcessRow(dec, io)) { return IDecError(idec, VP8_STATUS_USER_ABORT); } dec->mb_x_ = 0; } - - if (io->teardown) { - io->teardown(io); + // Synchronize the thread and check for errors. + if (!VP8ExitCritical(dec, io)) { + return IDecError(idec, VP8_STATUS_USER_ABORT); } dec->ready_ = 0; idec->state_ = STATE_DONE; @@ -403,14 +420,17 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) { VP8StatusCode status = VP8_STATUS_SUSPENDED; assert(idec->dec_); - if (idec->state_ == STATE_HEADER) { - status = DecodeHeader(idec); + if (idec->state_ == STATE_PRE_VP8) { + status = DecodeWebPHeaders(idec); + } + if (idec->state_ == STATE_VP8_FRAME_HEADER) { + status = DecodeVP8FrameHeader(idec); } - if (idec->state_ == STATE_PARTS0) { + if (idec->state_ == STATE_VP8_PARTS0) { status = DecodePartition0(idec); } - if (idec->state_ == STATE_DATA) { - return DecodeRemaining(idec); + if (idec->state_ == STATE_VP8_DATA) { + status = DecodeRemaining(idec); } return status; } @@ -418,9 +438,11 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) { //------------------------------------------------------------------------------ // Public functions -WebPIDecoder* WebPINew(WEBP_CSP_MODE mode) { +WebPIDecoder* WebPINewDecoder(WebPDecBuffer* const output_buffer) { WebPIDecoder* idec = (WebPIDecoder*)calloc(1, sizeof(WebPIDecoder)); - if (!idec) return NULL; + if (idec == NULL) { + return NULL; + } idec->dec_ = VP8New(); if (idec->dec_ == NULL) { @@ -428,53 +450,97 @@ WebPIDecoder* WebPINew(WEBP_CSP_MODE mode) { return NULL; } - idec->state_ = STATE_HEADER; - idec->params_.mode = mode; + idec->state_ = STATE_PRE_VP8; InitMemBuffer(&idec->mem_); + WebPInitDecBuffer(&idec->output_); VP8InitIo(&idec->io_); - WebPInitCustomIo(&idec->io_); + + WebPResetDecParams(&idec->params_); + idec->params_.output = output_buffer ? output_buffer : &idec->output_; + WebPInitCustomIo(&idec->params_, &idec->io_); // Plug the I/O functions. + +#ifdef WEBP_USE_THREAD + idec->dec_->use_threads_ = idec->params_.options && + (idec->params_.options->use_threads > 0); +#else + idec->dec_->use_threads_ = 0; +#endif + idec->vp8_size_ = 0; + + return idec; +} + +WebPIDecoder* WebPIDecode(const uint8_t* data, uint32_t data_size, + WebPDecoderConfig* const config) { + WebPIDecoder* idec; + + // Parse the bitstream's features, if requested: + if (data != NULL && data_size > 0 && config != NULL) { + if (WebPGetFeatures(data, data_size, &config->input) != VP8_STATUS_OK) { + return NULL; + } + } + // Create an instance of the incremental decoder + idec = WebPINewDecoder(config ? &config->output : NULL); + if (!idec) { + return NULL; + } + // Finish initialization + if (config != NULL) { + idec->params_.options = &config->options; + } return idec; } void WebPIDelete(WebPIDecoder* const idec) { if (!idec) return; VP8Delete(idec->dec_); - WebPClearDecParams(&idec->params_); ClearMemBuffer(&idec->mem_); + WebPFreeDecBuffer(&idec->output_); free(idec); } //------------------------------------------------------------------------------ +// Wrapper toward WebPINewDecoder + +WebPIDecoder* WebPINew(WEBP_CSP_MODE mode) { + WebPIDecoder* const idec = WebPINewDecoder(NULL); + if (!idec) return NULL; + idec->output_.colorspace = mode; + return idec; +} WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer, int output_buffer_size, int output_stride) { WebPIDecoder* idec; - if (mode == MODE_YUV) return NULL; - idec = WebPINew(mode); - if (idec == NULL) return NULL; - idec->params_.output = output_buffer; - idec->params_.stride = output_stride; - idec->params_.output_size = output_buffer_size; - idec->params_.external_buffer = 1; + if (mode >= MODE_YUV) return NULL; + idec = WebPINewDecoder(NULL); + if (!idec) return NULL; + idec->output_.colorspace = mode; + idec->output_.is_external_memory = 1; + idec->output_.u.RGBA.rgba = output_buffer; + idec->output_.u.RGBA.stride = output_stride; + idec->output_.u.RGBA.size = output_buffer_size; return idec; } WebPIDecoder* WebPINewYUV(uint8_t* luma, int luma_size, int luma_stride, uint8_t* u, int u_size, int u_stride, uint8_t* v, int v_size, int v_stride) { - WebPIDecoder* idec = WebPINew(MODE_YUV); - if (idec == NULL) return NULL; - idec->params_.output = luma; - idec->params_.stride = luma_stride; - idec->params_.output_size = luma_size; - idec->params_.u = u; - idec->params_.u_stride = u_stride; - idec->params_.output_u_size = u_size; - idec->params_.v = v; - idec->params_.v_stride = v_stride; - idec->params_.output_v_size = v_size; - idec->params_.external_buffer = 1; + WebPIDecoder* const idec = WebPINewDecoder(NULL); + if (!idec) return NULL; + idec->output_.colorspace = MODE_YUV; + idec->output_.is_external_memory = 1; + idec->output_.u.YUVA.y = luma; + idec->output_.u.YUVA.y_stride = luma_stride; + idec->output_.u.YUVA.y_size = luma_size; + idec->output_.u.YUVA.u = u; + idec->output_.u.YUVA.u_stride = u_stride; + idec->output_.u.YUVA.u_size = u_size; + idec->output_.u.YUVA.v = v; + idec->output_.u.YUVA.v_stride = v_stride; + idec->output_.u.YUVA.v_size = v_size; return idec; } @@ -538,38 +604,81 @@ VP8StatusCode WebPIUpdate(WebPIDecoder* const idec, const uint8_t* data, //------------------------------------------------------------------------------ -uint8_t* WebPIDecGetRGB(const WebPIDecoder* const idec, int *last_y, +static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) { + if (!idec || !idec->dec_ || idec->state_ <= STATE_VP8_PARTS0) { + return NULL; + } + return idec->params_.output; +} + +const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* const idec, + int* const left, int* const top, + int* const width, int* const height) { + const WebPDecBuffer* const src = GetOutputBuffer(idec); + if (left) *left = 0; + if (top) *top = 0; + // TODO(skal): later include handling of rotations. + if (src) { + if (width) *width = src->width; + if (height) *height = idec->params_.last_y; + } else { + if (width) *width = 0; + if (height) *height = 0; + } + return src; +} + +uint8_t* WebPIDecGetRGB(const WebPIDecoder* const idec, int* last_y, int* width, int* height, int* stride) { - if (!idec || !idec->dec_ || idec->params_.mode != MODE_RGB || - idec->state_ <= STATE_PARTS0) { + const WebPDecBuffer* const src = GetOutputBuffer(idec); + if (!src) return NULL; + if (src->colorspace >= MODE_YUV) { return NULL; } if (last_y) *last_y = idec->params_.last_y; - if (width) *width = idec->w_; - if (height) *height = idec->h_; - if (stride) *stride = idec->params_.stride; + if (width) *width = src->width; + if (height) *height = src->height; + if (stride) *stride = src->u.RGBA.stride; - return idec->params_.output; + return src->u.RGBA.rgba; } -uint8_t* WebPIDecGetYUV(const WebPIDecoder* const idec, int *last_y, - uint8_t** u, uint8_t** v, int* width, int* height, - int *stride, int* uv_stride) { - if (!idec || !idec->dec_ || idec->params_.mode != MODE_YUV || - idec->state_ <= STATE_PARTS0) { +uint8_t* WebPIDecGetYUV(const WebPIDecoder* const idec, int* last_y, + uint8_t** u, uint8_t** v, + int* width, int* height, int *stride, int* uv_stride) { + const WebPDecBuffer* const src = GetOutputBuffer(idec); + if (!src) return NULL; + if (src->colorspace < MODE_YUV) { return NULL; } if (last_y) *last_y = idec->params_.last_y; - if (u) *u = idec->params_.u; - if (v) *v = idec->params_.v; - if (width) *width = idec->w_; - if (height) *height = idec->h_; - if (stride) *stride = idec->params_.stride; - if (uv_stride) *uv_stride = idec->params_.u_stride; + if (u) *u = src->u.YUVA.u; + if (v) *v = src->u.YUVA.v; + if (width) *width = src->width; + if (height) *height = src->height; + if (stride) *stride = src->u.YUVA.y_stride; + if (uv_stride) *uv_stride = src->u.YUVA.u_stride; + + return src->u.YUVA.y; +} - return idec->params_.output; +int WebPISetIOHooks(WebPIDecoder* const idec, + VP8IoPutHook put, + VP8IoSetupHook setup, + VP8IoTeardownHook teardown, + void* user_data) { + if (!idec || !idec->dec_ || idec->state_ > STATE_PRE_VP8) { + return 0; + } + + idec->io_.put = put; + idec->io_.setup = setup; + idec->io_.teardown = teardown; + idec->io_.opaque = user_data; + + return 1; } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/third_party/libwebp/dec/io.c b/third_party/libwebp/dec/io.c new file mode 100644 index 0000000..405df3a --- /dev/null +++ b/third_party/libwebp/dec/io.c @@ -0,0 +1,668 @@ +// Copyright 2011 Google Inc. +// +// This code is licensed under the same terms as WebM: +// Software License Agreement: http://www.webmproject.org/license/software/ +// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// ----------------------------------------------------------------------------- +// +// functions for sample output. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <stdlib.h> +#include "../dec/vp8i.h" +#include "./webpi.h" +#include "../dsp/dsp.h" +#include "../dsp/yuv.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +//------------------------------------------------------------------------------ +// Main YUV<->RGB conversion functions + +static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) { + WebPDecBuffer* output = p->output; + const WebPYUVABuffer* const buf = &output->u.YUVA; + uint8_t* const y_dst = buf->y + io->mb_y * buf->y_stride; + uint8_t* const u_dst = buf->u + (io->mb_y >> 1) * buf->u_stride; + uint8_t* const v_dst = buf->v + (io->mb_y >> 1) * buf->v_stride; + const int mb_w = io->mb_w; + const int mb_h = io->mb_h; + const int uv_w = (mb_w + 1) / 2; + int j; + for (j = 0; j < mb_h; ++j) { + memcpy(y_dst + j * buf->y_stride, io->y + j * io->y_stride, mb_w); + } + for (j = 0; j < (mb_h + 1) / 2; ++j) { + memcpy(u_dst + j * buf->u_stride, io->u + j * io->uv_stride, uv_w); + memcpy(v_dst + j * buf->v_stride, io->v + j * io->uv_stride, uv_w); + } + return io->mb_h; +} + +// Point-sampling U/V sampler. +static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { + WebPDecBuffer* output = p->output; + const WebPRGBABuffer* const buf = &output->u.RGBA; + uint8_t* dst = buf->rgba + io->mb_y * buf->stride; + const uint8_t* y_src = io->y; + const uint8_t* u_src = io->u; + const uint8_t* v_src = io->v; + const WebPSampleLinePairFunc sample = WebPSamplers[output->colorspace]; + const int mb_w = io->mb_w; + const int last = io->mb_h - 1; + int j; + for (j = 0; j < last; j += 2) { + sample(y_src, y_src + io->y_stride, u_src, v_src, + dst, dst + buf->stride, mb_w); + y_src += 2 * io->y_stride; + u_src += io->uv_stride; + v_src += io->uv_stride; + dst += 2 * buf->stride; + } + if (j == last) { // Just do the last line twice + sample(y_src, y_src, u_src, v_src, dst, dst, mb_w); + } + return io->mb_h; +} + +//------------------------------------------------------------------------------ +// YUV444 -> RGB conversion + +#if 0 // TODO(skal): this is for future rescaling. +static int EmitRGB(const VP8Io* const io, WebPDecParams* const p) { + WebPDecBuffer* output = p->output; + const WebPRGBABuffer* const buf = &output->u.RGBA; + uint8_t* dst = buf->rgba + io->mb_y * buf->stride; + const uint8_t* y_src = io->y; + const uint8_t* u_src = io->u; + const uint8_t* v_src = io->v; + const WebPYUV444Converter convert = WebPYUV444Converters[output->colorspace]; + const int mb_w = io->mb_w; + const int last = io->mb_h; + int j; + for (j = 0; j < last; ++j) { + convert(y_src, u_src, v_src, dst, mb_w); + y_src += io->y_stride; + u_src += io->uv_stride; + v_src += io->uv_stride; + dst += buf->stride; + } + return io->mb_h; +} +#endif + +//------------------------------------------------------------------------------ +// Fancy upsampling + +#ifdef FANCY_UPSAMPLING +static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { + int num_lines_out = io->mb_h; // a priori guess + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + uint8_t* dst = buf->rgba + io->mb_y * buf->stride; + const WebPUpsampleLinePairFunc upsample = + io->a ? WebPUpsamplersKeepAlpha[p->output->colorspace] + : WebPUpsamplers[p->output->colorspace]; + const uint8_t* cur_y = io->y; + const uint8_t* cur_u = io->u; + const uint8_t* cur_v = io->v; + const uint8_t* top_u = p->tmp_u; + const uint8_t* top_v = p->tmp_v; + int y = io->mb_y; + int y_end = io->mb_y + io->mb_h; + const int mb_w = io->mb_w; + const int uv_w = (mb_w + 1) / 2; + + if (y == 0) { + // First line is special cased. We mirror the u/v samples at boundary. + upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, mb_w); + } else { + // We can finish the left-over line from previous call. + // Warning! Don't overwrite the alpha values (if any), as they + // are not lagging one line behind but are already written. + upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v, + dst - buf->stride, dst, mb_w); + num_lines_out++; + } + // Loop over each output pairs of row. + for (; y + 2 < y_end; y += 2) { + top_u = cur_u; + top_v = cur_v; + cur_u += io->uv_stride; + cur_v += io->uv_stride; + dst += 2 * buf->stride; + cur_y += 2 * io->y_stride; + upsample(cur_y - io->y_stride, cur_y, + top_u, top_v, cur_u, cur_v, + dst - buf->stride, dst, mb_w); + } + // move to last row + cur_y += io->y_stride; + if (io->crop_top + y_end < io->crop_bottom) { + // Save the unfinished samples for next call (as we're not done yet). + memcpy(p->tmp_y, cur_y, mb_w * sizeof(*p->tmp_y)); + memcpy(p->tmp_u, cur_u, uv_w * sizeof(*p->tmp_u)); + memcpy(p->tmp_v, cur_v, uv_w * sizeof(*p->tmp_v)); + // The fancy upsampler leaves a row unfinished behind + // (except for the very last row) + num_lines_out--; + } else { + // Process the very last row of even-sized picture + if (!(y_end & 1)) { + upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, + dst + buf->stride, NULL, mb_w); + } + } + return num_lines_out; +} + +#endif /* FANCY_UPSAMPLING */ + +//------------------------------------------------------------------------------ + +#ifdef WEBP_EXPERIMENTAL_FEATURES +static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) { + const int mb_w = io->mb_w; + const int mb_h = io->mb_h; + int j; + const WebPYUVABuffer* const buf = &p->output->u.YUVA; + uint8_t* dst = buf->a + io->mb_y * buf->a_stride; + const uint8_t* alpha = io->a; + if (alpha) { + for (j = 0; j < mb_h; ++j) { + memcpy(dst, alpha, mb_w * sizeof(*dst)); + alpha += io->width; + dst += buf->a_stride; + } + } + return 0; +} + +static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { + const int mb_w = io->mb_w; + const int mb_h = io->mb_h; + int i, j; + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + uint8_t* dst = buf->rgba + io->mb_y * buf->stride; + const uint8_t* alpha = io->a; + if (alpha) { + for (j = 0; j < mb_h; ++j) { + for (i = 0; i < mb_w; ++i) { + dst[4 * i + 3] = alpha[i]; + } + alpha += io->width; + dst += buf->stride; + } + } + return 0; +} + +#endif /* WEBP_EXPERIMENTAL_FEATURES */ + +//------------------------------------------------------------------------------ +// Simple picture rescaler + +// TODO(skal): start a common library for encoder and decoder, and factorize +// this code in. + +#define RFIX 30 +#define MULT(x,y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX) + +static void InitRescaler(WebPRescaler* const wrk, + int src_width, int src_height, + uint8_t* dst, + int dst_width, int dst_height, int dst_stride, + int x_add, int x_sub, int y_add, int y_sub, + int32_t* work) { + wrk->x_expand = (src_width < dst_width); + wrk->src_width = src_width; + wrk->src_height = src_height; + wrk->dst_width = dst_width; + wrk->dst_height = dst_height; + wrk->dst = dst; + wrk->dst_stride = dst_stride; + // for 'x_expand', we use bilinear interpolation + wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub; + wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; + wrk->y_accum = y_add; + wrk->y_add = y_add; + wrk->y_sub = y_sub; + wrk->fx_scale = (1 << RFIX) / x_sub; + wrk->fy_scale = (1 << RFIX) / y_sub; + wrk->fxy_scale = wrk->x_expand ? + ((int64_t)dst_height << RFIX) / (x_sub * src_height) : + ((int64_t)dst_height << RFIX) / (x_add * src_height); + wrk->irow = work; + wrk->frow = work + dst_width; +} + +static inline void ImportRow(const uint8_t* const src, + WebPRescaler* const wrk) { + int x_in = 0; + int x_out; + int accum = 0; + if (!wrk->x_expand) { + int sum = 0; + for (x_out = 0; x_out < wrk->dst_width; ++x_out) { + accum += wrk->x_add; + for (; accum > 0; accum -= wrk->x_sub) { + sum += src[x_in++]; + } + { // Emit next horizontal pixel. + const int32_t base = src[x_in++]; + const int32_t frac = base * (-accum); + wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac; + // fresh fractional start for next pixel + sum = MULT(frac, wrk->fx_scale); + } + } + } else { // simple bilinear interpolation + int left = src[0], right = src[0]; + for (x_out = 0; x_out < wrk->dst_width; ++x_out) { + if (accum < 0) { + left = right; + right = src[++x_in]; + accum += wrk->x_add; + } + wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum; + accum -= wrk->x_sub; + } + } + // Accumulate the new row's contribution + for (x_out = 0; x_out < wrk->dst_width; ++x_out) { + wrk->irow[x_out] += wrk->frow[x_out]; + } +} + +static void ExportRow(WebPRescaler* const wrk) { + int x_out; + const int yscale = wrk->fy_scale * (-wrk->y_accum); + assert(wrk->y_accum <= 0); + for (x_out = 0; x_out < wrk->dst_width; ++x_out) { + const int frac = MULT(wrk->frow[x_out], yscale); + const int v = (int)MULT(wrk->irow[x_out] - frac, wrk->fxy_scale); + wrk->dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; + wrk->irow[x_out] = frac; // new fractional start + } + wrk->y_accum += wrk->y_add; + wrk->dst += wrk->dst_stride; +} + +#undef MULT +#undef RFIX + +//------------------------------------------------------------------------------ +// YUV rescaling (no final RGB conversion needed) + +static int Rescale(const uint8_t* src, int src_stride, + int new_lines, WebPRescaler* const wrk) { + int num_lines_out = 0; + while (new_lines-- > 0) { // import new contribution of one source row. + ImportRow(src, wrk); + src += src_stride; + wrk->y_accum -= wrk->y_sub; + while (wrk->y_accum <= 0) { // emit output row(s) + ExportRow(wrk); + num_lines_out++; + } + } + return num_lines_out; +} + +static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) { + const int mb_h = io->mb_h; + const int uv_mb_h = (mb_h + 1) >> 1; + const int num_lines_out = Rescale(io->y, io->y_stride, mb_h, &p->scaler_y); + Rescale(io->u, io->uv_stride, uv_mb_h, &p->scaler_u); + Rescale(io->v, io->uv_stride, uv_mb_h, &p->scaler_v); + return num_lines_out; +} + +static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p) { + if (io->a) { + Rescale(io->a, io->width, io->mb_h, &p->scaler_a); + } + return 0; +} + +static int IsAlphaMode(WEBP_CSP_MODE mode) { + return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB || + mode == MODE_RGBA_4444 || mode == MODE_YUVA); +} + +static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) { + const int has_alpha = IsAlphaMode(p->output->colorspace); + const WebPYUVABuffer* const buf = &p->output->u.YUVA; + const int out_width = io->scaled_width; + const int out_height = io->scaled_height; + const int uv_out_width = (out_width + 1) >> 1; + const int uv_out_height = (out_height + 1) >> 1; + const int uv_in_width = (io->mb_w + 1) >> 1; + const int uv_in_height = (io->mb_h + 1) >> 1; + const size_t work_size = 2 * out_width; // scratch memory for luma rescaler + const size_t uv_work_size = 2 * uv_out_width; // and for each u/v ones + size_t tmp_size; + int32_t* work; + + tmp_size = work_size + 2 * uv_work_size; + if (has_alpha) { + tmp_size += work_size; + } + p->memory = calloc(1, tmp_size * sizeof(*work)); + if (p->memory == NULL) { + return 0; // memory error + } + work = (int32_t*)p->memory; + InitRescaler(&p->scaler_y, io->mb_w, io->mb_h, + buf->y, out_width, out_height, buf->y_stride, + io->mb_w, out_width, io->mb_h, out_height, + work); + InitRescaler(&p->scaler_u, uv_in_width, uv_in_height, + buf->u, uv_out_width, uv_out_height, buf->u_stride, + uv_in_width, uv_out_width, + uv_in_height, uv_out_height, + work + work_size); + InitRescaler(&p->scaler_v, uv_in_width, uv_in_height, + buf->v, uv_out_width, uv_out_height, buf->v_stride, + uv_in_width, uv_out_width, + uv_in_height, uv_out_height, + work + work_size + uv_work_size); + p->emit = EmitRescaledYUV; + if (has_alpha) { + InitRescaler(&p->scaler_a, io->mb_w, io->mb_h, + buf->a, out_width, out_height, buf->a_stride, + io->mb_w, out_width, io->mb_h, out_height, + work + work_size + 2 * uv_work_size); + p->emit_alpha = EmitRescaledAlphaYUV; + } + return 1; +} + +//------------------------------------------------------------------------------ +// RGBA rescaling + +// import new contributions until one row is ready to be output, or all input +// is consumed. +static int Import(const uint8_t* src, int src_stride, + int new_lines, WebPRescaler* const wrk) { + int num_lines_in = 0; + while (num_lines_in < new_lines && wrk->y_accum > 0) { + ImportRow(src, wrk); + src += src_stride; + ++num_lines_in; + wrk->y_accum -= wrk->y_sub; + } + return num_lines_in; +} + +static int ExportRGB(WebPDecParams* const p, int y_pos) { + const WebPYUV444Converter convert = + WebPYUV444Converters[p->output->colorspace]; + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride; + int num_lines_out = 0; + // For RGB rescaling, because of the YUV420, current scan position + // U/V can be +1/-1 line from the Y one. Hence the double test. + while (p->scaler_y.y_accum <= 0 && p->scaler_u.y_accum <= 0) { + assert(p->last_y + y_pos + num_lines_out < p->output->height); + assert(p->scaler_u.y_accum == p->scaler_v.y_accum); + ExportRow(&p->scaler_y); + ExportRow(&p->scaler_u); + ExportRow(&p->scaler_v); + convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst, + dst, p->scaler_y.dst_width); + dst += buf->stride; + num_lines_out++; + } + return num_lines_out; +} + +static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { + const int mb_h = io->mb_h; + const int uv_mb_h = (mb_h + 1) >> 1; + int j = 0, uv_j = 0; + int num_lines_out = 0; + while (j < mb_h) { + const int y_lines_in = Import(io->y + j * io->y_stride, io->y_stride, + mb_h - j, &p->scaler_y); + const int u_lines_in = Import(io->u + uv_j * io->uv_stride, io->uv_stride, + uv_mb_h - uv_j, &p->scaler_u); + const int v_lines_in = Import(io->v + uv_j * io->uv_stride, io->uv_stride, + uv_mb_h - uv_j, &p->scaler_v); + (void)v_lines_in; // remove a gcc warning + assert(u_lines_in == v_lines_in); + j += y_lines_in; + uv_j += u_lines_in; + num_lines_out += ExportRGB(p, num_lines_out); + } + return num_lines_out; +} + +static int ExportAlpha(WebPDecParams* const p, int y_pos) { + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride; + int num_lines_out = 0; + while (p->scaler_a.y_accum <= 0) { + int i; + assert(p->last_y + y_pos + num_lines_out < p->output->height); + ExportRow(&p->scaler_a); + for (i = 0; i < p->scaler_a.dst_width; ++i) { + dst[4 * i + 3] = p->scaler_a.dst[i]; + } + dst += buf->stride; + num_lines_out++; + } + return num_lines_out; +} + +static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { + if (io->a) { + int j = 0, pos = 0; + while (j < io->mb_h) { + j += Import(io->a + j * io->width, io->width, io->mb_h - j, &p->scaler_a); + pos += ExportAlpha(p, pos); + } + } + return 0; +} + +static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { + const int has_alpha = IsAlphaMode(p->output->colorspace); + const int out_width = io->scaled_width; + const int out_height = io->scaled_height; + const int uv_in_width = (io->mb_w + 1) >> 1; + const int uv_in_height = (io->mb_h + 1) >> 1; + const size_t work_size = 2 * out_width; // scratch memory for one rescaler + int32_t* work; // rescalers work area + uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion + size_t tmp_size1, tmp_size2; + + tmp_size1 = 3 * work_size; + tmp_size2 = 3 * out_width; + if (has_alpha) { + tmp_size1 += work_size; + tmp_size2 += out_width; + } + p->memory = + calloc(1, tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp)); + if (p->memory == NULL) { + return 0; // memory error + } + work = (int32_t*)p->memory; + tmp = (uint8_t*)(work + tmp_size1); + InitRescaler(&p->scaler_y, io->mb_w, io->mb_h, + tmp + 0 * out_width, out_width, out_height, 0, + io->mb_w, out_width, io->mb_h, out_height, + work + 0 * work_size); + InitRescaler(&p->scaler_u, uv_in_width, uv_in_height, + tmp + 1 * out_width, out_width, out_height, 0, + io->mb_w, 2 * out_width, io->mb_h, 2 * out_height, + work + 1 * work_size); + InitRescaler(&p->scaler_v, uv_in_width, uv_in_height, + tmp + 2 * out_width, out_width, out_height, 0, + io->mb_w, 2 * out_width, io->mb_h, 2 * out_height, + work + 2 * work_size); + p->emit = EmitRescaledRGB; + + if (has_alpha) { + InitRescaler(&p->scaler_a, io->mb_w, io->mb_h, + tmp + 3 * out_width, out_width, out_height, 0, + io->mb_w, out_width, io->mb_h, out_height, + work + 3 * work_size); + p->emit_alpha = EmitRescaledAlphaRGB; + } + return 1; +} + +//------------------------------------------------------------------------------ +// Default custom functions + +// Setup crop_xxx fields, mb_w and mb_h +static int InitFromOptions(const WebPDecoderOptions* const options, + VP8Io* const io) { + const int W = io->width; + const int H = io->height; + int x = 0, y = 0, w = W, h = H; + + // Cropping + io->use_cropping = (options != NULL) && (options->use_cropping > 0); + if (io->use_cropping) { + w = options->crop_width; + h = options->crop_height; + // TODO(skal): take colorspace into account. Don't assume YUV420. + x = options->crop_left & ~1; + y = options->crop_top & ~1; + if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) { + return 0; // out of frame boundary error + } + } + io->crop_left = x; + io->crop_top = y; + io->crop_right = x + w; + io->crop_bottom = y + h; + io->mb_w = w; + io->mb_h = h; + + // Scaling + io->use_scaling = (options != NULL) && (options->use_scaling > 0); + if (io->use_scaling) { + if (options->scaled_width <= 0 || options->scaled_height <= 0) { + return 0; + } + io->scaled_width = options->scaled_width; + io->scaled_height = options->scaled_height; + } + + // Filter + io->bypass_filtering = options && options->bypass_filtering; + + // Fancy upsampler +#ifdef FANCY_UPSAMPLING + io->fancy_upsampling = (options == NULL) || (!options->no_fancy_upsampling); +#endif + + if (io->use_scaling) { + // disable filter (only for large downscaling ratio). + io->bypass_filtering = (io->scaled_width < W * 3 / 4) && + (io->scaled_height < H * 3 / 4); + io->fancy_upsampling = 0; + } + return 1; +} + +static int CustomSetup(VP8Io* io) { + WebPDecParams* const p = (WebPDecParams*)io->opaque; + const int is_rgb = (p->output->colorspace < MODE_YUV); + + p->memory = NULL; + p->emit = NULL; + p->emit_alpha = NULL; + if (!InitFromOptions(p->options, io)) { + return 0; + } + + if (io->use_scaling) { + const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p); + if (!ok) { + return 0; // memory error + } + } else { + if (is_rgb) { + p->emit = EmitSampledRGB; // default +#ifdef FANCY_UPSAMPLING + if (io->fancy_upsampling) { + const int uv_width = (io->mb_w + 1) >> 1; + p->memory = malloc(io->mb_w + 2 * uv_width); + if (p->memory == NULL) { + return 0; // memory error. + } + p->tmp_y = (uint8_t*)p->memory; + p->tmp_u = p->tmp_y + io->mb_w; + p->tmp_v = p->tmp_u + uv_width; + p->emit = EmitFancyRGB; + WebPInitUpsamplers(); + } +#endif + } else { + p->emit = EmitYUV; + } +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (IsAlphaMode(p->output->colorspace)) { + // We need transparency output + p->emit_alpha = is_rgb ? EmitAlphaRGB : EmitAlphaYUV; + } +#endif + } + + if (is_rgb) { + VP8YUVInit(); + } + return 1; +} + +//------------------------------------------------------------------------------ + +static int CustomPut(const VP8Io* io) { + WebPDecParams* p = (WebPDecParams*)io->opaque; + const int mb_w = io->mb_w; + const int mb_h = io->mb_h; + int num_lines_out; + assert(!(io->mb_y & 1)); + + if (mb_w <= 0 || mb_h <= 0) { + return 0; + } + num_lines_out = p->emit(io, p); + if (p->emit_alpha) { + p->emit_alpha(io, p); + } + p->last_y += num_lines_out; + return 1; +} + +//------------------------------------------------------------------------------ + +static void CustomTeardown(const VP8Io* io) { + WebPDecParams* const p = (WebPDecParams*)io->opaque; + free(p->memory); + p->memory = NULL; +} + +//------------------------------------------------------------------------------ +// Main entry point + +void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) { + io->put = CustomPut; + io->setup = CustomSetup; + io->teardown = CustomTeardown; + io->opaque = params; +} + +//------------------------------------------------------------------------------ + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif diff --git a/third_party/libwebp/dec/layer.c b/third_party/libwebp/dec/layer.c new file mode 100644 index 0000000..f7d41e0 --- /dev/null +++ b/third_party/libwebp/dec/layer.c @@ -0,0 +1,34 @@ +// Copyright 2011 Google Inc. +// +// This code is licensed under the same terms as WebM: +// Software License Agreement: http://www.webmproject.org/license/software/ +// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// ----------------------------------------------------------------------------- +// +// Enhancement layer (for YUV444/422) +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <stdlib.h> +#include "vp8i.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +//------------------------------------------------------------------------------ + +int VP8DecodeLayer(VP8Decoder* const dec) { + assert(dec); + assert(dec->layer_data_size_ > 0); + (void)dec; + + // TODO: handle enhancement layer here. + + return 1; +} + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif diff --git a/third_party/libwebp/dec/quant.c b/third_party/libwebp/dec/quant.c index 47edbf5..aee4fd3 100644 --- a/third_party/libwebp/dec/quant.c +++ b/third_party/libwebp/dec/quant.c @@ -58,7 +58,7 @@ static const uint16_t kAcTable[128] = { 249, 254, 259, 264, 269, 274, 279, 284 }; -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Paragraph 9.6 void VP8ParseQuant(VP8Decoder* const dec) { @@ -104,7 +104,7 @@ void VP8ParseQuant(VP8Decoder* const dec) { } } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #if defined(__cplusplus) || defined(c_plusplus) } // extern "C" diff --git a/third_party/libwebp/dec/tree.c b/third_party/libwebp/dec/tree.c index ed6caad..7055216 100644 --- a/third_party/libwebp/dec/tree.c +++ b/third_party/libwebp/dec/tree.c @@ -65,7 +65,7 @@ static const int8_t kMVRef4[6] = { }; #endif -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Default probabilities // Inter @@ -385,7 +385,7 @@ void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec) { : VP8GetBit(br, 183) ? TM_PRED : H_PRED; } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Paragraph 13 static const uint8_t diff --git a/third_party/libwebp/dec/vp8.c b/third_party/libwebp/dec/vp8.c index 43a0c35..9149284 100644 --- a/third_party/libwebp/dec/vp8.c +++ b/third_party/libwebp/dec/vp8.c @@ -11,18 +11,19 @@ #include <stdlib.h> #include "vp8i.h" +#include "webpi.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ int WebPGetDecoderVersion(void) { return (DEC_MAJ_VERSION << 16) | (DEC_MIN_VERSION << 8) | DEC_REV_VERSION; } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // VP8Decoder static void SetOk(VP8Decoder* const dec) { @@ -43,6 +44,7 @@ VP8Decoder* VP8New(void) { VP8Decoder* dec = (VP8Decoder*)calloc(1, sizeof(VP8Decoder)); if (dec) { SetOk(dec); + WebPWorkerInit(&dec->worker_); dec->ready_ = 0; } return dec; @@ -74,7 +76,56 @@ int VP8SetError(VP8Decoder* const dec, return 0; } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ + +int VP8GetInfo(const uint8_t* data, uint32_t data_size, uint32_t chunk_size, + int* width, int* height, int* has_alpha) { + if (data_size < 10) { + return 0; // not enough data + } + // check signature + if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a) { + return 0; // Wrong signature. + } else { + const uint32_t bits = data[0] | (data[1] << 8) | (data[2] << 16); + const int key_frame = !(bits & 1); + const int w = ((data[7] << 8) | data[6]) & 0x3fff; + const int h = ((data[9] << 8) | data[8]) & 0x3fff; + + if (has_alpha) { +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (data_size < 11) return 0; + *has_alpha = !!(data[10] & 0x80); // the colorspace_ bit +#else + *has_alpha = 0; +#endif + } + if (!key_frame) { // Not a keyframe. + return 0; + } + + if (((bits >> 1) & 7) > 3) { + return 0; // unknown profile + } + if (!((bits >> 4) & 1)) { + return 0; // first frame is invisible! + } + if (((bits >> 5)) >= chunk_size) { // partition_length + return 0; // inconsistent size information. + } + + if (width) { + *width = w; + } + if (height) { + *height = h; + } + + return 1; + } +} + +//------------------------------------------------------------------------------ // Header parsing static void ResetSegmentHeader(VP8SegmentHeader* const hdr) { @@ -194,14 +245,12 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) { return !br->eof_; } -static inline uint32_t get_le32(const uint8_t* const data) { - return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); -} - // Topmost call int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { - uint8_t* buf; + const uint8_t* buf; uint32_t buf_size; + uint32_t vp8_chunk_size; + uint32_t bytes_skipped; VP8FrameHeader* frm_hdr; VP8PictureHeader* pic_hdr; VP8BitReader* br; @@ -216,41 +265,19 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { "null VP8Io passed to VP8GetHeaders()"); } - buf = (uint8_t *)io->data; + buf = io->data; buf_size = io->data_size; - if (buf == NULL || buf_size <= 4) { - return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, - "Not enough data to parse frame header"); + + // Process Pre-VP8 chunks. + status = WebPParseHeaders(&buf, &buf_size, &vp8_chunk_size, &bytes_skipped); + if (status != VP8_STATUS_OK) { + return VP8SetError(dec, status, "Incorrect/incomplete header."); } - // Skip over valid RIFF headers - if (!memcmp(buf, "RIFF", 4)) { - uint32_t riff_size; - uint32_t chunk_size; - if (buf_size < 20 + 4) { - return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, - "RIFF: Truncated header."); - } - if (memcmp(buf + 8, "WEBP", 4)) { // wrong image file signature - return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, - "RIFF: WEBP signature not found."); - } - riff_size = get_le32(buf + 4); - if (riff_size < 12) { - return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, - "RIFF: Truncated header."); - } - if (memcmp(buf + 12, "VP8 ", 4)) { - return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, - "RIFF: Invalid compression format."); - } - chunk_size = get_le32(buf + 16); - if (chunk_size > riff_size - 12) { - return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, - "RIFF: Inconsistent size information."); - } - buf += 20; - buf_size -= 20; + // Process the VP8 frame header. + if (buf_size < 4) { + return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, + "Truncated header."); } // Paragraph 9.1 @@ -291,8 +318,17 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { dec->mb_w_ = (pic_hdr->width_ + 15) >> 4; dec->mb_h_ = (pic_hdr->height_ + 15) >> 4; + // Setup default output area (can be later modified during io->setup()) io->width = pic_hdr->width_; io->height = pic_hdr->height_; + io->use_scaling = 0; + io->use_cropping = 0; + io->crop_top = 0; + io->crop_left = 0; + io->crop_right = io->width; + io->crop_bottom = io->height; + io->mb_w = io->width; // sanity check + io->mb_h = io->height; // ditto VP8ResetProba(&dec->proba_); ResetSegmentHeader(&dec->segment_hdr_); @@ -305,6 +341,10 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "bad partition length"); } + + dec->alpha_data_ = NULL; + dec->alpha_data_size_ = 0; + br = &dec->br_; VP8InitBitReader(br, buf, buf + frm_hdr->partition_length_); buf += frm_hdr->partition_length_; @@ -368,12 +408,42 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { VP8ParseProba(br, dec); +#ifdef WEBP_EXPERIMENTAL_FEATURES + // Extensions + if (dec->pic_hdr_.colorspace_) { + const size_t kTrailerSize = 8; + const uint8_t kTrailerMarker = 0x01; + const uint8_t* ext_buf = buf - kTrailerSize; + size_t size; + + if (frm_hdr->partition_length_ < kTrailerSize || + ext_buf[kTrailerSize - 1] != kTrailerMarker) { + Error: + return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, + "RIFF: Inconsistent extra information."); + } + // Alpha + size = (ext_buf[4] << 0) | (ext_buf[5] << 8) | (ext_buf[6] << 16); + if (frm_hdr->partition_length_ < size + kTrailerSize) { + goto Error; + } + dec->alpha_data_ = (size > 0) ? ext_buf - size : NULL; + dec->alpha_data_size_ = size; + + // Layer + size = (ext_buf[0] << 0) | (ext_buf[1] << 8) | (ext_buf[2] << 16); + dec->layer_data_size_ = size; + dec->layer_data_ = NULL; // will be set later + dec->layer_colorspace_ = ext_buf[3]; + } +#endif + // sanitized state dec->ready_ = 1; return 1; } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Residual decoding (Paragraph 13.2 / 13.3) static const uint8_t kBands[16 + 1] = { @@ -386,7 +456,7 @@ static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 }; static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 }; static const uint8_t kCat6[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -static const uint8_t * const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 }; +static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 }; static const uint8_t kZigzag[16] = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; @@ -422,7 +492,8 @@ static int GetCoeffs(VP8BitReader* const br, ProbaArray prob, if (!VP8GetBit(br, p[7])) { v = 5 + VP8GetBit(br, 159); } else { - v = 7 + 2 * VP8GetBit(br, 165) + VP8GetBit(br, 145); + v = 7 + 2 * VP8GetBit(br, 165); + v += VP8GetBit(br, 145); } } else { const uint8_t* tab; @@ -551,7 +622,7 @@ static void ParseResiduals(VP8Decoder* const dec, } #undef PACK -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Main loop int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) { @@ -588,16 +659,21 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) { return (!token_br->eof_); } +void VP8InitScanline(VP8Decoder* const dec) { + VP8MB* const left = dec->mb_info_ - 1; + left->nz_ = 0; + left->dc_nz_ = 0; + memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_)); + dec->filter_row_ = + (dec->filter_type_ > 0) && + (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_); +} + static int ParseFrame(VP8Decoder* const dec, VP8Io* io) { - for (dec->mb_y_ = 0; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) { - VP8MB* const left = dec->mb_info_ - 1; + for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) { VP8BitReader* const token_br = &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)]; - - left->nz_ = 0; - left->dc_nz_ = 0; - memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_)); - + VP8InitScanline(dec); for (dec->mb_x_ = 0; dec->mb_x_ < dec->mb_w_; dec->mb_x_++) { if (!VP8DecodeMB(dec, token_br)) { return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, @@ -608,11 +684,13 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) { // Store data and save block's filtering params VP8StoreBlock(dec); } - if (!VP8FinishRow(dec, io)) { - return VP8SetError(dec, VP8_STATUS_USER_ABORT, - "Output aborted."); + if (!VP8ProcessRow(dec, io)) { + return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted."); } } + if (dec->use_threads_ && !WebPWorkerSync(&dec->worker_)) { + return 0; + } // Finish #ifndef ONLY_KEYFRAME_CODE @@ -621,11 +699,20 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) { } #endif +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (dec->layer_data_size_ > 0) { + if (!VP8DecodeLayer(dec)) { + return 0; + } + } +#endif + return 1; } // Main entry point int VP8Decode(VP8Decoder* const dec, VP8Io* const io) { + int ok = 0; if (dec == NULL) { return 0; } @@ -641,32 +728,22 @@ int VP8Decode(VP8Decoder* const dec, VP8Io* const io) { } assert(dec->ready_); - // will allocate memory and prepare everything. - if (!VP8InitFrame(dec, io)) { - VP8Clear(dec); - return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, - "Allocation failed"); - } + // Finish setting up the decoding parameter. Will call io->setup(). + ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK); + if (ok) { // good to go. + // Will allocate memory and prepare everything. + if (ok) ok = VP8InitFrame(dec, io); - if (io->setup && !io->setup(io)) { - VP8Clear(dec); - return VP8SetError(dec, VP8_STATUS_USER_ABORT, - "Frame setup failed"); - } + // Main decoding loop + if (ok) ok = ParseFrame(dec, io); - // Disable filtering per user request (_after_ setup() is called) - if (io->bypass_filtering) dec->filter_type_ = 0; + // Exit. + ok &= VP8ExitCritical(dec, io); + } - // Main decoding loop - { - const int ret = ParseFrame(dec, io); - if (io->teardown) { - io->teardown(io); - } - if (!ret) { - VP8Clear(dec); - return 0; - } + if (!ok) { + VP8Clear(dec); + return 0; } dec->ready_ = 0; @@ -677,6 +754,9 @@ void VP8Clear(VP8Decoder* const dec) { if (dec == NULL) { return; } + if (dec->use_threads_) { + WebPWorkerEnd(&dec->worker_); + } if (dec->mem_) { free(dec->mem_); } @@ -686,7 +766,7 @@ void VP8Clear(VP8Decoder* const dec) { dec->ready_ = 0; } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #if defined(__cplusplus) || defined(c_plusplus) } // extern "C" diff --git a/third_party/libwebp/dec/vp8i.h b/third_party/libwebp/dec/vp8i.h index b2ad9a3..2cbdef22 100644 --- a/third_party/libwebp/dec/vp8i.h +++ b/third_party/libwebp/dec/vp8i.h @@ -13,19 +13,21 @@ #define WEBP_DEC_VP8I_H_ #include <string.h> // for memcpy() -#include "bits.h" +#include "../utils/bit_reader.h" +#include "../utils/thread.h" +#include "../dsp/dsp.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Various defines and enums // version numbers #define DEC_MAJ_VERSION 0 #define DEC_MIN_VERSION 1 -#define DEC_REV_VERSION 2 +#define DEC_REV_VERSION 3 #define ONLY_KEYFRAME_CODE // to remove any code related to P-Frames @@ -95,7 +97,7 @@ enum { MB_FEATURE_TREE_PROBS = 3, #define U_OFF (Y_OFF + BPS * 16 + BPS) #define V_OFF (U_OFF + 16) -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Headers typedef struct { @@ -144,19 +146,19 @@ typedef struct { int mode_lf_delta_[NUM_MODE_LF_DELTAS]; } VP8FilterHeader; -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Informations about the macroblocks. -typedef struct { - // block type - uint8_t skip_:1; - // filter specs - uint8_t f_level_:6; // filter strength: 0..63 - uint8_t f_ilevel_:6; // inner limit: 1..63 - uint8_t f_inner_:1; // do inner filtering? - // cbp - uint8_t nz_; // non-zero AC/DC coeffs - uint8_t dc_nz_; // non-zero DC coeffs +typedef struct { // filter specs + unsigned int f_level_:6; // filter strength: 0..63 + unsigned int f_ilevel_:6; // inner limit: 1..63 + unsigned int f_inner_:1; // do inner filtering? +} VP8FInfo; + +typedef struct { // used for syntax-parsing + unsigned int nz_; // non-zero AC/DC coeffs + unsigned int dc_nz_:1; // non-zero DC coeffs + unsigned int skip_:1; // block type } VP8MB; // Dequantization matrices @@ -164,7 +166,16 @@ typedef struct { uint16_t y1_mat_[2], y2_mat_[2], uv_mat_[2]; // [DC / AC] } VP8QuantMatrix; -//----------------------------------------------------------------------------- +// Persistent information needed by the parallel processing +typedef struct { + int id_; // cache row to process (in [0..2]) + int mb_y_; // macroblock position of the row + int filter_row_; // true if row-filtering is needed + VP8FInfo* f_info_; // filter strengths + VP8Io io_; // copy of the VP8Io to pass to put() +} VP8ThreadContext; + +//------------------------------------------------------------------------------ // VP8Decoder: the main opaque structure handed over to user struct VP8Decoder { @@ -181,9 +192,20 @@ struct VP8Decoder { VP8FilterHeader filter_hdr_; VP8SegmentHeader segment_hdr_; + // Worker + WebPWorker worker_; + int use_threads_; // use multi-thread + int cache_id_; // current cache row + int num_caches_; // number of cached rows of 16 pixels (1, 2 or 3) + VP8ThreadContext thread_ctx_; // Thread context + // dimension, in macroblock units. int mb_w_, mb_h_; + // Macroblock to process/filter, depending on cropping and filter_type. + int tl_mb_x_, tl_mb_y_; // top-left MB that must be in-loop filtered + int br_mb_x_, br_mb_y_; // last bottom-right MB that must be decoded + // number of partitions. int num_parts_; // per-partition boolean decoders. @@ -212,10 +234,11 @@ struct VP8Decoder { // Boundary data cache and persistent buffers. uint8_t* intra_t_; // top intra modes values: 4 * mb_w_ uint8_t intra_l_[4]; // left intra modes values - uint8_t *y_t_; // top luma samples: 16 * mb_w_ - uint8_t *u_t_, *v_t_; // top u/v samples: 8 * mb_w_ each + uint8_t* y_t_; // top luma samples: 16 * mb_w_ + uint8_t* u_t_, *v_t_; // top u/v samples: 8 * mb_w_ each - VP8MB* mb_info_; // contextual macroblock infos (mb_w_ + 1) + VP8MB* mb_info_; // contextual macroblock info (mb_w_ + 1) + VP8FInfo* f_info_; // filter strength info uint8_t* yuv_b_; // main block for Y/U/V (size = YUV_SIZE) int16_t* coeffs_; // 384 coeffs = (16+8+8) * 4*4 @@ -244,17 +267,35 @@ struct VP8Decoder { uint32_t non_zero_ac_; // Filtering side-info - int filter_type_; // 0=off, 1=simple, 2=complex + int filter_type_; // 0=off, 1=simple, 2=complex + int filter_row_; // per-row flag uint8_t filter_levels_[NUM_MB_SEGMENTS]; // precalculated per-segment + + // extensions + const uint8_t* alpha_data_; // compressed alpha data (if present) + size_t alpha_data_size_; + uint8_t* alpha_plane_; // output + + int layer_colorspace_; + const uint8_t* layer_data_; // compressed layer data (if present) + size_t layer_data_size_; }; -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // internal functions. Not public. // in vp8.c int VP8SetError(VP8Decoder* const dec, VP8StatusCode error, const char * const msg); +// Validates the VP8 data-header and retrieve basic header information viz width +// and height. Returns 0 in case of formatting error. *width/*height/*has_alpha +// can be passed NULL. +int VP8GetInfo(const uint8_t* data, + uint32_t data_size, // data available so far + uint32_t chunk_size, // total data size expect in the chunk + int *width, int *height, int *has_alpha); + // in tree.c void VP8ResetProba(VP8Proba* const proba); void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec); @@ -267,59 +308,38 @@ void VP8ParseQuant(VP8Decoder* const dec); int VP8InitFrame(VP8Decoder* const dec, VP8Io* io); // Predict a block and add residual void VP8ReconstructBlock(VP8Decoder* const dec); +// Call io->setup() and finish setting up scan parameters. +// After this call returns, one must always call VP8ExitCritical() with the +// same parameters. Both functions should be used in pair. Returns VP8_STATUS_OK +// if ok, otherwise sets and returns the error status on *dec. +VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io); +// Must always be called in pair with VP8EnterCritical(). +// Returns false in case of error. +int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io); +// Filter the decoded macroblock row (if needed) +int VP8FinishRow(VP8Decoder* const dec, VP8Io* io); // multi threaded call +// Process the last decoded row (filtering + output) +int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io); // Store a block, along with filtering params void VP8StoreBlock(VP8Decoder* const dec); // Finalize and transmit a complete row. Return false in case of user-abort. -int VP8FinishRow(VP8Decoder* const dec, VP8Io* io); +int VP8FinishRow(VP8Decoder* const dec, VP8Io* const io); +// To be called at the start of a new scanline, to initialize predictors. +void VP8InitScanline(VP8Decoder* const dec); // Decode one macroblock. Returns false if there is not enough data. int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br); -// in dsp.c -typedef void (*VP8Idct)(const int16_t* coeffs, uint8_t* dst); -extern VP8Idct VP8Transform; -extern VP8Idct VP8TransformUV; -extern VP8Idct VP8TransformDC; -extern VP8Idct VP8TransformDCUV; -extern void (*VP8TransformWHT)(const int16_t* in, int16_t* out); - -// *dst is the destination block, with stride BPS. Boundary samples are -// assumed accessible when needed. -typedef void (*VP8PredFunc)(uint8_t *dst); -extern VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES]; -extern VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES]; -extern VP8PredFunc VP8PredLuma4[NUM_BMODES]; - -void VP8DspInit(void); // must be called before anything using the above -void VP8DspInitTables(void); // needs to be called no matter what. - -// simple filter (only for luma) -typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh); -extern VP8SimpleFilterFunc VP8SimpleVFilter16; -extern VP8SimpleFilterFunc VP8SimpleHFilter16; -extern VP8SimpleFilterFunc VP8SimpleVFilter16i; // filter 3 inner edges -extern VP8SimpleFilterFunc VP8SimpleHFilter16i; - -// regular filter (on both macroblock edges and inner edges) -typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride, - int thresh, int ithresh, int hev_t); -typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride, - int thresh, int ithresh, int hev_t); -// on outter edge -extern VP8LumaFilterFunc VP8VFilter16; -extern VP8LumaFilterFunc VP8HFilter16; -extern VP8ChromaFilterFunc VP8VFilter8; -extern VP8ChromaFilterFunc VP8HFilter8; - -// on inner edge -extern VP8LumaFilterFunc VP8VFilter16i; // filtering 3 inner edges altogether -extern VP8LumaFilterFunc VP8HFilter16i; -extern VP8ChromaFilterFunc VP8VFilter8i; // filtering u and v altogether -extern VP8ChromaFilterFunc VP8HFilter8i; - -//----------------------------------------------------------------------------- +// in alpha.c +const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, + int row, int num_rows); + +// in layer.c +int VP8DecodeLayer(VP8Decoder* const dec); + +//------------------------------------------------------------------------------ #if defined(__cplusplus) || defined(c_plusplus) } // extern "C" #endif -#endif // WEBP_DEC_VP8I_H_ +#endif /* WEBP_DEC_VP8I_H_ */ diff --git a/third_party/libwebp/dec/webp.c b/third_party/libwebp/dec/webp.c index 3bf6f55..91ac75f 100644 --- a/third_party/libwebp/dec/webp.c +++ b/third_party/libwebp/dec/webp.c @@ -12,24 +12,29 @@ #include <stdlib.h> #include "vp8i.h" #include "webpi.h" -#include "yuv.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif -#define FANCY_UPSCALING // undefined to remove fancy upscaling support - -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // RIFF layout is: -// 0ffset tag +// Offset tag // 0...3 "RIFF" 4-byte tag // 4...7 size of image data (including metadata) starting at offset 8 // 8...11 "WEBP" our form-type signature +// The RIFF container (12 bytes) is followed by appropriate chunks: // 12..15 "VP8 ": 4-bytes tags, describing the raw video format used // 16..19 size of the raw VP8 image data, starting at offset 20 // 20.... the VP8 bytes -// There can be extra chunks after the "VP8 " chunk (ICMT, ICOP, ...) +// Or, +// 12..15 "VP8X": 4-bytes tags, describing the extended-VP8 chunk. +// 16..19 size of the VP8X chunk starting at offset 20. +// 20..23 VP8X flags bit-map corresponding to the chunk-types present. +// 24..27 Width of the Canvas Image. +// 28..31 Height of the Canvas Image. +// There can be extra chunks after the "VP8X" chunk (ICCP, TILE, FRM, VP8, +// META ...) // All 32-bits sizes are in little-endian order. // Note: chunk data must be padded to multiple of 2 in size @@ -37,472 +42,312 @@ static inline uint32_t get_le32(const uint8_t* const data) { return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); } -// If a RIFF container is detected, validate it and skip over it. -uint32_t WebPCheckRIFFHeader(const uint8_t** data_ptr, - uint32_t *data_size_ptr) { - uint32_t chunk_size = 0xffffffffu; - if (*data_size_ptr >= 10 + 20 && !memcmp(*data_ptr, "RIFF", 4)) { - if (memcmp(*data_ptr + 8, "WEBP", 4)) { - return 0; // wrong image file signature +VP8StatusCode WebPParseRIFF(const uint8_t** data, uint32_t* data_size, + uint32_t* riff_size) { + assert(data); + assert(data_size); + assert(riff_size); + + if (*data_size >= RIFF_HEADER_SIZE && + !memcmp(*data, "RIFF", TAG_SIZE)) { + if (memcmp(*data + 8, "WEBP", TAG_SIZE)) { + return VP8_STATUS_BITSTREAM_ERROR; // Wrong image file signature. } else { - const uint32_t riff_size = get_le32(*data_ptr + 4); - if (riff_size < 12) { - return 0; // we should have at least one chunk - } - if (memcmp(*data_ptr + 12, "VP8 ", 4)) { - return 0; // invalid compression format - } - chunk_size = get_le32(*data_ptr + 16); - if (chunk_size > riff_size - 12) { - return 0; // inconsistent size information. + *riff_size = get_le32(*data + TAG_SIZE); + // Check that we have at least one chunk (i.e "WEBP" + "VP8?nnnn"). + if (*riff_size < TAG_SIZE + CHUNK_HEADER_SIZE) { + return VP8_STATUS_BITSTREAM_ERROR; } // We have a RIFF container. Skip it. - *data_ptr += 20; - *data_size_ptr -= 20; - // Note: we don't report error for odd-sized chunks. + *data += RIFF_HEADER_SIZE; + *data_size -= RIFF_HEADER_SIZE; } - return chunk_size; - } - return *data_size_ptr; -} - -//----------------------------------------------------------------------------- -// Fancy upscaling - -#ifdef FANCY_UPSCALING - -// Given samples laid out in a square as: -// [a b] -// [c d] -// we interpolate u/v as: -// ([9*a + 3*b + 3*c + d 3*a + 9*b + 3*c + d] + [8 8]) / 16 -// ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16 - -// We process u and v together stashed into 32bit (16bit each). -#define LOAD_UV(u,v) ((u) | ((v) << 16)) - -#define UPSCALE_FUNC(FUNC_NAME, FUNC, XSTEP) \ -static inline void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ - const uint8_t* top_u, const uint8_t* top_v, \ - const uint8_t* cur_u, const uint8_t* cur_v, \ - uint8_t* top_dst, uint8_t* bottom_dst, int len) { \ - int x; \ - const int last_pixel_pair = (len - 1) >> 1; \ - uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \ - uint32_t l_uv = LOAD_UV(cur_u[0], cur_v[0]); /* left-sample */ \ - if (top_y) { \ - const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \ - FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \ - } \ - if (bottom_y) { \ - const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \ - FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst); \ - } \ - for (x = 1; x <= last_pixel_pair; ++x) { \ - const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]); /* top sample */ \ - const uint32_t uv = LOAD_UV(cur_u[x], cur_v[x]); /* sample */ \ - /* precompute invariant values associated with first and second diagonals*/\ - const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u; \ - const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3; \ - const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3; \ - if (top_y) { \ - const uint32_t uv0 = (diag_12 + tl_uv) >> 1; \ - const uint32_t uv1 = (diag_03 + t_uv) >> 1; \ - FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \ - top_dst + (2 * x - 1) * XSTEP); \ - FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16), \ - top_dst + (2 * x - 0) * XSTEP); \ - } \ - if (bottom_y) { \ - const uint32_t uv0 = (diag_03 + l_uv) >> 1; \ - const uint32_t uv1 = (diag_12 + uv) >> 1; \ - FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \ - bottom_dst + (2 * x - 1) * XSTEP); \ - FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16), \ - bottom_dst + (2 * x + 0) * XSTEP); \ - } \ - tl_uv = t_uv; \ - l_uv = uv; \ - } \ - if (!(len & 1)) { \ - if (top_y) { \ - const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \ - FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16), \ - top_dst + (len - 1) * XSTEP); \ - } \ - if (bottom_y) { \ - const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \ - FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16), \ - bottom_dst + (len - 1) * XSTEP); \ - } \ - } \ -} - -// All variants implemented. -UPSCALE_FUNC(UpscaleRgbLinePair, VP8YuvToRgb, 3) -UPSCALE_FUNC(UpscaleBgrLinePair, VP8YuvToBgr, 3) -UPSCALE_FUNC(UpscaleRgbaLinePair, VP8YuvToRgba, 4) -UPSCALE_FUNC(UpscaleBgraLinePair, VP8YuvToBgra, 4) - -// Main driver function. -static inline -void UpscaleLinePair(const uint8_t* top_y, const uint8_t* bottom_y, - const uint8_t* top_u, const uint8_t* top_v, - const uint8_t* cur_u, const uint8_t* cur_v, - uint8_t* top_dst, uint8_t* bottom_dst, int len, - WEBP_CSP_MODE mode) { - if (mode == MODE_RGB) { - UpscaleRgbLinePair(top_y, bottom_y, top_u, top_v, cur_u, cur_v, - top_dst, bottom_dst, len); - } else if (mode == MODE_BGR) { - UpscaleBgrLinePair(top_y, bottom_y, top_u, top_v, cur_u, cur_v, - top_dst, bottom_dst, len); - } else if (mode == MODE_RGBA) { - UpscaleRgbaLinePair(top_y, bottom_y, top_u, top_v, cur_u, cur_v, - top_dst, bottom_dst, len); } else { - assert(mode == MODE_BGRA); - UpscaleBgraLinePair(top_y, bottom_y, top_u, top_v, cur_u, cur_v, - top_dst, bottom_dst, len); + *riff_size = 0; // Did not get full RIFF Header. } + return VP8_STATUS_OK; } -#undef LOAD_UV -#undef UPSCALE_FUNC - -#endif // FANCY_UPSCALING +VP8StatusCode WebPParseVP8X(const uint8_t** data, uint32_t* data_size, + uint32_t* bytes_skipped, + int* width, int* height, uint32_t* flags) { + assert(data); + assert(data_size); + assert(bytes_skipped); -//----------------------------------------------------------------------------- -// Main conversion driver. + *bytes_skipped = 0; -static int CustomPut(const VP8Io* io) { - WebPDecParams *p = (WebPDecParams*)io->opaque; - const int w = io->width; - const int mb_h = io->mb_h; - const int uv_w = (w + 1) / 2; - assert(!(io->mb_y & 1)); - - if (w <= 0 || mb_h <= 0) { - return 0; + if (*data_size < CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE) { + return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data. } - p->last_y = io->mb_y + io->mb_h; // a priori guess - if (p->mode == MODE_YUV) { - uint8_t* const y_dst = p->output + io->mb_y * p->stride; - uint8_t* const u_dst = p->u + (io->mb_y >> 1) * p->u_stride; - uint8_t* const v_dst = p->v + (io->mb_y >> 1) * p->v_stride; - int j; - for (j = 0; j < mb_h; ++j) { - memcpy(y_dst + j * p->stride, io->y + j * io->y_stride, w); + if (!memcmp(*data, "VP8X", TAG_SIZE)) { + const uint32_t chunk_size = get_le32(*data + TAG_SIZE); + if (chunk_size != VP8X_CHUNK_SIZE) { + return VP8_STATUS_BITSTREAM_ERROR; // Wrong chunk size. } - for (j = 0; j < (mb_h + 1) / 2; ++j) { - memcpy(u_dst + j * p->u_stride, io->u + j * io->uv_stride, uv_w); - memcpy(v_dst + j * p->v_stride, io->v + j * io->uv_stride, uv_w); + if (flags) { + *flags = get_le32(*data + 8); } - } else { - uint8_t* dst = p->output + io->mb_y * p->stride; - if (io->fancy_upscaling) { -#ifdef FANCY_UPSCALING - const uint8_t* cur_y = io->y; - const uint8_t* cur_u = io->u; - const uint8_t* cur_v = io->v; - const uint8_t* top_u = p->top_u; - const uint8_t* top_v = p->top_v; - int y = io->mb_y; - int y_end = io->mb_y + io->mb_h; - if (y == 0) { - // First line is special cased. We mirror the u/v samples at boundary. - UpscaleLinePair(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, - NULL, dst, w, p->mode); - } else { - // We can finish the left-over line from previous call - UpscaleLinePair(p->top_y, cur_y, top_u, top_v, cur_u, cur_v, - dst - p->stride, dst, w, p->mode); - } - // Loop over each output pairs of row. - for (; y + 2 < y_end; y += 2) { - top_u = cur_u; - top_v = cur_v; - cur_u += io->uv_stride; - cur_v += io->uv_stride; - dst += 2 * p->stride; - cur_y += 2 * io->y_stride; - UpscaleLinePair(cur_y - io->y_stride, cur_y, - top_u, top_v, cur_u, cur_v, - dst - p->stride, dst, w, p->mode); - } - // move to last row - cur_y += io->y_stride; - if (y_end != io->height) { - // Save the unfinished samples for next call (as we're not done yet). - memcpy(p->top_y, cur_y, w * sizeof(*p->top_y)); - memcpy(p->top_u, cur_u, uv_w * sizeof(*p->top_u)); - memcpy(p->top_v, cur_v, uv_w * sizeof(*p->top_v)); - // The fancy upscaler leaves a row unfinished behind - // (except for the very last row) - p->last_y -= 1; - } else { - // Process the very last row of even-sized picture - if (!(y_end & 1)) { - UpscaleLinePair(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, - dst + p->stride, NULL, w, p->mode); - } - } -#else - assert(0); // shouldn't happen. -#endif - } else { - // Point-sampling U/V upscaler. - int j; - for (j = 0; j < mb_h; ++j) { - const uint8_t* y_src = io->y + j * io->y_stride; - int i; - for (i = 0; i < w; ++i) { - const int y = y_src[i]; - const int u = io->u[(j / 2) * io->uv_stride + (i / 2)]; - const int v = io->v[(j / 2) * io->uv_stride + (i / 2)]; - if (p->mode == MODE_RGB) { - VP8YuvToRgb(y, u, v, dst + i * 3); - } else if (p->mode == MODE_BGR) { - VP8YuvToBgr(y, u, v, dst + i * 3); - } else if (p->mode == MODE_RGBA) { - VP8YuvToRgba(y, u, v, dst + i * 4); - } else { - VP8YuvToBgra(y, u, v, dst + i * 4); - } - } - dst += p->stride; - } + if (width) { + *width = get_le32(*data + 12); } + if (height) { + *height = get_le32(*data + 16); + } + // We have consumed 20 bytes from VP8X. Skip them. + *bytes_skipped = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE; + *data += *bytes_skipped; + *data_size -= *bytes_skipped; } - return 1; + return VP8_STATUS_OK; } -//----------------------------------------------------------------------------- - -static int CustomSetup(VP8Io* io) { -#ifdef FANCY_UPSCALING - WebPDecParams *p = (WebPDecParams*)io->opaque; - p->top_y = p->top_u = p->top_v = NULL; - if (p->mode != MODE_YUV) { - const int uv_width = (io->width + 1) >> 1; - p->top_y = (uint8_t*)malloc(io->width + 2 * uv_width); - if (p->top_y == NULL) { - return 0; // memory error. +VP8StatusCode WebPParseOptionalChunks(const uint8_t** data, uint32_t* data_size, + uint32_t riff_size, + uint32_t* bytes_skipped) { + const uint8_t* buf; + uint32_t buf_size; + + assert(data); + assert(data_size); + assert(bytes_skipped); + + buf = *data; + buf_size = *data_size; + *bytes_skipped = 0; + + while (1) { + uint32_t chunk_size; + uint32_t cur_skip_size; + const uint32_t bytes_skipped_header = TAG_SIZE + // "WEBP". + CHUNK_HEADER_SIZE + // "VP8Xnnnn". + VP8X_CHUNK_SIZE; // Data. + *data = buf; + *data_size = buf_size; + + if (buf_size < CHUNK_HEADER_SIZE) { // Insufficient data. + return VP8_STATUS_NOT_ENOUGH_DATA; + } + + chunk_size = get_le32(buf + TAG_SIZE); + cur_skip_size = CHUNK_HEADER_SIZE + chunk_size; + + // Check that total bytes skipped along with current chunk size + // does not exceed riff_size. + if (riff_size > 0 && + (bytes_skipped_header + *bytes_skipped + cur_skip_size > riff_size)) { + return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size. + } + + if (buf_size < cur_skip_size) { // Insufficient data. + return VP8_STATUS_NOT_ENOUGH_DATA; + } + + if (!memcmp(buf, "VP8 ", TAG_SIZE)) { // A valid VP8 header. + return VP8_STATUS_OK; // Found. } - p->top_u = p->top_y + io->width; - p->top_v = p->top_u + uv_width; - io->fancy_upscaling = 1; // activate fancy upscaling + + // We have a full & valid chunk; skip it. + buf += cur_skip_size; + buf_size -= cur_skip_size; + *bytes_skipped += cur_skip_size; } -#endif - return 1; } -static void CustomTeardown(const VP8Io* io) { -#ifdef FANCY_UPSCALING - WebPDecParams *p = (WebPDecParams*)io->opaque; - if (p->top_y) { - free(p->top_y); - p->top_y = p->top_u = p->top_v = NULL; +VP8StatusCode WebPParseVP8Header(const uint8_t** data, uint32_t* data_size, + uint32_t riff_size, uint32_t* bytes_skipped, + uint32_t* vp8_chunk_size) { + assert(data); + assert(data_size); + assert(bytes_skipped); + assert(vp8_chunk_size); + + *bytes_skipped = 0; + *vp8_chunk_size = 0; + + if (*data_size < CHUNK_HEADER_SIZE) { + return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data. } -#endif -} -void WebPInitCustomIo(VP8Io* const io) { - io->put = CustomPut; - io->setup = CustomSetup; - io->teardown = CustomTeardown; + if (!memcmp(*data, "VP8 ", TAG_SIZE)) { + *vp8_chunk_size = get_le32(*data + TAG_SIZE); + if (riff_size >= TAG_SIZE + CHUNK_HEADER_SIZE && // "WEBP" + "VP8 nnnn". + (*vp8_chunk_size > riff_size - (TAG_SIZE + CHUNK_HEADER_SIZE))) { + return VP8_STATUS_BITSTREAM_ERROR; // Inconsistent size information. + } + // We have consumed CHUNK_HEADER_SIZE bytes from VP8 Header. Skip them. + *bytes_skipped = CHUNK_HEADER_SIZE; + *data += *bytes_skipped; + *data_size -= *bytes_skipped; + } + return VP8_STATUS_OK; } -//----------------------------------------------------------------------------- -// Init/Check/Free decoding parameters and buffer - -int WebPInitDecParams(const uint8_t* data, uint32_t data_size, int* width, - int* height, WebPDecParams* const params) { - int w, h; +VP8StatusCode WebPParseHeaders(const uint8_t** data, uint32_t* data_size, + uint32_t* vp8_size, uint32_t* bytes_skipped) { + const uint8_t* buf; + uint32_t buf_size; + uint32_t riff_size; + uint32_t vp8_size_tmp; + uint32_t optional_data_size; + uint32_t vp8x_skip_size; + uint32_t vp8_skip_size; + VP8StatusCode status; + + assert(data); + assert(data_size); + assert(vp8_size); + assert(bytes_skipped); + + buf = *data; + buf_size = *data_size; + + *vp8_size = 0; + *bytes_skipped = 0; + + if (buf == NULL || buf_size < RIFF_HEADER_SIZE) { + return VP8_STATUS_NOT_ENOUGH_DATA; + } - if (!WebPGetInfo(data, data_size, &w, &h)) { - return 0; + // Skip over RIFF header. + if (WebPParseRIFF(&buf, &buf_size, &riff_size) != VP8_STATUS_OK) { + return VP8_STATUS_BITSTREAM_ERROR; // Wrong RIFF Header. } - if (width) *width = w; - if (height) *height = h; - - if (!params->external_buffer) { - int stride; - int uv_stride = 0; - int size; - int uv_size = 0; - uint8_t* output; - WEBP_CSP_MODE mode = params->mode; - - // initialize output buffer, now that dimensions are known. - stride = (mode == MODE_RGB || mode == MODE_BGR) ? 3 * w - : (mode == MODE_RGBA || mode == MODE_BGRA) ? 4 * w - : w; - size = stride * h; - - if (mode == MODE_YUV) { - uv_stride = (w + 1) / 2; - uv_size = uv_stride * ((h + 1) / 2); - } - output = (uint8_t*)malloc(size + 2 * uv_size); - if (!output) { - return 0; + // Skip over VP8X header. + status = WebPParseVP8X(&buf, &buf_size, &vp8x_skip_size, NULL, NULL, NULL); + if (status != VP8_STATUS_OK) { + return status; // Wrong VP8X Chunk / Insufficient data. + } + if (vp8x_skip_size > 0) { + // Skip over optional chunks. + status = WebPParseOptionalChunks(&buf, &buf_size, riff_size, + &optional_data_size); + if (status != VP8_STATUS_OK) { + return status; // Found an invalid chunk size / Insufficient data. } + } - params->output = output; - params->stride = stride; - params->output_size = size; - if (mode == MODE_YUV) { - params->u = output + size; - params->u_stride = uv_stride; - params->output_u_size = uv_size; - params->v = output + size + uv_size; - params->v_stride = uv_stride; - params->output_v_size = uv_size; - } + // Skip over VP8 chunk header. + status = WebPParseVP8Header(&buf, &buf_size, riff_size, &vp8_skip_size, + &vp8_size_tmp); + if (status != VP8_STATUS_OK) { + return status; // Invalid VP8 header / Insufficient data. + } + if (vp8_skip_size > 0) { + *vp8_size = vp8_size_tmp; } - return 1; -} -int WebPCheckDecParams(const VP8Io* io, const WebPDecParams* params) { - int ok = 1; - WEBP_CSP_MODE mode = params->mode; - ok &= (params->stride * io->height <= params->output_size); - if (mode == MODE_RGB || mode == MODE_BGR) { - ok &= (params->stride >= io->width * 3); - } else if (mode == MODE_RGBA || mode == MODE_BGRA) { - ok &= (params->stride >= io->width * 4); - } else { - // some extra checks for U/V - const int u_size = params->u_stride * ((io->height + 1) / 2); - const int v_size = params->v_stride * ((io->height + 1) / 2); - ok &= (params->stride >= io->width); - ok &= (params->u_stride >= (io->width + 1) / 2) && - (params->v_stride >= (io->width + 1) / 2); - ok &= (u_size <= params->output_u_size && - v_size <= params->output_v_size); - } - return ok; + *bytes_skipped = buf - *data; + assert(*bytes_skipped == *data_size - buf_size); + *data = buf; + *data_size = buf_size; + return VP8_STATUS_OK; } -void WebPClearDecParams(WebPDecParams* params) { - if (!params->external_buffer) { - free(params->output); +//------------------------------------------------------------------------------ +// WebPDecParams + +void WebPResetDecParams(WebPDecParams* const params) { + if (params) { + memset(params, 0, sizeof(*params)); } - memset(params, 0, sizeof(*params)); } -//----------------------------------------------------------------------------- -// "Into" variants +//------------------------------------------------------------------------------ +// "Into" decoding variants -static uint8_t* DecodeInto(WEBP_CSP_MODE mode, - const uint8_t* data, uint32_t data_size, - WebPDecParams* params) { +// Main flow +static VP8StatusCode DecodeInto(const uint8_t* data, uint32_t data_size, + WebPDecParams* const params) { VP8Decoder* dec = VP8New(); + VP8StatusCode status = VP8_STATUS_OK; VP8Io io; - int ok = 1; + assert(params); if (dec == NULL) { - return NULL; + return VP8_STATUS_INVALID_PARAM; } VP8InitIo(&io); io.data = data; io.data_size = data_size; + WebPInitCustomIo(params, &io); // Plug the I/O functions. - params->mode = mode; - io.opaque = params; - WebPInitCustomIo(&io); +#ifdef WEBP_USE_THREAD + dec->use_threads_ = params->options && (params->options->use_threads > 0); +#else + dec->use_threads_ = 0; +#endif + // Decode bitstream header, update io->width/io->height. if (!VP8GetHeaders(dec, &io)) { - VP8Delete(dec); - return NULL; - } - - // check output buffers - ok = WebPCheckDecParams(&io, params); - if (!ok) { - VP8Delete(dec); - return NULL; - } - - if (mode != MODE_YUV) { - VP8YUVInit(); + status = VP8_STATUS_BITSTREAM_ERROR; + } else { + // Allocate/check output buffers. + status = WebPAllocateDecBuffer(io.width, io.height, params->options, + params->output); + if (status == VP8_STATUS_OK) { + // Decode + if (!VP8Decode(dec, &io)) { + status = dec->status_; + } + } } - - ok = VP8Decode(dec, &io); VP8Delete(dec); - return ok ? params->output : NULL; + if (status != VP8_STATUS_OK) { + WebPFreeDecBuffer(params->output); + } + return status; } -uint8_t* WebPDecodeRGBInto(const uint8_t* data, uint32_t data_size, - uint8_t* output, int output_size, - int output_stride) { +// Helpers +static uint8_t* DecodeIntoRGBABuffer(WEBP_CSP_MODE colorspace, + const uint8_t* data, uint32_t data_size, + uint8_t* rgba, int stride, int size) { WebPDecParams params; - - if (output == NULL) { + WebPDecBuffer buf; + if (rgba == NULL) { + return NULL; + } + WebPInitDecBuffer(&buf); + WebPResetDecParams(¶ms); + params.output = &buf; + buf.colorspace = colorspace; + buf.u.RGBA.rgba = rgba; + buf.u.RGBA.stride = stride; + buf.u.RGBA.size = size; + buf.is_external_memory = 1; + if (DecodeInto(data, data_size, ¶ms) != VP8_STATUS_OK) { return NULL; } + return rgba; +} - params.output = output; - params.stride = output_stride; - params.output_size = output_size; - params.output_u_size = 0; - params.output_v_size = 0; - return DecodeInto(MODE_RGB, data, data_size, ¶ms); +uint8_t* WebPDecodeRGBInto(const uint8_t* data, uint32_t data_size, + uint8_t* output, int size, int stride) { + return DecodeIntoRGBABuffer(MODE_RGB, data, data_size, output, stride, size); } uint8_t* WebPDecodeRGBAInto(const uint8_t* data, uint32_t data_size, - uint8_t* output, int output_size, - int output_stride) { - WebPDecParams params; - - if (output == NULL) { - return NULL; - } + uint8_t* output, int size, int stride) { + return DecodeIntoRGBABuffer(MODE_RGBA, data, data_size, output, stride, size); +} - params.output = output; - params.stride = output_stride; - params.output_size = output_size; - params.output_u_size = 0; - params.output_v_size = 0; - return DecodeInto(MODE_RGBA, data, data_size, ¶ms); +uint8_t* WebPDecodeARGBInto(const uint8_t* data, uint32_t data_size, + uint8_t* output, int size, int stride) { + return DecodeIntoRGBABuffer(MODE_ARGB, data, data_size, output, stride, size); } uint8_t* WebPDecodeBGRInto(const uint8_t* data, uint32_t data_size, - uint8_t* output, int output_size, - int output_stride) { - WebPDecParams params; - - if (output == NULL) { - return NULL; - } - - params.output = output; - params.stride = output_stride; - params.output_size = output_size; - params.output_u_size = 0; - params.output_v_size = 0; - return DecodeInto(MODE_BGR, data, data_size, ¶ms); + uint8_t* output, int size, int stride) { + return DecodeIntoRGBABuffer(MODE_BGR, data, data_size, output, stride, size); } uint8_t* WebPDecodeBGRAInto(const uint8_t* data, uint32_t data_size, - uint8_t* output, int output_size, - int output_stride) { - WebPDecParams params; - - if (output == NULL) { - return NULL; - } - - params.output = output; - params.stride = output_stride; - params.output_size = output_size; - params.output_u_size = 0; - params.output_v_size = 0; - return DecodeInto(MODE_BGRA, data, data_size, ¶ms); + uint8_t* output, int size, int stride) { + return DecodeIntoRGBABuffer(MODE_BGRA, data, data_size, output, stride, size); } uint8_t* WebPDecodeYUVInto(const uint8_t* data, uint32_t data_size, @@ -510,132 +355,241 @@ uint8_t* WebPDecodeYUVInto(const uint8_t* data, uint32_t data_size, uint8_t* u, int u_size, int u_stride, uint8_t* v, int v_size, int v_stride) { WebPDecParams params; - - if (luma == NULL) { + WebPDecBuffer output; + if (luma == NULL) return NULL; + WebPInitDecBuffer(&output); + WebPResetDecParams(¶ms); + params.output = &output; + output.colorspace = MODE_YUV; + output.u.YUVA.y = luma; + output.u.YUVA.y_stride = luma_stride; + output.u.YUVA.y_size = luma_size; + output.u.YUVA.u = u; + output.u.YUVA.u_stride = u_stride; + output.u.YUVA.u_size = u_size; + output.u.YUVA.v = v; + output.u.YUVA.v_stride = v_stride; + output.u.YUVA.v_size = v_size; + output.is_external_memory = 1; + if (DecodeInto(data, data_size, ¶ms) != VP8_STATUS_OK) { return NULL; } - - params.output = luma; - params.stride = luma_stride; - params.output_size = luma_size; - params.u = u; - params.u_stride = u_stride; - params.output_u_size = u_size; - params.v = v; - params.v_stride = v_stride; - params.output_v_size = v_size; - return DecodeInto(MODE_YUV, data, data_size, ¶ms); + return luma; } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ static uint8_t* Decode(WEBP_CSP_MODE mode, const uint8_t* data, uint32_t data_size, int* width, int* height, - WebPDecParams* params_out) { - uint8_t* output; + WebPDecBuffer* keep_info) { WebPDecParams params; + WebPDecBuffer output; - memset(¶ms, 0, sizeof(params)); - params.mode = mode; - if (!WebPInitDecParams(data, data_size, width, height, ¶ms)) { + WebPInitDecBuffer(&output); + WebPResetDecParams(¶ms); + params.output = &output; + output.colorspace = mode; + + // Retrieve (and report back) the required dimensions from bitstream. + if (!WebPGetInfo(data, data_size, &output.width, &output.height)) { return NULL; } + if (width) *width = output.width; + if (height) *height = output.height; - params.output_size = params.stride * (*height); - params.output_u_size = params.output_v_size = - params.u_stride * ((*height + 1) / 2); - output = DecodeInto(mode, data, data_size, ¶ms); - if (!output) { - WebPClearDecParams(¶ms); + // Decode + if (DecodeInto(data, data_size, ¶ms) != VP8_STATUS_OK) { + return NULL; } - if (params_out) { - *params_out = params; + if (keep_info) { // keep track of the side-info + WebPCopyDecBuffer(&output, keep_info); } - return output; + // return decoded samples (don't clear 'output'!) + return (mode >= MODE_YUV) ? output.u.YUVA.y : output.u.RGBA.rgba; } uint8_t* WebPDecodeRGB(const uint8_t* data, uint32_t data_size, - int *width, int *height) { + int* width, int* height) { return Decode(MODE_RGB, data, data_size, width, height, NULL); } uint8_t* WebPDecodeRGBA(const uint8_t* data, uint32_t data_size, - int *width, int *height) { + int* width, int* height) { return Decode(MODE_RGBA, data, data_size, width, height, NULL); } +uint8_t* WebPDecodeARGB(const uint8_t* data, uint32_t data_size, + int* width, int* height) { + return Decode(MODE_ARGB, data, data_size, width, height, NULL); +} + uint8_t* WebPDecodeBGR(const uint8_t* data, uint32_t data_size, - int *width, int *height) { + int* width, int* height) { return Decode(MODE_BGR, data, data_size, width, height, NULL); } uint8_t* WebPDecodeBGRA(const uint8_t* data, uint32_t data_size, - int *width, int *height) { + int* width, int* height) { return Decode(MODE_BGRA, data, data_size, width, height, NULL); } uint8_t* WebPDecodeYUV(const uint8_t* data, uint32_t data_size, - int *width, int *height, uint8_t** u, uint8_t** v, - int *stride, int* uv_stride) { - WebPDecParams params; + int* width, int* height, uint8_t** u, uint8_t** v, + int* stride, int* uv_stride) { + WebPDecBuffer output; // only to preserve the side-infos uint8_t* const out = Decode(MODE_YUV, data, data_size, - width, height, ¶ms); + width, height, &output); if (out) { - *u = params.u; - *v = params.v; - *stride = params.stride; - *uv_stride = params.u_stride; - assert(params.u_stride == params.v_stride); + const WebPYUVABuffer* const buf = &output.u.YUVA; + *u = buf->u; + *v = buf->v; + *stride = buf->y_stride; + *uv_stride = buf->u_stride; + assert(buf->u_stride == buf->v_stride); } return out; } -//----------------------------------------------------------------------------- +static void DefaultFeatures(WebPBitstreamFeatures* const features) { + assert(features); + memset(features, 0, sizeof(*features)); + features->bitstream_version = 0; +} + +static VP8StatusCode GetFeatures(const uint8_t* data, uint32_t data_size, + WebPBitstreamFeatures* const features) { + uint32_t vp8_chunk_size = 0; + uint32_t riff_size = 0; + uint32_t flags = 0; + uint32_t vp8x_skip_size = 0; + uint32_t vp8_skip_size = 0; + VP8StatusCode status; + + if (features == NULL) { + return VP8_STATUS_INVALID_PARAM; + } + DefaultFeatures(features); + + if (data == NULL) { + return VP8_STATUS_INVALID_PARAM; + } + + // Skip over RIFF header. + status = WebPParseRIFF(&data, &data_size, &riff_size); + if (status != VP8_STATUS_OK) { + return status; // Wrong RIFF Header / Insufficient data. + } + + // Skip over VP8X. + status = WebPParseVP8X(&data, &data_size, &vp8x_skip_size, &features->width, + &features->height, &flags); + if (status != VP8_STATUS_OK) { + return status; // Wrong VP8X / insufficient data. + + } + if (vp8x_skip_size > 0) { + return VP8_STATUS_OK; // Return features from VP8X header. + } + + // Skip over VP8 header. + status = WebPParseVP8Header(&data, &data_size, riff_size, &vp8_skip_size, + &vp8_chunk_size); + if (status != VP8_STATUS_OK) { + return status; // Wrong VP8 Chunk-header / insufficient data. + } + if (vp8_skip_size == 0) { + vp8_chunk_size = data_size; // No VP8 chunk wrapper over raw VP8 data. + } + + // Validates raw VP8 data. + if (!VP8GetInfo(data, data_size, vp8_chunk_size, + &features->width, &features->height, &features->has_alpha)) { + return VP8_STATUS_BITSTREAM_ERROR; + } + + return VP8_STATUS_OK; // Return features from VP8 header. +} + +//------------------------------------------------------------------------------ // WebPGetInfo() int WebPGetInfo(const uint8_t* data, uint32_t data_size, - int *width, int *height) { - const uint32_t chunk_size = WebPCheckRIFFHeader(&data, &data_size); - if (!chunk_size) { - return 0; // unsupported RIFF header - } - // Validate raw video data - if (data_size < 10) { - return 0; // not enough data - } - // check signature - if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a) { - return 0; // Wrong signature. - } else { - const uint32_t bits = data[0] | (data[1] << 8) | (data[2] << 16); - const int key_frame = !(bits & 1); - const int w = ((data[7] << 8) | data[6]) & 0x3fff; - const int h = ((data[9] << 8) | data[8]) & 0x3fff; + int* width, int* height) { + WebPBitstreamFeatures features; - if (!key_frame) { // Not a keyframe. - return 0; - } + if (GetFeatures(data, data_size, &features) != VP8_STATUS_OK) { + return 0; + } - if (((bits >> 1) & 7) > 3) { - return 0; // unknown profile - } - if (!((bits >> 4) & 1)) { - return 0; // first frame is invisible! - } - if (((bits >> 5)) >= chunk_size) { // partition_length - return 0; // inconsistent size information. - } + if (width) { + *width = features.width; + } + if (height) { + *height = features.height; + } - if (width) { - *width = w; - } - if (height) { - *height = h; - } + return 1; +} - return 1; +//------------------------------------------------------------------------------ +// Advance decoding API + +int WebPInitDecoderConfigInternal(WebPDecoderConfig* const config, + int version) { + if (version != WEBP_DECODER_ABI_VERSION) { + return 0; // version mismatch } + if (config == NULL) { + return 0; + } + memset(config, 0, sizeof(*config)); + DefaultFeatures(&config->input); + WebPInitDecBuffer(&config->output); + return 1; +} + +VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, uint32_t data_size, + WebPBitstreamFeatures* const features, + int version) { + VP8StatusCode status; + if (version != WEBP_DECODER_ABI_VERSION) { + return VP8_STATUS_INVALID_PARAM; // version mismatch + } + if (features == NULL) { + return VP8_STATUS_INVALID_PARAM; + } + + status = GetFeatures(data, data_size, features); + if (status == VP8_STATUS_NOT_ENOUGH_DATA) { + return VP8_STATUS_BITSTREAM_ERROR; // Not enough data treated as error. + } + return status; +} + +VP8StatusCode WebPDecode(const uint8_t* data, uint32_t data_size, + WebPDecoderConfig* const config) { + WebPDecParams params; + VP8StatusCode status; + + if (!config) { + return VP8_STATUS_INVALID_PARAM; + } + + status = GetFeatures(data, data_size, &config->input); + if (status != VP8_STATUS_OK) { + if (status == VP8_STATUS_NOT_ENOUGH_DATA) { + return VP8_STATUS_BITSTREAM_ERROR; // Not enough data treated as error. + } + return status; + } + + WebPResetDecParams(¶ms); + params.output = &config->output; + params.options = &config->options; + status = DecodeInto(data, data_size, ¶ms); + + return status; } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/third_party/libwebp/dec/webpi.h b/third_party/libwebp/dec/webpi.h index cf5bc0e..6c14460 100644 --- a/third_party/libwebp/dec/webpi.h +++ b/third_party/libwebp/dec/webpi.h @@ -9,55 +9,155 @@ // // Author: somnath@google.com (Somnath Banerjee) -#ifndef WEBP_DEC_WEBPI_H -#define WEBP_DEC_WEBPI_H +#ifndef WEBP_DEC_WEBPI_H_ +#define WEBP_DEC_WEBPI_H_ #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif -#include "webp/decode_vp8.h" +#include "../webp/decode_vp8.h" -// Decoding output parameters. +//------------------------------------------------------------------------------ +// WebPDecParams: Decoding output parameters. Transient internal object. + +typedef struct WebPDecParams WebPDecParams; +typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p); + +// Structure use for on-the-fly rescaling typedef struct { - uint8_t* output; // rgb(a) or luma - uint8_t *u, *v; // chroma u/v - uint8_t *top_y, *top_u, *top_v; // cache for the fancy upscaler - int stride; // rgb(a) stride or luma stride - int u_stride; // chroma-u stride - int v_stride; // chroma-v stride - WEBP_CSP_MODE mode; // rgb(a) or yuv - int last_y; // coordinate of the line that was last output - int output_size; // size of 'output' buffer - int output_u_size; // size of 'u' buffer - int output_v_size; // size of 'v' buffer - int external_buffer; // If true, the output buffers are externally owned -} WebPDecParams; - -// If a RIFF container is detected, validate it and skip over it. Returns -// VP8 bit-stream size if RIFF header is valid else returns 0 -uint32_t WebPCheckRIFFHeader(const uint8_t** data_ptr, - uint32_t *data_size_ptr); - -// Initializes VP8Io with custom setup, io and teardown functions -void WebPInitCustomIo(VP8Io* const io); - -// Initializes params_out by allocating output buffer and setting the -// stride information. It also outputs width and height information of -// the WebP image. Returns 1 if succeeds. -int WebPInitDecParams(const uint8_t* data, uint32_t data_size, int* width, - int* height, WebPDecParams* const params_out); - -// Verifies various size configurations (e.g stride >= width, specified -// output size <= stride * height etc.). Returns 0 if checks fail. -int WebPCheckDecParams(const VP8Io* io, const WebPDecParams* params); - -// Deallocate memory allocated by WebPInitDecParams() and reset the -// WebPDecParams object. -void WebPClearDecParams(WebPDecParams* params); + int x_expand; // true if we're expanding in the x direction + int fy_scale, fx_scale; // fixed-point scaling factor + int64_t fxy_scale; // '' + // we need hpel-precise add/sub increments, for the downsampled U/V planes. + int y_accum; // vertical accumulator + int y_add, y_sub; // vertical increments (add ~= src, sub ~= dst) + int x_add, x_sub; // horizontal increments (add ~= src, sub ~= dst) + int src_width, src_height; // source dimensions + int dst_width, dst_height; // destination dimensions + uint8_t* dst; + int dst_stride; + int32_t* irow, *frow; // work buffer +} WebPRescaler; + +struct WebPDecParams { + WebPDecBuffer* output; // output buffer. + uint8_t* tmp_y, *tmp_u, *tmp_v; // cache for the fancy upsampler + // or used for tmp rescaling + + int last_y; // coordinate of the line that was last output + const WebPDecoderOptions* options; // if not NULL, use alt decoding features + // rescalers + WebPRescaler scaler_y, scaler_u, scaler_v, scaler_a; + void* memory; // overall scratch memory for the output work. + OutputFunc emit; // output RGB or YUV samples + OutputFunc emit_alpha; // output alpha channel +}; + +// Should be called first, before any use of the WebPDecParams object. +void WebPResetDecParams(WebPDecParams* const params); + +//------------------------------------------------------------------------------ +// Header parsing helpers + +#define TAG_SIZE 4 +#define CHUNK_HEADER_SIZE 8 +#define RIFF_HEADER_SIZE 12 +#define FRAME_CHUNK_SIZE 20 +#define LOOP_CHUNK_SIZE 4 +#define TILE_CHUNK_SIZE 8 +#define VP8X_CHUNK_SIZE 12 +#define VP8_FRAME_HEADER_SIZE 10 // Size of the frame header within VP8 data. + +// Validates the RIFF container (if detected) and skips over it. +// If a RIFF container is detected, +// Returns VP8_STATUS_BITSTREAM_ERROR for invalid header, and +// VP8_STATUS_OK otherwise. +// In case there are not enough bytes (partial RIFF container), return 0 for +// riff_size. Else return the riff_size extracted from the header. +VP8StatusCode WebPParseRIFF(const uint8_t** data, uint32_t* data_size, + uint32_t* riff_size); + +// Validates the VP8X Header and skips over it. +// Returns VP8_STATUS_BITSTREAM_ERROR for invalid VP8X header, +// VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and +// VP8_STATUS_OK otherwise. +// If a VP8 chunk is found, bytes_skipped is set to the total number of bytes +// that are skipped; also Width, Height & Flags are set to the corresponding +// fields extracted from the VP8X chunk. +VP8StatusCode WebPParseVP8X(const uint8_t** data, uint32_t* data_size, + uint32_t* bytes_skipped, + int* width, int* height, uint32_t* flags); + +// Skips to the next VP8 chunk header in the data given the size of the RIFF +// chunk 'riff_size'. +// Returns VP8_STATUS_BITSTREAM_ERROR if any invalid chunk size is encountered, +// VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and +// VP8_STATUS_OK otherwise. +// If a VP8 chunk is found, bytes_skipped is set to the total number of bytes +// that are skipped. +VP8StatusCode WebPParseOptionalChunks(const uint8_t** data, uint32_t* data_size, + uint32_t riff_size, + uint32_t* bytes_skipped); + +// Validates the VP8 Header ("VP8 nnnn") and skips over it. +// Returns VP8_STATUS_BITSTREAM_ERROR for invalid (vp8_chunk_size greater than +// riff_size) VP8 header, +// VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and +// VP8_STATUS_OK otherwise. +// If a VP8 chunk is found, bytes_skipped is set to the total number of bytes +// that are skipped and vp8_chunk_size is set to the corresponding size +// extracted from the VP8 chunk header. +// For a partial VP8 chunk, vp8_chunk_size is set to 0. +VP8StatusCode WebPParseVP8Header(const uint8_t** data, uint32_t* data_size, + uint32_t riff_size, uint32_t* bytes_skipped, + uint32_t* vp8_chunk_size); + +// Skips over all valid chunks prior to the first VP8 frame header. +// Returns VP8_STATUS_OK on success, +// VP8_STATUS_BITSTREAM_ERROR if an invalid header/chunk is found, and +// VP8_STATUS_NOT_ENOUGH_DATA if case of insufficient data. +// Also, data, data_size, vp8_size & bytes_skipped are updated appropriately +// on success, where +// vp8_size is the size of VP8 chunk data (extracted from VP8 chunk header) and +// bytes_skipped is set to the total number of bytes that are skipped. +VP8StatusCode WebPParseHeaders(const uint8_t** data, uint32_t* data_size, + uint32_t* vp8_size, uint32_t* bytes_skipped); + +//------------------------------------------------------------------------------ +// Misc utils + +// Initializes VP8Io with custom setup, io and teardown functions. The default +// hooks will use the supplied 'params' as io->opaque handle. +void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io); + +//------------------------------------------------------------------------------ +// Internal functions regarding WebPDecBuffer memory (in buffer.c). +// Don't really need to be externally visible for now. + +// Prepare 'buffer' with the requested initial dimensions width/height. +// If no external storage is supplied, initializes buffer by allocating output +// memory and setting up the stride information. Validate the parameters. Return +// an error code in case of problem (no memory, or invalid stride / size / +// dimension / etc.). If *options is not NULL, also verify that the options' +// parameters are valid and apply them to the width/height dimensions of the +// output buffer. This takes cropping / scaling / rotation into account. +VP8StatusCode WebPAllocateDecBuffer(int width, int height, + const WebPDecoderOptions* const options, + WebPDecBuffer* const buffer); + +// Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the +// memory (still held by 'src'). +void WebPCopyDecBuffer(const WebPDecBuffer* const src, + WebPDecBuffer* const dst); + +// Copy and transfer ownership from src to dst (beware of parameter order!) +void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst); + +//------------------------------------------------------------------------------ #if defined(__cplusplus) || defined(c_plusplus) } // extern "C" #endif -#endif // WEBP_DEC_WEBPI_H +#endif /* WEBP_DEC_WEBPI_H_ */ diff --git a/third_party/libwebp/dec/yuv.c b/third_party/libwebp/dec/yuv.c deleted file mode 100644 index ac448ee..0000000 --- a/third_party/libwebp/dec/yuv.c +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2010 Google Inc. -// -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ -// ----------------------------------------------------------------------------- -// -// YUV->RGB conversion function -// -// Author: Skal (pascal.massimino@gmail.com) - -#include "yuv.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -enum { YUV_HALF = 1 << (YUV_FIX - 1) }; - -int16_t VP8kVToR[256], VP8kUToB[256]; -int32_t VP8kVToG[256], VP8kUToG[256]; -uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN]; - -static int done = 0; - -void VP8YUVInit(void) { - int i; - if (done) { - return; - } - for (i = 0; i < 256; ++i) { - VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX; - VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF; - VP8kVToG[i] = -45773 * (i - 128); - VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX; - } - for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) { - const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX; - VP8kClip[i - YUV_RANGE_MIN] = (k < 0) ? 0 : (k > 255) ? 255 : k; - } - done = 1; -} - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/third_party/libwebp/dec/yuv.h b/third_party/libwebp/dec/yuv.h deleted file mode 100644 index 50e63f9..0000000 --- a/third_party/libwebp/dec/yuv.h +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2010 Google Inc. -// -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ -// ----------------------------------------------------------------------------- -// -// inline YUV->RGB conversion function -// -// Author: Skal (pascal.massimino@gmail.com) - -#ifndef WEBP_DEC_YUV_H_ -#define WEBP_DEC_YUV_H_ - -#include "webp/decode_vp8.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -enum { YUV_FIX = 16, // fixed-point precision - YUV_RANGE_MIN = -227, // min value of r/g/b output - YUV_RANGE_MAX = 256 + 226 // max value of r/g/b output -}; -extern int16_t VP8kVToR[256], VP8kUToB[256]; -extern int32_t VP8kVToG[256], VP8kUToG[256]; -extern uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN]; - -inline static void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v, - uint8_t* const rgb) { - const int r_off = VP8kVToR[v]; - const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX; - const int b_off = VP8kUToB[u]; - rgb[0] = VP8kClip[y + r_off - YUV_RANGE_MIN]; - rgb[1] = VP8kClip[y + g_off - YUV_RANGE_MIN]; - rgb[2] = VP8kClip[y + b_off - YUV_RANGE_MIN]; -} - -inline static void VP8YuvToRgba(int y, int u, int v, uint8_t* const rgba) { - VP8YuvToRgb(y, u, v, rgba); - rgba[3] = 0xff; -} - -inline static void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v, - uint8_t* const bgr) { - const int r_off = VP8kVToR[v]; - const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX; - const int b_off = VP8kUToB[u]; - bgr[0] = VP8kClip[y + b_off - YUV_RANGE_MIN]; - bgr[1] = VP8kClip[y + g_off - YUV_RANGE_MIN]; - bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN]; -} - -inline static void VP8YuvToBgra(int y, int u, int v, uint8_t* const bgra) { - VP8YuvToBgr(y, u, v, bgra); - bgra[3] = 0xff; -} - -// Must be called before everything, to initialize the tables. -void VP8YUVInit(void); - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif - -#endif // WEBP_DEC_YUV_H_ |