From f18d1a93390fedbb03f81db82322215ce027c1e0 Mon Sep 17 00:00:00 2001
From: Robert Shih <robertshih@google.com>
Date: Mon, 5 Jan 2015 17:35:54 -0800
Subject: libFLAC: merge master from Xiph

remote: https://git.xiph.org/flac.git
commit: 775eb93

Bug: 18872897
Bug: 18910747
Change-Id: I6e450e44c96b97c3323e428b9e6d420422f24a4e
(cherry picked from commit 31e4f3166a91a2ebb34f643787122a638d9f1471)
Tested-by: Wolfgang Wiedmeyer <wolfgit@wiedmeyer.de>
---
 config.h                                         |   2 +-
 include/FLAC/all.h                               |   7 +-
 include/FLAC/assert.h                            |   3 +-
 include/FLAC/callback.h                          |   3 +-
 include/FLAC/export.h                            |  24 +-
 include/FLAC/format.h                            |  19 +-
 include/FLAC/metadata.h                          |  13 +-
 include/FLAC/ordinals.h                          |  34 +-
 include/FLAC/stream_decoder.h                    |   3 +-
 include/FLAC/stream_encoder.h                    |  72 ++-
 include/share/alloc.h                            | 231 +++++++--
 include/share/compat.h                           | 201 ++++++++
 include/share/endswap.h                          |  78 +++
 include/share/private.h                          |  45 ++
 libFLAC/bitmath.c                                |  46 +-
 libFLAC/bitreader.c                              | 629 ++++++-----------------
 libFLAC/bitwriter.c                              | 142 ++---
 libFLAC/cpu.c                                    | 605 ++++++++++++----------
 libFLAC/crc.c                                    |   7 +-
 libFLAC/fixed.c                                  |  38 +-
 libFLAC/float.c                                  |  16 +-
 libFLAC/format.c                                 |  48 +-
 libFLAC/include/private/all.h                    |   3 +-
 libFLAC/include/private/bitmath.h                | 150 +++++-
 libFLAC/include/private/bitreader.h              |  14 +-
 libFLAC/include/private/bitwriter.h              |   3 +-
 libFLAC/include/private/cpu.h                    | 126 ++++-
 libFLAC/include/private/crc.h                    |   7 +-
 libFLAC/include/private/fixed.h                  |  20 +-
 libFLAC/include/private/float.h                  |   3 +-
 libFLAC/include/private/format.h                 |   3 +-
 libFLAC/include/private/lpc.h                    |  54 +-
 libFLAC/include/private/macros.h                 |  72 +++
 libFLAC/include/private/md5.h                    |   8 +-
 libFLAC/include/private/memory.h                 |  14 +-
 libFLAC/include/private/metadata.h               |   3 +-
 libFLAC/include/private/ogg_decoder_aspect.h     |   3 +-
 libFLAC/include/private/ogg_encoder_aspect.h     |   3 +-
 libFLAC/include/private/ogg_helper.h             |   3 +-
 libFLAC/include/private/ogg_mapping.h            |   5 +-
 libFLAC/include/private/stream_encoder.h         |  67 +++
 libFLAC/include/private/stream_encoder_framing.h |   3 +-
 libFLAC/include/private/window.h                 |   5 +-
 libFLAC/include/protected/all.h                  |   3 +-
 libFLAC/include/protected/stream_decoder.h       |   4 +-
 libFLAC/include/protected/stream_encoder.h       |  10 +-
 libFLAC/lpc.c                                    | 126 ++---
 libFLAC/md5.c                                    | 318 ++++++++----
 libFLAC/memory.c                                 |  69 ++-
 libFLAC/stream_decoder.c                         | 327 ++++++------
 libFLAC/stream_encoder.c                         | 484 +++++++++++------
 libFLAC/stream_encoder_framing.c                 |  16 +-
 libFLAC/window.c                                 |  83 ++-
 53 files changed, 2592 insertions(+), 1683 deletions(-)
 create mode 100644 include/share/compat.h
 create mode 100644 include/share/endswap.h
 create mode 100644 include/share/private.h
 mode change 100755 => 100644 libFLAC/bitwriter.c
 create mode 100644 libFLAC/include/private/macros.h
 create mode 100644 libFLAC/include/private/stream_encoder.h

diff --git a/config.h b/config.h
index 6827125..793995f 100644
--- a/config.h
+++ b/config.h
@@ -119,7 +119,7 @@
 #define STDC_HEADERS 1
 
 /* Version number of package */
-#define VERSION "1.2.1"
+#define VERSION "1.3.1"
 
 /* Define to 1 if your processor stores words with the most significant byte
    first (like Motorola and SPARC, unlike Intel and VAX). */
diff --git a/include/FLAC/all.h b/include/FLAC/all.h
index c542c0d..6f4d336 100644
--- a/include/FLAC/all.h
+++ b/include/FLAC/all.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -161,7 +162,7 @@
  * in FLAC 1.1.3 is a set of \c #defines in \c export.h of each
  * library's includes (e.g. \c include/FLAC/export.h).  The
  * \c #defines mirror the libraries'
- * <A HREF="http://www.gnu.org/software/libtool/manual.html#Libtool-versioning">libtool version numbers</A>,
+ * <A HREF="http://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning">libtool version numbers</A>,
  * e.g. in libFLAC there are \c FLAC_API_VERSION_CURRENT,
  * \c FLAC_API_VERSION_REVISION, and \c FLAC_API_VERSION_AGE.
  * These can be used to support multiple versions of an API during the
@@ -175,7 +176,7 @@
  * #endif
  * \endcode
  *
- * The the source will work for multiple versions and the legacy code can
+ * The source will work for multiple versions and the legacy code can
  * easily be removed when the transition is complete.
  *
  * Another available symbol is FLAC_API_SUPPORTS_OGG_FLAC (defined in
diff --git a/include/FLAC/assert.h b/include/FLAC/assert.h
index 3fc03f3..dc9bcef 100644
--- a/include/FLAC/assert.h
+++ b/include/FLAC/assert.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/include/FLAC/callback.h b/include/FLAC/callback.h
index c954121..ce8787f 100644
--- a/include/FLAC/callback.h
+++ b/include/FLAC/callback.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2004-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/include/FLAC/export.h b/include/FLAC/export.h
index a525f29..9cc9e13 100644
--- a/include/FLAC/export.h
+++ b/include/FLAC/export.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -55,25 +56,30 @@
  * \{
  */
 
-#if defined(FLAC__NO_DLL) || !defined(_MSC_VER)
+#if defined(FLAC__NO_DLL)
 #define FLAC_API
 
+#elif defined(_MSC_VER)
+#ifdef FLAC_API_EXPORTS
+#define	FLAC_API __declspec(dllexport)
 #else
+#define FLAC_API __declspec(dllimport)
+#endif
+
+#elif defined(FLAC__USE_VISIBILITY_ATTR)
+#define FLAC_API __attribute__ ((visibility ("default")))
 
-#ifdef FLAC_API_EXPORTS
-#define	FLAC_API	_declspec(dllexport)
 #else
-#define FLAC_API	_declspec(dllimport)
+#define FLAC_API
 
 #endif
-#endif
 
 /** These #defines will mirror the libtool-based library version number, see
- * http://www.gnu.org/software/libtool/manual.html#Libtool-versioning
+ * http://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning
  */
-#define FLAC_API_VERSION_CURRENT 10
+#define FLAC_API_VERSION_CURRENT 11
 #define FLAC_API_VERSION_REVISION 0 /**< see above */
-#define FLAC_API_VERSION_AGE 2 /**< see above */
+#define FLAC_API_VERSION_AGE 3 /**< see above */
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/FLAC/format.h b/include/FLAC/format.h
index 77e2d01..7424565 100644
--- a/include/FLAC/format.h
+++ b/include/FLAC/format.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -508,9 +509,11 @@ typedef enum {
 	FLAC__METADATA_TYPE_PICTURE = 6,
 	/**< <A HREF="../format.html#metadata_block_picture">PICTURE</A> block */
 
-	FLAC__METADATA_TYPE_UNDEFINED = 7
+	FLAC__METADATA_TYPE_UNDEFINED = 7,
 	/**< marker to denote beginning of undefined type range; this number will increase as new metadata types are added */
 
+        FLAC__MAX_METADATA_TYPE = FLAC__MAX_METADATA_TYPE_CODE,
+        /**< No type will ever be greater than this. There is not enough room in the protocol block. */
 } FLAC__MetadataType;
 
 /** Maps a FLAC__MetadataType to a C string.
@@ -879,6 +882,18 @@ extern FLAC_API const unsigned FLAC__STREAM_METADATA_LENGTH_LEN; /**< == 24 (bit
  */
 FLAC_API FLAC__bool FLAC__format_sample_rate_is_valid(unsigned sample_rate);
 
+/** Tests that a blocksize at the given sample rate is valid for the FLAC
+ *  subset.
+ *
+ * \param blocksize    The blocksize to test for compliance.
+ * \param sample_rate  The sample rate is needed, since the valid subset
+ *                     blocksize depends on the sample rate.
+ * \retval FLAC__bool
+ *    \c true if the given blocksize conforms to the specification for the
+ *    subset at the given sample rate, else \c false.
+ */
+FLAC_API FLAC__bool FLAC__format_blocksize_is_subset(unsigned blocksize, unsigned sample_rate);
+
 /** Tests that a sample rate is valid for the FLAC subset.  The subset rules
  *  for valid sample rates are slightly more complex since the rate has to
  *  be expressible completely in the frame header.
diff --git a/include/FLAC/metadata.h b/include/FLAC/metadata.h
index fff90b0..b8ad8b4 100644
--- a/include/FLAC/metadata.h
+++ b/include/FLAC/metadata.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -499,7 +500,7 @@ FLAC_API FLAC__MetadataType FLAC__metadata_simple_iterator_get_block_type(const
  * \retval unsigned
  *    The length of the metadata block at the current iterator position.
  *    The is same length as that in the
- *    <a href="http://flac.sourceforge.net/format.html#metadata_block_header">metadata block header</a>,
+ *    <a href="http://xiph.org/flac/format.html#metadata_block_header">metadata block header</a>,
  *    i.e. the length of the metadata body that follows the header.
  */
 FLAC_API unsigned FLAC__metadata_simple_iterator_get_block_length(const FLAC__Metadata_SimpleIterator *iterator);
@@ -666,7 +667,7 @@ FLAC_API FLAC__bool FLAC__metadata_simple_iterator_delete_block(FLAC__Metadata_S
  *
  * - Create a new chain using FLAC__metadata_chain_new().  A chain is a
  *   linked list of FLAC metadata blocks.
- * - Read all metadata into the the chain from a FLAC file using
+ * - Read all metadata into the chain from a FLAC file using
  *   FLAC__metadata_chain_read() or FLAC__metadata_chain_read_ogg() and
  *   check the status.
  * - Optionally, consolidate the padding using
@@ -763,7 +764,7 @@ typedef enum {
 	FLAC__METADATA_CHAIN_STATUS_READ_WRITE_MISMATCH,
 	/**< FLAC__metadata_chain_write() was called on a chain read by
 	 *   FLAC__metadata_chain_read_with_callbacks()/FLAC__metadata_chain_read_ogg_with_callbacks(),
-	 *   or 
+	 *   or
 	 *   FLAC__metadata_chain_write_with_callbacks()/FLAC__metadata_chain_write_with_callbacks_and_tempfile()
 	 *   was called on a chain read by
 	 *   FLAC__metadata_chain_read()/FLAC__metadata_chain_read_ogg().
@@ -1691,7 +1692,7 @@ FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_append_comment(FLAC__Str
  *  For convenience, a trailing NUL is added to the entry if it doesn't have
  *  one already.
  *
- *  Depending on the the value of \a all, either all or just the first comment
+ *  Depending on the value of \a all, either all or just the first comment
  *  whose field name(s) match the given entry's name will be replaced by the
  *  given entry.  If no comments match, \a entry will simply be appended.
  *
@@ -1986,7 +1987,7 @@ FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_resize_tracks(FLAC__StreamMet
  *    \code object->type == FLAC__METADATA_TYPE_CUESHEET \endcode
  *    \code track_num < object->data.cue_sheet.num_tracks \endcode
  *    \code (track->indices != NULL && track->num_indices > 0) ||
- * (track->indices == NULL && track->num_indices == 0)
+ * (track->indices == NULL && track->num_indices == 0) \endcode
  * \retval FLAC__bool
  *    \c false if \a copy is \c true and malloc() fails, else \c true.
  */
diff --git a/include/FLAC/ordinals.h b/include/FLAC/ordinals.h
index a7a5cd9..b1e1acf 100644
--- a/include/FLAC/ordinals.h
+++ b/include/FLAC/ordinals.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -32,40 +33,45 @@
 #ifndef FLAC__ORDINALS_H
 #define FLAC__ORDINALS_H
 
-#if !(defined(_MSC_VER) || defined(__BORLANDC__) || defined(__EMX__))
-#include <inttypes.h>
-#endif
+#if defined(_MSC_VER) && _MSC_VER < 1600
+
+/* Microsoft Visual Studio earlier than the 2010 version did not provide
+ * the 1999 ISO C Standard header file <stdint.h>.
+ */
 
-typedef signed char FLAC__int8;
-typedef unsigned char FLAC__uint8;
+typedef __int8 FLAC__int8;
+typedef unsigned __int8 FLAC__uint8;
 
-#if defined(_MSC_VER) || defined(__BORLANDC__)
 typedef __int16 FLAC__int16;
 typedef __int32 FLAC__int32;
 typedef __int64 FLAC__int64;
 typedef unsigned __int16 FLAC__uint16;
 typedef unsigned __int32 FLAC__uint32;
 typedef unsigned __int64 FLAC__uint64;
-#elif defined(__EMX__)
-typedef short FLAC__int16;
-typedef long FLAC__int32;
-typedef long long FLAC__int64;
-typedef unsigned short FLAC__uint16;
-typedef unsigned long FLAC__uint32;
-typedef unsigned long long FLAC__uint64;
+
 #else
+
+/* For MSVC 2010 and everything else which provides <stdint.h>. */
+
+#include <stdint.h>
+
+typedef int8_t FLAC__int8;
+typedef uint8_t FLAC__uint8;
+
 typedef int16_t FLAC__int16;
 typedef int32_t FLAC__int32;
 typedef int64_t FLAC__int64;
 typedef uint16_t FLAC__uint16;
 typedef uint32_t FLAC__uint32;
 typedef uint64_t FLAC__uint64;
+
 #endif
 
 typedef int FLAC__bool;
 
 typedef FLAC__uint8 FLAC__byte;
 
+
 #ifdef true
 #undef true
 #endif
diff --git a/include/FLAC/stream_decoder.h b/include/FLAC/stream_decoder.h
index 9ac1594..9bfdd1f 100644
--- a/include/FLAC/stream_decoder.h
+++ b/include/FLAC/stream_decoder.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/include/FLAC/stream_encoder.h b/include/FLAC/stream_encoder.h
index dbbbb23..efc213a 100644
--- a/include/FLAC/stream_encoder.h
+++ b/include/FLAC/stream_encoder.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -128,7 +129,7 @@ extern "C" {
  * Unlike the decoders, the stream encoder has many options that can
  * affect the speed and compression ratio.  When setting these parameters
  * you should have some basic knowledge of the format (see the
- * <A HREF="../documentation.html#format">user-level documentation</A>
+ * <A HREF="../documentation_format_overview.html">user-level documentation</A>
  * or the <A HREF="../format.html">formal description</A>).  The
  * FLAC__stream_encoder_set_*() functions themselves do not validate the
  * values as many are interdependent.  The FLAC__stream_encoder_init_*()
@@ -829,28 +830,28 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_sample_rate(FLAC__StreamEncoder *en
  * The actual values set for each level are:
  * <table>
  * <tr>
- *  <td><b>level</b><td>
- *  <td>do mid-side stereo<td>
- *  <td>loose mid-side stereo<td>
- *  <td>apodization<td>
- *  <td>max lpc order<td>
- *  <td>qlp coeff precision<td>
- *  <td>qlp coeff prec search<td>
- *  <td>escape coding<td>
- *  <td>exhaustive model search<td>
- *  <td>min residual partition order<td>
- *  <td>max residual partition order<td>
- *  <td>rice parameter search dist<td>
+ *  <td><b>level</b></td>
+ *  <td>do mid-side stereo</td>
+ *  <td>loose mid-side stereo</td>
+ *  <td>apodization</td>
+ *  <td>max lpc order</td>
+ *  <td>qlp coeff precision</td>
+ *  <td>qlp coeff prec search</td>
+ *  <td>escape coding</td>
+ *  <td>exhaustive model search</td>
+ *  <td>min residual partition order</td>
+ *  <td>max residual partition order</td>
+ *  <td>rice parameter search dist</td>
  * </tr>
- * <tr>  <td><b>0</b><td>  <td>false<td>  <td>false<td>  <td>tukey(0.5)<td>  <td>0<td>   <td>0<td>  <td>false<td>  <td>false<td>  <td>false<td>  <td>0<td>  <td>3<td>  <td>0<td>  </tr>
- * <tr>  <td><b>1</b><td>  <td>true<td>   <td>true<td>   <td>tukey(0.5)<td>  <td>0<td>   <td>0<td>  <td>false<td>  <td>false<td>  <td>false<td>  <td>0<td>  <td>3<td>  <td>0<td>  </tr>
- * <tr>  <td><b>2</b><td>  <td>true<td>   <td>false<td>  <td>tukey(0.5)<td>  <td>0<td>   <td>0<td>  <td>false<td>  <td>false<td>  <td>false<td>  <td>0<td>  <td>3<td>  <td>0<td>  </tr>
- * <tr>  <td><b>3</b><td>  <td>false<td>  <td>false<td>  <td>tukey(0.5)<td>  <td>6<td>   <td>0<td>  <td>false<td>  <td>false<td>  <td>false<td>  <td>0<td>  <td>4<td>  <td>0<td>  </tr>
- * <tr>  <td><b>4</b><td>  <td>true<td>   <td>true<td>   <td>tukey(0.5)<td>  <td>8<td>   <td>0<td>  <td>false<td>  <td>false<td>  <td>false<td>  <td>0<td>  <td>4<td>  <td>0<td>  </tr>
- * <tr>  <td><b>5</b><td>  <td>true<td>   <td>false<td>  <td>tukey(0.5)<td>  <td>8<td>   <td>0<td>  <td>false<td>  <td>false<td>  <td>false<td>  <td>0<td>  <td>5<td>  <td>0<td>  </tr>
- * <tr>  <td><b>6</b><td>  <td>true<td>   <td>false<td>  <td>tukey(0.5)<td>  <td>8<td>   <td>0<td>  <td>false<td>  <td>false<td>  <td>false<td>  <td>0<td>  <td>6<td>  <td>0<td>  </tr>
- * <tr>  <td><b>7</b><td>  <td>true<td>   <td>false<td>  <td>tukey(0.5)<td>  <td>8<td>   <td>0<td>  <td>false<td>  <td>false<td>  <td>true<td>   <td>0<td>  <td>6<td>  <td>0<td>  </tr>
- * <tr>  <td><b>8</b><td>  <td>true<td>   <td>false<td>  <td>tukey(0.5)<td>  <td>12<td>  <td>0<td>  <td>false<td>  <td>false<td>  <td>true<td>   <td>0<td>  <td>6<td>  <td>0<td>  </tr>
+ * <tr>  <td><b>0</b></td> <td>false</td> <td>false</td> <td>tukey(0.5)<td>                                     <td>0</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>3</td> <td>0</td> </tr>
+ * <tr>  <td><b>1</b></td> <td>true</td>  <td>true</td>  <td>tukey(0.5)<td>                                     <td>0</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>3</td> <td>0</td> </tr>
+ * <tr>  <td><b>2</b></td> <td>true</td>  <td>false</td> <td>tukey(0.5)<td>                                     <td>0</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>3</td> <td>0</td> </tr>
+ * <tr>  <td><b>3</b></td> <td>false</td> <td>false</td> <td>tukey(0.5)<td>                                     <td>6</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>4</td> <td>0</td> </tr>
+ * <tr>  <td><b>4</b></td> <td>true</td>  <td>true</td>  <td>tukey(0.5)<td>                                     <td>8</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>4</td> <td>0</td> </tr>
+ * <tr>  <td><b>5</b></td> <td>true</td>  <td>false</td> <td>tukey(0.5)<td>                                     <td>8</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>5</td> <td>0</td> </tr>
+ * <tr>  <td><b>6</b></td> <td>true</td>  <td>false</td> <td>tukey(0.5);partial_tukey(2)<td>                    <td>8</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>6</td> <td>0</td> </tr>
+ * <tr>  <td><b>7</b></td> <td>true</td>  <td>false</td> <td>tukey(0.5);partial_tukey(2)<td>                    <td>12</td> <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>6</td> <td>0</td> </tr>
+ * <tr>  <td><b>8</b></td> <td>true</td>  <td>false</td> <td>tukey(0.5);partial_tukey(2);punchout_tukey(3)</td> <td>12</td> <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>6</td> <td>0</td> </tr>
  * </table>
  *
  * \default \c 5
@@ -919,7 +920,8 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_loose_mid_side_stereo(FLAC__StreamE
  * The available functions are \c bartlett, \c bartlett_hann,
  * \c blackman, \c blackman_harris_4term_92db, \c connes, \c flattop,
  * \c gauss(STDDEV), \c hamming, \c hann, \c kaiser_bessel, \c nuttall,
- * \c rectangle, \c triangle, \c tukey(P), \c welch.
+ * \c rectangle, \c triangle, \c tukey(P), \c partial_tukey(n[/ov[/P]]),
+ * \c punchout_tukey(n[/ov[/P]]), \c welch.
  *
  * For \c gauss(STDDEV), STDDEV specifies the standard deviation
  * (0<STDDEV<=0.5).
@@ -928,6 +930,24 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_loose_mid_side_stereo(FLAC__StreamE
  * tapered (0<=P<=1).  P=0 corresponds to \c rectangle and P=1
  * corresponds to \c hann.
  *
+ * Specifying \c partial_tukey or \c punchout_tukey works a little
+ * different. These do not specify a single apodization function, but
+ * a series of them with some overlap. partial_tukey specifies a series
+ * of small windows (all treated separately) while punchout_tukey
+ * specifies a series of windows that have a hole in them. In this way,
+ * the predictor is constructed with only a part of the block, which
+ * helps in case a block consists of dissimilar parts.
+ *
+ * The three parameters that can be specified for the functions are
+ * n, ov and P. n is the number of functions to add, ov is the overlap
+ * of the windows in case of partial_tukey and the overlap in the gaps
+ * in case of punchout_tukey. P is the fraction of the window that is
+ * tapered, like with a regular tukey window. The function can be
+ * specified with only a number, a number and an overlap, or a number
+ * an overlap and a P, for example, partial_tukey(3), partial_tukey(3/0.3)
+ * and partial_tukey(3/0.3/0.5) are all valid. ov should be smaller than 1
+ * and can be negative.
+ *
  * Example specifications are \c "blackman" or
  * \c "hann;triangle;tukey(0.5);tukey(0.25);tukey(0.125)"
  *
@@ -940,7 +960,9 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_loose_mid_side_stereo(FLAC__StreamE
  * results in the smallest compressed subframe.
  *
  * Note that each function specified causes the encoder to occupy a
- * floating point array in which to store the window.
+ * floating point array in which to store the window. Also note that the
+ * values of P, STDDEV and ov are locale-specific, so if the comma
+ * separator specified by the locale is a comma, a comma should be used.
  *
  * \default \c "tukey(0.5)"
  * \param  encoder        An encoder instance to set.
diff --git a/include/share/alloc.h b/include/share/alloc.h
index 2e92dd8..5e1f1e2 100644
--- a/include/share/alloc.h
+++ b/include/share/alloc.h
@@ -1,72 +1,209 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
+/* alloc - Convenience routines for safely allocating memory
+ * Copyright (C) 2007-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FLAC__SHARE__ALLOC_H
+#define FLAC__SHARE__ALLOC_H
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+/* WATCHOUT: for c++ you may have to #define __STDC_LIMIT_MACROS 1 real early
+ * before #including this file,  otherwise SIZE_MAX might not be defined
  */
 
-#ifndef __SHARE_ALLOC_H_
-#define __SHARE_ALLOC_H_
+#include <limits.h> /* for SIZE_MAX */
+#if HAVE_STDINT_H
+#include <stdint.h> /* for SIZE_MAX in case limits.h didn't get it */
+#endif
+#include <stdlib.h> /* for size_t, malloc(), etc */
+#include "share/compat.h"
 
-#include <limits.h>
+#ifndef SIZE_MAX
+# ifndef SIZE_T_MAX
+#  ifdef _MSC_VER
+#   ifdef _WIN64
+#    define SIZE_T_MAX 0xffffffffffffffffui64
+#   else
+#    define SIZE_T_MAX 0xffffffff
+#   endif
+#  else
+#   error
+#  endif
+# endif
+# define SIZE_MAX SIZE_T_MAX
+#endif
+
+/* avoid malloc()ing 0 bytes, see:
+ * https://www.securecoding.cert.org/confluence/display/seccode/MEM04-A.+Do+not+make+assumptions+about+the+result+of+allocating+0+bytes?focusedCommentId=5407003
+*/
+static inline void *safe_malloc_(size_t size)
+{
+	/* malloc(0) is undefined; FLAC src convention is to always allocate */
+	if(!size)
+		size++;
+	return malloc(size);
+}
+
+static inline void *safe_calloc_(size_t nmemb, size_t size)
+{
+	if(!nmemb || !size)
+		return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
+	return calloc(nmemb, size);
+}
+
+/*@@@@ there's probably a better way to prevent overflows when allocating untrusted sums but this works for now */
+
+static inline void *safe_malloc_add_2op_(size_t size1, size_t size2)
+{
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	return safe_malloc_(size2);
+}
+
+static inline void *safe_malloc_add_3op_(size_t size1, size_t size2, size_t size3)
+{
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	size3 += size2;
+	if(size3 < size2)
+		return 0;
+	return safe_malloc_(size3);
+}
+
+static inline void *safe_malloc_add_4op_(size_t size1, size_t size2, size_t size3, size_t size4)
+{
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	size3 += size2;
+	if(size3 < size2)
+		return 0;
+	size4 += size3;
+	if(size4 < size3)
+		return 0;
+	return safe_malloc_(size4);
+}
+
+void *safe_malloc_mul_2op_(size_t size1, size_t size2) ;
+
+static inline void *safe_malloc_mul_3op_(size_t size1, size_t size2, size_t size3)
+{
+	if(!size1 || !size2 || !size3)
+		return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
+	if(size1 > SIZE_MAX / size2)
+		return 0;
+	size1 *= size2;
+	if(size1 > SIZE_MAX / size3)
+		return 0;
+	return malloc(size1*size3);
+}
+
+/* size1*size2 + size3 */
+static inline void *safe_malloc_mul2add_(size_t size1, size_t size2, size_t size3)
+{
+	if(!size1 || !size2)
+		return safe_malloc_(size3);
+	if(size1 > SIZE_MAX / size2)
+		return 0;
+	return safe_malloc_add_2op_(size1*size2, size3);
+}
 
-// malloc(n floor 1)
-static inline void *safe_malloc_(size_t n)
+/* size1 * (size2 + size3) */
+static inline void *safe_malloc_muladd2_(size_t size1, size_t size2, size_t size3)
 {
-    // dlmalloc is already safe
-    return malloc(n);
+	if(!size1 || (!size2 && !size3))
+		return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
+	size2 += size3;
+	if(size2 < size3)
+		return 0;
+	if(size1 > SIZE_MAX / size2)
+		return 0;
+	return malloc(size1*size2);
 }
 
-// malloc(n1 * n2) then memset to zero
-static inline void *safe_calloc_(size_t n1, size_t n2)
+static inline void *safe_realloc_add_2op_(void *ptr, size_t size1, size_t size2)
 {
-    // dlcalloc is already safe
-    return calloc(n1, n2);
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	return realloc(ptr, size2);
 }
 
-// malloc(n1 + n2)
-static inline void *safe_malloc_add_2op_(size_t n1, size_t n2)
+static inline void *safe_realloc_add_3op_(void *ptr, size_t size1, size_t size2, size_t size3)
 {
-    unsigned long long n = n1 + n2;
-    size_t ns = n;
-    // check for overflow
-    return n == ns ? malloc(ns) : NULL;
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	size3 += size2;
+	if(size3 < size2)
+		return 0;
+	return realloc(ptr, size3);
 }
 
-// malloc(n1 * n2)
-static inline void *safe_malloc_mul_2op_(size_t n1, size_t n2)
+static inline void *safe_realloc_add_4op_(void *ptr, size_t size1, size_t size2, size_t size3, size_t size4)
 {
-    unsigned long long n = n1 * n2;
-    size_t ns = n;
-    // check for overflow
-    return n == ns ? malloc(ns) : NULL;
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	size3 += size2;
+	if(size3 < size2)
+		return 0;
+	size4 += size3;
+	if(size4 < size3)
+		return 0;
+	return realloc(ptr, size4);
 }
 
-// malloc(n1 * (n2 + n3))
-static inline void *safe_malloc_muladd2_(size_t n1, size_t n2, size_t n3)
+static inline void *safe_realloc_mul_2op_(void *ptr, size_t size1, size_t size2)
 {
-    unsigned long long n = n1 * (n2 + n3);
-    size_t ns = n;
-    // check for overflow
-    return n == ns ? malloc(ns) : NULL;
+	if(!size1 || !size2)
+		return realloc(ptr, 0); /* preserve POSIX realloc(ptr, 0) semantics */
+	if(size1 > SIZE_MAX / size2)
+		return 0;
+	return realloc(ptr, size1*size2);
 }
 
-// realloc(ptr, n1 * n2)
-static inline void *safe_realloc_mul_2op_(void *ptr, size_t n1, size_t n2)
+/* size1 * (size2 + size3) */
+static inline void *safe_realloc_muladd2_(void *ptr, size_t size1, size_t size2, size_t size3)
 {
-    unsigned long long n = n1 * n2;
-    size_t ns = n;
-    // check for overflow
-    return n == ns ? realloc(ptr, ns) : NULL;
+	if(!size1 || (!size2 && !size3))
+		return realloc(ptr, 0); /* preserve POSIX realloc(ptr, 0) semantics */
+	size2 += size3;
+	if(size2 < size3)
+		return 0;
+	return safe_realloc_mul_2op_(ptr, size1, size2);
 }
 
-#endif // __SHARE_ALLOC_H_
+#endif
diff --git a/include/share/compat.h b/include/share/compat.h
new file mode 100644
index 0000000..1ee5cc4
--- /dev/null
+++ b/include/share/compat.h
@@ -0,0 +1,201 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2012-2014  Xiph.org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This is the prefered location of all CPP hackery to make $random_compiler
+ * work like something approaching a C99 (or maybe more accurately GNU99)
+ * compiler.
+ *
+ * It is assumed that this header will be included after "config.h".
+ */
+
+#ifndef FLAC__SHARE__COMPAT_H
+#define FLAC__SHARE__COMPAT_H
+
+#if defined _WIN32 && !defined __CYGWIN__
+/* where MSVC puts unlink() */
+# include <io.h>
+#else
+# include <unistd.h>
+#endif
+
+#if defined _MSC_VER || defined __BORLANDC__ || defined __MINGW32__
+#include <sys/types.h> /* for off_t */
+#define FLAC__off_t __int64 /* use this instead of off_t to fix the 2 GB limit */
+#if !defined __MINGW32__
+#define fseeko _fseeki64
+#define ftello _ftelli64
+#else /* MinGW */
+#if !defined(HAVE_FSEEKO)
+#define fseeko fseeko64
+#define ftello ftello64
+#endif
+#endif
+#else
+#define FLAC__off_t off_t
+#endif
+
+#if HAVE_INTTYPES_H
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#endif
+
+#if defined(_MSC_VER)
+#define strtoll _strtoi64
+#define strtoull _strtoui64
+#endif
+
+#if defined(_MSC_VER)
+#define inline __inline
+#endif
+
+#if defined __INTEL_COMPILER || (defined _MSC_VER && defined _WIN64)
+/* MSVS generates VERY slow 32-bit code with __restrict */
+#define flac_restrict __restrict
+#elif defined __GNUC__
+#define flac_restrict __restrict__
+#else
+#define flac_restrict
+#endif
+
+#define FLAC__U64L(x) x##ULL
+
+#if defined _MSC_VER || defined __BORLANDC__ || defined __MINGW32__
+#define FLAC__STRCASECMP stricmp
+#define FLAC__STRNCASECMP strnicmp
+#else
+#define FLAC__STRCASECMP strcasecmp
+#define FLAC__STRNCASECMP strncasecmp
+#endif
+
+#if defined _MSC_VER || defined __MINGW32__ || defined __CYGWIN__ || defined __EMX__
+#include <io.h> /* for _setmode(), chmod() */
+#include <fcntl.h> /* for _O_BINARY */
+#else
+#include <unistd.h> /* for chown(), unlink() */
+#endif
+
+#if defined _MSC_VER || defined __BORLANDC__ || defined __MINGW32__
+#if defined __BORLANDC__
+#include <utime.h> /* for utime() */
+#else
+#include <sys/utime.h> /* for utime() */
+#endif
+#else
+#include <sys/types.h> /* some flavors of BSD (like OS X) require this to get time_t */
+#include <utime.h> /* for utime() */
+#endif
+
+#if defined _MSC_VER
+#  if _MSC_VER >= 1600
+/* Visual Studio 2010 has decent C99 support */
+#    include <stdint.h>
+#    define PRIu64 "llu"
+#    define PRId64 "lld"
+#    define PRIx64 "llx"
+#  else
+#    include <limits.h>
+#    ifndef UINT32_MAX
+#      define UINT32_MAX _UI32_MAX
+#    endif
+     typedef unsigned __int64 uint64_t;
+     typedef unsigned __int32 uint32_t;
+     typedef unsigned __int16 uint16_t;
+     typedef unsigned __int8 uint8_t;
+     typedef __int64 int64_t;
+     typedef __int32 int32_t;
+     typedef __int16 int16_t;
+     typedef __int8  int8_t;
+#    define PRIu64 "I64u"
+#    define PRId64 "I64d"
+#    define PRIx64 "I64x"
+#  endif
+#endif /* defined _MSC_VER */
+
+#ifdef _WIN32
+/* All char* strings are in UTF-8 format. Added to support Unicode files on Windows */
+#include "share/win_utf8_io.h"
+
+#define flac_printf printf_utf8
+#define flac_fprintf fprintf_utf8
+#define flac_vfprintf vfprintf_utf8
+#define flac_fopen fopen_utf8
+#define flac_chmod chmod_utf8
+#define flac_utime utime_utf8
+#define flac_unlink unlink_utf8
+#define flac_rename rename_utf8
+#define flac_stat _stat64_utf8
+
+#else
+
+#define flac_printf printf
+#define flac_fprintf fprintf
+#define flac_vfprintf vfprintf
+#define flac_fopen fopen
+#define flac_chmod chmod
+#define flac_utime utime
+#define flac_unlink unlink
+#define flac_rename rename
+#define flac_stat stat
+
+#endif
+
+#ifdef _WIN32
+#define flac_stat_s __stat64 /* stat struct */
+#define flac_fstat _fstat64
+#else
+#define flac_stat_s stat /* stat struct */
+#define flac_fstat fstat
+#endif
+
+#ifndef M_LN2
+#define M_LN2 0.69314718055994530942
+#endif
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+/* FLAC needs to compile and work correctly on systems with a normal ISO C99
+ * snprintf as well as Microsoft Visual Studio which has an non-standards
+ * conformant snprint_s function.
+ *
+ * This function wraps the MS version to behave more like the ISO version.
+ */
+#include <stdarg.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+int flac_snprintf(char *str, size_t size, const char *fmt, ...);
+int flac_vsnprintf(char *str, size_t size, const char *fmt, va_list va);
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* FLAC__SHARE__COMPAT_H */
diff --git a/include/share/endswap.h b/include/share/endswap.h
new file mode 100644
index 0000000..4fde4c1
--- /dev/null
+++ b/include/share/endswap.h
@@ -0,0 +1,78 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2012-2014  Xiph.org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* It is assumed that this header will be included after "config.h". */
+
+#if HAVE_BSWAP32			/* GCC and Clang */
+
+/* GCC prior to 4.8 didn't provide bswap16 on x86_64 */
+#if ! HAVE_BSWAP16
+static inline unsigned short __builtin_bswap16(unsigned short a)
+{
+	return (a<<8)|(a>>8);
+}
+#endif
+
+#define	ENDSWAP_16(x)		(__builtin_bswap16 (x))
+#define	ENDSWAP_32(x)		(__builtin_bswap32 (x))
+
+#elif defined _MSC_VER		/* Windows. Apparently in <stdlib.h>. */
+
+#define	ENDSWAP_16(x)		(_byteswap_ushort (x))
+#define	ENDSWAP_32(x)		(_byteswap_ulong (x))
+
+#elif defined HAVE_BYTESWAP_H		/* Linux */
+
+#include <byteswap.h>
+
+#define	ENDSWAP_16(x)		(bswap_16 (x))
+#define	ENDSWAP_32(x)		(bswap_32 (x))
+
+#else
+
+#define	ENDSWAP_16(x)		((((x) >> 8) & 0xFF) | (((x) & 0xFF) << 8))
+#define	ENDSWAP_32(x)		((((x) >> 24) & 0xFF) | (((x) >> 8) & 0xFF00) | (((x) & 0xFF00) << 8) | (((x) & 0xFF) << 24))
+
+#endif
+
+
+/* Host to little-endian byte swapping. */
+#if CPU_IS_BIG_ENDIAN
+
+#define H2LE_16(x)		ENDSWAP_16 (x)
+#define H2LE_32(x)		ENDSWAP_32 (x)
+
+#else
+
+#define H2LE_16(x)		(x)
+#define H2LE_32(x)		(x)
+
+#endif
diff --git a/include/share/private.h b/include/share/private.h
new file mode 100644
index 0000000..3a500d3
--- /dev/null
+++ b/include/share/private.h
@@ -0,0 +1,45 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2013-2014  Xiph.org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FLAC__SHARE__PRIVATE_H
+#define FLAC__SHARE__PRIVATE_H
+
+/*
+ * Unpublished debug routines from libFLAC> This should not be used from any
+ * client code other than code shipped with the FLAC sources.
+ */
+FLAC_API FLAC__bool FLAC__stream_encoder_disable_constant_subframes(FLAC__StreamEncoder *encoder, FLAC__bool value);
+FLAC_API FLAC__bool FLAC__stream_encoder_disable_fixed_subframes(FLAC__StreamEncoder *encoder, FLAC__bool value);
+FLAC_API FLAC__bool FLAC__stream_encoder_disable_verbatim_subframes(FLAC__StreamEncoder *encoder, FLAC__bool value);
+FLAC_API FLAC__bool FLAC__stream_encoder_set_do_md5(FLAC__StreamEncoder *encoder, FLAC__bool value);
+FLAC_API FLAC__bool FLAC__stream_encoder_get_do_md5(const FLAC__StreamEncoder *encoder);
+
+#endif /* FLAC__SHARE__PRIVATE_H */
diff --git a/libFLAC/bitmath.c b/libFLAC/bitmath.c
index 27e25f0..5b58ca9 100644
--- a/libFLAC/bitmath.c
+++ b/libFLAC/bitmath.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,52 +30,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
 #include "private/bitmath.h"
-#include "FLAC/assert.h"
-
-/* An example of what FLAC__bitmath_ilog2() computes:
- *
- * ilog2( 0) = assertion failure
- * ilog2( 1) = 0
- * ilog2( 2) = 1
- * ilog2( 3) = 1
- * ilog2( 4) = 2
- * ilog2( 5) = 2
- * ilog2( 6) = 2
- * ilog2( 7) = 2
- * ilog2( 8) = 3
- * ilog2( 9) = 3
- * ilog2(10) = 3
- * ilog2(11) = 3
- * ilog2(12) = 3
- * ilog2(13) = 3
- * ilog2(14) = 3
- * ilog2(15) = 3
- * ilog2(16) = 4
- * ilog2(17) = 4
- * ilog2(18) = 4
- */
-unsigned FLAC__bitmath_ilog2(FLAC__uint32 v)
-{
-	unsigned l = 0;
-	FLAC__ASSERT(v > 0);
-	while(v >>= 1)
-		l++;
-	return l;
-}
-
-unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v)
-{
-	unsigned l = 0;
-	FLAC__ASSERT(v > 0);
-	while(v >>= 1)
-		l++;
-	return l;
-}
 
 /* An example of what FLAC__bitmath_silog2() computes:
  *
diff --git a/libFLAC/bitreader.c b/libFLAC/bitreader.c
index 7d63e52..f61229b 100644
--- a/libFLAC/bitreader.c
+++ b/libFLAC/bitreader.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,53 +30,33 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
-#include <stdlib.h> /* for malloc() */
-#include <string.h> /* for memcpy(), memset() */
-#ifdef _MSC_VER
-#include <winsock.h> /* for ntohl() */
-#elif defined FLAC__SYS_DARWIN
-#include <machine/endian.h> /* for ntohl() */
-#elif defined __MINGW32__
-#include <winsock.h> /* for ntohl() */
-#else
-#include <netinet/in.h> /* for ntohl() */
-#endif
+#include <stdlib.h>
+#include <string.h>
 #include "private/bitmath.h"
 #include "private/bitreader.h"
 #include "private/crc.h"
+#include "private/macros.h"
 #include "FLAC/assert.h"
+#include "share/compat.h"
+#include "share/endswap.h"
 
 /* Things should be fastest when this matches the machine word size */
-/* WATCHOUT: if you change this you must also change the following #defines down to COUNT_ZERO_MSBS below to match */
-/* WATCHOUT: there are a few places where the code will not work unless brword is >= 32 bits wide */
+/* WATCHOUT: if you change this you must also change the following #defines down to FLAC__clz_uint32 below to match */
+/* WATCHOUT: there are a few places where the code will not work unless uint32_t is >= 32 bits wide */
 /*           also, some sections currently only have fast versions for 4 or 8 bytes per word */
-typedef FLAC__uint32 brword;
-#define FLAC__BYTES_PER_WORD 4
-#define FLAC__BITS_PER_WORD 32
+#define FLAC__BYTES_PER_WORD 4		/* sizeof uint32_t */
+#define FLAC__BITS_PER_WORD (8 * FLAC__BYTES_PER_WORD)
 #define FLAC__WORD_ALL_ONES ((FLAC__uint32)0xffffffff)
-/* SWAP_BE_WORD_TO_HOST swaps bytes in a brword (which is always big-endian) if necessary to match host byte order */
+/* SWAP_BE_WORD_TO_HOST swaps bytes in a uint32_t (which is always big-endian) if necessary to match host byte order */
 #if WORDS_BIGENDIAN
 #define SWAP_BE_WORD_TO_HOST(x) (x)
 #else
-#ifdef _MSC_VER
-#define SWAP_BE_WORD_TO_HOST(x) local_swap32_(x)
-#else
-#define SWAP_BE_WORD_TO_HOST(x) ntohl(x)
-#endif
+#define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_32(x)
 #endif
-/* counts the # of zero MSBs in a word */
-#define COUNT_ZERO_MSBS(word) ( \
-	(word) <= 0xffff ? \
-		( (word) <= 0xff? byte_to_unary_table[word] + 24 : byte_to_unary_table[(word) >> 8] + 16 ) : \
-		( (word) <= 0xffffff? byte_to_unary_table[word >> 16] + 8 : byte_to_unary_table[(word) >> 24] ) \
-)
-/* this alternate might be slightly faster on some systems/compilers: */
-#define COUNT_ZERO_MSBS2(word) ( (word) <= 0xff ? byte_to_unary_table[word] + 24 : ((word) <= 0xffff ? byte_to_unary_table[(word) >> 8] + 16 : ((word) <= 0xffffff ? byte_to_unary_table[(word) >> 16] + 8 : byte_to_unary_table[(word) >> 24])) )
-
 
 /*
  * This should be at least twice as large as the largest number of words
@@ -93,50 +74,10 @@ typedef FLAC__uint32 brword;
  */
 static const unsigned FLAC__BITREADER_DEFAULT_CAPACITY = 65536u / FLAC__BITS_PER_WORD; /* in words */
 
-static const unsigned char byte_to_unary_table[] = {
-	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
-	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-#ifdef min
-#undef min
-#endif
-#define min(x,y) ((x)<(y)?(x):(y))
-#ifdef max
-#undef max
-#endif
-#define max(x,y) ((x)>(y)?(x):(y))
-
-/* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
-#ifdef _MSC_VER
-#define FLAC__U64L(x) x
-#else
-#define FLAC__U64L(x) x##LLU
-#endif
-
-#ifndef FLaC__INLINE
-#define FLaC__INLINE
-#endif
-
-/* WATCHOUT: assembly routines rely on the order in which these fields are declared */
 struct FLAC__BitReader {
 	/* any partially-consumed word at the head will stay right-justified as bits are consumed from the left */
 	/* any incomplete word at the tail will be left-justified, and bytes from the read callback are added on the right */
-	brword *buffer;
+	uint32_t *buffer;
 	unsigned capacity; /* in words */
 	unsigned words; /* # of completed words in buffer */
 	unsigned bytes; /* # of bytes in incomplete word at buffer[words] */
@@ -146,36 +87,9 @@ struct FLAC__BitReader {
 	unsigned crc16_align; /* the number of bits in the current consumed word that should not be CRC'd */
 	FLAC__BitReaderReadCallback read_callback;
 	void *client_data;
-	FLAC__CPUInfo cpu_info;
 };
 
-#ifdef _MSC_VER
-/* OPT: an MSVC built-in would be better */
-static _inline FLAC__uint32 local_swap32_(FLAC__uint32 x)
-{
-	x = ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
-	return (x>>16) | (x<<16);
-}
-static void local_swap32_block_(FLAC__uint32 *start, FLAC__uint32 len)
-{
-	__asm {
-		mov edx, start
-		mov ecx, len
-		test ecx, ecx
-loop1:
-		jz done1
-		mov eax, [edx]
-		bswap eax
-		mov [edx], eax
-		add edx, 4
-		dec ecx
-		jmp short loop1
-done1:
-	}
-}
-#endif
-
-static FLaC__INLINE void crc16_update_word_(FLAC__BitReader *br, brword word)
+static inline void crc16_update_word_(FLAC__BitReader *br, uint32_t word)
 {
 	register unsigned crc = br->read_crc16;
 #if FLAC__BYTES_PER_WORD == 4
@@ -204,8 +118,7 @@ static FLaC__INLINE void crc16_update_word_(FLAC__BitReader *br, brword word)
 	br->crc16_align = 0;
 }
 
-/* would be static except it needs to be called by asm routines */
-FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br)
+static FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br)
 {
 	unsigned start, end;
 	size_t bytes;
@@ -229,7 +142,7 @@ FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br)
 		return false; /* no space left, buffer is too small; see note for FLAC__BITREADER_DEFAULT_CAPACITY  */
 	target = ((FLAC__byte*)(br->buffer+br->words)) + br->bytes;
 
-	/* before reading, if the existing reader looks like this (say brword is 32 bits wide)
+	/* before reading, if the existing reader looks like this (say uint32_t is 32 bits wide)
 	 *   bitstream :  11 22 33 44 55            br->words=1 br->bytes=1 (partial tail word is left-justified)
 	 *   buffer[BE]:  11 22 33 44 55 ?? ?? ??   (shown layed out as bytes sequentially in memory)
 	 *   buffer[LE]:  44 33 22 11 ?? ?? ?? 55   (?? being don't-care)
@@ -263,13 +176,6 @@ FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br)
 #if WORDS_BIGENDIAN
 #else
 	end = (br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes + (FLAC__BYTES_PER_WORD-1)) / FLAC__BYTES_PER_WORD;
-# if defined(_MSC_VER) && (FLAC__BYTES_PER_WORD == 4)
-	if(br->cpu_info.type == FLAC__CPUINFO_TYPE_IA32 && br->cpu_info.data.ia32.bswap) {
-		start = br->words;
-		local_swap32_block_(br->buffer + start, end - start);
-	}
-	else
-# endif
 	for(start = br->words; start < end; start++)
 		br->buffer[start] = SWAP_BE_WORD_TO_HOST(br->buffer[start]);
 #endif
@@ -295,7 +201,7 @@ FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br)
 
 FLAC__BitReader *FLAC__bitreader_new(void)
 {
-	FLAC__BitReader *br = (FLAC__BitReader*)calloc(1, sizeof(FLAC__BitReader));
+	FLAC__BitReader *br = calloc(1, sizeof(FLAC__BitReader));
 
 	/* calloc() implies:
 		memset(br, 0, sizeof(FLAC__BitReader));
@@ -323,19 +229,18 @@ void FLAC__bitreader_delete(FLAC__BitReader *br)
  *
  ***********************************************************************/
 
-FLAC__bool FLAC__bitreader_init(FLAC__BitReader *br, FLAC__CPUInfo cpu, FLAC__BitReaderReadCallback rcb, void *cd)
+FLAC__bool FLAC__bitreader_init(FLAC__BitReader *br, FLAC__BitReaderReadCallback rcb, void *cd)
 {
 	FLAC__ASSERT(0 != br);
 
 	br->words = br->bytes = 0;
 	br->consumed_words = br->consumed_bits = 0;
 	br->capacity = FLAC__BITREADER_DEFAULT_CAPACITY;
-	br->buffer = (brword*)malloc(sizeof(brword) * br->capacity);
+	br->buffer = malloc(sizeof(uint32_t) * br->capacity);
 	if(br->buffer == 0)
 		return false;
 	br->read_callback = rcb;
 	br->client_data = cd;
-	br->cpu_info = cpu;
 
 	return true;
 }
@@ -410,29 +315,29 @@ FLAC__uint16 FLAC__bitreader_get_read_crc16(FLAC__BitReader *br)
 
 	/* CRC any tail bytes in a partially-consumed word */
 	if(br->consumed_bits) {
-		const brword tail = br->buffer[br->consumed_words];
+		const uint32_t tail = br->buffer[br->consumed_words];
 		for( ; br->crc16_align < br->consumed_bits; br->crc16_align += 8)
 			br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)((tail >> (FLAC__BITS_PER_WORD-8-br->crc16_align)) & 0xff), br->read_crc16);
 	}
 	return br->read_crc16;
 }
 
-FLaC__INLINE FLAC__bool FLAC__bitreader_is_consumed_byte_aligned(const FLAC__BitReader *br)
+inline FLAC__bool FLAC__bitreader_is_consumed_byte_aligned(const FLAC__BitReader *br)
 {
 	return ((br->consumed_bits & 7) == 0);
 }
 
-FLaC__INLINE unsigned FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br)
+inline unsigned FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br)
 {
 	return 8 - (br->consumed_bits & 7);
 }
 
-FLaC__INLINE unsigned FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br)
+inline unsigned FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br)
 {
 	return (br->words-br->consumed_words)*FLAC__BITS_PER_WORD + br->bytes*8 - br->consumed_bits;
 }
 
-FLaC__INLINE FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *val, unsigned bits)
+FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *val, unsigned bits)
 {
 	FLAC__ASSERT(0 != br);
 	FLAC__ASSERT(0 != br->buffer);
@@ -458,7 +363,7 @@ FLaC__INLINE FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLA
 		if(br->consumed_bits) {
 			/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
 			const unsigned n = FLAC__BITS_PER_WORD - br->consumed_bits;
-			const brword word = br->buffer[br->consumed_words];
+			const uint32_t word = br->buffer[br->consumed_words];
 			if(bits < n) {
 				*val = (word & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (n-bits);
 				br->consumed_bits += bits;
@@ -477,7 +382,7 @@ FLaC__INLINE FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLA
 			return true;
 		}
 		else {
-			const brword word = br->buffer[br->consumed_words];
+			const uint32_t word = br->buffer[br->consumed_words];
 			if(bits < FLAC__BITS_PER_WORD) {
 				*val = word >> (FLAC__BITS_PER_WORD-bits);
 				br->consumed_bits = bits;
@@ -543,7 +448,7 @@ FLAC__bool FLAC__bitreader_read_raw_uint64(FLAC__BitReader *br, FLAC__uint64 *va
 	return true;
 }
 
-FLaC__INLINE FLAC__bool FLAC__bitreader_read_uint32_little_endian(FLAC__BitReader *br, FLAC__uint32 *val)
+inline FLAC__bool FLAC__bitreader_read_uint32_little_endian(FLAC__BitReader *br, FLAC__uint32 *val)
 {
 	FLAC__uint32 x8, x32 = 0;
 
@@ -583,7 +488,7 @@ FLAC__bool FLAC__bitreader_skip_bits_no_crc(FLAC__BitReader *br, unsigned bits)
 		FLAC__uint32 x;
 
 		if(n != 0) {
-			m = min(8-n, bits);
+			m = flac_min(8-n, bits);
 			if(!FLAC__bitreader_read_raw_uint32(br, &x, m))
 				return false;
 			bits -= m;
@@ -658,7 +563,7 @@ FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, F
 	/* step 2: read whole words in chunks */
 	while(nvals >= FLAC__BYTES_PER_WORD) {
 		if(br->consumed_words < br->words) {
-			const brword word = br->buffer[br->consumed_words++];
+			const uint32_t word = br->buffer[br->consumed_words++];
 #if FLAC__BYTES_PER_WORD == 4
 			val[0] = (FLAC__byte)(word >> 24);
 			val[1] = (FLAC__byte)(word >> 16);
@@ -694,7 +599,7 @@ FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, F
 	return true;
 }
 
-FLaC__INLINE FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *val)
+FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *val)
 #if 0 /* slow but readable version */
 {
 	unsigned bit;
@@ -723,9 +628,9 @@ FLaC__INLINE FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br,
 	*val = 0;
 	while(1) {
 		while(br->consumed_words < br->words) { /* if we've not consumed up to a partial tail word... */
-			brword b = br->buffer[br->consumed_words] << br->consumed_bits;
+			uint32_t b = br->buffer[br->consumed_words] << br->consumed_bits;
 			if(b) {
-				i = COUNT_ZERO_MSBS(b);
+				i = FLAC__clz_uint32(b);
 				*val += i;
 				i++;
 				br->consumed_bits += i;
@@ -751,11 +656,11 @@ FLaC__INLINE FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br,
 		 * us data a byte at a time (unlikely), br->consumed_bits may not
 		 * be zero.
 		 */
-		if(br->bytes) {
+		if(br->bytes*8 > br->consumed_bits) {
 			const unsigned end = br->bytes * 8;
-			brword b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits;
+			uint32_t b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits;
 			if(b) {
-				i = COUNT_ZERO_MSBS(b);
+				i = FLAC__clz_uint32(b);
 				*val += i;
 				i++;
 				br->consumed_bits += i;
@@ -764,7 +669,7 @@ FLaC__INLINE FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br,
 			}
 			else {
 				*val += end - br->consumed_bits;
-				br->consumed_bits += end;
+				br->consumed_bits = end;
 				FLAC__ASSERT(br->consumed_bits < FLAC__BITS_PER_WORD);
 				/* didn't find stop bit yet, have to keep going... */
 			}
@@ -803,379 +708,144 @@ FLAC__bool FLAC__bitreader_read_rice_signed(FLAC__BitReader *br, int *val, unsig
 }
 
 /* this is by far the most heavily used reader call.  it ain't pretty but it's fast */
-/* a lot of the logic is copied, then adapted, from FLAC__bitreader_read_unary_unsigned() and FLAC__bitreader_read_raw_uint32() */
 FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
-/* OPT: possibly faster version for use with MSVC */
-#ifdef _MSC_VER
 {
-	unsigned i;
-	unsigned uval = 0;
-	unsigned bits; /* the # of binary LSBs left to read to finish a rice codeword */
-
 	/* try and get br->consumed_words and br->consumed_bits into register;
 	 * must remember to flush them back to *br before calling other
-	 * bitwriter functions that use them, and before returning */
-	register unsigned cwords;
-	register unsigned cbits;
+	 * bitreader functions that use them, and before returning */
+	unsigned cwords, words, lsbs, msbs, x, y;
+	unsigned ucbits; /* keep track of the number of unconsumed bits in word */
+	uint32_t b;
+	int *val, *end;
 
 	FLAC__ASSERT(0 != br);
 	FLAC__ASSERT(0 != br->buffer);
 	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
 	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
 	FLAC__ASSERT(parameter < 32);
-	/* the above two asserts also guarantee that the binary part never straddles more that 2 words, so we don't have to loop to read it */
-
-	if(nvals == 0)
-		return true;
-
-	cbits = br->consumed_bits;
-	cwords = br->consumed_words;
+	/* the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it */
 
-	while(1) {
+	val = vals;
+	end = vals + nvals;
 
-		/* read unary part */
-		while(1) {
-			while(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
-				brword b = br->buffer[cwords] << cbits;
-				if(b) {
-#if 0 /* slower, probably due to bad register allocation... */ && defined FLAC__CPU_IA32 && !defined FLAC__NO_ASM && FLAC__BITS_PER_WORD == 32
-					__asm {
-						bsr eax, b
-						not eax
-						and eax, 31
-						mov i, eax
-					}
-#else
-					i = COUNT_ZERO_MSBS(b);
-#endif
-					uval += i;
-					bits = parameter;
-					i++;
-					cbits += i;
-					if(cbits == FLAC__BITS_PER_WORD) {
-						crc16_update_word_(br, br->buffer[cwords]);
-						cwords++;
-						cbits = 0;
-					}
-					goto break1;
-				}
-				else {
-					uval += FLAC__BITS_PER_WORD - cbits;
-					crc16_update_word_(br, br->buffer[cwords]);
-					cwords++;
-					cbits = 0;
-					/* didn't find stop bit yet, have to keep going... */
-				}
-			}
-			/* at this point we've eaten up all the whole words; have to try
-			 * reading through any tail bytes before calling the read callback.
-			 * this is a repeat of the above logic adjusted for the fact we
-			 * don't have a whole word.  note though if the client is feeding
-			 * us data a byte at a time (unlikely), br->consumed_bits may not
-			 * be zero.
-			 */
-			if(br->bytes) {
-				const unsigned end = br->bytes * 8;
-				brword b = (br->buffer[cwords] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << cbits;
-				if(b) {
-					i = COUNT_ZERO_MSBS(b);
-					uval += i;
-					bits = parameter;
-					i++;
-					cbits += i;
-					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
-					goto break1;
-				}
-				else {
-					uval += end - cbits;
-					cbits += end;
-					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
-					/* didn't find stop bit yet, have to keep going... */
-				}
-			}
-			/* flush registers and read; bitreader_read_from_client_() does
-			 * not touch br->consumed_bits at all but we still need to set
-			 * it in case it fails and we have to return false.
-			 */
-			br->consumed_bits = cbits;
-			br->consumed_words = cwords;
-			if(!bitreader_read_from_client_(br))
+	if(parameter == 0) {
+		while(val < end) {
+			/* read the unary MSBs and end bit */
+			if(!FLAC__bitreader_read_unary_unsigned(br, &msbs))
 				return false;
-			cwords = br->consumed_words;
-		}
-break1:
-		/* read binary part */
-		FLAC__ASSERT(cwords <= br->words);
-
-		if(bits) {
-			while((br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits < bits) {
-				/* flush registers and read; bitreader_read_from_client_() does
-				 * not touch br->consumed_bits at all but we still need to set
-				 * it in case it fails and we have to return false.
-				 */
-				br->consumed_bits = cbits;
-				br->consumed_words = cwords;
-				if(!bitreader_read_from_client_(br))
-					return false;
-				cwords = br->consumed_words;
-			}
-			if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
-				if(cbits) {
-					/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
-					const unsigned n = FLAC__BITS_PER_WORD - cbits;
-					const brword word = br->buffer[cwords];
-					if(bits < n) {
-						uval <<= bits;
-						uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-bits);
-						cbits += bits;
-						goto break2;
-					}
-					uval <<= n;
-					uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
-					bits -= n;
-					crc16_update_word_(br, word);
-					cwords++;
-					cbits = 0;
-					if(bits) { /* if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
-						uval <<= bits;
-						uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-bits));
-						cbits = bits;
-					}
-					goto break2;
-				}
-				else {
-					FLAC__ASSERT(bits < FLAC__BITS_PER_WORD);
-					uval <<= bits;
-					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-bits);
-					cbits = bits;
-					goto break2;
-				}
-			}
-			else {
-				/* in this case we're starting our read at a partial tail word;
-				 * the reader has guaranteed that we have at least 'bits' bits
-				 * available to read, which makes this case simpler.
-				 */
-				uval <<= bits;
-				if(cbits) {
-					/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
-					FLAC__ASSERT(cbits + bits <= br->bytes*8);
-					uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-bits);
-					cbits += bits;
-					goto break2;
-				}
-				else {
-					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-bits);
-					cbits += bits;
-					goto break2;
-				}
-			}
-		}
-break2:
-		/* compose the value */
-		*vals = (int)(uval >> 1 ^ -(int)(uval & 1));
 
-		/* are we done? */
-		--nvals;
-		if(nvals == 0) {
-			br->consumed_bits = cbits;
-			br->consumed_words = cwords;
-			return true;
+			*val++ = (int)(msbs >> 1) ^ -(int)(msbs & 1);
 		}
 
-		uval = 0;
-		++vals;
-
+		return true;
 	}
-}
-#else
-{
-	unsigned i;
-	unsigned uval = 0;
 
-	/* try and get br->consumed_words and br->consumed_bits into register;
-	 * must remember to flush them back to *br before calling other
-	 * bitwriter functions that use them, and before returning */
-	register unsigned cwords;
-	register unsigned cbits;
-	unsigned ucbits; /* keep track of the number of unconsumed bits in the buffer */
+	FLAC__ASSERT(parameter > 0);
 
-	FLAC__ASSERT(0 != br);
-	FLAC__ASSERT(0 != br->buffer);
-	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
-	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
-	FLAC__ASSERT(parameter < 32);
-	/* the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it */
+	cwords = br->consumed_words;
+	words = br->words;
 
-	if(nvals == 0)
-		return true;
+	/* if we've not consumed up to a partial tail word... */
+	if(cwords >= words) {
+		x = 0;
+		goto process_tail;
+	}
 
-	cbits = br->consumed_bits;
-	cwords = br->consumed_words;
-	ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
+	ucbits = FLAC__BITS_PER_WORD - br->consumed_bits;
+	b = br->buffer[cwords] << br->consumed_bits;  /* keep unconsumed bits aligned to left */
 
-	while(1) {
+	while(val < end) {
+		/* read the unary MSBs and end bit */
+		x = y = FLAC__clz2_uint32(b);
+		if(x == FLAC__BITS_PER_WORD) {
+			x = ucbits;
+			do {
+				/* didn't find stop bit yet, have to keep going... */
+				crc16_update_word_(br, br->buffer[cwords++]);
+				if (cwords >= words)
+					goto incomplete_msbs;
+				b = br->buffer[cwords];
+				y = FLAC__clz2_uint32(b);
+				x += y;
+			} while(y == FLAC__BITS_PER_WORD);
+		}
+		b <<= y;
+		b <<= 1; /* account for stop bit */
+		ucbits = (ucbits - x - 1) % FLAC__BITS_PER_WORD;
+		msbs = x;
+
+		/* read the binary LSBs */
+		x = b >> (FLAC__BITS_PER_WORD - parameter);
+		if(parameter <= ucbits) {
+			ucbits -= parameter;
+			b <<= parameter;
+		} else {
+			/* there are still bits left to read, they will all be in the next word */
+			crc16_update_word_(br, br->buffer[cwords++]);
+			if (cwords >= words)
+				goto incomplete_lsbs;
+			b = br->buffer[cwords];
+			ucbits += FLAC__BITS_PER_WORD - parameter;
+			x |= b >> ucbits;
+			b <<= FLAC__BITS_PER_WORD - ucbits;
+		}
+		lsbs = x;
 
-		/* read unary part */
-		while(1) {
-			while(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
-				brword b = br->buffer[cwords] << cbits;
-				if(b) {
-#if 0 /* is not discernably faster... */ && defined FLAC__CPU_IA32 && !defined FLAC__NO_ASM && FLAC__BITS_PER_WORD == 32 && defined __GNUC__
-					asm volatile (
-						"bsrl %1, %0;"
-						"notl %0;"
-						"andl $31, %0;"
-						: "=r"(i)
-						: "r"(b)
-					);
-#else
-					i = COUNT_ZERO_MSBS(b);
-#endif
-					uval += i;
-					cbits += i;
-					cbits++; /* skip over stop bit */
-					if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */
-						crc16_update_word_(br, br->buffer[cwords]);
-						cwords++;
-						cbits = 0;
-					}
-					goto break1;
-				}
-				else {
-					uval += FLAC__BITS_PER_WORD - cbits;
-					crc16_update_word_(br, br->buffer[cwords]);
-					cwords++;
-					cbits = 0;
-					/* didn't find stop bit yet, have to keep going... */
-				}
-			}
-			/* at this point we've eaten up all the whole words; have to try
-			 * reading through any tail bytes before calling the read callback.
-			 * this is a repeat of the above logic adjusted for the fact we
-			 * don't have a whole word.  note though if the client is feeding
-			 * us data a byte at a time (unlikely), br->consumed_bits may not
-			 * be zero.
-			 */
-			if(br->bytes) {
-				const unsigned end = br->bytes * 8;
-				brword b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits;
-				if(b) {
-					i = COUNT_ZERO_MSBS(b);
-					uval += i;
-					cbits += i;
-					cbits++; /* skip over stop bit */
-					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
-					goto break1;
-				}
-				else {
-					uval += end - cbits;
-					cbits += end;
-					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
-					/* didn't find stop bit yet, have to keep going... */
-				}
+		/* compose the value */
+		x = (msbs << parameter) | lsbs;
+		*val++ = (int)(x >> 1) ^ -(int)(x & 1);
+
+		continue;
+
+		/* at this point we've eaten up all the whole words */
+process_tail:
+		do {
+			if(0) {
+incomplete_msbs:
+				br->consumed_bits = 0;
+				br->consumed_words = cwords;
 			}
-			/* flush registers and read; bitreader_read_from_client_() does
-			 * not touch br->consumed_bits at all but we still need to set
-			 * it in case it fails and we have to return false.
-			 */
-			br->consumed_bits = cbits;
-			br->consumed_words = cwords;
-			if(!bitreader_read_from_client_(br))
+
+			/* read the unary MSBs and end bit */
+			if(!FLAC__bitreader_read_unary_unsigned(br, &msbs))
 				return false;
-			cwords = br->consumed_words;
-			ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits + uval;
-			/* + uval to offset our count by the # of unary bits already
-			 * consumed before the read, because we will add these back
-			 * in all at once at break1
-			 */
-		}
-break1:
-		ucbits -= uval;
-		ucbits--; /* account for stop bit */
-
-		/* read binary part */
-		FLAC__ASSERT(cwords <= br->words);
-
-		if(parameter) {
-			while(ucbits < parameter) {
-				/* flush registers and read; bitreader_read_from_client_() does
-				 * not touch br->consumed_bits at all but we still need to set
-				 * it in case it fails and we have to return false.
-				 */
-				br->consumed_bits = cbits;
+			msbs += x;
+			x = ucbits = 0;
+
+			if(0) {
+incomplete_lsbs:
+				br->consumed_bits = 0;
 				br->consumed_words = cwords;
-				if(!bitreader_read_from_client_(br))
-					return false;
-				cwords = br->consumed_words;
-				ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
-			}
-			if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
-				if(cbits) {
-					/* this also works when consumed_bits==0, it's just slower than necessary for that case */
-					const unsigned n = FLAC__BITS_PER_WORD - cbits;
-					const brword word = br->buffer[cwords];
-					if(parameter < n) {
-						uval <<= parameter;
-						uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-parameter);
-						cbits += parameter;
-					}
-					else {
-						uval <<= n;
-						uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
-						crc16_update_word_(br, word);
-						cwords++;
-						cbits = parameter - n;
-						if(cbits) { /* parameter > n, i.e. if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
-							uval <<= cbits;
-							uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
-						}
-					}
-				}
-				else {
-					cbits = parameter;
-					uval <<= parameter;
-					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
-				}
-			}
-			else {
-				/* in this case we're starting our read at a partial tail word;
-				 * the reader has guaranteed that we have at least 'parameter'
-				 * bits available to read, which makes this case simpler.
-				 */
-				uval <<= parameter;
-				if(cbits) {
-					/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
-					FLAC__ASSERT(cbits + parameter <= br->bytes*8);
-					uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-parameter);
-					cbits += parameter;
-				}
-				else {
-					cbits = parameter;
-					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
-				}
 			}
-		}
 
-		ucbits -= parameter;
+			/* read the binary LSBs */
+			if(!FLAC__bitreader_read_raw_uint32(br, &lsbs, parameter - ucbits))
+				return false;
+			lsbs = x | lsbs;
 
-		/* compose the value */
-		*vals = (int)(uval >> 1 ^ -(int)(uval & 1));
+			/* compose the value */
+			x = (msbs << parameter) | lsbs;
+			*val++ = (int)(x >> 1) ^ -(int)(x & 1);
+			x = 0;
 
-		/* are we done? */
-		--nvals;
-		if(nvals == 0) {
-			br->consumed_bits = cbits;
-			br->consumed_words = cwords;
-			return true;
-		}
-
-		uval = 0;
-		++vals;
+			cwords = br->consumed_words;
+			words = br->words;
+			ucbits = FLAC__BITS_PER_WORD - br->consumed_bits;
+			b = br->buffer[cwords] << br->consumed_bits;
+		} while(cwords >= words && val < end);
+	}
 
+	if(ucbits == 0 && cwords < words) {
+		/* don't leave the head word with no unconsumed bits */
+		crc16_update_word_(br, br->buffer[cwords++]);
+		ucbits = FLAC__BITS_PER_WORD;
 	}
+
+	br->consumed_bits = FLAC__BITS_PER_WORD - ucbits;
+	br->consumed_words = cwords;
+
+	return true;
 }
-#endif
 
 #if 0 /* UNUSED */
 FLAC__bool FLAC__bitreader_read_golomb_signed(FLAC__BitReader *br, int *val, unsigned parameter)
@@ -1374,3 +1044,16 @@ FLAC__bool FLAC__bitreader_read_utf8_uint64(FLAC__BitReader *br, FLAC__uint64 *v
 	*val = v;
 	return true;
 }
+
+/* These functions are declared inline in this file but are also callable as
+ * externs from elsewhere.
+ * According to the C99 spec, section 6.7.4, simply providing a function
+ * prototype in a header file without 'inline' and making the function inline
+ * in this file should be sufficient.
+ * Unfortunately, the Microsoft VS compiler doesn't pick them up externally. To
+ * fix that we add extern declarations here.
+ */
+extern FLAC__bool FLAC__bitreader_is_consumed_byte_aligned(const FLAC__BitReader *br);
+extern unsigned FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br);
+extern unsigned FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br);
+extern FLAC__bool FLAC__bitreader_read_uint32_little_endian(FLAC__BitReader *br, FLAC__uint32 *val);
diff --git a/libFLAC/bitwriter.c b/libFLAC/bitwriter.c
old mode 100755
new mode 100644
index e3e7ad1..76be1bc
--- a/libFLAC/bitwriter.c
+++ b/libFLAC/bitwriter.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,45 +30,31 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
-#include <stdlib.h> /* for malloc() */
-#include <string.h> /* for memcpy(), memset() */
-#ifdef _MSC_VER
-#include <winsock.h> /* for ntohl() */
-#elif defined FLAC__SYS_DARWIN
-#include <machine/endian.h> /* for ntohl() */
-#elif defined __MINGW32__
-#include <winsock.h> /* for ntohl() */
-#else
-#include <netinet/in.h> /* for ntohl() */
-#endif
-#if 0 /* UNUSED */
-#include "private/bitmath.h"
-#endif
+#include <stdlib.h>
+#include <string.h>
 #include "private/bitwriter.h"
 #include "private/crc.h"
+#include "private/macros.h"
 #include "FLAC/assert.h"
 #include "share/alloc.h"
+#include "share/compat.h"
+#include "share/endswap.h"
 
 /* Things should be fastest when this matches the machine word size */
 /* WATCHOUT: if you change this you must also change the following #defines down to SWAP_BE_WORD_TO_HOST below to match */
-/* WATCHOUT: there are a few places where the code will not work unless bwword is >= 32 bits wide */
-typedef FLAC__uint32 bwword;
+/* WATCHOUT: there are a few places where the code will not work unless uint32_t is >= 32 bits wide */
 #define FLAC__BYTES_PER_WORD 4
 #define FLAC__BITS_PER_WORD 32
 #define FLAC__WORD_ALL_ONES ((FLAC__uint32)0xffffffff)
-/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */
+/* SWAP_BE_WORD_TO_HOST swaps bytes in a uint32_t (which is always big-endian) if necessary to match host byte order */
 #if WORDS_BIGENDIAN
 #define SWAP_BE_WORD_TO_HOST(x) (x)
 #else
-#ifdef _MSC_VER
-#define SWAP_BE_WORD_TO_HOST(x) local_swap32_(x)
-#else
-#define SWAP_BE_WORD_TO_HOST(x) ntohl(x)
-#endif
+#define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_32(x)
 #endif
 
 /*
@@ -76,51 +63,29 @@ typedef FLAC__uint32 bwword;
  * a frame or metadata block, then write that out and clear the buffer for the
  * next one.
  */
-static const unsigned FLAC__BITWRITER_DEFAULT_CAPACITY = 32768u / sizeof(bwword); /* size in words */
+static const unsigned FLAC__BITWRITER_DEFAULT_CAPACITY = 32768u / sizeof(uint32_t); /* size in words */
 /* When growing, increment 4K at a time */
-static const unsigned FLAC__BITWRITER_DEFAULT_INCREMENT = 4096u / sizeof(bwword); /* size in words */
+static const unsigned FLAC__BITWRITER_DEFAULT_INCREMENT = 4096u / sizeof(uint32_t); /* size in words */
 
 #define FLAC__WORDS_TO_BITS(words) ((words) * FLAC__BITS_PER_WORD)
 #define FLAC__TOTAL_BITS(bw) (FLAC__WORDS_TO_BITS((bw)->words) + (bw)->bits)
 
-#ifdef min
-#undef min
-#endif
-#define min(x,y) ((x)<(y)?(x):(y))
-
-/* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
-#ifdef _MSC_VER
-#define FLAC__U64L(x) x
-#else
-#define FLAC__U64L(x) x##LLU
-#endif
-
-#ifndef FLaC__INLINE
-#define FLaC__INLINE
-#endif
-
 struct FLAC__BitWriter {
-	bwword *buffer;
-	bwword accum; /* accumulator; bits are right-justified; when full, accum is appended to buffer */
+	uint32_t *buffer;
+	uint32_t accum; /* accumulator; bits are right-justified; when full, accum is appended to buffer */
 	unsigned capacity; /* capacity of buffer in words */
 	unsigned words; /* # of complete words in buffer */
 	unsigned bits; /* # of used bits in accum */
 };
 
-#ifdef _MSC_VER
-/* OPT: an MSVC built-in would be better */
-static _inline FLAC__uint32 local_swap32_(FLAC__uint32 x)
-{
-	x = ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
-	return (x>>16) | (x<<16);
-}
-#endif
-
 /* * WATCHOUT: The current implementation only grows the buffer. */
-static FLAC__bool bitwriter_grow_(FLAC__BitWriter *bw, unsigned bits_to_add)
+#ifndef __SUNPRO_C
+static
+#endif
+FLAC__bool bitwriter_grow_(FLAC__BitWriter *bw, unsigned bits_to_add)
 {
 	unsigned new_capacity;
-	bwword *new_buffer;
+	uint32_t *new_buffer;
 
 	FLAC__ASSERT(0 != bw);
 	FLAC__ASSERT(0 != bw->buffer);
@@ -142,7 +107,7 @@ static FLAC__bool bitwriter_grow_(FLAC__BitWriter *bw, unsigned bits_to_add)
 	FLAC__ASSERT(new_capacity > bw->capacity);
 	FLAC__ASSERT(new_capacity >= bw->words + ((bw->bits + bits_to_add + FLAC__BITS_PER_WORD - 1) / FLAC__BITS_PER_WORD));
 
-	new_buffer = (bwword*)safe_realloc_mul_2op_(bw->buffer, sizeof(bwword), /*times*/new_capacity);
+	new_buffer = safe_realloc_mul_2op_(bw->buffer, sizeof(uint32_t), /*times*/new_capacity);
 	if(new_buffer == 0)
 		return false;
 	bw->buffer = new_buffer;
@@ -159,7 +124,7 @@ static FLAC__bool bitwriter_grow_(FLAC__BitWriter *bw, unsigned bits_to_add)
 
 FLAC__BitWriter *FLAC__bitwriter_new(void)
 {
-	FLAC__BitWriter *bw = (FLAC__BitWriter*)calloc(1, sizeof(FLAC__BitWriter));
+	FLAC__BitWriter *bw = calloc(1, sizeof(FLAC__BitWriter));
 	/* note that calloc() sets all members to 0 for us */
 	return bw;
 }
@@ -184,7 +149,7 @@ FLAC__bool FLAC__bitwriter_init(FLAC__BitWriter *bw)
 
 	bw->words = bw->bits = 0;
 	bw->capacity = FLAC__BITWRITER_DEFAULT_CAPACITY;
-	bw->buffer = (bwword*)malloc(sizeof(bwword) * bw->capacity);
+	bw->buffer = malloc(sizeof(uint32_t) * bw->capacity);
 	if(bw->buffer == 0)
 		return false;
 
@@ -299,7 +264,7 @@ void FLAC__bitwriter_release_buffer(FLAC__BitWriter *bw)
 	(void)bw;
 }
 
-FLaC__INLINE FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, unsigned bits)
+inline FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, unsigned bits)
 {
 	unsigned n;
 
@@ -313,7 +278,7 @@ FLaC__INLINE FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, unsign
 		return false;
 	/* first part gets to word alignment */
 	if(bw->bits) {
-		n = min(FLAC__BITS_PER_WORD - bw->bits, bits);
+		n = flac_min(FLAC__BITS_PER_WORD - bw->bits, bits);
 		bw->accum <<= n;
 		bits -= n;
 		bw->bits += n;
@@ -337,7 +302,7 @@ FLaC__INLINE FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, unsign
 	return true;
 }
 
-FLaC__INLINE FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__uint32 val, unsigned bits)
+inline FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__uint32 val, unsigned bits)
 {
 	register unsigned left;
 
@@ -376,7 +341,7 @@ FLaC__INLINE FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FL
 	return true;
 }
 
-FLaC__INLINE FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLAC__int32 val, unsigned bits)
+inline FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLAC__int32 val, unsigned bits)
 {
 	/* zero-out unused bits */
 	if(bits < 32)
@@ -385,7 +350,7 @@ FLaC__INLINE FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLA
 	return FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)val, bits);
 }
 
-FLaC__INLINE FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__uint64 val, unsigned bits)
+inline FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__uint64 val, unsigned bits)
 {
 	/* this could be a little faster but it's not used for much */
 	if(bits > 32) {
@@ -397,7 +362,7 @@ FLaC__INLINE FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FL
 		return FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)val, bits);
 }
 
-FLaC__INLINE FLAC__bool FLAC__bitwriter_write_raw_uint32_little_endian(FLAC__BitWriter *bw, FLAC__uint32 val)
+inline FLAC__bool FLAC__bitwriter_write_raw_uint32_little_endian(FLAC__BitWriter *bw, FLAC__uint32 val)
 {
 	/* this doesn't need to be that fast as currently it is only used for vorbis comments */
 
@@ -413,7 +378,7 @@ FLaC__INLINE FLAC__bool FLAC__bitwriter_write_raw_uint32_little_endian(FLAC__Bit
 	return true;
 }
 
-FLaC__INLINE FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter *bw, const FLAC__byte vals[], unsigned nvals)
+inline FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter *bw, const FLAC__byte vals[], unsigned nvals)
 {
 	unsigned i;
 
@@ -549,7 +514,7 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
 
 	FLAC__ASSERT(0 != bw);
 	FLAC__ASSERT(0 != bw->buffer);
-	FLAC__ASSERT(parameter < 8*sizeof(bwword)-1);
+	FLAC__ASSERT(parameter < 8*sizeof(uint32_t)-1);
 	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
 	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
 
@@ -559,30 +524,8 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
 
 		msbits = uval >> parameter;
 
-#if 0 /* OPT: can remove this special case if it doesn't make up for the extra compare (doesn't make a statistically significant difference with msvc or gcc/x86) */
-		if(bw->bits && bw->bits + msbits + lsbits <= FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
-			/* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free bwword to work in */
-			bw->bits = bw->bits + msbits + lsbits;
-			uval |= mask1; /* set stop bit */
-			uval &= mask2; /* mask off unused top bits */
-			/* NOT: bw->accum <<= msbits + lsbits because msbits+lsbits could be 32, then the shift would be a NOP */
-			bw->accum <<= msbits;
-			bw->accum <<= lsbits;
-			bw->accum |= uval;
-			if(bw->bits == FLAC__BITS_PER_WORD) {
-				bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
-				bw->bits = 0;
-				/* burying the capacity check down here means we have to grow the buffer a little if there are more vals to do */
-				if(bw->capacity <= bw->words && nvals > 1 && !bitwriter_grow_(bw, 1)) {
-					FLAC__ASSERT(bw->capacity == bw->words);
-					return false;
-				}
-			}
-		}
-		else {
-#elif 1 /*@@@@@@ OPT: try this version with MSVC6 to see if better, not much difference for gcc-4 */
-		if(bw->bits && bw->bits + msbits + lsbits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
-			/* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free bwword to work in */
+		if(bw->bits && bw->bits + msbits + lsbits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current uint32_t */
+			/* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free uint32_t to work in */
 			bw->bits = bw->bits + msbits + lsbits;
 			uval |= mask1; /* set stop bit */
 			uval &= mask2; /* mask off unused top bits */
@@ -590,10 +533,9 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
 			bw->accum |= uval;
 		}
 		else {
-#endif
 			/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
 			/* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */
-			if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 bwword*/ && !bitwriter_grow_(bw, msbits+lsbits))
+			if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 uint32_t*/ && !bitwriter_grow_(bw, msbits+lsbits))
 				return false;
 
 			if(msbits) {
@@ -645,9 +587,7 @@ break1:
 				bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
 				bw->accum = uval;
 			}
-#if 1
 		}
-#endif
 		vals++;
 		nvals--;
 	}
@@ -887,3 +827,17 @@ FLAC__bool FLAC__bitwriter_zero_pad_to_byte_boundary(FLAC__BitWriter *bw)
 	else
 		return true;
 }
+
+/* These functions are declared inline in this file but are also callable as
+ * externs from elsewhere.
+ * According to the C99 spec, section 6.7.4, simply providing a function
+ * prototype in a header file without 'inline' and making the function inline
+ * in this file should be sufficient.
+ * Unfortunately, the Microsoft VS compiler doesn't pick them up externally. To
+ * fix that we add extern declarations here.
+ */
+extern FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, unsigned bits);
+extern FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLAC__int32 val, unsigned bits);
+extern FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__uint64 val, unsigned bits);
+extern FLAC__bool FLAC__bitwriter_write_raw_uint32_little_endian(FLAC__BitWriter *bw, FLAC__uint32 val);
+extern FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter *bw, const FLAC__byte vals[], unsigned nvals);
diff --git a/libFLAC/cpu.c b/libFLAC/cpu.c
index 60b73bf..40c7950 100644
--- a/libFLAC/cpu.c
+++ b/libFLAC/cpu.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,47 +30,46 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
 #include "private/cpu.h"
 #include <stdlib.h>
-#include <stdio.h>
+#include <memory.h>
+#ifdef DEBUG
+# include <stdio.h>
+#endif
 
 #if defined FLAC__CPU_IA32
 # include <signal.h>
-#elif defined FLAC__CPU_PPC
-# if !defined FLAC__NO_ASM
-#  if defined FLAC__SYS_DARWIN
-#   include <sys/sysctl.h>
-#   include <mach/mach.h>
-#   include <mach/mach_host.h>
-#   include <mach/host_info.h>
-#   include <mach/machine.h>
-#   ifndef CPU_SUBTYPE_POWERPC_970
-#    define CPU_SUBTYPE_POWERPC_970 ((cpu_subtype_t) 100)
-#   endif
-#  else /* FLAC__SYS_DARWIN */
-
-#   include <signal.h>
-#   include <setjmp.h>
-
-static sigjmp_buf jmpbuf;
-static volatile sig_atomic_t canjump = 0;
-
-static void sigill_handler (int sig)
+
+static void disable_sse(FLAC__CPUInfo *info)
 {
-	if (!canjump) {
-		signal (sig, SIG_DFL);
-		raise (sig);
-	}
-	canjump = 0;
-	siglongjmp (jmpbuf, 1);
+	info->ia32.sse   = false;
+	info->ia32.sse2  = false;
+	info->ia32.sse3  = false;
+	info->ia32.ssse3 = false;
+	info->ia32.sse41 = false;
+	info->ia32.sse42 = false;
+}
+
+static void disable_avx(FLAC__CPUInfo *info)
+{
+	info->ia32.avx     = false;
+	info->ia32.avx2    = false;
+	info->ia32.fma     = false;
 }
-#  endif /* FLAC__SYS_DARWIN */
-# endif /* FLAC__NO_ASM */
-#endif /* FLAC__CPU_PPC */
+
+#elif defined FLAC__CPU_X86_64
+
+static void disable_avx(FLAC__CPUInfo *info)
+{
+	info->x86.avx     = false;
+	info->x86.avx2    = false;
+	info->x86.fma     = false;
+}
+#endif
 
 #if defined (__NetBSD__) || defined(__OpenBSD__)
 #include <sys/param.h>
@@ -86,25 +86,34 @@ static void sigill_handler (int sig)
 /* how to get sysctlbyname()? */
 #endif
 
+#ifdef FLAC__CPU_IA32
 /* these are flags in EDX of CPUID AX=00000001 */
 static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000;
 static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000;
 static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000;
 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000;
 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000;
+#endif
+
 /* these are flags in ECX of CPUID AX=00000001 */
 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001;
 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200;
-/* these are flags in EDX of CPUID AX=80000001 */
-static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE41 = 0x00080000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE42 = 0x00100000;
 
+#if defined FLAC__AVX_SUPPORTED
+/* these are flags in ECX of CPUID AX=00000001 */
+static const unsigned FLAC__CPUINFO_IA32_CPUID_OSXSAVE = 0x08000000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX = 0x10000000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_FMA = 0x00001000;
+/* these are flags in EBX of CPUID AX=00000007 */
+static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX2 = 0x00000020;
+#endif
 
 /*
  * Extra stuff needed for detection of OS support for SSE on IA-32
  */
-#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && defined FLAC__HAS_NASM && !defined FLAC__NO_SSE_OS && !defined FLAC__SSE_OS
+#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN) && !defined FLAC__NO_SSE_OS && !defined FLAC__SSE_OS
 # if defined(__linux__)
 /*
  * If the OS doesn't support SSE, we will get here with a SIGILL.  We
@@ -119,35 +128,14 @@ static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000;
  *   6 bytes extra in case our estimate is wrong
  * 12 bytes puts us in the NOP "landing zone"
  */
-#  undef USE_OBSOLETE_SIGCONTEXT_FLAVOR /* #define this to use the older signal handler method */
-#  ifdef USE_OBSOLETE_SIGCONTEXT_FLAVOR
-	static void sigill_handler_sse_os(int signal, struct sigcontext sc)
-	{
-		(void)signal;
-		sc.eip += 3 + 3 + 6;
-	}
-#  else
 #   include <sys/ucontext.h>
 	static void sigill_handler_sse_os(int signal, siginfo_t *si, void *uc)
 	{
 		(void)signal, (void)si;
 		((ucontext_t*)uc)->uc_mcontext.gregs[14/*REG_EIP*/] += 3 + 3 + 6;
 	}
-#  endif
 # elif defined(_MSC_VER)
 #  include <windows.h>
-#  undef USE_TRY_CATCH_FLAVOR /* #define this to use the try/catch method for catching illegal opcode exception */
-#  ifdef USE_TRY_CATCH_FLAVOR
-#  else
-	LONG CALLBACK sigill_handler_sse_os(EXCEPTION_POINTERS *ep)
-	{
-		if(ep->ExceptionRecord->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) {
-			ep->ContextRecord->Eip += 3 + 3 + 6;
-			return EXCEPTION_CONTINUE_EXECUTION;
-		}
-		return EXCEPTION_CONTINUE_SEARCH;
-	}
-#  endif
 # endif
 #endif
 
@@ -158,261 +146,346 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
  * IA32-specific
  */
 #ifdef FLAC__CPU_IA32
+	FLAC__bool ia32_fxsr = false;
+	FLAC__bool ia32_osxsave = false;
+	(void) ia32_fxsr; (void) ia32_osxsave; /* to avoid warnings about unused variables */
+	memset(info, 0, sizeof(*info));
 	info->type = FLAC__CPUINFO_TYPE_IA32;
-#if !defined FLAC__NO_ASM && defined FLAC__HAS_NASM
+#if !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN)
 	info->use_asm = true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */
-	info->data.ia32.cpuid = FLAC__cpu_have_cpuid_asm_ia32()? true : false;
-	info->data.ia32.bswap = info->data.ia32.cpuid; /* CPUID => BSWAP since it came after */
-	info->data.ia32.cmov = false;
-	info->data.ia32.mmx = false;
-	info->data.ia32.fxsr = false;
-	info->data.ia32.sse = false;
-	info->data.ia32.sse2 = false;
-	info->data.ia32.sse3 = false;
-	info->data.ia32.ssse3 = false;
-	info->data.ia32._3dnow = false;
-	info->data.ia32.ext3dnow = false;
-	info->data.ia32.extmmx = false;
-	if(info->data.ia32.cpuid) {
-		/* http://www.sandpile.org/ia32/cpuid.htm */
-		FLAC__uint32 flags_edx, flags_ecx;
-		FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx);
-		info->data.ia32.cmov  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false;
-		info->data.ia32.mmx   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX  )? true : false;
-		info->data.ia32.fxsr  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false;
-		info->data.ia32.sse   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE  )? true : false;
-		info->data.ia32.sse2  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false;
-		info->data.ia32.sse3  = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
-		info->data.ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
-
-#ifdef FLAC__USE_3DNOW
-		flags_edx = FLAC__cpu_info_extended_amd_asm_ia32();
-		info->data.ia32._3dnow   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW   )? true : false;
-		info->data.ia32.ext3dnow = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false;
-		info->data.ia32.extmmx   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX  )? true : false;
+#ifdef FLAC__HAS_X86INTRIN
+	if(!FLAC__cpu_have_cpuid_x86())
+		return;
 #else
-		info->data.ia32._3dnow = info->data.ia32.ext3dnow = info->data.ia32.extmmx = false;
+	if(!FLAC__cpu_have_cpuid_asm_ia32())
+		return;
 #endif
+	{
+		/* http://www.sandpile.org/x86/cpuid.htm */
+#ifdef FLAC__HAS_X86INTRIN
+		FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx;
+		FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
+		info->ia32.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E)? true : false; /* GenuineIntel */
+		FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
+#else
+		FLAC__uint32 flags_ecx, flags_edx;
+		FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx);
+#endif
+		info->ia32.cmov  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false;
+		info->ia32.mmx   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX  )? true : false;
+		      ia32_fxsr  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false;
+		info->ia32.sse   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE  )? true : false;
+		info->ia32.sse2  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false;
+		info->ia32.sse3  = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
+		info->ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
+		info->ia32.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false;
+		info->ia32.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false;
+#if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED
+		    ia32_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE)? true : false;
+		info->ia32.avx   = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX    )? true : false;
+		info->ia32.fma   = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA    )? true : false;
+		FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
+		info->ia32.avx2  = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2   )? true : false;
+#endif
+	}
 
 #ifdef DEBUG
-		fprintf(stderr, "CPU info (IA-32):\n");
-		fprintf(stderr, "  CPUID ...... %c\n", info->data.ia32.cpuid   ? 'Y' : 'n');
-		fprintf(stderr, "  BSWAP ...... %c\n", info->data.ia32.bswap   ? 'Y' : 'n');
-		fprintf(stderr, "  CMOV ....... %c\n", info->data.ia32.cmov    ? 'Y' : 'n');
-		fprintf(stderr, "  MMX ........ %c\n", info->data.ia32.mmx     ? 'Y' : 'n');
-		fprintf(stderr, "  FXSR ....... %c\n", info->data.ia32.fxsr    ? 'Y' : 'n');
-		fprintf(stderr, "  SSE ........ %c\n", info->data.ia32.sse     ? 'Y' : 'n');
-		fprintf(stderr, "  SSE2 ....... %c\n", info->data.ia32.sse2    ? 'Y' : 'n');
-		fprintf(stderr, "  SSE3 ....... %c\n", info->data.ia32.sse3    ? 'Y' : 'n');
-		fprintf(stderr, "  SSSE3 ...... %c\n", info->data.ia32.ssse3   ? 'Y' : 'n');
-		fprintf(stderr, "  3DNow! ..... %c\n", info->data.ia32._3dnow  ? 'Y' : 'n');
-		fprintf(stderr, "  3DNow!-ext . %c\n", info->data.ia32.ext3dnow? 'Y' : 'n');
-		fprintf(stderr, "  3DNow!-MMX . %c\n", info->data.ia32.extmmx  ? 'Y' : 'n');
+	fprintf(stderr, "CPU info (IA-32):\n");
+	fprintf(stderr, "  CMOV ....... %c\n", info->ia32.cmov    ? 'Y' : 'n');
+	fprintf(stderr, "  MMX ........ %c\n", info->ia32.mmx     ? 'Y' : 'n');
+	fprintf(stderr, "  SSE ........ %c\n", info->ia32.sse     ? 'Y' : 'n');
+	fprintf(stderr, "  SSE2 ....... %c\n", info->ia32.sse2    ? 'Y' : 'n');
+	fprintf(stderr, "  SSE3 ....... %c\n", info->ia32.sse3    ? 'Y' : 'n');
+	fprintf(stderr, "  SSSE3 ...... %c\n", info->ia32.ssse3   ? 'Y' : 'n');
+	fprintf(stderr, "  SSE41 ...... %c\n", info->ia32.sse41   ? 'Y' : 'n');
+	fprintf(stderr, "  SSE42 ...... %c\n", info->ia32.sse42   ? 'Y' : 'n');
+# if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED
+	fprintf(stderr, "  AVX ........ %c\n", info->ia32.avx     ? 'Y' : 'n');
+	fprintf(stderr, "  FMA ........ %c\n", info->ia32.fma     ? 'Y' : 'n');
+	fprintf(stderr, "  AVX2 ....... %c\n", info->ia32.avx2    ? 'Y' : 'n');
+# endif
 #endif
 
-		/*
-		 * now have to check for OS support of SSE/SSE2
-		 */
-		if(info->data.ia32.fxsr || info->data.ia32.sse || info->data.ia32.sse2) {
+	/*
+	 * now have to check for OS support of SSE instructions
+	 */
+	if(info->ia32.sse) {
 #if defined FLAC__NO_SSE_OS
-			/* assume user knows better than us; turn it off */
-			info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+		/* assume user knows better than us; turn it off */
+		disable_sse(info);
 #elif defined FLAC__SSE_OS
-			/* assume user knows better than us; leave as detected above */
+		/* assume user knows better than us; leave as detected above */
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
-			int sse = 0;
-			size_t len;
-			/* at least one of these must work: */
-			len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse);
-			len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse"   , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */
-			if(!sse)
-				info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+		int sse = 0;
+		size_t len;
+		/* at least one of these must work: */
+		len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse);
+		len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse"   , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */
+		if(!sse)
+			disable_sse(info);
 #elif defined(__NetBSD__) || defined (__OpenBSD__)
 # if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__)
-			int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE };
-			size_t len = sizeof(val);
-			if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
-				info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
-			else { /* double-check SSE2 */
-				mib[1] = CPU_SSE2;
-				len = sizeof(val);
-				if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
-					info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+		int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE };
+		size_t len = sizeof(val);
+		if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
+			disable_sse(info);
+		else { /* double-check SSE2 */
+			mib[1] = CPU_SSE2;
+			len = sizeof(val);
+			if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) {
+				disable_sse(info);
+				info->ia32.sse = true;
 			}
+		}
 # else
-			info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+		disable_sse(info);
 # endif
 #elif defined(__linux__)
-			int sse = 0;
-			struct sigaction sigill_save;
-#ifdef USE_OBSOLETE_SIGCONTEXT_FLAVOR
-			if(0 == sigaction(SIGILL, NULL, &sigill_save) && signal(SIGILL, (void (*)(int))sigill_handler_sse_os) != SIG_ERR)
-#else
-			struct sigaction sigill_sse;
-			sigill_sse.sa_sigaction = sigill_handler_sse_os;
-			__sigemptyset(&sigill_sse.sa_mask);
-			sigill_sse.sa_flags = SA_SIGINFO | SA_RESETHAND; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */
-			if(0 == sigaction(SIGILL, &sigill_sse, &sigill_save))
-#endif
-			{
-				/* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */
-				/* see sigill_handler_sse_os() for an explanation of the following: */
-				asm volatile (
-					"xorl %0,%0\n\t"          /* for some reason, still need to do this to clear 'sse' var */
-					"xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */
-					"incl %0\n\t"             /* SIGILL handler will jump over this */
-					/* landing zone */
-					"nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */
-					"nop\n\t"
-					"nop\n\t"
-					"nop\n\t"
-					"nop\n\t"
-					"nop\n\t"
-					"nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */
-					"nop\n\t"
-					"nop"     /* SIGILL jump lands here if "inc" is 1 byte */
-					: "=r"(sse)
-					: "r"(sse)
-				);
-
-				sigaction(SIGILL, &sigill_save, NULL);
-			}
+		int sse = 0;
+		struct sigaction sigill_save;
+		struct sigaction sigill_sse;
+		sigill_sse.sa_sigaction = sigill_handler_sse_os;
+		__sigemptyset(&sigill_sse.sa_mask);
+		sigill_sse.sa_flags = SA_SIGINFO | SA_RESETHAND; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */
+		if(0 == sigaction(SIGILL, &sigill_sse, &sigill_save))
+		{
+			/* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */
+			/* see sigill_handler_sse_os() for an explanation of the following: */
+			asm volatile (
+				"xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */
+				"incl %0\n\t"             /* SIGILL handler will jump over this */
+				/* landing zone */
+				"nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */
+				"nop\n\t"
+				"nop\n\t"
+				"nop\n\t"
+				"nop\n\t"
+				"nop\n\t"
+				"nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */
+				"nop\n\t"
+				"nop"     /* SIGILL jump lands here if "inc" is 1 byte */
+				: "=r"(sse)
+				: "0"(sse)
+			);
 
-			if(!sse)
-				info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+			sigaction(SIGILL, &sigill_save, NULL);
+		}
+
+		if(!sse)
+			disable_sse(info);
 #elif defined(_MSC_VER)
-# ifdef USE_TRY_CATCH_FLAVOR
-			_try {
-				__asm {
-#  if _MSC_VER <= 1200
-					/* VC6 assembler doesn't know SSE, have to emit bytecode instead */
-					_emit 0x0F
-					_emit 0x57
-					_emit 0xC0
-#  else
-					xorps xmm0,xmm0
-#  endif
-				}
-			}
-			_except(EXCEPTION_EXECUTE_HANDLER) {
-				if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
-					info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
-			}
-# else
-			int sse = 0;
-			LPTOP_LEVEL_EXCEPTION_FILTER save = SetUnhandledExceptionFilter(sigill_handler_sse_os);
-			/* see GCC version above for explanation */
-			/*  http://msdn2.microsoft.com/en-us/library/4ks26t93.aspx */
-			/*  http://www.codeproject.com/cpp/gccasm.asp */
-			/*  http://www.hick.org/~mmiller/msvc_inline_asm.html */
+		__try {
 			__asm {
-#  if _MSC_VER <= 1200
-				/* VC6 assembler doesn't know SSE, have to emit bytecode instead */
-				_emit 0x0F
-				_emit 0x57
-				_emit 0xC0
-#  else
 				xorps xmm0,xmm0
-#  endif
-				inc sse
-				nop
-				nop
-				nop
-				nop
-				nop
-				nop
-				nop
-				nop
-				nop
 			}
-			SetUnhandledExceptionFilter(save);
-			if(!sse)
-				info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
-# endif
+		}
+		__except(EXCEPTION_EXECUTE_HANDLER) {
+			if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
+				disable_sse(info);
+		}
+#elif defined(__GNUC__) /* MinGW goes here */
+		int sse = 0;
+		/* Based on the idea described in Agner Fog's manual "Optimizing subroutines in assembly language" */
+		/* In theory, not guaranteed to detect lack of OS SSE support on some future Intel CPUs, but in practice works (see the aforementioned manual) */
+		if (ia32_fxsr) {
+			struct {
+				FLAC__uint32 buff[128];
+			} __attribute__((aligned(16))) fxsr;
+			FLAC__uint32 old_val, new_val;
+
+			asm volatile ("fxsave %0"  : "=m" (fxsr) : "m" (fxsr));
+			old_val = fxsr.buff[50];
+			fxsr.buff[50] ^= 0x0013c0de;                             /* change value in the buffer */
+			asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr));  /* try to change SSE register */
+			fxsr.buff[50] = old_val;                                 /* restore old value in the buffer */
+			asm volatile ("fxsave %0 " : "=m" (fxsr) : "m" (fxsr));  /* old value will be overwritten if SSE register was changed */
+			new_val = fxsr.buff[50];                                 /* == old_val if FXRSTOR didn't change SSE register and (old_val ^ 0x0013c0de) otherwise */
+			fxsr.buff[50] = old_val;                                 /* again restore old value in the buffer */
+			asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr));  /* restore old values of registers */
+
+			if ((old_val^new_val) == 0x0013c0de)
+				sse = 1;
+		}
+		if(!sse)
+			disable_sse(info);
 #else
-			/* no way to test, disable to be safe */
-			info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+		/* no way to test, disable to be safe */
+		disable_sse(info);
 #endif
 #ifdef DEBUG
-		fprintf(stderr, "  SSE OS sup . %c\n", info->data.ia32.sse     ? 'Y' : 'n');
+		fprintf(stderr, "  SSE OS sup . %c\n", info->ia32.sse ? 'Y' : 'n');
 #endif
+	}
+	else /* info->ia32.sse == false */
+		disable_sse(info);
 
-		}
+	/*
+	 * now have to check for OS support of AVX instructions
+	 */
+	if(info->ia32.avx && ia32_osxsave) {
+		FLAC__uint32 ecr = FLAC__cpu_xgetbv_x86();
+		if ((ecr & 0x6) != 0x6)
+			disable_avx(info);
+#ifdef DEBUG
+		fprintf(stderr, "  AVX OS sup . %c\n", info->ia32.avx ? 'Y' : 'n');
+#endif
 	}
+	else /* no OS AVX support*/
+		disable_avx(info);
 #else
 	info->use_asm = false;
 #endif
 
 /*
- * PPC-specific
+ * x86-64-specific
  */
-#elif defined FLAC__CPU_PPC
-	info->type = FLAC__CPUINFO_TYPE_PPC;
-# if !defined FLAC__NO_ASM
+#elif defined FLAC__CPU_X86_64
+	FLAC__bool x86_osxsave = false;
+	(void) x86_osxsave; /* to avoid warnings about unused variables */
+	memset(info, 0, sizeof(*info));
+	info->type = FLAC__CPUINFO_TYPE_X86_64;
+#if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN
 	info->use_asm = true;
-#  ifdef FLAC__USE_ALTIVEC
-#   if defined FLAC__SYS_DARWIN
 	{
-		int val = 0, mib[2] = { CTL_HW, HW_VECTORUNIT };
-		size_t len = sizeof(val);
-		info->data.ppc.altivec = !(sysctl(mib, 2, &val, &len, NULL, 0) || !val);
+		/* http://www.sandpile.org/x86/cpuid.htm */
+		FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx;
+		FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
+		info->x86.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E)? true : false; /* GenuineIntel */
+		FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
+		info->x86.sse3  = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
+		info->x86.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
+		info->x86.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false;
+		info->x86.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false;
+#if defined FLAC__AVX_SUPPORTED
+		    x86_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE)? true : false;
+		info->x86.avx   = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX    )? true : false;
+		info->x86.fma   = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA    )? true : false;
+		FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
+		info->x86.avx2  = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2   )? true : false;
+#endif
 	}
-	{
-		host_basic_info_data_t hostInfo;
-		mach_msg_type_number_t infoCount;
-
-		infoCount = HOST_BASIC_INFO_COUNT;
-		host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, &infoCount);
+#ifdef DEBUG
+	fprintf(stderr, "CPU info (x86-64):\n");
+	fprintf(stderr, "  SSE3 ....... %c\n", info->x86.sse3  ? 'Y' : 'n');
+	fprintf(stderr, "  SSSE3 ...... %c\n", info->x86.ssse3 ? 'Y' : 'n');
+	fprintf(stderr, "  SSE41 ...... %c\n", info->x86.sse41 ? 'Y' : 'n');
+	fprintf(stderr, "  SSE42 ...... %c\n", info->x86.sse42 ? 'Y' : 'n');
+# if defined FLAC__AVX_SUPPORTED
+	fprintf(stderr, "  AVX ........ %c\n", info->x86.avx   ? 'Y' : 'n');
+	fprintf(stderr, "  FMA ........ %c\n", info->x86.fma   ? 'Y' : 'n');
+	fprintf(stderr, "  AVX2 ....... %c\n", info->x86.avx2  ? 'Y' : 'n');
+# endif
+#endif
 
-		info->data.ppc.ppc64 = (hostInfo.cpu_type == CPU_TYPE_POWERPC) && (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_970);
+	/*
+	 * now have to check for OS support of AVX instructions
+	 */
+	if(info->x86.avx && x86_osxsave) {
+		FLAC__uint32 ecr = FLAC__cpu_xgetbv_x86();
+		if ((ecr & 0x6) != 0x6)
+			disable_avx(info);
+#ifdef DEBUG
+		fprintf(stderr, "  AVX OS sup . %c\n", info->x86.avx ? 'Y' : 'n');
+#endif
 	}
-#   else /* FLAC__USE_ALTIVEC && !FLAC__SYS_DARWIN */
-	{
-		/* no Darwin, do it the brute-force way */
-		/* @@@@@@ this is not thread-safe; replace with SSE OS method above or remove */
-		info->data.ppc.altivec = 0;
-		info->data.ppc.ppc64 = 0;
-
-		signal (SIGILL, sigill_handler);
-		canjump = 0;
-		if (!sigsetjmp (jmpbuf, 1)) {
-			canjump = 1;
+	else /* no OS AVX support*/
+		disable_avx(info);
+#else
+	info->use_asm = false;
+#endif
 
-			asm volatile (
-				"mtspr 256, %0\n\t"
-				"vand %%v0, %%v0, %%v0"
-				:
-				: "r" (-1)
-			);
+/*
+ * unknown CPU
+ */
+#else
+	info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
+	info->use_asm = false;
+#endif
+}
 
-			info->data.ppc.altivec = 1;
-		}
-		canjump = 0;
-		if (!sigsetjmp (jmpbuf, 1)) {
-			int x = 0;
-			canjump = 1;
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
 
-			/* PPC64 hardware implements the cntlzd instruction */
-			asm volatile ("cntlzd %0, %1" : "=r" (x) : "r" (x) );
+#if defined _MSC_VER
+#include <intrin.h> /* for __cpuid() and _xgetbv() */
+#elif defined __GNUC__ && defined HAVE_CPUID_H
+#include <cpuid.h> /* for __get_cpuid() and __get_cpuid_max() */
+#endif
 
-			info->data.ppc.ppc64 = 1;
-		}
-		signal (SIGILL, SIG_DFL); /*@@@@@@ should save and restore old signal */
+FLAC__uint32 FLAC__cpu_have_cpuid_x86(void)
+{
+#ifdef FLAC__CPU_X86_64
+	return 1;
+#else
+# if defined _MSC_VER || defined __INTEL_COMPILER /* Do they support CPUs w/o CPUID support (or OSes that work on those CPUs)? */
+	FLAC__uint32 flags1, flags2;
+	__asm {
+		pushfd
+		pushfd
+		pop		eax
+		mov		flags1, eax
+		xor		eax, 0x200000
+		push	eax
+		popfd
+		pushfd
+		pop		eax
+		mov		flags2, eax
+		popfd
 	}
-#   endif
-#  else /* !FLAC__USE_ALTIVEC */
-	info->data.ppc.altivec = 0;
-	info->data.ppc.ppc64 = 0;
-#  endif
+	if (((flags1^flags2) & 0x200000) != 0)
+		return 1;
+	else
+		return 0;
+# elif defined __GNUC__ && defined HAVE_CPUID_H
+	if (__get_cpuid_max(0, 0) != 0)
+		return 1;
+	else
+		return 0;
 # else
-	info->use_asm = false;
+	return 0;
 # endif
+#endif
+}
 
-/*
- * unknown CPI
- */
+void FLAC__cpu_info_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx)
+{
+#if defined _MSC_VER || defined __INTEL_COMPILER
+	int cpuinfo[4];
+	int ext = level & 0x80000000;
+	__cpuid(cpuinfo, ext);
+	if((unsigned)cpuinfo[0] < level) {
+		*eax = *ebx = *ecx = *edx = 0;
+		return;
+	}
+#if defined FLAC__AVX_SUPPORTED
+	__cpuidex(cpuinfo, level, 0); /* for AVX2 detection */
 #else
-	info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
-	info->use_asm = false;
+	__cpuid(cpuinfo, level); /* some old compilers don't support __cpuidex */
+#endif
+	*eax = cpuinfo[0]; *ebx = cpuinfo[1]; *ecx = cpuinfo[2]; *edx = cpuinfo[3];
+#elif defined __GNUC__ && defined HAVE_CPUID_H
+	FLAC__uint32 ext = level & 0x80000000;
+	__cpuid(ext, *eax, *ebx, *ecx, *edx);
+	if (*eax < level) {
+		*eax = *ebx = *ecx = *edx = 0;
+		return;
+	}
+	__cpuid_count(level, 0, *eax, *ebx, *ecx, *edx);
+#else
+	*eax = *ebx = *ecx = *edx = 0;
 #endif
 }
+
+FLAC__uint32 FLAC__cpu_xgetbv_x86(void)
+{
+#if (defined _MSC_VER || defined __INTEL_COMPILER) && defined FLAC__AVX_SUPPORTED
+	return (FLAC__uint32)_xgetbv(0);
+#elif defined __GNUC__
+	FLAC__uint32 lo, hi;
+	asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a"(lo), "=d"(hi) : "c" (0));
+	return lo;
+#else
+	return 0;
+#endif
+}
+
+#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
diff --git a/libFLAC/crc.c b/libFLAC/crc.c
index 463ab65..de2cb18 100644
--- a/libFLAC/crc.c
+++ b/libFLAC/crc.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,7 +30,7 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
@@ -74,7 +75,7 @@ FLAC__byte const FLAC__crc8_table[256] = {
 
 /* CRC-16, poly = x^16 + x^15 + x^2 + x^0, init = 0 */
 
-unsigned FLAC__crc16_table[256] = {
+unsigned const FLAC__crc16_table[256] = {
 	0x0000,  0x8005,  0x800f,  0x000a,  0x801b,  0x001e,  0x0014,  0x8011,
 	0x8033,  0x0036,  0x003c,  0x8039,  0x0028,  0x802d,  0x8027,  0x0022,
 	0x8063,  0x0066,  0x006c,  0x8069,  0x0078,  0x807d,  0x8077,  0x0072,
diff --git a/libFLAC/fixed.c b/libFLAC/fixed.c
index 1a3aac0..74b31b3 100644
--- a/libFLAC/fixed.c
+++ b/libFLAC/fixed.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,26 +30,18 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
 #include <math.h>
 #include <string.h>
+#include "share/compat.h"
 #include "private/bitmath.h"
 #include "private/fixed.h"
+#include "private/macros.h"
 #include "FLAC/assert.h"
 
-#ifndef M_LN2
-/* math.h in VC++ doesn't seem to have this (how Microsoft is that?) */
-#define M_LN2 0.69314718055994530942
-#endif
-
-#ifdef min
-#undef min
-#endif
-#define min(x,y) ((x) < (y)? (x) : (y))
-
 #ifdef local_abs
 #undef local_abs
 #endif
@@ -242,11 +235,11 @@ unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned d
 		error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
 	}
 
-	if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
+	if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
 		order = 0;
-	else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
+	else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4))
 		order = 1;
-	else if(total_error_2 < min(total_error_3, total_error_4))
+	else if(total_error_2 < flac_min(total_error_3, total_error_4))
 		order = 2;
 	else if(total_error_3 < total_error_4)
 		order = 3;
@@ -304,11 +297,11 @@ unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsig
 		error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
 	}
 
-	if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
+	if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
 		order = 0;
-	else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
+	else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4))
 		order = 1;
-	else if(total_error_2 < min(total_error_3, total_error_4))
+	else if(total_error_2 < flac_min(total_error_3, total_error_4))
 		order = 2;
 	else if(total_error_3 < total_error_4)
 		order = 3;
@@ -324,20 +317,11 @@ unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsig
 	FLAC__ASSERT(data_len > 0 || total_error_3 == 0);
 	FLAC__ASSERT(data_len > 0 || total_error_4 == 0);
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-#if defined _MSC_VER || defined __MINGW32__
-	/* with MSVC you have to spoon feed it the casting */
-	residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_LN2 * (FLAC__double)(FLAC__int64)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_LN2 * (FLAC__double)(FLAC__int64)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_LN2 * (FLAC__double)(FLAC__int64)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_LN2 * (FLAC__double)(FLAC__int64)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_LN2 * (FLAC__double)(FLAC__int64)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
-#else
 	residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
 	residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
 	residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
 	residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
 	residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
-#endif
 #else
 	residual_bits_per_sample[0] = (total_error_0 > 0) ? local__compute_rbps_wide_integerized(total_error_0, data_len) : 0;
 	residual_bits_per_sample[1] = (total_error_1 > 0) ? local__compute_rbps_wide_integerized(total_error_1, data_len) : 0;
diff --git a/libFLAC/float.c b/libFLAC/float.c
index 8e19280..068069f 100644
--- a/libFLAC/float.c
+++ b/libFLAC/float.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2004-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,23 +30,16 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
 #include "FLAC/assert.h"
-
+#include "share/compat.h"
 #include "private/float.h"
 
 #ifdef FLAC__INTEGER_ONLY_LIBRARY
 
-/* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
-#ifdef _MSC_VER
-#define FLAC__U64L(x) x
-#else
-#define FLAC__U64L(x) x##LLU
-#endif
-
 const FLAC__fixedpoint FLAC__FP_ZERO = 0;
 const FLAC__fixedpoint FLAC__FP_ONE_HALF = 0x00008000;
 const FLAC__fixedpoint FLAC__FP_ONE = 0x00010000;
@@ -282,7 +276,7 @@ FLAC__uint32 FLAC__fixedpoint_log2(FLAC__uint32 x, unsigned fracbits, unsigned p
 
 	if(x < ONE)
 		return 0;
-	
+
 	if(precision > LOG2_LOOKUP_PRECISION)
 		precision = LOG2_LOOKUP_PRECISION;
 
diff --git a/libFLAC/format.c b/libFLAC/format.c
index 749461d..4d0d832 100644
--- a/libFLAC/format.c
+++ b/libFLAC/format.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,7 +30,7 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
@@ -38,33 +39,14 @@
 #include <string.h> /* for memset() */
 #include "FLAC/assert.h"
 #include "FLAC/format.h"
+#include "share/compat.h"
 #include "private/format.h"
-
-#ifndef FLaC__INLINE
-#define FLaC__INLINE
-#endif
-
-#ifdef min
-#undef min
-#endif
-#define min(a,b) ((a)<(b)?(a):(b))
-
-/* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
-#ifdef _MSC_VER
-#define FLAC__U64L(x) x
-#else
-#define FLAC__U64L(x) x##LLU
-#endif
+#include "private/macros.h"
 
 /* VERSION should come from configure */
 FLAC_API const char *FLAC__VERSION_STRING = VERSION;
 
-#if defined _MSC_VER || defined __BORLANDC__ || defined __MINW32__
-/* yet one more hack because of MSVC6: */
-FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC 1.2.1 20070917";
-#else
-FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC " VERSION " 20070917";
-#endif
+FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC " VERSION " 20141125";
 
 FLAC_API const FLAC__byte FLAC__STREAM_SYNC_STRING[4] = { 'f','L','a','C' };
 FLAC_API const unsigned FLAC__STREAM_SYNC = 0x664C6143;
@@ -223,6 +205,16 @@ FLAC_API FLAC__bool FLAC__format_sample_rate_is_valid(unsigned sample_rate)
 		return true;
 }
 
+FLAC_API FLAC__bool FLAC__format_blocksize_is_subset(unsigned blocksize, unsigned sample_rate)
+{
+	if(blocksize > 16384)
+		return false;
+	else if(sample_rate <= 48000 && blocksize > 4608)
+		return false;
+	else
+		return true;
+}
+
 FLAC_API FLAC__bool FLAC__format_sample_rate_is_subset(unsigned sample_rate)
 {
 	if(
@@ -313,7 +305,7 @@ FLAC_API unsigned FLAC__format_seektable_sort(FLAC__StreamMetadata_SeekTable *se
  * and a more clear explanation at the end of this section:
  *   http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  */
-static FLaC__INLINE unsigned utf8len_(const FLAC__byte *utf8)
+static unsigned utf8len_(const FLAC__byte *utf8)
 {
 	FLAC__ASSERT(0 != utf8);
 	if ((utf8[0] & 0x80) == 0) {
@@ -536,7 +528,7 @@ unsigned FLAC__format_get_max_rice_partition_order_from_blocksize(unsigned block
 		max_rice_partition_order++;
 		blocksize >>= 1;
 	}
-	return min(FLAC__MAX_RICE_PARTITION_ORDER, max_rice_partition_order);
+	return flac_min(FLAC__MAX_RICE_PARTITION_ORDER, max_rice_partition_order);
 }
 
 unsigned FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(unsigned limit, unsigned blocksize, unsigned predictor_order)
@@ -581,9 +573,9 @@ FLAC__bool FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_s
 	FLAC__ASSERT(object->capacity_by_order > 0 || (0 == object->parameters && 0 == object->raw_bits));
 
 	if(object->capacity_by_order < max_partition_order) {
-		if(0 == (object->parameters = (unsigned*)realloc(object->parameters, sizeof(unsigned)*(1 << max_partition_order))))
+		if(0 == (object->parameters = realloc(object->parameters, sizeof(unsigned)*(1 << max_partition_order))))
 			return false;
-		if(0 == (object->raw_bits = (unsigned*)realloc(object->raw_bits, sizeof(unsigned)*(1 << max_partition_order))))
+		if(0 == (object->raw_bits = realloc(object->raw_bits, sizeof(unsigned)*(1 << max_partition_order))))
 			return false;
 		memset(object->raw_bits, 0, sizeof(unsigned)*(1 << max_partition_order));
 		object->capacity_by_order = max_partition_order;
diff --git a/libFLAC/include/private/all.h b/libFLAC/include/private/all.h
index 304c471..a4463d2 100644
--- a/libFLAC/include/private/all.h
+++ b/libFLAC/include/private/all.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/libFLAC/include/private/bitmath.h b/libFLAC/include/private/bitmath.h
index 87fa0fa..22d894f 100644
--- a/libFLAC/include/private/bitmath.h
+++ b/libFLAC/include/private/bitmath.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -33,9 +34,152 @@
 #define FLAC__PRIVATE__BITMATH_H
 
 #include "FLAC/ordinals.h"
+#include "FLAC/assert.h"
+
+/* for CHAR_BIT */
+#include <limits.h>
+#include "share/compat.h"
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+#include <intrin.h> /* for _BitScanReverse* */
+#endif
+
+/* Will never be emitted for MSVC, GCC, Intel compilers */
+static inline unsigned int FLAC__clz_soft_uint32(unsigned int word)
+{
+    static const unsigned char byte_to_unary_table[] = {
+    8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    };
+
+    return (word) > 0xffffff ? byte_to_unary_table[(word) >> 24] :
+    (word) > 0xffff ? byte_to_unary_table[(word) >> 16] + 8 :
+    (word) > 0xff ? byte_to_unary_table[(word) >> 8] + 16 :
+    byte_to_unary_table[(word)] + 24;
+}
+
+static inline unsigned int FLAC__clz_uint32(FLAC__uint32 v)
+{
+/* Never used with input 0 */
+    FLAC__ASSERT(v > 0);
+#if defined(__INTEL_COMPILER)
+    return _bit_scan_reverse(v) ^ 31U;
+#elif defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+/* This will translate either to (bsr ^ 31U), clz , ctlz, cntlz, lzcnt depending on
+ * -march= setting or to a software routine in exotic machines. */
+    return __builtin_clz(v);
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+    {
+        unsigned long idx;
+        _BitScanReverse(&idx, v);
+        return idx ^ 31U;
+    }
+#else
+    return FLAC__clz_soft_uint32(v);
+#endif
+}
+
+/* This one works with input 0 */
+static inline unsigned int FLAC__clz2_uint32(FLAC__uint32 v)
+{
+    if (!v)
+        return 32;
+    return FLAC__clz_uint32(v);
+}
+
+/* An example of what FLAC__bitmath_ilog2() computes:
+ *
+ * ilog2( 0) = assertion failure
+ * ilog2( 1) = 0
+ * ilog2( 2) = 1
+ * ilog2( 3) = 1
+ * ilog2( 4) = 2
+ * ilog2( 5) = 2
+ * ilog2( 6) = 2
+ * ilog2( 7) = 2
+ * ilog2( 8) = 3
+ * ilog2( 9) = 3
+ * ilog2(10) = 3
+ * ilog2(11) = 3
+ * ilog2(12) = 3
+ * ilog2(13) = 3
+ * ilog2(14) = 3
+ * ilog2(15) = 3
+ * ilog2(16) = 4
+ * ilog2(17) = 4
+ * ilog2(18) = 4
+ */
+
+static inline unsigned FLAC__bitmath_ilog2(FLAC__uint32 v)
+{
+    FLAC__ASSERT(v > 0);
+#if defined(__INTEL_COMPILER)
+    return _bit_scan_reverse(v);
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+    {
+        unsigned long idx;
+        _BitScanReverse(&idx, v);
+        return idx;
+    }
+#else
+    return sizeof(FLAC__uint32) * CHAR_BIT  - 1 - FLAC__clz_uint32(v);
+#endif
+}
+
+
+#ifdef FLAC__INTEGER_ONLY_LIBRARY /* Unused otherwise */
+
+static inline unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v)
+{
+    FLAC__ASSERT(v > 0);
+#if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+    return sizeof(FLAC__uint64) * CHAR_BIT - 1 - __builtin_clzll(v);
+/* Sorry, only supported in x64/Itanium.. and both have fast FPU which makes integer-only encoder pointless */
+#elif (defined(_MSC_VER) && (_MSC_VER >= 1400)) && (defined(_M_IA64) || defined(_M_X64))
+    {
+        unsigned long idx;
+        _BitScanReverse64(&idx, v);
+        return idx;
+    }
+#else
+/*  Brain-damaged compilers will use the fastest possible way that is,
+    de Bruijn sequences (http://supertech.csail.mit.edu/papers/debruijn.pdf)
+    (C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 CC0 (Public domain).
+*/
+    {
+        static const unsigned char DEBRUIJN_IDX64[64]={
+            0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
+            5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
+            63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
+            62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
+        };
+        v|= v>>1;
+        v|= v>>2;
+        v|= v>>4;
+        v|= v>>8;
+        v|= v>>16;
+        v|= v>>32;
+        v= (v>>1)+1;
+        return DEBRUIJN_IDX64[v*0x218A392CD3D5DBF>>58&0x3F];
+    }
+#endif
+}
+#endif
 
-unsigned FLAC__bitmath_ilog2(FLAC__uint32 v);
-unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v);
 unsigned FLAC__bitmath_silog2(int v);
 unsigned FLAC__bitmath_silog2_wide(FLAC__int64 v);
 
diff --git a/libFLAC/include/private/bitreader.h b/libFLAC/include/private/bitreader.h
index fd0f6aa..4dea8d0 100644
--- a/libFLAC/include/private/bitreader.h
+++ b/libFLAC/include/private/bitreader.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -49,7 +50,7 @@ typedef FLAC__bool (*FLAC__BitReaderReadCallback)(FLAC__byte buffer[], size_t *b
  */
 FLAC__BitReader *FLAC__bitreader_new(void);
 void FLAC__bitreader_delete(FLAC__BitReader *br);
-FLAC__bool FLAC__bitreader_init(FLAC__BitReader *br, FLAC__CPUInfo cpu, FLAC__BitReaderReadCallback rcb, void *cd);
+FLAC__bool FLAC__bitreader_init(FLAC__BitReader *br, FLAC__BitReaderReadCallback rcb, void *cd);
 void FLAC__bitreader_free(FLAC__BitReader *br); /* does not 'free(br)' */
 FLAC__bool FLAC__bitreader_clear(FLAC__BitReader *br);
 void FLAC__bitreader_dump(const FLAC__BitReader *br, FILE *out);
@@ -81,19 +82,10 @@ FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, F
 FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *val);
 FLAC__bool FLAC__bitreader_read_rice_signed(FLAC__BitReader *br, int *val, unsigned parameter);
 FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter);
-#ifndef FLAC__NO_ASM
-#  ifdef FLAC__CPU_IA32
-#    ifdef FLAC__HAS_NASM
-FLAC__bool FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter);
-#    endif
-#  endif
-#endif
 #if 0 /* UNUSED */
 FLAC__bool FLAC__bitreader_read_golomb_signed(FLAC__BitReader *br, int *val, unsigned parameter);
 FLAC__bool FLAC__bitreader_read_golomb_unsigned(FLAC__BitReader *br, unsigned *val, unsigned parameter);
 #endif
 FLAC__bool FLAC__bitreader_read_utf8_uint32(FLAC__BitReader *br, FLAC__uint32 *val, FLAC__byte *raw, unsigned *rawlen);
 FLAC__bool FLAC__bitreader_read_utf8_uint64(FLAC__BitReader *br, FLAC__uint64 *val, FLAC__byte *raw, unsigned *rawlen);
-
-FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br);
 #endif
diff --git a/libFLAC/include/private/bitwriter.h b/libFLAC/include/private/bitwriter.h
index aa5c4f7..3b1362d 100644
--- a/libFLAC/include/private/bitwriter.h
+++ b/libFLAC/include/private/bitwriter.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/libFLAC/include/private/cpu.h b/libFLAC/include/private/cpu.h
index 651bb22..380f4f0 100644
--- a/libFLAC/include/private/cpu.h
+++ b/libFLAC/include/private/cpu.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -38,51 +39,134 @@
 #include <config.h>
 #endif
 
+#if defined FLAC__HAS_X86INTRIN
+/* SSE intrinsics support by ICC/MSVC/GCC */
+#if defined __INTEL_COMPILER
+  #define FLAC__SSE_TARGET(x)
+  #define FLAC__SSE_SUPPORTED 1
+  #define FLAC__SSE2_SUPPORTED 1
+  #if (__INTEL_COMPILER >= 1000) /* Intel C++ Compiler 10.0 */
+    #define FLAC__SSSE3_SUPPORTED 1
+    #define FLAC__SSE4_1_SUPPORTED 1
+  #endif
+  #if (__INTEL_COMPILER >= 1110) /* Intel C++ Compiler 11.1 */
+    #define FLAC__AVX_SUPPORTED 1
+  #endif
+  #if (__INTEL_COMPILER >= 1300) /* Intel C++ Compiler 13.0 */
+    #define FLAC__AVX2_SUPPORTED 1
+    #define FLAC__FMA_SUPPORTED 1
+  #endif
+#elif defined _MSC_VER
+  #define FLAC__SSE_TARGET(x)
+  #define FLAC__SSE_SUPPORTED 1
+  #define FLAC__SSE2_SUPPORTED 1
+  #if (_MSC_VER >= 1500) /* MS Visual Studio 2008 */
+    #define FLAC__SSSE3_SUPPORTED 1
+    #define FLAC__SSE4_1_SUPPORTED 1
+  #endif
+  #if (_MSC_FULL_VER >= 160040219) /* MS Visual Studio 2010 SP1 */
+    #define FLAC__AVX_SUPPORTED 1
+  #endif
+  #if (_MSC_VER >= 1700) /* MS Visual Studio 2012 */
+    #define FLAC__AVX2_SUPPORTED 1
+    #define FLAC__FMA_SUPPORTED 1
+  #endif
+#elif defined __GNUC__
+  #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)) /* since GCC 4.9 -msse.. compiler options aren't necessary */
+    #define FLAC__SSE_TARGET(x) __attribute__ ((__target__ (x)))
+    #define FLAC__SSE_SUPPORTED 1
+    #define FLAC__SSE2_SUPPORTED 1
+    #define FLAC__SSSE3_SUPPORTED 1
+    #define FLAC__SSE4_1_SUPPORTED 1
+    #define FLAC__AVX_SUPPORTED 1
+    #define FLAC__AVX2_SUPPORTED 1
+    #define FLAC__FMA_SUPPORTED 1
+  #else /* for GCC older than 4.9 */
+    #define FLAC__SSE_TARGET(x)
+    #ifdef __SSE__
+      #define FLAC__SSE_SUPPORTED 1
+    #endif
+    #ifdef __SSE2__
+      #define FLAC__SSE2_SUPPORTED 1
+    #endif
+    #ifdef __SSSE3__
+      #define FLAC__SSSE3_SUPPORTED 1
+    #endif
+    #ifdef __SSE4_1__
+      #define FLAC__SSE4_1_SUPPORTED 1
+    #endif
+    #ifdef __AVX__
+      #define FLAC__AVX_SUPPORTED 1
+    #endif
+    #ifdef __AVX2__
+      #define FLAC__AVX2_SUPPORTED 1
+    #endif
+    #ifdef __FMA__
+      #define FLAC__FMA_SUPPORTED 1
+    #endif
+  #endif /* GCC version */
+#endif /* compiler version */
+#endif /* intrinsics support */
+
 typedef enum {
 	FLAC__CPUINFO_TYPE_IA32,
-	FLAC__CPUINFO_TYPE_PPC,
+	FLAC__CPUINFO_TYPE_X86_64,
 	FLAC__CPUINFO_TYPE_UNKNOWN
 } FLAC__CPUInfo_Type;
 
+#if defined FLAC__CPU_IA32
 typedef struct {
-	FLAC__bool cpuid;
-	FLAC__bool bswap;
+	FLAC__bool intel;
+
 	FLAC__bool cmov;
 	FLAC__bool mmx;
-	FLAC__bool fxsr;
 	FLAC__bool sse;
 	FLAC__bool sse2;
+
 	FLAC__bool sse3;
 	FLAC__bool ssse3;
-	FLAC__bool _3dnow;
-	FLAC__bool ext3dnow;
-	FLAC__bool extmmx;
+	FLAC__bool sse41;
+	FLAC__bool sse42;
+	FLAC__bool avx;
+	FLAC__bool avx2;
+	FLAC__bool fma;
 } FLAC__CPUInfo_IA32;
-
+#elif defined FLAC__CPU_X86_64
 typedef struct {
-	FLAC__bool altivec;
-	FLAC__bool ppc64;
-} FLAC__CPUInfo_PPC;
+	FLAC__bool intel;
+
+	FLAC__bool sse3;
+	FLAC__bool ssse3;
+	FLAC__bool sse41;
+	FLAC__bool sse42;
+	FLAC__bool avx;
+	FLAC__bool avx2;
+	FLAC__bool fma;
+} FLAC__CPUInfo_x86;
+#endif
 
 typedef struct {
 	FLAC__bool use_asm;
 	FLAC__CPUInfo_Type type;
-	union {
-		FLAC__CPUInfo_IA32 ia32;
-		FLAC__CPUInfo_PPC ppc;
-	} data;
+#if defined FLAC__CPU_IA32
+	FLAC__CPUInfo_IA32 ia32;
+#elif defined FLAC__CPU_X86_64
+	FLAC__CPUInfo_x86 x86;
+#endif
 } FLAC__CPUInfo;
 
 void FLAC__cpu_info(FLAC__CPUInfo *info);
 
 #ifndef FLAC__NO_ASM
-#ifdef FLAC__CPU_IA32
-#ifdef FLAC__HAS_NASM
+# if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM
 FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32(void);
 void         FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx);
-FLAC__uint32 FLAC__cpu_info_extended_amd_asm_ia32(void);
-#endif
-#endif
+# endif
+# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+FLAC__uint32 FLAC__cpu_have_cpuid_x86(void);
+void         FLAC__cpu_info_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx);
+FLAC__uint32 FLAC__cpu_xgetbv_x86(void);
+# endif
 #endif
 
 #endif
diff --git a/libFLAC/include/private/crc.h b/libFLAC/include/private/crc.h
index 0b67fb4..29c512c 100644
--- a/libFLAC/include/private/crc.h
+++ b/libFLAC/include/private/crc.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -48,9 +49,9 @@ FLAC__uint8 FLAC__crc8(const FLAC__byte *data, unsigned len);
 ** polynomial = x^16 + x^15 + x^2 + x^0
 ** init = 0
 */
-extern unsigned FLAC__crc16_table[256];
+extern unsigned const FLAC__crc16_table[256];
 
-#define FLAC__CRC16_UPDATE(data, crc) (((((crc)<<8) & 0xffff) ^ FLAC__crc16_table[((crc)>>8) ^ (data)]))
+#define FLAC__CRC16_UPDATE(data, crc) ((((crc)<<8) & 0xffff) ^ FLAC__crc16_table[((crc)>>8) ^ (data)])
 /* this alternate may be faster on some systems/compilers */
 #if 0
 #define FLAC__CRC16_UPDATE(data, crc) ((((crc)<<8) ^ FLAC__crc16_table[((crc)>>8) ^ (data)]) & 0xffff)
diff --git a/libFLAC/include/private/fixed.h b/libFLAC/include/private/fixed.h
index 6656b79..dcc4715 100644
--- a/libFLAC/include/private/fixed.h
+++ b/libFLAC/include/private/fixed.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -36,6 +37,7 @@
 #include <config.h>
 #endif
 
+#include "private/cpu.h"
 #include "private/float.h"
 #include "FLAC/format.h"
 
@@ -53,14 +55,22 @@
  */
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
 # ifndef FLAC__NO_ASM
-#  ifdef FLAC__CPU_IA32
-#   ifdef FLAC__HAS_NASM
-unsigned FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+#  if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#   ifdef FLAC__SSE2_SUPPORTED
+unsigned FLAC__fixed_compute_best_predictor_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+unsigned FLAC__fixed_compute_best_predictor_wide_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+#   endif
+#   ifdef FLAC__SSSE3_SUPPORTED
+unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
 #   endif
 #  endif
+#  if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM
+unsigned FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+#  endif
 # endif
-unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
 #else
 unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
 unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
diff --git a/libFLAC/include/private/float.h b/libFLAC/include/private/float.h
index 73313f6..ab26432 100644
--- a/libFLAC/include/private/float.h
+++ b/libFLAC/include/private/float.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2004-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/libFLAC/include/private/format.h b/libFLAC/include/private/format.h
index 7f5cc93..2fd3460 100644
--- a/libFLAC/include/private/format.h
+++ b/libFLAC/include/private/format.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/libFLAC/include/private/lpc.h b/libFLAC/include/private/lpc.h
index 2cb139b..c4ed085 100644
--- a/libFLAC/include/private/lpc.h
+++ b/libFLAC/include/private/lpc.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -36,6 +37,7 @@
 #include <config.h>
 #endif
 
+#include "private/cpu.h"
 #include "private/float.h"
 #include "FLAC/format.h"
 
@@ -71,10 +73,22 @@ void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_le
 #  ifdef FLAC__CPU_IA32
 #    ifdef FLAC__HAS_NASM
 void FLAC__lpc_compute_autocorrelation_asm_ia32(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+#    endif
+#  endif
+#  if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#    ifdef FLAC__SSE_SUPPORTED
+void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
 #    endif
 #  endif
 #endif
@@ -144,6 +158,22 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *da
 #    ifdef FLAC__HAS_NASM
 void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
 void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+#    endif
+#  endif
+#  if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#    ifdef FLAC__SSE2_SUPPORTED
+void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+#    endif
+#    ifdef FLAC__SSE4_1_SUPPORTED
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+#    endif
+#    ifdef FLAC__AVX2_SUPPORTED
+void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
 #    endif
 #  endif
 #endif
@@ -172,11 +202,17 @@ void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_l
 #    ifdef FLAC__HAS_NASM
 void FLAC__lpc_restore_signal_asm_ia32(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
 void FLAC__lpc_restore_signal_asm_ia32_mmx(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
+void FLAC__lpc_restore_signal_wide_asm_ia32(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
 #    endif /* FLAC__HAS_NASM */
-#  elif defined FLAC__CPU_PPC
-void FLAC__lpc_restore_signal_asm_ppc_altivec_16(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-void FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-#  endif/* FLAC__CPU_IA32 || FLAC__CPU_PPC */
+#  endif /* FLAC__CPU_IA32 */
+#  if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#    ifdef FLAC__SSE2_SUPPORTED
+void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
+#    endif
+#    ifdef FLAC__SSE4_1_SUPPORTED
+void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
+#    endif
+#  endif
 #endif /* FLAC__NO_ASM */
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
diff --git a/libFLAC/include/private/macros.h b/libFLAC/include/private/macros.h
new file mode 100644
index 0000000..b72b69b
--- /dev/null
+++ b/libFLAC/include/private/macros.h
@@ -0,0 +1,72 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2012-2014  Xiph.org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FLAC__PRIVATE__MACROS_H
+#define FLAC__PRIVATE__MACROS_H
+
+#if defined(__GNUC__) && (__GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+
+#define flac_max(a,b) \
+    ({ __typeof__ (a) _a = (a); \
+    __typeof__ (b) _b = (b); \
+    _a > _b ? _a : _b; })
+
+#define MIN_PASTE(A,B) A##B
+#define MIN_IMPL(A,B,L) ({ \
+    __typeof__(A) MIN_PASTE(__a,L) = (A); \
+    __typeof__(B) MIN_PASTE(__b,L) = (B); \
+    MIN_PASTE(__a,L) < MIN_PASTE(__b,L) ? MIN_PASTE(__a,L) : MIN_PASTE(__b,L); \
+    })
+
+#define flac_min(A,B) MIN_IMPL(A,B,__COUNTER__)
+
+/* Whatever other unix that has sys/param.h */
+#elif defined(HAVE_SYS_PARAM_H)
+#include <sys/param.h>
+#define flac_max(a,b) MAX(a,b)
+#define flac_min(a,b) MIN(a,b)
+
+/* Windows VS has them in stdlib.h.. XXX:Untested */
+#elif defined(_MSC_VER)
+#include <stdlib.h>
+#define flac_max(a,b) __max(a,b)
+#define flac_min(a,b) __min(a,b)
+#endif
+
+#ifndef MIN
+#define MIN(x,y)	((x) <= (y) ? (x) : (y))
+#endif
+
+#ifndef MAX
+#define MAX(x,y)	((x) >= (y) ? (x) : (y))
+#endif
+
+#endif
diff --git a/libFLAC/include/private/md5.h b/libFLAC/include/private/md5.h
index e5f675a..c665ab3 100644
--- a/libFLAC/include/private/md5.h
+++ b/libFLAC/include/private/md5.h
@@ -28,11 +28,17 @@
 
 #include "FLAC/ordinals.h"
 
+typedef union {
+	FLAC__byte *p8;
+	FLAC__int16 *p16;
+	FLAC__int32 *p32;
+} FLAC__multibyte;
+
 typedef struct {
 	FLAC__uint32 in[16];
 	FLAC__uint32 buf[4];
 	FLAC__uint32 bytes[2];
-	FLAC__byte *internal_buf;
+	FLAC__multibyte internal_buf;
 	size_t capacity;
 } FLAC__MD5Context;
 
diff --git a/libFLAC/include/private/memory.h b/libFLAC/include/private/memory.h
index 7852c81..c9f2712 100644
--- a/libFLAC/include/private/memory.h
+++ b/libFLAC/include/private/memory.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -45,12 +46,13 @@
  * Use free() on this address to deallocate.
  */
 void *FLAC__memory_alloc_aligned(size_t bytes, void **aligned_address);
-FLAC__bool FLAC__memory_alloc_aligned_int32_array(unsigned elements, FLAC__int32 **unaligned_pointer, FLAC__int32 **aligned_pointer);
-FLAC__bool FLAC__memory_alloc_aligned_uint32_array(unsigned elements, FLAC__uint32 **unaligned_pointer, FLAC__uint32 **aligned_pointer);
-FLAC__bool FLAC__memory_alloc_aligned_uint64_array(unsigned elements, FLAC__uint64 **unaligned_pointer, FLAC__uint64 **aligned_pointer);
-FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(unsigned elements, unsigned **unaligned_pointer, unsigned **aligned_pointer);
+FLAC__bool FLAC__memory_alloc_aligned_int32_array(size_t elements, FLAC__int32 **unaligned_pointer, FLAC__int32 **aligned_pointer);
+FLAC__bool FLAC__memory_alloc_aligned_uint32_array(size_t elements, FLAC__uint32 **unaligned_pointer, FLAC__uint32 **aligned_pointer);
+FLAC__bool FLAC__memory_alloc_aligned_uint64_array(size_t elements, FLAC__uint64 **unaligned_pointer, FLAC__uint64 **aligned_pointer);
+FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(size_t elements, unsigned **unaligned_pointer, unsigned **aligned_pointer);
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-FLAC__bool FLAC__memory_alloc_aligned_real_array(unsigned elements, FLAC__real **unaligned_pointer, FLAC__real **aligned_pointer);
+FLAC__bool FLAC__memory_alloc_aligned_real_array(size_t elements, FLAC__real **unaligned_pointer, FLAC__real **aligned_pointer);
 #endif
+void *safe_malloc_mul_2op_p(size_t size1, size_t size2);
 
 #endif
diff --git a/libFLAC/include/private/metadata.h b/libFLAC/include/private/metadata.h
index b5268c9..092764c 100644
--- a/libFLAC/include/private/metadata.h
+++ b/libFLAC/include/private/metadata.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2002-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/libFLAC/include/private/ogg_decoder_aspect.h b/libFLAC/include/private/ogg_decoder_aspect.h
index df2b6b5..439bf8e 100644
--- a/libFLAC/include/private/ogg_decoder_aspect.h
+++ b/libFLAC/include/private/ogg_decoder_aspect.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec
- * Copyright (C) 2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2002-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/libFLAC/include/private/ogg_encoder_aspect.h b/libFLAC/include/private/ogg_encoder_aspect.h
index 290da07..709597d 100644
--- a/libFLAC/include/private/ogg_encoder_aspect.h
+++ b/libFLAC/include/private/ogg_encoder_aspect.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec
- * Copyright (C) 2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2002-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/libFLAC/include/private/ogg_helper.h b/libFLAC/include/private/ogg_helper.h
index 389be18..87b96ef 100644
--- a/libFLAC/include/private/ogg_helper.h
+++ b/libFLAC/include/private/ogg_helper.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec
- * Copyright (C) 2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2004-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/libFLAC/include/private/ogg_mapping.h b/libFLAC/include/private/ogg_mapping.h
index 07dd8b2..d35a46c 100644
--- a/libFLAC/include/private/ogg_mapping.h
+++ b/libFLAC/include/private/ogg_mapping.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec
- * Copyright (C) 2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2004-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,7 +35,7 @@
 
 #include "FLAC/ordinals.h"
 
-/** The length of the 'FLAC' magic in bytes. */
+/** The length of the packet type field in bytes. */
 #define FLAC__OGG_MAPPING_PACKET_TYPE_LENGTH (1u)
 
 extern const unsigned FLAC__OGG_MAPPING_PACKET_TYPE_LEN; /* = 8 bits */
diff --git a/libFLAC/include/private/stream_encoder.h b/libFLAC/include/private/stream_encoder.h
new file mode 100644
index 0000000..96d3135
--- /dev/null
+++ b/libFLAC/include/private/stream_encoder.h
@@ -0,0 +1,67 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FLAC__PRIVATE__STREAM_ENCODER_H
+#define FLAC__PRIVATE__STREAM_ENCODER_H
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+/*
+ * This is used to avoid overflow with unusual signals in 32-bit
+ * accumulator in the *precompute_partition_info_sums_* functions.
+ */
+#define FLAC__MAX_EXTRA_RESIDUAL_BPS 4
+
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#include "private/cpu.h"
+#include "FLAC/format.h"
+
+#ifdef FLAC__SSE2_SUPPORTED
+extern void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
+			unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
+#endif
+
+#ifdef FLAC__SSSE3_SUPPORTED
+extern void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
+			unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
+#endif
+
+#ifdef FLAC__AVX2_SUPPORTED
+extern void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
+			unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
+#endif
+
+#endif
+
+#endif
diff --git a/libFLAC/include/private/stream_encoder_framing.h b/libFLAC/include/private/stream_encoder_framing.h
index 4865c16..2b7387a 100644
--- a/libFLAC/include/private/stream_encoder_framing.h
+++ b/libFLAC/include/private/stream_encoder_framing.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/libFLAC/include/private/window.h b/libFLAC/include/private/window.h
index 01e0fc4..52c9262 100644
--- a/libFLAC/include/private/window.h
+++ b/libFLAC/include/private/window.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2006,2007  Josh Coalson
+ * Copyright (C) 2006-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -64,6 +65,8 @@ void FLAC__window_nuttall(FLAC__real *window, const FLAC__int32 L);
 void FLAC__window_rectangle(FLAC__real *window, const FLAC__int32 L);
 void FLAC__window_triangle(FLAC__real *window, const FLAC__int32 L);
 void FLAC__window_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p);
+void FLAC__window_partial_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end);
+void FLAC__window_punchout_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end);
 void FLAC__window_welch(FLAC__real *window, const FLAC__int32 L);
 
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
diff --git a/libFLAC/include/protected/all.h b/libFLAC/include/protected/all.h
index 2921092..90912af 100644
--- a/libFLAC/include/protected/all.h
+++ b/libFLAC/include/protected/all.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/libFLAC/include/protected/stream_decoder.h b/libFLAC/include/protected/stream_decoder.h
index 9108ca7..7be95af 100644
--- a/libFLAC/include/protected/stream_decoder.h
+++ b/libFLAC/include/protected/stream_decoder.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -39,6 +40,7 @@
 
 typedef struct FLAC__StreamDecoderProtected {
 	FLAC__StreamDecoderState state;
+	FLAC__StreamDecoderInitStatus initstate;
 	unsigned channels;
 	FLAC__ChannelAssignment channel_assignment;
 	unsigned bits_per_sample;
diff --git a/libFLAC/include/protected/stream_encoder.h b/libFLAC/include/protected/stream_encoder.h
index 4101ee5..6917f5d 100644
--- a/libFLAC/include/protected/stream_encoder.h
+++ b/libFLAC/include/protected/stream_encoder.h
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -58,6 +59,8 @@ typedef enum {
 	FLAC__APODIZATION_RECTANGLE,
 	FLAC__APODIZATION_TRIANGLE,
 	FLAC__APODIZATION_TUKEY,
+	FLAC__APODIZATION_PARTIAL_TUKEY,
+	FLAC__APODIZATION_PUNCHOUT_TUKEY,
 	FLAC__APODIZATION_WELCH
 } FLAC__ApodizationFunction;
 
@@ -70,6 +73,11 @@ typedef struct {
 		struct {
 			FLAC__real p;
 		} tukey;
+		struct {
+			FLAC__real p;
+			FLAC__real start;
+			FLAC__real end;
+		} multiple_tukey;
 	} parameters;
 } FLAC__ApodizationSpecification;
 
diff --git a/libFLAC/lpc.c b/libFLAC/lpc.c
index 0356867..f7c643a 100644
--- a/libFLAC/lpc.c
+++ b/libFLAC/lpc.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,29 +30,39 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
 #include <math.h>
+
 #include "FLAC/assert.h"
 #include "FLAC/format.h"
+#include "share/compat.h"
 #include "private/bitmath.h"
 #include "private/lpc.h"
+#include "private/macros.h"
 #if defined DEBUG || defined FLAC__OVERFLOW_DETECT || defined FLAC__OVERFLOW_DETECT_VERBOSE
 #include <stdio.h>
 #endif
 
-#ifndef FLAC__INTEGER_ONLY_LIBRARY
-
-#ifndef M_LN2
-/* math.h in VC++ doesn't seem to have this (how Microsoft is that?) */
-#define M_LN2 0.69314718055994530942
-#endif
-
 /* OPT: #undef'ing this may improve the speed on some architectures */
 #define FLAC__LPC_UNROLLED_FILTER_LOOPS
 
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+
+#if !defined(HAVE_LROUND)
+#if defined(_MSC_VER)
+#include <float.h>
+#define copysign _copysign
+#elif defined(__GNUC__)
+#define copysign __builtin_copysign
+#endif
+static inline long int lround(double x) {
+    return (long)(x + copysign (0.5, x));
+}
+/* If this fails, we are in the presence of a mid 90's compiler, move along... */
+#endif
 
 void FLAC__lpc_window_data(const FLAC__int32 in[], const FLAC__real window[], FLAC__real out[], unsigned data_len)
 {
@@ -112,7 +123,7 @@ void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_le
 void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], FLAC__double error[])
 {
 	unsigned i, j;
-	FLAC__double r, err, ref[FLAC__MAX_LPC_ORDER], lpc[FLAC__MAX_LPC_ORDER];
+	FLAC__double r, err, lpc[FLAC__MAX_LPC_ORDER];
 
 	FLAC__ASSERT(0 != max_order);
 	FLAC__ASSERT(0 < *max_order);
@@ -126,7 +137,7 @@ void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_o
 		r = -autoc[i+1];
 		for(j = 0; j < i; j++)
 			r -= lpc[j] * autoc[i-j];
-		ref[i] = (r/=err);
+		r /= err;
 
 		/* Update LPC coefficients and total error. */
 		lpc[i]=r;
@@ -145,7 +156,7 @@ void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_o
 			lp_coeff[i][j] = (FLAC__real)(-lpc[j]); /* negate FIR filter coeff to get predictor coeff */
 		error[i] = err;
 
-		/* see SF bug #1601812 http://sourceforge.net/tracker/index.php?func=detail&aid=1601812&group_id=13478&atid=113478 */
+		/* see SF bug https://sourceforge.net/p/flac/bugs/234/ */
 		if(err == 0.0) {
 			*max_order = i+1;
 			return;
@@ -200,14 +211,8 @@ int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order,
 		FLAC__int32 q;
 		for(i = 0; i < order; i++) {
 			error += lp_coeff[i] * (1 << *shift);
-#if 1 /* unfortunately lround() is C99 */
-			if(error >= 0.0)
-				q = (FLAC__int32)(error + 0.5);
-			else
-				q = (FLAC__int32)(error - 0.5);
-#else
 			q = lround(error);
-#endif
+
 #ifdef FLAC__OVERFLOW_DETECT
 			if(q > qmax+1) /* we expect q==qmax+1 occasionally due to rounding */
 				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q>qmax %d>%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmax,*shift,cmax,precision+1,i,lp_coeff[i]);
@@ -235,14 +240,7 @@ int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order,
 #endif
 		for(i = 0; i < order; i++) {
 			error += lp_coeff[i] / (1 << nshift);
-#if 1 /* unfortunately lround() is C99 */
-			if(error >= 0.0)
-				q = (FLAC__int32)(error + 0.5);
-			else
-				q = (FLAC__int32)(error - 0.5);
-#else
 			q = lround(error);
-#endif
 #ifdef FLAC__OVERFLOW_DETECT
 			if(q > qmax+1) /* we expect q==qmax+1 occasionally due to rounding */
 				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q>qmax %d>%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmax,*shift,cmax,precision+1,i,lp_coeff[i]);
@@ -262,7 +260,12 @@ int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order,
 	return 0;
 }
 
-void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+#if defined(_MSC_VER)
+// silence MSVC warnings about __restrict modifier
+#pragma warning ( disable : 4028 )
+#endif
+
+void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
 	FLAC__int64 sumo;
@@ -285,13 +288,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, u
 		for(j = 0; j < order; j++) {
 			sum += qlp_coeff[j] * (*(--history));
 			sumo += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*history);
-#if defined _MSC_VER
-			if(sumo > 2147483647I64 || sumo < -2147483648I64)
-				fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%I64d\n",i,j,qlp_coeff[j],*history,sumo);
-#else
-			if(sumo > 2147483647ll || sumo < -2147483648ll)
-				fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%lld\n",i,j,qlp_coeff[j],*history,(long long)sumo);
-#endif
+				fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%" PRId64 "\n",i,j,qlp_coeff[j],*history,sumo);
 		}
 		*(residual++) = *(data++) - (sum >> lp_quantization);
 	}
@@ -528,7 +525,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, u
 }
 #endif
 
-void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
 	unsigned i, j;
@@ -549,19 +546,11 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *da
 		for(j = 0; j < order; j++)
 			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
 		if(FLAC__bitmath_silog2_wide(sum >> lp_quantization) > 32) {
-#if defined _MSC_VER
-			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, sum=%I64d\n", i, sum >> lp_quantization);
-#else
-			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, sum=%lld\n", i, (long long)(sum >> lp_quantization));
-#endif
+			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, sum=%" PRId64 "\n", i, (sum >> lp_quantization));
 			break;
 		}
 		if(FLAC__bitmath_silog2_wide((FLAC__int64)(*data) - (sum >> lp_quantization)) > 32) {
-#if defined _MSC_VER
-			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, data=%d, sum=%I64d, residual=%I64d\n", i, *data, sum >> lp_quantization, (FLAC__int64)(*data) - (sum >> lp_quantization));
-#else
-			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, data=%d, sum=%lld, residual=%lld\n", i, *data, (long long)(sum >> lp_quantization), (long long)((FLAC__int64)(*data) - (sum >> lp_quantization)));
-#endif
+			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, data=%d, sum=%" PRId64 ", residual=%" PRId64 "\n", i, *data, (int64_t)(sum >> lp_quantization), ((FLAC__int64)(*data) - (sum >> lp_quantization)));
 			break;
 		}
 		*(residual++) = *(data++) - (FLAC__int32)(sum >> lp_quantization);
@@ -792,7 +781,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *da
 
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
 
-void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
+void FLAC__lpc_restore_signal(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
 	FLAC__int64 sumo;
@@ -815,15 +804,8 @@ void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, c
 		for(j = 0; j < order; j++) {
 			sum += qlp_coeff[j] * (*(--history));
 			sumo += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*history);
-#ifdef FLAC__OVERFLOW_DETECT_VERBOSE
-#if defined _MSC_VER
-			if(sumo > 2147483647I64 || sumo < -2147483648I64)
-				fprintf(stderr,"FLAC__lpc_restore_signal: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%I64d\n",i,j,qlp_coeff[j],*history,sumo);
-#else
 			if(sumo > 2147483647ll || sumo < -2147483648ll)
-				fprintf(stderr,"FLAC__lpc_restore_signal: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%lld\n",i,j,qlp_coeff[j],*history,(long long)sumo);
-#endif
-#endif
+				fprintf(stderr,"FLAC__lpc_restore_signal: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%" PRId64 "\n",i,j,qlp_coeff[j],*history,sumo);
 		}
 		*(data++) = *(r++) + (sum >> lp_quantization);
 	}
@@ -1060,7 +1042,7 @@ void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, c
 }
 #endif
 
-void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
+void FLAC__lpc_restore_signal_wide(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
 	unsigned i, j;
@@ -1081,23 +1063,11 @@ void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_l
 		for(j = 0; j < order; j++)
 			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
 		if(FLAC__bitmath_silog2_wide(sum >> lp_quantization) > 32) {
-#ifdef FLAC__OVERFLOW_DETECT_VERBOSE
-#ifdef _MSC_VER
-			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, sum=%I64d\n", i, sum >> lp_quantization);
-#else
-			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, sum=%lld\n", i, (long long)(sum >> lp_quantization));
-#endif
-#endif
+			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, sum=%" PRId64 "\n", i, (sum >> lp_quantization));
 			break;
 		}
 		if(FLAC__bitmath_silog2_wide((FLAC__int64)(*r) + (sum >> lp_quantization)) > 32) {
-#ifdef FLAC__OVERFLOW_DETECT_VERBOSE
-#ifdef _MSC_VER
-			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, residual=%d, sum=%I64d, data=%I64d\n", i, *r, sum >> lp_quantization, (FLAC__int64)(*r) + (sum >> lp_quantization));
-#else
-			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, residual=%d, sum=%lld, data=%lld\n", i, *r, (long long)(sum >> lp_quantization), (long long)((FLAC__int64)(*r) + (sum >> lp_quantization)));
-#endif
-#endif
+			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, residual=%d, sum=%" PRId64 ", data=%" PRId64 "\n", i, *r, (sum >> lp_quantization), ((FLAC__int64)(*r) + (sum >> lp_quantization)));
 			break;
 		}
 		*(data++) = *(r++) + (FLAC__int32)(sum >> lp_quantization);
@@ -1326,6 +1296,10 @@ void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_l
 }
 #endif
 
+#if defined(_MSC_VER)
+#pragma warning ( default : 4028 )
+#endif
+
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 
 FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample(FLAC__double lpc_error, unsigned total_samples)
@@ -1334,7 +1308,7 @@ FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample(FLAC__double lp
 
 	FLAC__ASSERT(total_samples > 0);
 
-	error_scale = 0.5 * M_LN2 * M_LN2 / (FLAC__double)total_samples;
+	error_scale = 0.5 / (FLAC__double)total_samples;
 
 	return FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error, error_scale);
 }
@@ -1358,26 +1332,26 @@ FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scal
 
 unsigned FLAC__lpc_compute_best_order(const FLAC__double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order)
 {
-	unsigned order, index, best_index; /* 'index' the index into lpc_error; index==order-1 since lpc_error[0] is for order==1, lpc_error[1] is for order==2, etc */
+	unsigned order, indx, best_index; /* 'index' the index into lpc_error; index==order-1 since lpc_error[0] is for order==1, lpc_error[1] is for order==2, etc */
 	FLAC__double bits, best_bits, error_scale;
 
 	FLAC__ASSERT(max_order > 0);
 	FLAC__ASSERT(total_samples > 0);
 
-	error_scale = 0.5 * M_LN2 * M_LN2 / (FLAC__double)total_samples;
+	error_scale = 0.5 / (FLAC__double)total_samples;
 
 	best_index = 0;
 	best_bits = (unsigned)(-1);
 
-	for(index = 0, order = 1; index < max_order; index++, order++) {
-		bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[index], error_scale) * (FLAC__double)(total_samples - order) + (FLAC__double)(order * overhead_bits_per_order);
+	for(indx = 0, order = 1; indx < max_order; indx++, order++) {
+		bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[indx], error_scale) * (FLAC__double)(total_samples - order) + (FLAC__double)(order * overhead_bits_per_order);
 		if(bits < best_bits) {
-			best_index = index;
+			best_index = indx;
 			best_bits = bits;
 		}
 	}
 
-	return best_index+1; /* +1 since index of lpc_error[] is order-1 */
+	return best_index+1; /* +1 since indx of lpc_error[] is order-1 */
 }
 
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
diff --git a/libFLAC/md5.c b/libFLAC/md5.c
index 37cef67..e5adc3e 100644
--- a/libFLAC/md5.c
+++ b/libFLAC/md5.c
@@ -1,4 +1,4 @@
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
@@ -7,10 +7,7 @@
 
 #include "private/md5.h"
 #include "share/alloc.h"
-
-#ifndef FLaC__INLINE
-#define FLaC__INLINE
-#endif
+#include "share/endswap.h"
 
 /*
  * This code implements the MD5 message-digest algorithm.
@@ -143,7 +140,7 @@ static void byteSwap(FLAC__uint32 *buf, unsigned words)
 {
 	register FLAC__uint32 x;
 	do {
-		x = *buf; 
+		x = *buf;
 		x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff);
 		*buf++ = (x >> 16) | (x << 16);
 	} while (--words);
@@ -227,7 +224,7 @@ void FLAC__MD5Init(FLAC__MD5Context *ctx)
 	ctx->bytes[0] = 0;
 	ctx->bytes[1] = 0;
 
-	ctx->internal_buf = 0;
+	ctx->internal_buf.p8= 0;
 	ctx->capacity = 0;
 }
 
@@ -263,69 +260,135 @@ void FLAC__MD5Final(FLAC__byte digest[16], FLAC__MD5Context *ctx)
 
 	byteSwap(ctx->buf, 4);
 	memcpy(digest, ctx->buf, 16);
-	memset(ctx, 0, sizeof(ctx));	/* In case it's sensitive */
-	if(0 != ctx->internal_buf) {
-		free(ctx->internal_buf);
-		ctx->internal_buf = 0;
+	if (0 != ctx->internal_buf.p8) {
+		free(ctx->internal_buf.p8);
+		ctx->internal_buf.p8= 0;
 		ctx->capacity = 0;
 	}
+	memset(ctx, 0, sizeof(*ctx));	/* In case it's sensitive */
 }
 
 /*
  * Convert the incoming audio signal to a byte stream
  */
-static void format_input_(FLAC__byte *buf, const FLAC__int32 * const signal[], unsigned channels, unsigned samples, unsigned bytes_per_sample)
+static void format_input_(FLAC__multibyte *mbuf, const FLAC__int32 * const signal[], unsigned channels, unsigned samples, unsigned bytes_per_sample)
 {
+	FLAC__byte *buf_ = mbuf->p8;
+	FLAC__int16 *buf16 = mbuf->p16;
+	FLAC__int32 *buf32 = mbuf->p32;
+	FLAC__int32 a_word;
 	unsigned channel, sample;
-	register FLAC__int32 a_word;
-	register FLAC__byte *buf_ = buf;
 
-#if WORDS_BIGENDIAN
-#else
-	if(channels == 2 && bytes_per_sample == 2) {
-		FLAC__int16 *buf1_ = ((FLAC__int16*)buf_) + 1;
-		memcpy(buf_, signal[0], sizeof(FLAC__int32) * samples);
-		for(sample = 0; sample < samples; sample++, buf1_+=2)
-			*buf1_ = (FLAC__int16)signal[1][sample];
-	}
-	else if(channels == 1 && bytes_per_sample == 2) {
-		FLAC__int16 *buf1_ = (FLAC__int16*)buf_;
-		for(sample = 0; sample < samples; sample++)
-			*buf1_++ = (FLAC__int16)signal[0][sample];
-	}
-	else
-#endif
-	if(bytes_per_sample == 2) {
-		if(channels == 2) {
-			for(sample = 0; sample < samples; sample++) {
-				a_word = signal[0][sample];
-				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
-				*buf_++ = (FLAC__byte)a_word;
-				a_word = signal[1][sample];
-				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
-				*buf_++ = (FLAC__byte)a_word;
+	/* Storage in the output buffer, buf, is little endian. */
+
+#define BYTES_CHANNEL_SELECTOR(bytes, channels)   (bytes * 100 + channels)
+
+	/* First do the most commonly used combinations. */
+	switch (BYTES_CHANNEL_SELECTOR (bytes_per_sample, channels)) {
+		/* One byte per sample. */
+		case (BYTES_CHANNEL_SELECTOR (1, 1)):
+			for (sample = 0; sample < samples; sample++)
+				*buf_++ = signal[0][sample];
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (1, 2)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf_++ = signal[0][sample];
+				*buf_++ = signal[1][sample];
 			}
-		}
-		else if(channels == 1) {
-			for(sample = 0; sample < samples; sample++) {
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (1, 4)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf_++ = signal[0][sample];
+				*buf_++ = signal[1][sample];
+				*buf_++ = signal[2][sample];
+				*buf_++ = signal[3][sample];
+			}
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (1, 6)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf_++ = signal[0][sample];
+				*buf_++ = signal[1][sample];
+				*buf_++ = signal[2][sample];
+				*buf_++ = signal[3][sample];
+				*buf_++ = signal[4][sample];
+				*buf_++ = signal[5][sample];
+			}
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (1, 8)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf_++ = signal[0][sample];
+				*buf_++ = signal[1][sample];
+				*buf_++ = signal[2][sample];
+				*buf_++ = signal[3][sample];
+				*buf_++ = signal[4][sample];
+				*buf_++ = signal[5][sample];
+				*buf_++ = signal[6][sample];
+				*buf_++ = signal[7][sample];
+			}
+			return;
+
+		/* Two bytes per sample. */
+		case (BYTES_CHANNEL_SELECTOR (2, 1)):
+			for (sample = 0; sample < samples; sample++)
+				*buf16++ = H2LE_16(signal[0][sample]);
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (2, 2)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf16++ = H2LE_16(signal[0][sample]);
+				*buf16++ = H2LE_16(signal[1][sample]);
+			}
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (2, 4)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf16++ = H2LE_16(signal[0][sample]);
+				*buf16++ = H2LE_16(signal[1][sample]);
+				*buf16++ = H2LE_16(signal[2][sample]);
+				*buf16++ = H2LE_16(signal[3][sample]);
+			}
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (2, 6)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf16++ = H2LE_16(signal[0][sample]);
+				*buf16++ = H2LE_16(signal[1][sample]);
+				*buf16++ = H2LE_16(signal[2][sample]);
+				*buf16++ = H2LE_16(signal[3][sample]);
+				*buf16++ = H2LE_16(signal[4][sample]);
+				*buf16++ = H2LE_16(signal[5][sample]);
+			}
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (2, 8)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf16++ = H2LE_16(signal[0][sample]);
+				*buf16++ = H2LE_16(signal[1][sample]);
+				*buf16++ = H2LE_16(signal[2][sample]);
+				*buf16++ = H2LE_16(signal[3][sample]);
+				*buf16++ = H2LE_16(signal[4][sample]);
+				*buf16++ = H2LE_16(signal[5][sample]);
+				*buf16++ = H2LE_16(signal[6][sample]);
+				*buf16++ = H2LE_16(signal[7][sample]);
+			}
+			return;
+
+		/* Three bytes per sample. */
+		case (BYTES_CHANNEL_SELECTOR (3, 1)):
+			for (sample = 0; sample < samples; sample++) {
 				a_word = signal[0][sample];
 				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
 				*buf_++ = (FLAC__byte)a_word;
 			}
-		}
-		else {
-			for(sample = 0; sample < samples; sample++) {
-				for(channel = 0; channel < channels; channel++) {
-					a_word = signal[channel][sample];
-					*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
-					*buf_++ = (FLAC__byte)a_word;
-				}
-			}
-		}
-	}
-	else if(bytes_per_sample == 3) {
-		if(channels == 2) {
-			for(sample = 0; sample < samples; sample++) {
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (3, 2)):
+			for (sample = 0; sample < samples; sample++) {
 				a_word = signal[0][sample];
 				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
 				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
@@ -335,60 +398,90 @@ static void format_input_(FLAC__byte *buf, const FLAC__int32 * const signal[], u
 				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
 				*buf_++ = (FLAC__byte)a_word;
 			}
-		}
-		else if(channels == 1) {
-			for(sample = 0; sample < samples; sample++) {
-				a_word = signal[0][sample];
-				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
-				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
-				*buf_++ = (FLAC__byte)a_word;
+			return;
+
+		/* Four bytes per sample. */
+		case (BYTES_CHANNEL_SELECTOR (4, 1)):
+			for (sample = 0; sample < samples; sample++)
+				*buf32++ = H2LE_32(signal[0][sample]);
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (4, 2)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf32++ = H2LE_32(signal[0][sample]);
+				*buf32++ = H2LE_32(signal[1][sample]);
 			}
-		}
-		else {
-			for(sample = 0; sample < samples; sample++) {
-				for(channel = 0; channel < channels; channel++) {
-					a_word = signal[channel][sample];
-					*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
-					*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
-					*buf_++ = (FLAC__byte)a_word;
-				}
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (4, 4)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf32++ = H2LE_32(signal[0][sample]);
+				*buf32++ = H2LE_32(signal[1][sample]);
+				*buf32++ = H2LE_32(signal[2][sample]);
+				*buf32++ = H2LE_32(signal[3][sample]);
 			}
-		}
-	}
-	else if(bytes_per_sample == 1) {
-		if(channels == 2) {
-			for(sample = 0; sample < samples; sample++) {
-				a_word = signal[0][sample];
-				*buf_++ = (FLAC__byte)a_word;
-				a_word = signal[1][sample];
-				*buf_++ = (FLAC__byte)a_word;
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (4, 6)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf32++ = H2LE_32(signal[0][sample]);
+				*buf32++ = H2LE_32(signal[1][sample]);
+				*buf32++ = H2LE_32(signal[2][sample]);
+				*buf32++ = H2LE_32(signal[3][sample]);
+				*buf32++ = H2LE_32(signal[4][sample]);
+				*buf32++ = H2LE_32(signal[5][sample]);
 			}
-		}
-		else if(channels == 1) {
-			for(sample = 0; sample < samples; sample++) {
-				a_word = signal[0][sample];
-				*buf_++ = (FLAC__byte)a_word;
+			return;
+
+		case (BYTES_CHANNEL_SELECTOR (4, 8)):
+			for (sample = 0; sample < samples; sample++) {
+				*buf32++ = H2LE_32(signal[0][sample]);
+				*buf32++ = H2LE_32(signal[1][sample]);
+				*buf32++ = H2LE_32(signal[2][sample]);
+				*buf32++ = H2LE_32(signal[3][sample]);
+				*buf32++ = H2LE_32(signal[4][sample]);
+				*buf32++ = H2LE_32(signal[5][sample]);
+				*buf32++ = H2LE_32(signal[6][sample]);
+				*buf32++ = H2LE_32(signal[7][sample]);
 			}
-		}
-		else {
-			for(sample = 0; sample < samples; sample++) {
-				for(channel = 0; channel < channels; channel++) {
+			return;
+
+		default:
+			break;
+	}
+
+	/* General version. */
+	switch (bytes_per_sample) {
+		case 1:
+			for (sample = 0; sample < samples; sample++)
+				for (channel = 0; channel < channels; channel++)
+					*buf_++ = signal[channel][sample];
+			return;
+
+		case 2:
+			for (sample = 0; sample < samples; sample++)
+				for (channel = 0; channel < channels; channel++)
+					*buf16++ = H2LE_16(signal[channel][sample]);
+			return;
+
+		case 3:
+			for (sample = 0; sample < samples; sample++)
+				for (channel = 0; channel < channels; channel++) {
 					a_word = signal[channel][sample];
+					*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+					*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
 					*buf_++ = (FLAC__byte)a_word;
 				}
-			}
-		}
-	}
-	else { /* bytes_per_sample == 4, maybe optimize more later */
-		for(sample = 0; sample < samples; sample++) {
-			for(channel = 0; channel < channels; channel++) {
-				a_word = signal[channel][sample];
-				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
-				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
-				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
-				*buf_++ = (FLAC__byte)a_word;
-			}
-		}
+			return;
+
+		case 4:
+			for (sample = 0; sample < samples; sample++)
+				for (channel = 0; channel < channels; channel++)
+					*buf32++ = H2LE_32(signal[channel][sample]);
+			return;
+
+		default:
+			break;
 	}
 }
 
@@ -400,25 +493,26 @@ FLAC__bool FLAC__MD5Accumulate(FLAC__MD5Context *ctx, const FLAC__int32 * const
 	const size_t bytes_needed = (size_t)channels * (size_t)samples * (size_t)bytes_per_sample;
 
 	/* overflow check */
-	if((size_t)channels > SIZE_MAX / (size_t)bytes_per_sample)
+	if ((size_t)channels > SIZE_MAX / (size_t)bytes_per_sample)
 		return false;
-	if((size_t)channels * (size_t)bytes_per_sample > SIZE_MAX / (size_t)samples)
+	if ((size_t)channels * (size_t)bytes_per_sample > SIZE_MAX / (size_t)samples)
 		return false;
 
-	if(ctx->capacity < bytes_needed) {
-		FLAC__byte *tmp = (FLAC__byte*)realloc(ctx->internal_buf, bytes_needed);
-		if(0 == tmp) {
-			free(ctx->internal_buf);
-			if(0 == (ctx->internal_buf = (FLAC__byte*)safe_malloc_(bytes_needed)))
+	if (ctx->capacity < bytes_needed) {
+		FLAC__byte *tmp = realloc(ctx->internal_buf.p8, bytes_needed);
+		if (0 == tmp) {
+			free(ctx->internal_buf.p8);
+			if (0 == (ctx->internal_buf.p8= safe_malloc_(bytes_needed)))
 				return false;
 		}
-		ctx->internal_buf = tmp;
+		else
+			ctx->internal_buf.p8= tmp;
 		ctx->capacity = bytes_needed;
 	}
 
-	format_input_(ctx->internal_buf, signal, channels, samples, bytes_per_sample);
+	format_input_(&ctx->internal_buf, signal, channels, samples, bytes_per_sample);
 
-	FLAC__MD5Update(ctx, ctx->internal_buf, bytes_needed);
+	FLAC__MD5Update(ctx, ctx->internal_buf.p8, bytes_needed);
 
 	return true;
 }
diff --git a/libFLAC/memory.c b/libFLAC/memory.c
index 4d10097..d22df70 100644
--- a/libFLAC/memory.c
+++ b/libFLAC/memory.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,10 +30,14 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
 #include "private/memory.h"
 #include "FLAC/assert.h"
 #include "share/alloc.h"
@@ -45,25 +50,8 @@ void *FLAC__memory_alloc_aligned(size_t bytes, void **aligned_address)
 
 #ifdef FLAC__ALIGN_MALLOC_DATA
 	/* align on 32-byte (256-bit) boundary */
-	x = safe_malloc_add_2op_(bytes, /*+*/31);
-#ifdef SIZEOF_VOIDP
-#if SIZEOF_VOIDP == 4
-		/* could do  *aligned_address = x + ((unsigned) (32 - (((unsigned)x) & 31))) & 31; */
-		*aligned_address = (void*)(((unsigned)x + 31) & -32);
-#elif SIZEOF_VOIDP == 8
-		*aligned_address = (void*)(((FLAC__uint64)x + 31) & (FLAC__uint64)(-((FLAC__int64)32)));
-#else
-# error  Unsupported sizeof(void*)
-#endif
-#else
-	/* there's got to be a better way to do this right for all archs */
-	if(sizeof(void*) == sizeof(unsigned))
-		*aligned_address = (void*)(((unsigned)x + 31) & -32);
-	else if(sizeof(void*) == sizeof(FLAC__uint64))
-		*aligned_address = (void*)(((FLAC__uint64)x + 31) & (FLAC__uint64)(-((FLAC__int64)32)));
-	else
-		return 0;
-#endif
+	x = safe_malloc_add_2op_(bytes, /*+*/31L);
+	*aligned_address = (void*)(((uintptr_t)x + 31L) & -32L);
 #else
 	x = safe_malloc_(bytes);
 	*aligned_address = x;
@@ -71,7 +59,7 @@ void *FLAC__memory_alloc_aligned(size_t bytes, void **aligned_address)
 	return x;
 }
 
-FLAC__bool FLAC__memory_alloc_aligned_int32_array(unsigned elements, FLAC__int32 **unaligned_pointer, FLAC__int32 **aligned_pointer)
+FLAC__bool FLAC__memory_alloc_aligned_int32_array(size_t elements, FLAC__int32 **unaligned_pointer, FLAC__int32 **aligned_pointer)
 {
 	FLAC__int32 *pu; /* unaligned pointer */
 	union { /* union needed to comply with C99 pointer aliasing rules */
@@ -84,10 +72,10 @@ FLAC__bool FLAC__memory_alloc_aligned_int32_array(unsigned elements, FLAC__int32
 	FLAC__ASSERT(0 != aligned_pointer);
 	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
 
-	if((size_t)elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+	if(elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
 		return false;
 
-	pu = (FLAC__int32*)FLAC__memory_alloc_aligned(sizeof(*pu) * (size_t)elements, &u.pv);
+	pu = FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
 	if(0 == pu) {
 		return false;
 	}
@@ -100,7 +88,7 @@ FLAC__bool FLAC__memory_alloc_aligned_int32_array(unsigned elements, FLAC__int32
 	}
 }
 
-FLAC__bool FLAC__memory_alloc_aligned_uint32_array(unsigned elements, FLAC__uint32 **unaligned_pointer, FLAC__uint32 **aligned_pointer)
+FLAC__bool FLAC__memory_alloc_aligned_uint32_array(size_t elements, FLAC__uint32 **unaligned_pointer, FLAC__uint32 **aligned_pointer)
 {
 	FLAC__uint32 *pu; /* unaligned pointer */
 	union { /* union needed to comply with C99 pointer aliasing rules */
@@ -113,10 +101,10 @@ FLAC__bool FLAC__memory_alloc_aligned_uint32_array(unsigned elements, FLAC__uint
 	FLAC__ASSERT(0 != aligned_pointer);
 	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
 
-	if((size_t)elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+	if(elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
 		return false;
 
-	pu = (FLAC__uint32*)FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
+	pu = FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
 	if(0 == pu) {
 		return false;
 	}
@@ -129,7 +117,7 @@ FLAC__bool FLAC__memory_alloc_aligned_uint32_array(unsigned elements, FLAC__uint
 	}
 }
 
-FLAC__bool FLAC__memory_alloc_aligned_uint64_array(unsigned elements, FLAC__uint64 **unaligned_pointer, FLAC__uint64 **aligned_pointer)
+FLAC__bool FLAC__memory_alloc_aligned_uint64_array(size_t elements, FLAC__uint64 **unaligned_pointer, FLAC__uint64 **aligned_pointer)
 {
 	FLAC__uint64 *pu; /* unaligned pointer */
 	union { /* union needed to comply with C99 pointer aliasing rules */
@@ -142,10 +130,10 @@ FLAC__bool FLAC__memory_alloc_aligned_uint64_array(unsigned elements, FLAC__uint
 	FLAC__ASSERT(0 != aligned_pointer);
 	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
 
-	if((size_t)elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+	if(elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
 		return false;
 
-	pu = (FLAC__uint64*)FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
+	pu = FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
 	if(0 == pu) {
 		return false;
 	}
@@ -158,7 +146,7 @@ FLAC__bool FLAC__memory_alloc_aligned_uint64_array(unsigned elements, FLAC__uint
 	}
 }
 
-FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(unsigned elements, unsigned **unaligned_pointer, unsigned **aligned_pointer)
+FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(size_t elements, unsigned **unaligned_pointer, unsigned **aligned_pointer)
 {
 	unsigned *pu; /* unaligned pointer */
 	union { /* union needed to comply with C99 pointer aliasing rules */
@@ -171,10 +159,10 @@ FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(unsigned elements, unsigned
 	FLAC__ASSERT(0 != aligned_pointer);
 	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
 
-	if((size_t)elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+	if(elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
 		return false;
 
-	pu = (unsigned*)FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
+	pu = FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
 	if(0 == pu) {
 		return false;
 	}
@@ -189,7 +177,7 @@ FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(unsigned elements, unsigned
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 
-FLAC__bool FLAC__memory_alloc_aligned_real_array(unsigned elements, FLAC__real **unaligned_pointer, FLAC__real **aligned_pointer)
+FLAC__bool FLAC__memory_alloc_aligned_real_array(size_t elements, FLAC__real **unaligned_pointer, FLAC__real **aligned_pointer)
 {
 	FLAC__real *pu; /* unaligned pointer */
 	union { /* union needed to comply with C99 pointer aliasing rules */
@@ -202,10 +190,10 @@ FLAC__bool FLAC__memory_alloc_aligned_real_array(unsigned elements, FLAC__real *
 	FLAC__ASSERT(0 != aligned_pointer);
 	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
 
-	if((size_t)elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+	if(elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
 		return false;
 
-	pu = (FLAC__real*)FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
+	pu = FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
 	if(0 == pu) {
 		return false;
 	}
@@ -219,3 +207,12 @@ FLAC__bool FLAC__memory_alloc_aligned_real_array(unsigned elements, FLAC__real *
 }
 
 #endif
+
+void *safe_malloc_mul_2op_p(size_t size1, size_t size2)
+{
+	if(!size1 || !size2)
+		return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
+	if(size1 > SIZE_MAX / size2)
+		return 0;
+	return malloc(size1*size2);
+}
diff --git a/libFLAC/stream_decoder.c b/libFLAC/stream_decoder.c
index 916fde6..d13b23b 100644
--- a/libFLAC/stream_decoder.c
+++ b/libFLAC/stream_decoder.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,29 +30,16 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
-#if defined _MSC_VER || defined __MINGW32__
-#include <io.h> /* for _setmode() */
-#include <fcntl.h> /* for _O_BINARY */
-#endif
-#if defined __CYGWIN__ || defined __EMX__
-#include <io.h> /* for setmode(), O_BINARY */
-#include <fcntl.h> /* for _O_BINARY */
-#endif
 #include <stdio.h>
 #include <stdlib.h> /* for malloc() */
 #include <string.h> /* for memset/memcpy() */
 #include <sys/stat.h> /* for stat() */
 #include <sys/types.h> /* for off_t */
-#if defined _MSC_VER || defined __BORLANDC__ || defined __MINGW32__
-#if _MSC_VER <= 1600 || defined __BORLANDC__ /* @@@ [2G limit] */
-#define fseeko fseek
-#define ftello ftell
-#endif
-#endif
+#include "share/compat.h"
 #include "FLAC/assert.h"
 #include "share/alloc.h"
 #include "protected/stream_decoder.h"
@@ -64,18 +52,7 @@
 #include "private/lpc.h"
 #include "private/md5.h"
 #include "private/memory.h"
-
-#ifdef max
-#undef max
-#endif
-#define max(a,b) ((a)>(b)?(a):(b))
-
-/* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
-#ifdef _MSC_VER
-#define FLAC__U64L(x) x
-#else
-#define FLAC__U64L(x) x##LLU
-#endif
+#include "private/macros.h"
 
 
 /* technically this should be in an "export.c" but this is convenient enough */
@@ -94,7 +71,7 @@ FLAC_API int FLAC_API_SUPPORTS_OGG_FLAC =
  *
  ***********************************************************************/
 
-static FLAC__byte ID3V2_TAG_[3] = { 'I', 'D', '3' };
+static const FLAC__byte ID3V2_TAG_[3] = { 'I', 'D', '3' };
 
 /***********************************************************************
  *
@@ -110,7 +87,7 @@ static FLAC__bool find_metadata_(FLAC__StreamDecoder *decoder);
 static FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder);
 static FLAC__bool read_metadata_streaminfo_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, unsigned length);
 static FLAC__bool read_metadata_seektable_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, unsigned length);
-static FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_VorbisComment *obj);
+static FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_VorbisComment *obj, unsigned length);
 static FLAC__bool read_metadata_cuesheet_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_CueSheet *obj);
 static FLAC__bool read_metadata_picture_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_Picture *obj);
 static FLAC__bool skip_id3v2_tag_(FLAC__StreamDecoder *decoder);
@@ -165,9 +142,6 @@ typedef struct FLAC__StreamDecoderPrivate {
 	void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
 	/* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit): */
 	void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-	/* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit), AND order <= 8: */
-	void (*local_lpc_restore_signal_16bit_order8)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-	FLAC__bool (*local_bitreader_read_rice_signed_block)(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter);
 	void *client_data;
 	FILE *file; /* only used if FLAC__stream_decoder_init_file()/FLAC__stream_decoder_init_file() called, else NULL */
 	FLAC__BitReader *input;
@@ -193,9 +167,7 @@ typedef struct FLAC__StreamDecoderPrivate {
 	FLAC__bool do_md5_checking; /* initially gets protected_->md5_checking but is turned off after a seek or if the metadata has a zero MD5 */
 	FLAC__bool internal_reset_hack; /* used only during init() so we can call reset to set up the decoder without rewinding the input */
 	FLAC__bool is_seeking;
-#ifndef FLAC__NO_MD5
 	FLAC__MD5Context md5context;
-#endif
 	FLAC__byte computed_md5sum[16]; /* this is the sum we computed from the decoded data */
 	/* (the rest of these are only used for seeking) */
 	FLAC__Frame last_frame; /* holds the info of the last frame we seeked to */
@@ -283,18 +255,18 @@ FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new(void)
 
 	FLAC__ASSERT(sizeof(int) >= 4); /* we want to die right away if this is not true */
 
-	decoder = (FLAC__StreamDecoder*)calloc(1, sizeof(FLAC__StreamDecoder));
+	decoder = calloc(1, sizeof(FLAC__StreamDecoder));
 	if(decoder == 0) {
 		return 0;
 	}
 
-	decoder->protected_ = (FLAC__StreamDecoderProtected*)calloc(1, sizeof(FLAC__StreamDecoderProtected));
+	decoder->protected_ = calloc(1, sizeof(FLAC__StreamDecoderProtected));
 	if(decoder->protected_ == 0) {
 		free(decoder);
 		return 0;
 	}
 
-	decoder->private_ = (FLAC__StreamDecoderPrivate*)calloc(1, sizeof(FLAC__StreamDecoderPrivate));
+	decoder->private_ = calloc(1, sizeof(FLAC__StreamDecoderPrivate));
 	if(decoder->private_ == 0) {
 		free(decoder->protected_);
 		free(decoder);
@@ -310,7 +282,7 @@ FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new(void)
 	}
 
 	decoder->private_->metadata_filter_ids_capacity = 16;
-	if(0 == (decoder->private_->metadata_filter_ids = (FLAC__byte*)malloc((FLAC__STREAM_METADATA_APPLICATION_ID_LEN/8) * decoder->private_->metadata_filter_ids_capacity))) {
+	if(0 == (decoder->private_->metadata_filter_ids = malloc((FLAC__STREAM_METADATA_APPLICATION_ID_LEN/8) * decoder->private_->metadata_filter_ids_capacity))) {
 		FLAC__bitreader_delete(decoder->private_->input);
 		free(decoder->private_);
 		free(decoder->protected_);
@@ -343,7 +315,9 @@ FLAC_API void FLAC__stream_decoder_delete(FLAC__StreamDecoder *decoder)
 {
 	unsigned i;
 
-	FLAC__ASSERT(0 != decoder);
+	if (decoder == NULL)
+		return ;
+
 	FLAC__ASSERT(0 != decoder->protected_);
 	FLAC__ASSERT(0 != decoder->private_);
 	FLAC__ASSERT(0 != decoder->private_->input);
@@ -404,7 +378,7 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
 #if FLAC__HAS_OGG
 	decoder->private_->is_ogg = is_ogg;
 	if(is_ogg && !FLAC__ogg_decoder_aspect_init(&decoder->protected_->ogg_decoder_aspect))
-		return decoder->protected_->state = FLAC__STREAM_DECODER_OGG_ERROR;
+		return decoder->protected_->initstate = FLAC__STREAM_DECODER_INIT_STATUS_ERROR_OPENING_FILE;
 #endif
 
 	/*
@@ -415,42 +389,44 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
 	decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal;
 	decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide;
 	decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal;
-	decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal;
-	decoder->private_->local_bitreader_read_rice_signed_block = FLAC__bitreader_read_rice_signed_block;
 	/* now override with asm where appropriate */
 #ifndef FLAC__NO_ASM
 	if(decoder->private_->cpuinfo.use_asm) {
 #ifdef FLAC__CPU_IA32
 		FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
 #ifdef FLAC__HAS_NASM
-#if 1 /*@@@@@@ OPT: not clearly faster, needs more testing */
-		if(decoder->private_->cpuinfo.data.ia32.bswap)
-			decoder->private_->local_bitreader_read_rice_signed_block = FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap;
-#endif
-		if(decoder->private_->cpuinfo.data.ia32.mmx) {
+		decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */
+		if(decoder->private_->cpuinfo.ia32.mmx) {
 			decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
 			decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx;
-			decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ia32_mmx;
 		}
 		else {
 			decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
 			decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32;
-			decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ia32;
 		}
 #endif
-#elif defined FLAC__CPU_PPC
-		FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC);
-		if(decoder->private_->cpuinfo.data.ppc.altivec) {
-			decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ppc_altivec_16;
-			decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8;
+#ifdef FLAC__HAS_X86INTRIN
+# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT_SSE: not better than MMX asm */
+		if(decoder->private_->cpuinfo.ia32.sse2) {
+			decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2;
+		}
+# endif
+# if defined FLAC__SSE4_1_SUPPORTED
+		if(decoder->private_->cpuinfo.ia32.sse41) {
+			decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41;
 		}
+# endif
+#endif
+#elif defined FLAC__CPU_X86_64
+		FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64);
+		/* No useful SSE optimizations yet */
 #endif
 	}
 #endif
 
 	/* from here on, errors are fatal */
 
-	if(!FLAC__bitreader_init(decoder->private_->input, decoder->private_->cpuinfo, read_callback_, decoder)) {
+	if(!FLAC__bitreader_init(decoder->private_->input, read_callback_, decoder)) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return FLAC__STREAM_DECODER_INIT_STATUS_MEMORY_ALLOCATION_ERROR;
 	}
@@ -551,10 +527,10 @@ static FLAC__StreamDecoderInitStatus init_FILE_internal_(
 	FLAC__ASSERT(0 != file);
 
 	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
-		return decoder->protected_->state = FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED;
+		return decoder->protected_->initstate = FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED;
 
 	if(0 == write_callback || 0 == error_callback)
-		return decoder->protected_->state = FLAC__STREAM_DECODER_INIT_STATUS_INVALID_CALLBACKS;
+		return decoder->protected_->initstate = FLAC__STREAM_DECODER_INIT_STATUS_INVALID_CALLBACKS;
 
 	/*
 	 * To make sure that our file does not go unclosed after an error, we
@@ -625,12 +601,12 @@ static FLAC__StreamDecoderInitStatus init_file_internal_(
 	 * in FLAC__stream_decoder_init_FILE() before the FILE* is assigned.
 	 */
 	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
-		return decoder->protected_->state = FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED;
+		return decoder->protected_->initstate = FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED;
 
 	if(0 == write_callback || 0 == error_callback)
-		return decoder->protected_->state = FLAC__STREAM_DECODER_INIT_STATUS_INVALID_CALLBACKS;
+		return decoder->protected_->initstate = FLAC__STREAM_DECODER_INIT_STATUS_INVALID_CALLBACKS;
 
-	file = filename? fopen(filename, "rb") : stdin;
+	file = filename? flac_fopen(filename, "rb") : stdin;
 
 	if(0 == file)
 		return FLAC__STREAM_DECODER_INIT_STATUS_ERROR_OPENING_FILE;
@@ -674,12 +650,10 @@ FLAC_API FLAC__bool FLAC__stream_decoder_finish(FLAC__StreamDecoder *decoder)
 	if(decoder->protected_->state == FLAC__STREAM_DECODER_UNINITIALIZED)
 		return true;
 
-#ifndef FLAC__NO_MD5
-	/* see the comment in FLAC__seekable_stream_decoder_reset() as to why we
+	/* see the comment in FLAC__stream_decoder_reset() as to why we
 	 * always call FLAC__MD5Final()
 	 */
 	FLAC__MD5Final(decoder->private_->computed_md5sum, &decoder->private_->md5context);
-#endif
 
 	if(decoder->private_->has_seek_table && 0 != decoder->private_->seek_table.data.seek_table.points) {
 		free(decoder->private_->seek_table.data.seek_table.points);
@@ -789,7 +763,7 @@ FLAC_API FLAC__bool FLAC__stream_decoder_set_metadata_respond_application(FLAC__
 	FLAC__ASSERT(0 != decoder->private_->metadata_filter_ids);
 
 	if(decoder->private_->metadata_filter_ids_count == decoder->private_->metadata_filter_ids_capacity) {
-		if(0 == (decoder->private_->metadata_filter_ids = (FLAC__byte*)safe_realloc_mul_2op_(decoder->private_->metadata_filter_ids, decoder->private_->metadata_filter_ids_capacity, /*times*/2))) {
+		if(0 == (decoder->private_->metadata_filter_ids = safe_realloc_mul_2op_(decoder->private_->metadata_filter_ids, decoder->private_->metadata_filter_ids_capacity, /*times*/2))) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 			return false;
 		}
@@ -848,7 +822,7 @@ FLAC_API FLAC__bool FLAC__stream_decoder_set_metadata_ignore_application(FLAC__S
 	FLAC__ASSERT(0 != decoder->private_->metadata_filter_ids);
 
 	if(decoder->private_->metadata_filter_ids_count == decoder->private_->metadata_filter_ids_capacity) {
-		if(0 == (decoder->private_->metadata_filter_ids = (FLAC__byte*)safe_realloc_mul_2op_(decoder->private_->metadata_filter_ids, decoder->private_->metadata_filter_ids_capacity, /*times*/2))) {
+		if(0 == (decoder->private_->metadata_filter_ids = safe_realloc_mul_2op_(decoder->private_->metadata_filter_ids, decoder->private_->metadata_filter_ids_capacity, /*times*/2))) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 			return false;
 		}
@@ -1025,7 +999,6 @@ FLAC_API FLAC__bool FLAC__stream_decoder_reset(FLAC__StreamDecoder *decoder)
 	 */
 	decoder->private_->fixed_block_size = decoder->private_->next_fixed_block_size = 0;
 
-#ifndef FLAC__NO_MD5
 	/* We initialize the FLAC__MD5Context even though we may never use it.  This
 	 * is because md5 checking may be turned on to start and then turned off if
 	 * a seek occurs.  So we init the context here and finalize it in
@@ -1033,7 +1006,6 @@ FLAC_API FLAC__bool FLAC__stream_decoder_reset(FLAC__StreamDecoder *decoder)
 	 * properly.
 	 */
 	FLAC__MD5Init(&decoder->private_->md5context);
-#endif
 
 	decoder->private_->first_frame_offset = 0;
 	decoder->private_->unparseable_frame_count = 0;
@@ -1294,7 +1266,7 @@ FILE *get_binary_stdin_(void)
 	 */
 #if defined _MSC_VER || defined __MINGW32__
 	_setmode(_fileno(stdin), _O_BINARY);
-#elif defined __CYGWIN__ 
+#elif defined __CYGWIN__
 	/* almost certainly not needed for any modern Cygwin, but let's be safe... */
 	setmode(_fileno(stdin), _O_BINARY);
 #elif defined __EMX__
@@ -1309,6 +1281,9 @@ FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigne
 	unsigned i;
 	FLAC__int32 *tmp;
 
+	/* Make sure size is some sensible minimum value. Plumb through predictor_order maybe? */
+	size = size < FLAC__MAX_LPC_ORDER ? FLAC__MAX_LPC_ORDER : size ;
+
 	if(size <= decoder->private_->output_capacity && channels <= decoder->private_->output_channels)
 		return true;
 
@@ -1332,7 +1307,7 @@ FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigne
 		 * (at negative indices) for alignment purposes; we use 4
 		 * to keep the data well-aligned.
 		 */
-		tmp = (FLAC__int32*)safe_malloc_muladd2_(sizeof(FLAC__int32), /*times (*/size, /*+*/4/*)*/);
+		tmp = safe_malloc_muladd2_(sizeof(FLAC__int32), /*times (*/size, /*+*/4/*)*/);
 		if(tmp == 0) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 			return false;
@@ -1340,9 +1315,6 @@ FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigne
 		memset(tmp, 0, sizeof(FLAC__int32)*4);
 		decoder->private_->output[i] = tmp + 4;
 
-		/* WATCHOUT:
-		 * minimum of quadword alignment for PPC vector optimizations is REQUIRED:
-		 */
 		if(!FLAC__memory_alloc_aligned_int32_array(size, &decoder->private_->residual_unaligned[i], &decoder->private_->residual[i])) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 			return false;
@@ -1392,6 +1364,10 @@ FLAC__bool find_metadata_(FLAC__StreamDecoder *decoder)
 			id = 0;
 			continue;
 		}
+
+		if(id >= 3)
+			return false;
+
 		if(x == ID3V2_TAG_[id]) {
 			id++;
 			i = 0;
@@ -1413,7 +1389,7 @@ FLAC__bool find_metadata_(FLAC__StreamDecoder *decoder)
 				decoder->private_->lookahead = (FLAC__byte)x;
 				decoder->private_->cached = true;
 			}
-			else if(x >> 2 == 0x3e) { /* MAGIC NUMBER for the last 6 sync bits */
+			else if(x >> 1 == 0x7c) { /* MAGIC NUMBER for the last 6 sync bits and reserved 7th bit */
 				decoder->private_->header_warmup[1] = (FLAC__byte)x;
 				decoder->protected_->state = FLAC__STREAM_DECODER_READ_FRAME;
 				return true;
@@ -1470,6 +1446,7 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 		unsigned real_length = length;
 		FLAC__StreamMetadata block;
 
+		memset(&block, 0, sizeof(block));
 		block.is_last = is_last;
 		block.type = (FLAC__MetadataType)type;
 		block.length = length;
@@ -1494,36 +1471,37 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 				return false; /* read_callback_ sets the state for us */
 		}
 		else {
+			FLAC__bool ok = true;
 			switch(type) {
 				case FLAC__METADATA_TYPE_PADDING:
 					/* skip the padding bytes */
 					if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(decoder->private_->input, real_length))
-						return false; /* read_callback_ sets the state for us */
+						ok = false; /* read_callback_ sets the state for us */
 					break;
 				case FLAC__METADATA_TYPE_APPLICATION:
 					/* remember, we read the ID already */
 					if(real_length > 0) {
-						if(0 == (block.data.application.data = (FLAC__byte*)malloc(real_length))) {
+						if(0 == (block.data.application.data = malloc(real_length))) {
 							decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
-							return false;
+							ok = false;
 						}
-						if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, block.data.application.data, real_length))
-							return false; /* read_callback_ sets the state for us */
+						else if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, block.data.application.data, real_length))
+							ok = false; /* read_callback_ sets the state for us */
 					}
 					else
 						block.data.application.data = 0;
 					break;
 				case FLAC__METADATA_TYPE_VORBIS_COMMENT:
-					if(!read_metadata_vorbiscomment_(decoder, &block.data.vorbis_comment))
-						return false;
+					if(!read_metadata_vorbiscomment_(decoder, &block.data.vorbis_comment, real_length))
+						ok = false;
 					break;
 				case FLAC__METADATA_TYPE_CUESHEET:
 					if(!read_metadata_cuesheet_(decoder, &block.data.cue_sheet))
-						return false;
+						ok = false;
 					break;
 				case FLAC__METADATA_TYPE_PICTURE:
 					if(!read_metadata_picture_(decoder, &block.data.picture))
-						return false;
+						ok = false;
 					break;
 				case FLAC__METADATA_TYPE_STREAMINFO:
 				case FLAC__METADATA_TYPE_SEEKTABLE:
@@ -1531,18 +1509,18 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 					break;
 				default:
 					if(real_length > 0) {
-						if(0 == (block.data.unknown.data = (FLAC__byte*)malloc(real_length))) {
+						if(0 == (block.data.unknown.data = malloc(real_length))) {
 							decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
-							return false;
+							ok = false;
 						}
-						if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, block.data.unknown.data, real_length))
-							return false; /* read_callback_ sets the state for us */
+						else if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, block.data.unknown.data, real_length))
+							ok = false; /* read_callback_ sets the state for us */
 					}
 					else
 						block.data.unknown.data = 0;
 					break;
 			}
-			if(!decoder->private_->is_seeking && decoder->private_->metadata_callback)
+			if(ok && !decoder->private_->is_seeking && decoder->private_->metadata_callback)
 				decoder->private_->metadata_callback(decoder, &block, decoder->private_->client_data);
 
 			/* now we have to free any malloc()ed data in the block */
@@ -1587,6 +1565,9 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 						free(block.data.unknown.data);
 					break;
 			}
+
+			if(!ok) /* anything that unsets "ok" should also make sure decoder->protected_->state is updated */
+				return false;
 		}
 	}
 
@@ -1685,7 +1666,7 @@ FLAC__bool read_metadata_seektable_(FLAC__StreamDecoder *decoder, FLAC__bool is_
 	decoder->private_->seek_table.data.seek_table.num_points = length / FLAC__STREAM_METADATA_SEEKPOINT_LENGTH;
 
 	/* use realloc since we may pass through here several times (e.g. after seeking) */
-	if(0 == (decoder->private_->seek_table.data.seek_table.points = (FLAC__StreamMetadata_SeekPoint*)safe_realloc_mul_2op_(decoder->private_->seek_table.data.seek_table.points, decoder->private_->seek_table.data.seek_table.num_points, /*times*/sizeof(FLAC__StreamMetadata_SeekPoint)))) {
+	if(0 == (decoder->private_->seek_table.data.seek_table.points = safe_realloc_mul_2op_(decoder->private_->seek_table.data.seek_table.points, decoder->private_->seek_table.data.seek_table.num_points, /*times*/sizeof(FLAC__StreamMetadata_SeekPoint)))) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return false;
 	}
@@ -1713,58 +1694,98 @@ FLAC__bool read_metadata_seektable_(FLAC__StreamDecoder *decoder, FLAC__bool is_
 	return true;
 }
 
-FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_VorbisComment *obj)
+FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_VorbisComment *obj, unsigned length)
 {
 	FLAC__uint32 i;
 
 	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
 
 	/* read vendor string */
-	FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN == 32);
-	if(!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->vendor_string.length))
-		return false; /* read_callback_ sets the state for us */
-	if(obj->vendor_string.length > 0) {
-		if(0 == (obj->vendor_string.entry = (FLAC__byte*)safe_malloc_add_2op_(obj->vendor_string.length, /*+*/1))) {
-			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
-			return false;
-		}
-		if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, obj->vendor_string.entry, obj->vendor_string.length))
+	if (length >= 8) {
+		length -= 8; /* vendor string length + num comments entries alone take 8 bytes */
+		FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN == 32);
+		if (!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->vendor_string.length))
 			return false; /* read_callback_ sets the state for us */
-		obj->vendor_string.entry[obj->vendor_string.length] = '\0';
-	}
-	else
-		obj->vendor_string.entry = 0;
+		if (obj->vendor_string.length > 0) {
+			if (length < obj->vendor_string.length) {
+				obj->vendor_string.length = 0;
+				obj->vendor_string.entry = 0;
+				goto skip;
+			}
+			else
+				length -= obj->vendor_string.length;
+			if (0 == (obj->vendor_string.entry = safe_malloc_add_2op_(obj->vendor_string.length, /*+*/1))) {
+				decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+				return false;
+			}
+			if (!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, obj->vendor_string.entry, obj->vendor_string.length))
+				return false; /* read_callback_ sets the state for us */
+			obj->vendor_string.entry[obj->vendor_string.length] = '\0';
+		}
+		else
+			obj->vendor_string.entry = 0;
 
-	/* read num comments */
-	FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN == 32);
-	if(!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->num_comments))
-		return false; /* read_callback_ sets the state for us */
+		/* read num comments */
+		FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN == 32);
+		if (!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->num_comments))
+			return false; /* read_callback_ sets the state for us */
 
-	/* read comments */
-	if(obj->num_comments > 0) {
-		if(0 == (obj->comments = (FLAC__StreamMetadata_VorbisComment_Entry*)safe_malloc_mul_2op_(obj->num_comments, /*times*/sizeof(FLAC__StreamMetadata_VorbisComment_Entry)))) {
-			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+		/* read comments */
+		if (obj->num_comments > 100000) {
+			/* Possibly malicious file. */
+			obj->num_comments = 0;
 			return false;
 		}
-		for(i = 0; i < obj->num_comments; i++) {
-			FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN == 32);
-			if(!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->comments[i].length))
-				return false; /* read_callback_ sets the state for us */
-			if(obj->comments[i].length > 0) {
-				if(0 == (obj->comments[i].entry = (FLAC__byte*)safe_malloc_add_2op_(obj->comments[i].length, /*+*/1))) {
-					decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
-					return false;
+		if (obj->num_comments > 0) {
+			if (0 == (obj->comments = safe_malloc_mul_2op_p(obj->num_comments, /*times*/sizeof(FLAC__StreamMetadata_VorbisComment_Entry)))) {
+				decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+				return false;
+			}
+			for (i = 0; i < obj->num_comments; i++) {
+				/* Initialize here just to make sure. */
+				obj->comments[i].length = 0;
+				obj->comments[i].entry = 0;
+
+				FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN == 32);
+				if (length < 4) {
+					obj->num_comments = i;
+					goto skip;
 				}
-				if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, obj->comments[i].entry, obj->comments[i].length))
+				else
+					length -= 4;
+				if (!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->comments[i].length))
 					return false; /* read_callback_ sets the state for us */
-				obj->comments[i].entry[obj->comments[i].length] = '\0';
+				if (obj->comments[i].length > 0) {
+					if (length < obj->comments[i].length) {
+						obj->num_comments = i;
+						goto skip;
+					}
+					else
+						length -= obj->comments[i].length;
+					if (0 == (obj->comments[i].entry = safe_malloc_add_2op_(obj->comments[i].length, /*+*/1))) {
+						decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+						return false;
+					}
+					memset (obj->comments[i].entry, 0, obj->comments[i].length) ;
+					if (!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, obj->comments[i].entry, obj->comments[i].length)) {
+						obj->num_comments = i;
+						goto skip;
+					}
+					obj->comments[i].entry[obj->comments[i].length] = '\0';
+				}
+				else
+					obj->comments[i].entry = 0;
 			}
-			else
-				obj->comments[i].entry = 0;
 		}
+		else
+			obj->comments = 0;
 	}
-	else {
-		obj->comments = 0;
+
+  skip:
+	if (length > 0) {
+		/* This will only happen on files with invalid data in comments */
+		if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(decoder->private_->input, length))
+			return false; /* read_callback_ sets the state for us */
 	}
 
 	return true;
@@ -1797,7 +1818,7 @@ FLAC__bool read_metadata_cuesheet_(FLAC__StreamDecoder *decoder, FLAC__StreamMet
 	obj->num_tracks = x;
 
 	if(obj->num_tracks > 0) {
-		if(0 == (obj->tracks = (FLAC__StreamMetadata_CueSheet_Track*)safe_calloc_(obj->num_tracks, sizeof(FLAC__StreamMetadata_CueSheet_Track)))) {
+		if(0 == (obj->tracks = safe_calloc_(obj->num_tracks, sizeof(FLAC__StreamMetadata_CueSheet_Track)))) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 			return false;
 		}
@@ -1830,18 +1851,18 @@ FLAC__bool read_metadata_cuesheet_(FLAC__StreamDecoder *decoder, FLAC__StreamMet
 			track->num_indices = (FLAC__byte)x;
 
 			if(track->num_indices > 0) {
-				if(0 == (track->indices = (FLAC__StreamMetadata_CueSheet_Index*)safe_calloc_(track->num_indices, sizeof(FLAC__StreamMetadata_CueSheet_Index)))) {
+				if(0 == (track->indices = safe_calloc_(track->num_indices, sizeof(FLAC__StreamMetadata_CueSheet_Index)))) {
 					decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 					return false;
 				}
 				for(j = 0; j < track->num_indices; j++) {
-					FLAC__StreamMetadata_CueSheet_Index *index = &track->indices[j];
-					if(!FLAC__bitreader_read_raw_uint64(decoder->private_->input, &index->offset, FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN))
+					FLAC__StreamMetadata_CueSheet_Index *indx = &track->indices[j];
+					if(!FLAC__bitreader_read_raw_uint64(decoder->private_->input, &indx->offset, FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN))
 						return false; /* read_callback_ sets the state for us */
 
 					if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN))
 						return false; /* read_callback_ sets the state for us */
-					index->number = (FLAC__byte)x;
+					indx->number = (FLAC__byte)x;
 
 					if(!FLAC__bitreader_skip_bits_no_crc(decoder->private_->input, FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN))
 						return false; /* read_callback_ sets the state for us */
@@ -1867,7 +1888,7 @@ FLAC__bool read_metadata_picture_(FLAC__StreamDecoder *decoder, FLAC__StreamMeta
 	/* read MIME type */
 	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN))
 		return false; /* read_callback_ sets the state for us */
-	if(0 == (obj->mime_type = (char*)safe_malloc_add_2op_(x, /*+*/1))) {
+	if(0 == (obj->mime_type = safe_malloc_add_2op_(x, /*+*/1))) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return false;
 	}
@@ -1880,7 +1901,7 @@ FLAC__bool read_metadata_picture_(FLAC__StreamDecoder *decoder, FLAC__StreamMeta
 	/* read description */
 	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN))
 		return false; /* read_callback_ sets the state for us */
-	if(0 == (obj->description = (FLAC__byte*)safe_malloc_add_2op_(x, /*+*/1))) {
+	if(0 == (obj->description = safe_malloc_add_2op_(x, /*+*/1))) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return false;
 	}
@@ -1909,7 +1930,7 @@ FLAC__bool read_metadata_picture_(FLAC__StreamDecoder *decoder, FLAC__StreamMeta
 	/* read data */
 	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &(obj->data_length), FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN))
 		return false; /* read_callback_ sets the state for us */
-	if(0 == (obj->data = (FLAC__byte*)safe_malloc_(obj->data_length))) {
+	if(0 == (obj->data = safe_malloc_(obj->data_length))) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return false;
 	}
@@ -1983,7 +2004,7 @@ FLAC__bool frame_sync_(FLAC__StreamDecoder *decoder)
 				decoder->private_->lookahead = (FLAC__byte)x;
 				decoder->private_->cached = true;
 			}
-			else if(x >> 2 == 0x3e) { /* MAGIC NUMBER for the last 6 sync bits */
+			else if(x >> 1 == 0x7c) { /* MAGIC NUMBER for the last 6 sync bits and reserved 7th bit */
 				decoder->private_->header_warmup[1] = (FLAC__byte)x;
 				decoder->protected_->state = FLAC__STREAM_DECODER_READ_FRAME;
 				return true;
@@ -2679,12 +2700,8 @@ FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, un
 		if( (FLAC__uint64)order * ((((FLAC__uint64)1)<<bps)-1) * ((1<<subframe->qlp_coeff_precision)-1) < (((FLAC__uint64)-1) << 32) )
 		*/
 		if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
-			if(bps <= 16 && subframe->qlp_coeff_precision <= 16) {
-				if(order <= 8)
-					decoder->private_->local_lpc_restore_signal_16bit_order8(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
-				else
-					decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
-			}
+			if(bps <= 16 && subframe->qlp_coeff_precision <= 16)
+				decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
 			else
 				decoder->private_->local_lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
 		else
@@ -2743,7 +2760,7 @@ FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigne
 		}
 	}
 
-	if(!FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(partitioned_rice_contents, max(6, partition_order))) {
+	if(!FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(partitioned_rice_contents, flac_max(6u, partition_order))) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return false;
 	}
@@ -2756,7 +2773,7 @@ FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigne
 		if(rice_parameter < pesc) {
 			partitioned_rice_contents->raw_bits[partition] = 0;
 			u = (partition_order == 0 || partition > 0)? partition_samples : partition_samples - predictor_order;
-			if(!decoder->private_->local_bitreader_read_rice_signed_block(decoder->private_->input, residual + sample, u, rice_parameter))
+			if(!FLAC__bitreader_read_rice_signed_block(decoder->private_->input, residual + sample, u, rice_parameter))
 				return false; /* read_callback_ sets the state for us */
 			sample += u;
 		}
@@ -2959,12 +2976,10 @@ FLAC__StreamDecoderWriteStatus write_audio_frame_to_client_(FLAC__StreamDecoder
 		 */
 		if(!decoder->private_->has_stream_info)
 			decoder->private_->do_md5_checking = false;
-#ifndef FLAC__NO_MD5
 		if(decoder->private_->do_md5_checking) {
 			if(!FLAC__MD5Accumulate(&decoder->private_->md5context, buffer, frame->header.channels, frame->header.blocksize, (frame->header.bits_per_sample+7) / 8))
 				return FLAC__STREAM_DECODER_WRITE_STATUS_ABORT;
 		}
-#endif
 		return decoder->private_->write_callback(decoder, frame, buffer, decoder->private_->client_data);
 	}
 }
@@ -3100,12 +3115,7 @@ FLAC__bool seek_to_absolute_sample_(FLAC__StreamDecoder *decoder, FLAC__uint64 s
 			return false;
 		}
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-#if defined _MSC_VER || defined __MINGW32__
-		/* with VC++ you have to spoon feed it the casting */
-		pos = (FLAC__int64)lower_bound + (FLAC__int64)((FLAC__double)(FLAC__int64)(target_sample - lower_bound_sample) / (FLAC__double)(FLAC__int64)(upper_bound_sample - lower_bound_sample) * (FLAC__double)(FLAC__int64)(upper_bound - lower_bound)) - approx_bytes_per_frame;
-#else
 		pos = (FLAC__int64)lower_bound + (FLAC__int64)((FLAC__double)(target_sample - lower_bound_sample) / (FLAC__double)(upper_bound_sample - lower_bound_sample) * (FLAC__double)(upper_bound - lower_bound)) - approx_bytes_per_frame;
-#endif
 #else
 		/* a little less accurate: */
 		if(upper_bound - lower_bound < 0xffffffff)
@@ -3157,11 +3167,11 @@ FLAC__bool seek_to_absolute_sample_(FLAC__StreamDecoder *decoder, FLAC__uint64 s
 			}
 			/* our last move backwards wasn't big enough, try again */
 			approx_bytes_per_frame = approx_bytes_per_frame? approx_bytes_per_frame * 2 : 16;
-			continue;	
+			continue;
 		}
 		/* allow one seek over upper bound, so we can get a correct upper_bound_sample for streams with unknown total_samples */
 		first_seek = false;
-		
+
 		/* make sure we are not seeking in corrupted stream */
 		if (this_frame_sample < lower_bound_sample) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
@@ -3201,7 +3211,7 @@ FLAC__bool seek_to_absolute_sample_ogg_(FLAC__StreamDecoder *decoder, FLAC__uint
 	FLAC__bool did_a_seek;
 	unsigned iteration = 0;
 
-	/* In the first iterations, we will calculate the target byte position 
+	/* In the first iterations, we will calculate the target byte position
 	 * by the distance from the target sample to left_sample and
 	 * right_sample (let's call it "proportional search").  After that, we
 	 * will switch to binary search.
@@ -3229,12 +3239,7 @@ FLAC__bool seek_to_absolute_sample_ogg_(FLAC__StreamDecoder *decoder, FLAC__uint
 			}
 			else {
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-#if defined _MSC_VER || defined __MINGW32__
-				/* with MSVC you have to spoon feed it the casting */
-				pos = (FLAC__uint64)((FLAC__double)(FLAC__int64)(target_sample - left_sample) / (FLAC__double)(FLAC__int64)(right_sample - left_sample) * (FLAC__double)(FLAC__int64)(right_pos - left_pos));
-#else
 				pos = (FLAC__uint64)((FLAC__double)(target_sample - left_sample) / (FLAC__double)(right_sample - left_sample) * (FLAC__double)(right_pos - left_pos));
-#endif
 #else
 				/* a little less accurate: */
 				if ((target_sample-left_sample <= 0xffffffff) && (right_pos-left_pos <= 0xffffffff))
@@ -3351,7 +3356,7 @@ FLAC__StreamDecoderSeekStatus file_seek_callback_(const FLAC__StreamDecoder *dec
 
 	if(decoder->private_->file == stdin)
 		return FLAC__STREAM_DECODER_SEEK_STATUS_UNSUPPORTED;
-	else if(fseeko(decoder->private_->file, (off_t)absolute_byte_offset, SEEK_SET) < 0)
+	else if(fseeko(decoder->private_->file, (FLAC__off_t)absolute_byte_offset, SEEK_SET) < 0)
 		return FLAC__STREAM_DECODER_SEEK_STATUS_ERROR;
 	else
 		return FLAC__STREAM_DECODER_SEEK_STATUS_OK;
@@ -3359,7 +3364,7 @@ FLAC__StreamDecoderSeekStatus file_seek_callback_(const FLAC__StreamDecoder *dec
 
 FLAC__StreamDecoderTellStatus file_tell_callback_(const FLAC__StreamDecoder *decoder, FLAC__uint64 *absolute_byte_offset, void *client_data)
 {
-	off_t pos;
+	FLAC__off_t pos;
 	(void)client_data;
 
 	if(decoder->private_->file == stdin)
@@ -3374,12 +3379,12 @@ FLAC__StreamDecoderTellStatus file_tell_callback_(const FLAC__StreamDecoder *dec
 
 FLAC__StreamDecoderLengthStatus file_length_callback_(const FLAC__StreamDecoder *decoder, FLAC__uint64 *stream_length, void *client_data)
 {
-	struct stat filestats;
+	struct flac_stat_s filestats;
 	(void)client_data;
 
 	if(decoder->private_->file == stdin)
 		return FLAC__STREAM_DECODER_LENGTH_STATUS_UNSUPPORTED;
-	else if(fstat(fileno(decoder->private_->file), &filestats) != 0)
+	else if(flac_fstat(fileno(decoder->private_->file), &filestats) != 0)
 		return FLAC__STREAM_DECODER_LENGTH_STATUS_ERROR;
 	else {
 		*stream_length = (FLAC__uint64)filestats.st_size;
diff --git a/libFLAC/stream_encoder.c b/libFLAC/stream_encoder.c
index 6d9859c..3aa90fb 100644
--- a/libFLAC/stream_encoder.c
+++ b/libFLAC/stream_encoder.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,32 +30,18 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
-#if defined _MSC_VER || defined __MINGW32__
-#include <io.h> /* for _setmode() */
-#include <fcntl.h> /* for _O_BINARY */
-#endif
-#if defined __CYGWIN__ || defined __EMX__
-#include <io.h> /* for setmode(), O_BINARY */
-#include <fcntl.h> /* for _O_BINARY */
-#endif
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h> /* for malloc() */
 #include <string.h> /* for memcpy() */
 #include <sys/types.h> /* for off_t */
-#if defined _MSC_VER || defined __BORLANDC__ || defined __MINGW32__
-#if _MSC_VER <= 1600 || defined __BORLANDC__ /* @@@ [2G limit] */
-#define fseeko fseek
-#define ftello ftell
-#endif
-#endif
+#include "share/compat.h"
 #include "FLAC/assert.h"
 #include "FLAC/stream_decoder.h"
-#include "share/alloc.h"
 #include "protected/stream_encoder.h"
 #include "private/bitwriter.h"
 #include "private/bitmath.h"
@@ -65,26 +52,17 @@
 #include "private/lpc.h"
 #include "private/md5.h"
 #include "private/memory.h"
+#include "private/macros.h"
 #if FLAC__HAS_OGG
 #include "private/ogg_helper.h"
 #include "private/ogg_mapping.h"
 #endif
+#include "private/stream_encoder.h"
 #include "private/stream_encoder_framing.h"
 #include "private/window.h"
+#include "share/alloc.h"
+#include "share/private.h"
 
-#ifndef FLaC__INLINE
-#define FLaC__INLINE
-#endif
-
-#ifdef min
-#undef min
-#endif
-#define min(x,y) ((x)<(y)?(x):(y))
-
-#ifdef max
-#undef max
-#endif
-#define max(x,y) ((x)>(y)?(x):(y))
 
 /* Exact Rice codeword length calculation is off by default.  The simple
  * (and fast) estimation (of how many bits a residual value will be
@@ -96,7 +74,7 @@
  * parameter estimation in this encoder is very good, almost always
  * yielding compression within 0.1% of the optimal parameters.
  */
-#undef ENABLE_RICE_PARAMETER_SEARCH 
+#undef ENABLE_RICE_PARAMETER_SEARCH
 
 
 typedef struct {
@@ -128,16 +106,18 @@ static struct CompressionLevels {
 	unsigned min_residual_partition_order;
 	unsigned max_residual_partition_order;
 	unsigned rice_parameter_search_dist;
+	const char *apodization;
 } compression_levels_[] = {
-	{ false, false,  0, 0, false, false, false, 0, 3, 0 },
-	{ true , true ,  0, 0, false, false, false, 0, 3, 0 },
-	{ true , false,  0, 0, false, false, false, 0, 3, 0 },
-	{ false, false,  6, 0, false, false, false, 0, 4, 0 },
-	{ true , true ,  8, 0, false, false, false, 0, 4, 0 },
-	{ true , false,  8, 0, false, false, false, 0, 5, 0 },
-	{ true , false,  8, 0, false, false, false, 0, 6, 0 },
-	{ true , false,  8, 0, false, false, true , 0, 6, 0 },
-	{ true , false, 12, 0, false, false, true , 0, 6, 0 }
+	{ false, false,  0, 0, false, false, false, 0, 3, 0, "tukey(5e-1)" },
+	{ true , true ,  0, 0, false, false, false, 0, 3, 0, "tukey(5e-1)" },
+	{ true , false,  0, 0, false, false, false, 0, 3, 0, "tukey(5e-1)" },
+	{ false, false,  6, 0, false, false, false, 0, 4, 0, "tukey(5e-1)" },
+	{ true , true ,  8, 0, false, false, false, 0, 4, 0, "tukey(5e-1)" },
+	{ true , false,  8, 0, false, false, false, 0, 5, 0, "tukey(5e-1)" },
+	{ true , false,  8, 0, false, false, false, 0, 6, 0, "tukey(5e-1);partial_tukey(2)" },
+	{ true , false, 12, 0, false, false, false, 0, 6, 0, "tukey(5e-1);partial_tukey(2)" },
+	{ true , false, 12, 0, false, false, false, 0, 6, 0, "tukey(5e-1);partial_tukey(2);punchout_tukey(3)" }
+	/* here we use locale-independent 5e-1 instead of 0.5 or 0,5 */
 };
 
 
@@ -232,7 +212,7 @@ static unsigned evaluate_lpc_subframe_(
 #endif
 
 static unsigned evaluate_verbatim_subframe_(
-	FLAC__StreamEncoder *encoder, 
+	FLAC__StreamEncoder *encoder,
 	const FLAC__int32 signal[],
 	unsigned blocksize,
 	unsigned subframe_bps,
@@ -367,10 +347,13 @@ typedef struct FLAC__StreamEncoderPrivate {
 	unsigned current_frame_number;
 	FLAC__MD5Context md5context;
 	FLAC__CPUInfo cpuinfo;
+	void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 	unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+	unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
 #else
 	unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+	unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
 #endif
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 	void (*local_lpc_compute_autocorrelation)(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
@@ -477,7 +460,7 @@ FLAC_API const char * const FLAC__StreamEncoderInitStatusString[] = {
 	"FLAC__STREAM_ENCODER_INIT_STATUS_ALREADY_INITIALIZED"
 };
 
-FLAC_API const char * const FLAC__treamEncoderReadStatusString[] = {
+FLAC_API const char * const FLAC__StreamEncoderReadStatusString[] = {
 	"FLAC__STREAM_ENCODER_READ_STATUS_CONTINUE",
 	"FLAC__STREAM_ENCODER_READ_STATUS_END_OF_STREAM",
 	"FLAC__STREAM_ENCODER_READ_STATUS_ABORT",
@@ -526,18 +509,18 @@ FLAC_API FLAC__StreamEncoder *FLAC__stream_encoder_new(void)
 
 	FLAC__ASSERT(sizeof(int) >= 4); /* we want to die right away if this is not true */
 
-	encoder = (FLAC__StreamEncoder*)calloc(1, sizeof(FLAC__StreamEncoder));
+	encoder = calloc(1, sizeof(FLAC__StreamEncoder));
 	if(encoder == 0) {
 		return 0;
 	}
 
-	encoder->protected_ = (FLAC__StreamEncoderProtected*)calloc(1, sizeof(FLAC__StreamEncoderProtected));
+	encoder->protected_ = calloc(1, sizeof(FLAC__StreamEncoderProtected));
 	if(encoder->protected_ == 0) {
 		free(encoder);
 		return 0;
 	}
 
-	encoder->private_ = (FLAC__StreamEncoderPrivate*)calloc(1, sizeof(FLAC__StreamEncoderPrivate));
+	encoder->private_ = calloc(1, sizeof(FLAC__StreamEncoderPrivate));
 	if(encoder->private_ == 0) {
 		free(encoder->protected_);
 		free(encoder);
@@ -595,7 +578,9 @@ FLAC_API void FLAC__stream_encoder_delete(FLAC__StreamEncoder *encoder)
 {
 	unsigned i;
 
-	FLAC__ASSERT(0 != encoder);
+	if (encoder == NULL)
+		return ;
+
 	FLAC__ASSERT(0 != encoder->protected_);
 	FLAC__ASSERT(0 != encoder->private_);
 	FLAC__ASSERT(0 != encoder->private_->frame);
@@ -696,7 +681,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 		if(encoder->protected_->bits_per_sample < 16) {
 			/* @@@ need some data about how to set this here w.r.t. blocksize and sample rate */
 			/* @@@ until then we'll make a guess */
-			encoder->protected_->qlp_coeff_precision = max(FLAC__MIN_QLP_COEFF_PRECISION, 2 + encoder->protected_->bits_per_sample / 2);
+			encoder->protected_->qlp_coeff_precision = flac_max(FLAC__MIN_QLP_COEFF_PRECISION, 2 + encoder->protected_->bits_per_sample / 2);
 		}
 		else if(encoder->protected_->bits_per_sample == 16) {
 			if(encoder->protected_->blocksize <= 192)
@@ -728,20 +713,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 		return FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_QLP_COEFF_PRECISION;
 
 	if(encoder->protected_->streamable_subset) {
-		if(
-			encoder->protected_->blocksize != 192 &&
-			encoder->protected_->blocksize != 576 &&
-			encoder->protected_->blocksize != 1152 &&
-			encoder->protected_->blocksize != 2304 &&
-			encoder->protected_->blocksize != 4608 &&
-			encoder->protected_->blocksize != 256 &&
-			encoder->protected_->blocksize != 512 &&
-			encoder->protected_->blocksize != 1024 &&
-			encoder->protected_->blocksize != 2048 &&
-			encoder->protected_->blocksize != 4096 &&
-			encoder->protected_->blocksize != 8192 &&
-			encoder->protected_->blocksize != 16384
-		)
+		if(!FLAC__format_blocksize_is_subset(encoder->protected_->blocksize, encoder->protected_->sample_rate))
 			return FLAC__STREAM_ENCODER_INIT_STATUS_NOT_STREAMABLE;
 		if(!FLAC__format_sample_rate_is_subset(encoder->protected_->sample_rate))
 			return FLAC__STREAM_ENCODER_INIT_STATUS_NOT_STREAMABLE;
@@ -774,12 +746,12 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 #if FLAC__HAS_OGG
 	/* reorder metadata if necessary to ensure that any VORBIS_COMMENT is the first, according to the mapping spec */
 	if(is_ogg && 0 != encoder->protected_->metadata && encoder->protected_->num_metadata_blocks > 1) {
-		unsigned i;
-		for(i = 1; i < encoder->protected_->num_metadata_blocks; i++) {
-			if(0 != encoder->protected_->metadata[i] && encoder->protected_->metadata[i]->type == FLAC__METADATA_TYPE_VORBIS_COMMENT) {
-				FLAC__StreamMetadata *vc = encoder->protected_->metadata[i];
-				for( ; i > 0; i--)
-					encoder->protected_->metadata[i] = encoder->protected_->metadata[i-1];
+		unsigned i1;
+		for(i1 = 1; i1 < encoder->protected_->num_metadata_blocks; i1++) {
+			if(0 != encoder->protected_->metadata[i1] && encoder->protected_->metadata[i1]->type == FLAC__METADATA_TYPE_VORBIS_COMMENT) {
+				FLAC__StreamMetadata *vc = encoder->protected_->metadata[i1];
+				for( ; i1 > 0; i1--)
+					encoder->protected_->metadata[i1] = encoder->protected_->metadata[i1-1];
 				encoder->protected_->metadata[0] = vc;
 				break;
 			}
@@ -788,10 +760,10 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 #endif
 	/* keep track of any SEEKTABLE block */
 	if(0 != encoder->protected_->metadata && encoder->protected_->num_metadata_blocks > 0) {
-		unsigned i;
-		for(i = 0; i < encoder->protected_->num_metadata_blocks; i++) {
-			if(0 != encoder->protected_->metadata[i] && encoder->protected_->metadata[i]->type == FLAC__METADATA_TYPE_SEEKTABLE) {
-				encoder->private_->seek_table = &encoder->protected_->metadata[i]->data.seek_table;
+		unsigned i2;
+		for(i2 = 0; i2 < encoder->protected_->num_metadata_blocks; i2++) {
+			if(0 != encoder->protected_->metadata[i2] && encoder->protected_->metadata[i2]->type == FLAC__METADATA_TYPE_SEEKTABLE) {
+				encoder->private_->seek_table = &encoder->protected_->metadata[i2]->data.seek_table;
 				break; /* take only the first one */
 			}
 		}
@@ -833,7 +805,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 				metadata_picture_has_type1 = true;
 				/* standard icon must be 32x32 pixel PNG */
 				if(
-					m->data.picture.type == FLAC__STREAM_METADATA_PICTURE_TYPE_FILE_ICON_STANDARD && 
+					m->data.picture.type == FLAC__STREAM_METADATA_PICTURE_TYPE_FILE_ICON_STANDARD &&
 					(
 						(strcmp(m->data.picture.mime_type, "image/png") && strcmp(m->data.picture.mime_type, "-->")) ||
 						m->data.picture.width != 32 ||
@@ -898,7 +870,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 	encoder->private_->current_frame_number = 0;
 
 	encoder->private_->use_wide_by_block = (encoder->protected_->bits_per_sample + FLAC__bitmath_ilog2(encoder->protected_->blocksize)+1 > 30);
-	encoder->private_->use_wide_by_order = (encoder->protected_->bits_per_sample + FLAC__bitmath_ilog2(max(encoder->protected_->max_lpc_order, FLAC__MAX_FIXED_ORDER))+1 > 30); /*@@@ need to use this? */
+	encoder->private_->use_wide_by_order = (encoder->protected_->bits_per_sample + FLAC__bitmath_ilog2(flac_max(encoder->protected_->max_lpc_order, FLAC__MAX_FIXED_ORDER))+1 > 30); /*@@@ need to use this? */
 	encoder->private_->use_wide_by_partition = (false); /*@@@ need to set this */
 
 	/*
@@ -909,7 +881,9 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 	encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
 #endif
+	encoder->private_->local_precompute_partition_info_sums = precompute_partition_info_sums_;
 	encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor;
+	encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide;
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 	encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients;
 	encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide;
@@ -922,21 +896,23 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 #  ifdef FLAC__CPU_IA32
 		FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
 #   ifdef FLAC__HAS_NASM
-		if(encoder->private_->cpuinfo.data.ia32.sse) {
+		if(encoder->private_->cpuinfo.ia32.sse) {
 			if(encoder->protected_->max_lpc_order < 4)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4;
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4_old;
 			else if(encoder->protected_->max_lpc_order < 8)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8;
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8_old;
 			else if(encoder->protected_->max_lpc_order < 12)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12;
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12_old;
+			else if(encoder->protected_->max_lpc_order < 16)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16_old;
 			else
 				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32;
 		}
-		else if(encoder->private_->cpuinfo.data.ia32._3dnow)
-			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow;
 		else
 			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32;
-		if(encoder->private_->cpuinfo.data.ia32.mmx) {
+
+		encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */
+		if(encoder->private_->cpuinfo.ia32.mmx) {
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx;
 		}
@@ -944,16 +920,163 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
 		}
-		if(encoder->private_->cpuinfo.data.ia32.mmx && encoder->private_->cpuinfo.data.ia32.cmov)
+
+		if(encoder->private_->cpuinfo.ia32.mmx && encoder->private_->cpuinfo.ia32.cmov)
 			encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov;
 #   endif /* FLAC__HAS_NASM */
-#  endif /* FLAC__CPU_IA32 */
+#   ifdef FLAC__HAS_X86INTRIN
+#    if defined FLAC__SSE_SUPPORTED
+		if(encoder->private_->cpuinfo.ia32.sse) {
+			if(encoder->private_->cpuinfo.ia32.sse42 || !encoder->private_->cpuinfo.ia32.intel) { /* use new autocorrelation functions */
+				if(encoder->protected_->max_lpc_order < 4)
+					encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_new;
+				else if(encoder->protected_->max_lpc_order < 8)
+					encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_new;
+				else if(encoder->protected_->max_lpc_order < 12)
+					encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_new;
+				else if(encoder->protected_->max_lpc_order < 16)
+					encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new;
+				else
+					encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
+			}
+			else { /* use old autocorrelation functions */
+				if(encoder->protected_->max_lpc_order < 4)
+					encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_old;
+				else if(encoder->protected_->max_lpc_order < 8)
+					encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_old;
+				else if(encoder->protected_->max_lpc_order < 12)
+					encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_old;
+				else if(encoder->protected_->max_lpc_order < 16)
+					encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_old;
+				else
+					encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
+			}
+		}
+#    endif
+
+#    ifdef FLAC__SSE2_SUPPORTED
+		if(encoder->private_->cpuinfo.ia32.sse2) {
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients       = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
+		}
+#    endif
+#    ifdef FLAC__SSE4_1_SUPPORTED
+		if(encoder->private_->cpuinfo.ia32.sse41) {
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients       = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41;
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41;
+		}
+#    endif
+#    ifdef FLAC__AVX2_SUPPORTED
+		if(encoder->private_->cpuinfo.ia32.avx2) {
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2;
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients       = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2;
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2;
+		}
+#    endif
+
+#    ifdef FLAC__SSE2_SUPPORTED
+		if (encoder->private_->cpuinfo.ia32.sse2) {
+			encoder->private_->local_fixed_compute_best_predictor      = FLAC__fixed_compute_best_predictor_intrin_sse2;
+			encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
+		}
+#    endif
+#    ifdef FLAC__SSSE3_SUPPORTED
+		if (encoder->private_->cpuinfo.ia32.ssse3) {
+			encoder->private_->local_fixed_compute_best_predictor      = FLAC__fixed_compute_best_predictor_intrin_ssse3;
+			encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_ssse3;
+		}
+#    endif
+#   endif /* FLAC__HAS_X86INTRIN */
+#  elif defined FLAC__CPU_X86_64
+		FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64);
+#   ifdef FLAC__HAS_X86INTRIN
+#    ifdef FLAC__SSE_SUPPORTED
+		if(encoder->private_->cpuinfo.x86.sse42 || !encoder->private_->cpuinfo.x86.intel) { /* use new autocorrelation functions */
+			if(encoder->protected_->max_lpc_order < 4)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_new;
+			else if(encoder->protected_->max_lpc_order < 8)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_new;
+			else if(encoder->protected_->max_lpc_order < 12)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_new;
+			else if(encoder->protected_->max_lpc_order < 16)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new;
+		}
+		else {
+			if(encoder->protected_->max_lpc_order < 4)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_old;
+			else if(encoder->protected_->max_lpc_order < 8)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_old;
+			else if(encoder->protected_->max_lpc_order < 12)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_old;
+			else if(encoder->protected_->max_lpc_order < 16)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_old;
+		}
+#    endif
+
+#    ifdef FLAC__SSE2_SUPPORTED
+		encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
+#    endif
+#    ifdef FLAC__SSE4_1_SUPPORTED
+		if(encoder->private_->cpuinfo.x86.sse41) {
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41;
+		}
+#    endif
+#    ifdef FLAC__AVX2_SUPPORTED
+		if(encoder->private_->cpuinfo.x86.avx2) {
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2;
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients       = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2;
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2;
+		}
+#    endif
+
+#    ifdef FLAC__SSE2_SUPPORTED
+		encoder->private_->local_fixed_compute_best_predictor      = FLAC__fixed_compute_best_predictor_intrin_sse2;
+		encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
+#    endif
+#    ifdef FLAC__SSSE3_SUPPORTED
+		if (encoder->private_->cpuinfo.x86.ssse3) {
+			encoder->private_->local_fixed_compute_best_predictor      = FLAC__fixed_compute_best_predictor_intrin_ssse3;
+			encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_ssse3;
+		}
+#    endif
+#   endif /* FLAC__HAS_X86INTRIN */
+#  endif /* FLAC__CPU_... */
 	}
 # endif /* !FLAC__NO_ASM */
 #endif /* !FLAC__INTEGER_ONLY_LIBRARY */
+#if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN
+	if(encoder->private_->cpuinfo.use_asm) {
+# if defined FLAC__CPU_IA32
+#  ifdef FLAC__SSE2_SUPPORTED
+		if(encoder->private_->cpuinfo.ia32.sse2)
+			encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_sse2;
+#  endif
+#  ifdef FLAC__SSSE3_SUPPORTED
+		if(encoder->private_->cpuinfo.ia32.ssse3)
+			encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_ssse3;
+#  endif
+#  ifdef FLAC__AVX2_SUPPORTED
+		if(encoder->private_->cpuinfo.ia32.avx2)
+			encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_avx2;
+#  endif
+# elif defined FLAC__CPU_X86_64
+#  ifdef FLAC__SSE2_SUPPORTED
+		encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_sse2;
+#  endif
+#  ifdef FLAC__SSSE3_SUPPORTED
+		if(encoder->private_->cpuinfo.x86.ssse3)
+			encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_ssse3;
+#  endif
+#  ifdef FLAC__AVX2_SUPPORTED
+		if(encoder->private_->cpuinfo.x86.avx2)
+			encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_avx2;
+#  endif
+# endif /* FLAC__CPU_... */
+	}
+#endif /* !FLAC__NO_ASM && FLAC__HAS_X86INTRIN */
 	/* finally override based on wide-ness if necessary */
 	if(encoder->private_->use_wide_by_block) {
-		encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_wide;
+		encoder->private_->local_fixed_compute_best_predictor = encoder->private_->local_fixed_compute_best_predictor_wide;
 	}
 
 	/* set state to OK; from here on, errors are fatal and we'll override the state then */
@@ -994,7 +1117,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 		 */
 		encoder->private_->verify.input_fifo.size = encoder->protected_->blocksize+OVERREAD_;
 		for(i = 0; i < encoder->protected_->channels; i++) {
-			if(0 == (encoder->private_->verify.input_fifo.data[i] = (FLAC__int32*)safe_malloc_mul_2op_(sizeof(FLAC__int32), /*times*/encoder->private_->verify.input_fifo.size))) {
+			if(0 == (encoder->private_->verify.input_fifo.data[i] = safe_malloc_mul_2op_p(sizeof(FLAC__int32), /*times*/encoder->private_->verify.input_fifo.size))) {
 				encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
 				return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
 			}
@@ -1004,10 +1127,12 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 		/*
 		 * Now set up a stream decoder for verification
 		 */
-		encoder->private_->verify.decoder = FLAC__stream_decoder_new();
 		if(0 == encoder->private_->verify.decoder) {
-			encoder->protected_->state = FLAC__STREAM_ENCODER_VERIFY_DECODER_ERROR;
-			return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
+			encoder->private_->verify.decoder = FLAC__stream_decoder_new();
+			if(0 == encoder->private_->verify.decoder) {
+				encoder->protected_->state = FLAC__STREAM_ENCODER_VERIFY_DECODER_ERROR;
+				return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
+			}
 		}
 
 		if(FLAC__stream_decoder_init_stream(encoder->private_->verify.decoder, verify_read_callback_, /*seek_callback=*/0, /*tell_callback=*/0, /*length_callback=*/0, /*eof_callback=*/0, verify_write_callback_, verify_metadata_callback_, verify_error_callback_, /*client_data=*/encoder) != FLAC__STREAM_DECODER_INIT_STATUS_OK) {
@@ -1180,7 +1305,7 @@ FLAC_API FLAC__StreamEncoderInitStatus FLAC__stream_encoder_init_ogg_stream(
 		/*is_ogg=*/true
 	);
 }
- 
+
 static FLAC__StreamEncoderInitStatus init_FILE_internal_(
 	FLAC__StreamEncoder *encoder,
 	FILE *file,
@@ -1211,6 +1336,13 @@ static FLAC__StreamEncoderInitStatus init_FILE_internal_(
 	if(file == stdout)
 		file = get_binary_stdout_(); /* just to be safe */
 
+#ifdef _WIN32
+	/*
+	 * Windows can suffer quite badly from disk fragmentation. This can be
+	 * reduced significantly by setting the output buffer size to be 10MB.
+	 */
+	setvbuf(file, NULL, _IOFBF, 10*1024*1024);
+#endif
 	encoder->private_->file = file;
 
 	encoder->private_->progress_callback = progress_callback;
@@ -1242,7 +1374,7 @@ static FLAC__StreamEncoderInitStatus init_FILE_internal_(
 
 	return init_status;
 }
- 
+
 FLAC_API FLAC__StreamEncoderInitStatus FLAC__stream_encoder_init_FILE(
 	FLAC__StreamEncoder *encoder,
 	FILE *file,
@@ -1252,7 +1384,7 @@ FLAC_API FLAC__StreamEncoderInitStatus FLAC__stream_encoder_init_FILE(
 {
 	return init_FILE_internal_(encoder, file, progress_callback, client_data, /*is_ogg=*/false);
 }
- 
+
 FLAC_API FLAC__StreamEncoderInitStatus FLAC__stream_encoder_init_ogg_FILE(
 	FLAC__StreamEncoder *encoder,
 	FILE *file,
@@ -1283,7 +1415,7 @@ static FLAC__StreamEncoderInitStatus init_file_internal_(
 	if(encoder->protected_->state != FLAC__STREAM_ENCODER_UNINITIALIZED)
 		return FLAC__STREAM_ENCODER_INIT_STATUS_ALREADY_INITIALIZED;
 
-	file = filename? fopen(filename, "w+b") : stdout;
+	file = filename? flac_fopen(filename, "w+b") : stdout;
 
 	if(file == 0) {
 		encoder->protected_->state = FLAC__STREAM_ENCODER_IO_ERROR;
@@ -1479,11 +1611,10 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_compression_level(FLAC__StreamEncod
 	ok &= FLAC__stream_encoder_set_do_mid_side_stereo          (encoder, compression_levels_[value].do_mid_side_stereo);
 	ok &= FLAC__stream_encoder_set_loose_mid_side_stereo       (encoder, compression_levels_[value].loose_mid_side_stereo);
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-#if 0
-	/* was: */
+#if 1
 	ok &= FLAC__stream_encoder_set_apodization                 (encoder, compression_levels_[value].apodization);
-	/* but it's too hard to specify the string in a locale-specific way */
 #else
+	/* equivalent to -A tukey(0.5) */
 	encoder->protected_->num_apodizations = 1;
 	encoder->protected_->apodizations[0].type = FLAC__APODIZATION_TUKEY;
 	encoder->protected_->apodizations[0].parameters.tukey.p = 0.5;
@@ -1587,6 +1718,48 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_apodization(FLAC__StreamEncoder *en
 				encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY;
 			}
 		}
+		else if(n>15   && 0 == strncmp("partial_tukey("       , specification, 14)) {
+			FLAC__int32 tukey_parts = (FLAC__int32)strtod(specification+14, 0);
+			const char *si_1 = strchr(specification, '/');
+			FLAC__real overlap = si_1?flac_min((FLAC__real)strtod(si_1+1, 0),0.99f):0.1f;
+			FLAC__real overlap_units = 1.0f/(1.0f - overlap) - 1.0f;
+			const char *si_2 = strchr((si_1?(si_1+1):specification), '/');
+			FLAC__real tukey_p = si_2?(FLAC__real)strtod(si_2+1, 0):0.2f;
+
+			if (tukey_parts <= 1) {
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.tukey.p = tukey_p;
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY;
+			}else if (encoder->protected_->num_apodizations + tukey_parts < 32){
+				FLAC__int32 m;
+				for(m = 0; m < tukey_parts; m++){
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.p = tukey_p;
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.start = m/(tukey_parts+overlap_units);
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.end = (m+1+overlap_units)/(tukey_parts+overlap_units);
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_PARTIAL_TUKEY;
+				}
+			}
+		}
+		else if(n>16   && 0 == strncmp("punchout_tukey("       , specification, 15)) {
+			FLAC__int32 tukey_parts = (FLAC__int32)strtod(specification+15, 0);
+			const char *si_1 = strchr(specification, '/');
+			FLAC__real overlap = si_1?flac_min((FLAC__real)strtod(si_1+1, 0),0.99f):0.2f;
+			FLAC__real overlap_units = 1.0f/(1.0f - overlap) - 1.0f;
+			const char *si_2 = strchr((si_1?(si_1+1):specification), '/');
+			FLAC__real tukey_p = si_2?(FLAC__real)strtod(si_2+1, 0):0.2f;
+
+			if (tukey_parts <= 1) {
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.tukey.p = tukey_p;
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY;
+			}else if (encoder->protected_->num_apodizations + tukey_parts < 32){
+				FLAC__int32 m;
+				for(m = 0; m < tukey_parts; m++){
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.p = tukey_p;
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.start = m/(tukey_parts+overlap_units);
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.end = (m+1+overlap_units)/(tukey_parts+overlap_units);
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_PUNCHOUT_TUKEY;
+				}
+			}
+		}
 		else if(n==5  && 0 == strncmp("welch"        , specification, n))
 			encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_WELCH;
 		if (encoder->protected_->num_apodizations == 32)
@@ -1733,7 +1906,7 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_metadata(FLAC__StreamEncoder *encod
 	}
 	if(num_blocks) {
 		FLAC__StreamMetadata **m;
-		if(0 == (m = (FLAC__StreamMetadata**)safe_malloc_mul_2op_(sizeof(m[0]), /*times*/num_blocks)))
+		if(0 == (m = safe_malloc_mul_2op_p(sizeof(m[0]), /*times*/num_blocks)))
 			return false;
 		memcpy(m, metadata, sizeof(m[0]) * num_blocks);
 		encoder->protected_->metadata = m;
@@ -1987,7 +2160,7 @@ FLAC_API FLAC__bool FLAC__stream_encoder_process(FLAC__StreamEncoder *encoder, c
 	FLAC__ASSERT(encoder->protected_->state == FLAC__STREAM_ENCODER_OK);
 
 	do {
-		const unsigned n = min(blocksize+OVERREAD_-encoder->private_->current_sample_number, samples-j);
+		const unsigned n = flac_min(blocksize+OVERREAD_-encoder->private_->current_sample_number, samples-j);
 
 		if(encoder->protected_->verify)
 			append_to_verify_fifo_(&encoder->private_->verify.input_fifo, buffer, j, channels, n);
@@ -2050,7 +2223,7 @@ FLAC_API FLAC__bool FLAC__stream_encoder_process_interleaved(FLAC__StreamEncoder
 		 */
 		do {
 			if(encoder->protected_->verify)
-				append_to_verify_fifo_interleaved_(&encoder->private_->verify.input_fifo, buffer, j, channels, min(blocksize+OVERREAD_-encoder->private_->current_sample_number, samples-j));
+				append_to_verify_fifo_interleaved_(&encoder->private_->verify.input_fifo, buffer, j, channels, flac_min(blocksize+OVERREAD_-encoder->private_->current_sample_number, samples-j));
 
 			/* "i <= blocksize" to overread 1 sample; see comment in OVERREAD_ decl */
 			for(i = encoder->private_->current_sample_number; i <= blocksize && j < samples; i++, j++) {
@@ -2085,7 +2258,7 @@ FLAC_API FLAC__bool FLAC__stream_encoder_process_interleaved(FLAC__StreamEncoder
 		 */
 		do {
 			if(encoder->protected_->verify)
-				append_to_verify_fifo_interleaved_(&encoder->private_->verify.input_fifo, buffer, j, channels, min(blocksize+OVERREAD_-encoder->private_->current_sample_number, samples-j));
+				append_to_verify_fifo_interleaved_(&encoder->private_->verify.input_fifo, buffer, j, channels, flac_min(blocksize+OVERREAD_-encoder->private_->current_sample_number, samples-j));
 
 			/* "i <= blocksize" to overread 1 sample; see comment in OVERREAD_ decl */
 			for(i = encoder->private_->current_sample_number; i <= blocksize && j < samples; i++, j++) {
@@ -2168,6 +2341,8 @@ void set_defaults_(FLAC__StreamEncoder *encoder)
 #if FLAC__HAS_OGG
 	FLAC__ogg_encoder_aspect_set_defaults(&encoder->protected_->ogg_encoder_aspect);
 #endif
+
+	FLAC__stream_encoder_set_compression_level(encoder, 5);
 }
 
 void free_(FLAC__StreamEncoder *encoder)
@@ -2266,8 +2441,8 @@ FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, unsigned new_blocksize)
 
 	ok = true;
 
-	/* WATCHOUT: FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx()
-	 * requires that the input arrays (in our case the integer signals)
+	/* WATCHOUT: FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx() and ..._intrin_sse2()
+	 * require that the input arrays (in our case the integer signals)
 	 * have a buffer of up to 3 zeroes in front (at negative indices) for
 	 * alignment purposes; we use 4 in front to keep the data well-aligned.
 	 */
@@ -2364,6 +2539,12 @@ FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, unsigned new_blocksize)
 				case FLAC__APODIZATION_TUKEY:
 					FLAC__window_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.tukey.p);
 					break;
+				case FLAC__APODIZATION_PARTIAL_TUKEY:
+					FLAC__window_partial_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.multiple_tukey.p, encoder->protected_->apodizations[i].parameters.multiple_tukey.start, encoder->protected_->apodizations[i].parameters.multiple_tukey.end);
+					break;
+				case FLAC__APODIZATION_PUNCHOUT_TUKEY:
+					FLAC__window_punchout_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.multiple_tukey.p, encoder->protected_->apodizations[i].parameters.multiple_tukey.start, encoder->protected_->apodizations[i].parameters.multiple_tukey.end);
+					break;
 				case FLAC__APODIZATION_WELCH:
 					FLAC__window_welch(encoder->private_->window[i], new_blocksize);
 					break;
@@ -2425,8 +2606,8 @@ FLAC__bool write_bitbuffer_(FLAC__StreamEncoder *encoder, unsigned samples, FLAC
 	FLAC__bitwriter_clear(encoder->private_->frame);
 
 	if(samples > 0) {
-		encoder->private_->streaminfo.data.stream_info.min_framesize = min(bytes, encoder->private_->streaminfo.data.stream_info.min_framesize);
-		encoder->private_->streaminfo.data.stream_info.max_framesize = max(bytes, encoder->private_->streaminfo.data.stream_info.max_framesize);
+		encoder->private_->streaminfo.data.stream_info.min_framesize = flac_min(bytes, encoder->private_->streaminfo.data.stream_info.min_framesize);
+		encoder->private_->streaminfo.data.stream_info.max_framesize = flac_max(bytes, encoder->private_->streaminfo.data.stream_info.max_framesize);
 	}
 
 	return true;
@@ -2437,6 +2618,10 @@ FLAC__StreamEncoderWriteStatus write_frame_(FLAC__StreamEncoder *encoder, const
 	FLAC__StreamEncoderWriteStatus status;
 	FLAC__uint64 output_position = 0;
 
+#if FLAC__HAS_OGG == 0
+	(void)is_last_block;
+#endif
+
 	/* FLAC__STREAM_ENCODER_TELL_STATUS_UNSUPPORTED just means we didn't get the offset; no error */
 	if(encoder->private_->tell_callback && encoder->private_->tell_callback(encoder, &output_position, encoder->private_->client_data) == FLAC__STREAM_ENCODER_TELL_STATUS_ERROR) {
 		encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR;
@@ -2513,7 +2698,7 @@ FLAC__StreamEncoderWriteStatus write_frame_(FLAC__StreamEncoder *encoder, const
 		 * when the encoder goes back to write metadata, 'current_frame'
 		 * will drop back to 0.
 		 */
-		encoder->private_->frames_written = max(encoder->private_->frames_written, encoder->private_->current_frame_number+1);
+		encoder->private_->frames_written = flac_max(encoder->private_->frames_written, encoder->private_->current_frame_number+1);
 	}
 	else
 		encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR;
@@ -2524,7 +2709,7 @@ FLAC__StreamEncoderWriteStatus write_frame_(FLAC__StreamEncoder *encoder, const
 /* Gets called when the encoding process has finished so that we can update the STREAMINFO and SEEKTABLE blocks.  */
 void update_metadata_(const FLAC__StreamEncoder *encoder)
 {
-	FLAC__byte b[max(6, FLAC__STREAM_METADATA_SEEKPOINT_LENGTH)];
+	FLAC__byte b[flac_max(6u, FLAC__STREAM_METADATA_SEEKPOINT_LENGTH)];
 	const FLAC__StreamMetadata *metadata = &encoder->private_->streaminfo;
 	const FLAC__uint64 samples = metadata->data.stream_info.total_samples;
 	const unsigned min_framesize = metadata->data.stream_info.min_framesize;
@@ -2689,7 +2874,7 @@ void update_ogg_metadata_(FLAC__StreamEncoder *encoder)
 		FLAC__OGG_MAPPING_NUM_HEADERS_LENGTH +
 		FLAC__STREAM_SYNC_LENGTH
 	;
-	FLAC__byte b[max(6, FLAC__STREAM_METADATA_SEEKPOINT_LENGTH)];
+	FLAC__byte b[flac_max(6u, FLAC__STREAM_METADATA_SEEKPOINT_LENGTH)];
 	const FLAC__StreamMetadata *metadata = &encoder->private_->streaminfo;
 	const FLAC__uint64 samples = metadata->data.stream_info.total_samples;
 	const unsigned min_framesize = metadata->data.stream_info.min_framesize;
@@ -2940,9 +3125,9 @@ FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder, FLAC__bool is_fracti
 	}
 	else {
 		max_partition_order = FLAC__format_get_max_rice_partition_order_from_blocksize(encoder->protected_->blocksize);
-		max_partition_order = min(max_partition_order, encoder->protected_->max_residual_partition_order);
+		max_partition_order = flac_min(max_partition_order, encoder->protected_->max_residual_partition_order);
 	}
-	min_partition_order = min(min_partition_order, max_partition_order);
+	min_partition_order = flac_min(min_partition_order, max_partition_order);
 
 	/*
 	 * Setup the frame
@@ -3191,7 +3376,7 @@ FLAC__bool process_subframe_(
 #endif
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 	FLAC__double lpc_residual_bits_per_sample;
-	FLAC__real autoc[FLAC__MAX_LPC_ORDER+1]; /* WATCHOUT: the size is important even though encoder->protected_->max_lpc_order might be less; some asm routines need all the space */
+	FLAC__real autoc[FLAC__MAX_LPC_ORDER+1]; /* WATCHOUT: the size is important even though encoder->protected_->max_lpc_order might be less; some asm and x86 intrinsic routines need all the space */
 	FLAC__double lpc_error[FLAC__MAX_LPC_ORDER];
 	unsigned min_lpc_order, max_lpc_order, lpc_order;
 	unsigned min_qlp_coeff_precision, max_qlp_coeff_precision, qlp_coeff_precision;
@@ -3344,10 +3529,10 @@ FLAC__bool process_subframe_(
 								}
 								if(encoder->protected_->do_qlp_coeff_prec_search) {
 									min_qlp_coeff_precision = FLAC__MIN_QLP_COEFF_PRECISION;
-									/* try to ensure a 32-bit datapath throughout for 16bps(+1bps for side channel) or less */
-									if(subframe_bps <= 17) {
-										max_qlp_coeff_precision = min(32 - subframe_bps - lpc_order, FLAC__MAX_QLP_COEFF_PRECISION);
-										max_qlp_coeff_precision = max(max_qlp_coeff_precision, min_qlp_coeff_precision);
+									/* try to keep qlp coeff precision such that only 32-bit math is required for decode of <=16bps streams */
+									if(subframe_bps <= 16) {
+										max_qlp_coeff_precision = flac_min(32 - subframe_bps - FLAC__bitmath_ilog2(lpc_order), FLAC__MAX_QLP_COEFF_PRECISION);
+										max_qlp_coeff_precision = flac_max(max_qlp_coeff_precision, min_qlp_coeff_precision);
 									}
 									else
 										max_qlp_coeff_precision = FLAC__MAX_QLP_COEFF_PRECISION;
@@ -3582,7 +3767,7 @@ unsigned evaluate_lpc_subframe_(
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents
 )
 {
-	FLAC__int32 qlp_coeff[FLAC__MAX_LPC_ORDER];
+	FLAC__int32 qlp_coeff[FLAC__MAX_LPC_ORDER]; /* WATCHOUT: the size is important; some x86 intrinsic routines need more than lpc order elements */
 	unsigned i, residual_bits, estimate;
 	int quantization, ret;
 	const unsigned residual_samples = blocksize - order;
@@ -3591,7 +3776,7 @@ unsigned evaluate_lpc_subframe_(
 	if(subframe_bps <= 16) {
 		FLAC__ASSERT(order > 0);
 		FLAC__ASSERT(order <= FLAC__MAX_LPC_ORDER);
-		qlp_coeff_precision = min(qlp_coeff_precision, 32 - subframe_bps - FLAC__bitmath_ilog2(order));
+		qlp_coeff_precision = flac_min(qlp_coeff_precision, 32 - subframe_bps - FLAC__bitmath_ilog2(order));
 	}
 
 	ret = FLAC__lpc_quantize_coefficients(lp_coeff, order, qlp_coeff_precision, qlp_coeff, &quantization);
@@ -3695,9 +3880,9 @@ unsigned find_best_partition_order_(
 	const unsigned blocksize = residual_samples + predictor_order;
 
 	max_partition_order = FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(max_partition_order, blocksize, predictor_order);
-	min_partition_order = min(min_partition_order, max_partition_order);
+	min_partition_order = flac_min(min_partition_order, max_partition_order);
 
-	precompute_partition_info_sums_(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order, bps);
+	private_->local_precompute_partition_info_sums(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order, bps);
 
 	if(do_escape_coding)
 		precompute_partition_info_escapes_(residual, raw_bits_per_partition, residual_samples, predictor_order, min_partition_order, max_partition_order);
@@ -3749,7 +3934,7 @@ unsigned find_best_partition_order_(
 		unsigned partition;
 
 		/* save best parameters and raw_bits */
-		FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(prc, max(6, best_partition_order));
+		FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(prc, flac_max(6u, best_partition_order));
 		memcpy(prc->parameters, private_->partitioned_rice_contents_extra[best_parameters_index].parameters, sizeof(unsigned)*(1<<(best_partition_order)));
 		if(do_escape_coding)
 			memcpy(prc->raw_bits, private_->partitioned_rice_contents_extra[best_parameters_index].raw_bits, sizeof(unsigned)*(1<<(best_partition_order)));
@@ -3769,17 +3954,6 @@ unsigned find_best_partition_order_(
 	return best_residual_bits;
 }
 
-#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && defined FLAC__HAS_NASM
-extern void precompute_partition_info_sums_32bit_asm_ia32_(
-	const FLAC__int32 residual[],
-	FLAC__uint64 abs_residual_partition_sums[],
-	unsigned blocksize,
-	unsigned predictor_order,
-	unsigned min_partition_order,
-	unsigned max_partition_order
-);
-#endif
-
 void precompute_partition_info_sums_(
 	const FLAC__int32 residual[],
 	FLAC__uint64 abs_residual_partition_sums[],
@@ -3795,21 +3969,12 @@ void precompute_partition_info_sums_(
 
 	FLAC__ASSERT(default_partition_samples > predictor_order);
 
-#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && defined FLAC__HAS_NASM
-	/* slightly pessimistic but still catches all common cases */
-	/* WATCHOUT: "+ bps" is an assumption that the average residual magnitude will not be more than "bps" bits */
-	if(FLAC__bitmath_ilog2(default_partition_samples) + bps < 32) {
-		precompute_partition_info_sums_32bit_asm_ia32_(residual, abs_residual_partition_sums, residual_samples + predictor_order, predictor_order, min_partition_order, max_partition_order);
-		return;
-	}
-#endif
-
 	/* first do max_partition_order */
 	{
 		unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order);
-		/* slightly pessimistic but still catches all common cases */
-		/* WATCHOUT: "+ bps" is an assumption that the average residual magnitude will not be more than "bps" bits */
-		if(FLAC__bitmath_ilog2(default_partition_samples) + bps < 32) {
+		/* WATCHOUT: "+ bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum
+		 * assumed size of the average residual magnitude */
+		if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) {
 			FLAC__uint32 abs_residual_partition_sum;
 
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
@@ -3901,7 +4066,7 @@ void precompute_partition_info_escapes_(
 		for(i = 0; i < partitions; i++) {
 			m = raw_bits_per_partition[from_partition];
 			from_partition++;
-			raw_bits_per_partition[to_partition] = max(m, raw_bits_per_partition[from_partition]);
+			raw_bits_per_partition[to_partition] = flac_max(m, raw_bits_per_partition[from_partition]);
 			from_partition++;
 			to_partition++;
 		}
@@ -3909,7 +4074,7 @@ void precompute_partition_info_escapes_(
 }
 
 #ifdef EXACT_RICE_BITS_CALCULATION
-static FLaC__INLINE unsigned count_rice_bits_in_partition_(
+static inline unsigned count_rice_bits_in_partition_(
 	const unsigned rice_parameter,
 	const unsigned partition_samples,
 	const FLAC__int32 *residual
@@ -3924,7 +4089,7 @@ static FLaC__INLINE unsigned count_rice_bits_in_partition_(
 	return partition_bits;
 }
 #else
-static FLaC__INLINE unsigned count_rice_bits_in_partition_(
+static inline unsigned count_rice_bits_in_partition_(
 	const unsigned rice_parameter,
 	const unsigned partition_samples,
 	const FLAC__uint64 abs_residual_partition_sum
@@ -3979,7 +4144,7 @@ FLAC__bool set_partitioned_rice_(
 	FLAC__ASSERT(suggested_rice_parameter < FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER);
 	FLAC__ASSERT(rice_parameter_limit <= FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER);
 
-	FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(partitioned_rice_contents, max(6, partition_order));
+	FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(partitioned_rice_contents, flac_max(6u, partition_order));
 	parameters = partitioned_rice_contents->parameters;
 	raw_bits = partitioned_rice_contents->raw_bits;
 
@@ -4053,8 +4218,35 @@ FLAC__bool set_partitioned_rice_(
 			 * in the partition, so the actual mean is
 			 * mean/partition_samples
 			 */
+#if 0 /* old simple code */
 			for(rice_parameter = 0, k = partition_samples; k < mean; rice_parameter++, k <<= 1)
 				;
+#else
+#if defined FLAC__CPU_X86_64 /* and other 64-bit arch, too */
+			if(mean <= 0x80000000/512) { /* 512: more or less optimal for both 16- and 24-bit input */
+#else
+			if(mean <= 0x80000000/8) { /* 32-bit arch: use 32-bit math if possible */
+#endif
+				FLAC__uint32 k2, mean2 = (FLAC__uint32) mean;
+				rice_parameter = 0; k2 = partition_samples;
+				while(k2*8 < mean2) { /* requires: mean <= (2^31)/8 */
+					rice_parameter += 4; k2 <<= 4; /* tuned for 16-bit input */
+				}
+				while(k2 < mean2) { /* requires: mean <= 2^31 */
+					rice_parameter++; k2 <<= 1;
+				}
+			}
+			else {
+				rice_parameter = 0; k = partition_samples;
+				if(mean <= FLAC__U64L(0x8000000000000000)/128) /* usually mean is _much_ smaller than this value */
+					while(k*128 < mean) { /* requires: mean <= (2^63)/128 */
+						rice_parameter += 8; k <<= 8; /* tuned for 24-bit input */
+					}
+				while(k < mean) { /* requires: mean <= 2^63 */
+					rice_parameter++; k <<= 1;
+				}
+			}
+#endif
 			if(rice_parameter >= rice_parameter_limit) {
 #ifdef DEBUG_VERBOSE
 				fprintf(stderr, "clipping rice_parameter (%u -> %u) @6\n", rice_parameter, rice_parameter_limit - 1);
@@ -4271,7 +4463,7 @@ FLAC__StreamEncoderSeekStatus file_seek_callback_(const FLAC__StreamEncoder *enc
 {
 	(void)client_data;
 
-	if(fseeko(encoder->private_->file, (off_t)absolute_byte_offset, SEEK_SET) < 0)
+	if(fseeko(encoder->private_->file, (FLAC__off_t)absolute_byte_offset, SEEK_SET) < 0)
 		return FLAC__STREAM_ENCODER_SEEK_STATUS_ERROR;
 	else
 		return FLAC__STREAM_ENCODER_SEEK_STATUS_OK;
@@ -4279,7 +4471,7 @@ FLAC__StreamEncoderSeekStatus file_seek_callback_(const FLAC__StreamEncoder *enc
 
 FLAC__StreamEncoderTellStatus file_tell_callback_(const FLAC__StreamEncoder *encoder, FLAC__uint64 *absolute_byte_offset, void *client_data)
 {
-	off_t offset;
+	FLAC__off_t offset;
 
 	(void)client_data;
 
diff --git a/libFLAC/stream_encoder_framing.c b/libFLAC/stream_encoder_framing.c
index 939955b..959eca0 100644
--- a/libFLAC/stream_encoder_framing.c
+++ b/libFLAC/stream_encoder_framing.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,7 +30,7 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
@@ -39,11 +40,6 @@
 #include "private/crc.h"
 #include "FLAC/assert.h"
 
-#ifdef max
-#undef max
-#endif
-#define max(x,y) ((x)>(y)?(x):(y))
-
 static FLAC__bool add_entropy_coding_method_(FLAC__BitWriter *bw, const FLAC__EntropyCodingMethod *method);
 static FLAC__bool add_residual_partitioned_rice_(FLAC__BitWriter *bw, const FLAC__int32 residual[], const unsigned residual_samples, const unsigned predictor_order, const unsigned rice_parameters[], const unsigned raw_bits[], const unsigned partition_order, const FLAC__bool is_extended);
 
@@ -166,11 +162,11 @@ FLAC__bool FLAC__add_metadata_block(const FLAC__StreamMetadata *metadata, FLAC__
 				if(!FLAC__bitwriter_write_raw_uint32(bw, track->num_indices, FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN))
 					return false;
 				for(j = 0; j < track->num_indices; j++) {
-					const FLAC__StreamMetadata_CueSheet_Index *index = track->indices + j;
+					const FLAC__StreamMetadata_CueSheet_Index *indx = track->indices + j;
 
-					if(!FLAC__bitwriter_write_raw_uint64(bw, index->offset, FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN))
+					if(!FLAC__bitwriter_write_raw_uint64(bw, indx->offset, FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN))
 						return false;
-					if(!FLAC__bitwriter_write_raw_uint32(bw, index->number, FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN))
+					if(!FLAC__bitwriter_write_raw_uint32(bw, indx->number, FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN))
 						return false;
 					if(!FLAC__bitwriter_write_zeroes(bw, FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN))
 						return false;
diff --git a/libFLAC/window.c b/libFLAC/window.c
index cd689c5..4387ef7 100644
--- a/libFLAC/window.c
+++ b/libFLAC/window.c
@@ -1,5 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2006,2007  Josh Coalson
+ * Copyright (C) 2006-2009  Josh Coalson
+ * Copyright (C) 2011-2014  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,22 +30,18 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif
 
 #include <math.h>
+#include "share/compat.h"
 #include "FLAC/assert.h"
 #include "FLAC/format.h"
 #include "private/window.h"
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 
-#ifndef M_PI
-/* math.h in VC++ doesn't seem to have this (how Microsoft is that?) */
-#define M_PI 3.14159265358979323846
-#endif
-
 
 void FLAC__window_bartlett(FLAC__real *window, const FLAC__int32 L)
 {
@@ -61,7 +58,7 @@ void FLAC__window_bartlett(FLAC__real *window, const FLAC__int32 L)
 		for (n = 0; n <= L/2-1; n++)
 			window[n] = 2.0f * n / (float)N;
 		for (; n <= N; n++)
-			window[n] = 2.0f - 2.0f * (N-n) / (float)N;
+			window[n] = 2.0f - 2.0f * n / (float)N;
 	}
 }
 
@@ -71,7 +68,7 @@ void FLAC__window_bartlett_hann(FLAC__real *window, const FLAC__int32 L)
 	FLAC__int32 n;
 
 	for (n = 0; n < L; n++)
-		window[n] = (FLAC__real)(0.62f - 0.48f * fabs((float)n/(float)N+0.5f) + 0.38f * cos(2.0f * M_PI * ((float)n/(float)N+0.5f)));
+		window[n] = (FLAC__real)(0.62f - 0.48f * fabs((float)n/(float)N-0.5f) - 0.38f * cos(2.0f * M_PI * ((float)n/(float)N)));
 }
 
 void FLAC__window_blackman(FLAC__real *window, const FLAC__int32 L)
@@ -176,16 +173,16 @@ void FLAC__window_triangle(FLAC__real *window, const FLAC__int32 L)
 	FLAC__int32 n;
 
 	if (L & 1) {
-		for (n = 1; n <= L+1/2; n++)
+		for (n = 1; n <= (L+1)/2; n++)
 			window[n-1] = 2.0f * n / ((float)L + 1.0f);
 		for (; n <= L; n++)
-			window[n-1] = - (float)(2 * (L - n + 1)) / ((float)L + 1.0f);
+			window[n-1] = (float)(2 * (L - n + 1)) / ((float)L + 1.0f);
 	}
 	else {
 		for (n = 1; n <= L/2; n++)
-			window[n-1] = 2.0f * n / (float)L;
+			window[n-1] = 2.0f * n / ((float)L + 1.0f);
 		for (; n <= L; n++)
-			window[n-1] = ((float)(2 * (L - n)) + 1.0f) / (float)L;
+			window[n-1] = (float)(2 * (L - n + 1)) / ((float)L + 1.0f);
 	}
 }
 
@@ -210,6 +207,66 @@ void FLAC__window_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__rea
 	}
 }
 
+void FLAC__window_partial_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end)
+{
+	const FLAC__int32 start_n = (FLAC__int32)(start * L);
+	const FLAC__int32 end_n = (FLAC__int32)(end * L);
+	const FLAC__int32 N = end_n - start_n;
+	FLAC__int32 Np, n, i;
+
+	if (p <= 0.0f)
+		FLAC__window_partial_tukey(window, L, 0.05f, start, end);
+	else if (p >= 1.0f)
+		FLAC__window_partial_tukey(window, L, 0.95f, start, end);
+	else {
+
+		Np = (FLAC__int32)(p / 2.0f * N);
+
+		for (n = 0; n < start_n && n < L; n++)
+			window[n] = 0.0f;
+		for (i = 1; n < (start_n+Np) && n < L; n++, i++)
+			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Np));
+		for (; n < (end_n-Np) && n < L; n++)
+			window[n] = 1.0f;
+		for (i = Np; n < end_n && n < L; n++, i--)
+			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Np));
+		for (; n < L; n++)
+			window[n] = 0.0f;
+	}
+}
+
+void FLAC__window_punchout_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end)
+{
+	const FLAC__int32 start_n = (FLAC__int32)(start * L);
+	const FLAC__int32 end_n = (FLAC__int32)(end * L);
+	FLAC__int32 Ns, Ne, n, i;
+
+	if (p <= 0.0f)
+		FLAC__window_punchout_tukey(window, L, 0.05f, start, end);
+	else if (p >= 1.0f)
+		FLAC__window_punchout_tukey(window, L, 0.95f, start, end);
+	else {
+
+		Ns = (FLAC__int32)(p / 2.0f * start_n);
+		Ne = (FLAC__int32)(p / 2.0f * (L - end_n));
+
+		for (n = 0, i = 1; n < Ns && n < L; n++, i++)
+			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ns));
+		for (; n < start_n-Ns && n < L; n++)
+			window[n] = 1.0f;
+		for (i = Ns; n < start_n && n < L; n++, i--)
+			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ns));
+		for (; n < end_n && n < L; n++)
+			window[n] = 0.0f;
+		for (i = 1; n < end_n+Ne && n < L; n++, i++)
+			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ne));
+		for (; n < L - (Ne) && n < L; n++)
+			window[n] = 1.0f;
+		for (i = Ne; n < L; n++, i--)
+			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ne));
+	}
+}
+
 void FLAC__window_welch(FLAC__real *window, const FLAC__int32 L)
 {
 	const FLAC__int32 N = L - 1;
-- 
cgit v1.1