1 files changed, 485 insertions, 0 deletions
diff --git a/media/webm/webm_parser.cc b/media/webm/webm_parser.cc
new file mode 100644
index 0000000..42d826c1
--- /dev/null
+++ b/media/webm/webm_parser.cc
@@ -0,0 +1,485 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/webm/webm_parser.h"
+
+// This file contains code to parse WebM file elements. It was created
+// from information in the Matroska spec.
+// http://www.matroska.org/technical/specs/index.html
+
+#include <iomanip>
+
+#include "base/logging.h"
+#include "media/webm/webm_constants.h"
+
+namespace media {
+
+// Maximum depth of WebM elements. Some WebM elements are lists of
+// other elements. This limits the number levels of recursion allowed.
+static const int kMaxLevelDepth = 6;
+
+enum ElementType {
+  LIST,
+  UINT,
+  FLOAT,
+  BINARY,
+  STRING,
+  SBLOCK,
+  SKIP,
+};
+
+struct ElementIdInfo {
+  int level_;
+  ElementType type_;
+  int id_;
+};
+
+struct ListElementInfo {
+  int id_;
+  const ElementIdInfo* id_info_;
+  int id_info_size_;
+};
+
+// The following are tables indicating what IDs are valid sub-elements
+// of particular elements. If an element is encountered that doesn't
+// appear in the list, a parsing error is signalled. Some elements are
+// marked as SKIP because they are valid, but we don't care about them
+// right now.
+static const ElementIdInfo kClusterIds[] = {
+  {2, UINT, kWebMIdTimecode},
+  {2, SBLOCK, kWebMIdSimpleBlock},
+  {2, LIST, kWebMIdBlockGroup},
+};
+
+static const ElementIdInfo kInfoIds[] = {
+  {2, SKIP, kWebMIdSegmentUID},
+  {2, UINT, kWebMIdTimecodeScale},
+  {2, FLOAT, kWebMIdDuration},
+  {2, SKIP, kWebMIdDateUTC},
+  {2, SKIP, kWebMIdTitle},
+  {2, SKIP, kWebMIdMuxingApp},
+  {2, SKIP, kWebMIdWritingApp},
+};
+
+static const ElementIdInfo kTracksIds[] = {
+  {2, LIST, kWebMIdTrackEntry},
+};
+
+static const ElementIdInfo kTrackEntryIds[] = {
+  {3, UINT, kWebMIdTrackNumber},
+  {3, SKIP, kWebMIdTrackUID},
+  {3, UINT, kWebMIdTrackType},
+  {3, SKIP, kWebMIdFlagEnabled},
+  {3, SKIP, kWebMIdFlagDefault},
+  {3, SKIP, kWebMIdFlagForced},
+  {3, UINT, kWebMIdFlagLacing},
+  {3, UINT, kWebMIdDefaultDuration},
+  {3, SKIP, kWebMIdName},
+  {3, SKIP, kWebMIdLanguage},
+  {3, STRING, kWebMIdCodecID},
+  {3, BINARY, kWebMIdCodecPrivate},
+  {3, SKIP, kWebMIdCodecName},
+  {3, LIST, kWebMIdVideo},
+  {3, LIST, kWebMIdAudio},
+};
+
+static const ElementIdInfo kVideoIds[] = {
+  {4, SKIP, kWebMIdFlagInterlaced},
+  {4, SKIP, kWebMIdStereoMode},
+  {4, UINT, kWebMIdPixelWidth},
+  {4, UINT, kWebMIdPixelHeight},
+  {4, SKIP, kWebMIdPixelCropBottom},
+  {4, SKIP, kWebMIdPixelCropTop},
+  {4, SKIP, kWebMIdPixelCropLeft},
+  {4, SKIP, kWebMIdPixelCropRight},
+  {4, SKIP, kWebMIdDisplayWidth},
+  {4, SKIP, kWebMIdDisplayHeight},
+  {4, SKIP, kWebMIdDisplayUnit},
+  {4, SKIP, kWebMIdAspectRatioType},
+};
+
+static const ElementIdInfo kAudioIds[] = {
+  {4, SKIP, kWebMIdSamplingFrequency},
+  {4, SKIP, kWebMIdOutputSamplingFrequency},
+  {4, UINT, kWebMIdChannels},
+  {4, SKIP, kWebMIdBitDepth},
+};
+
+static const ElementIdInfo kClustersOnly[] = {
+  {1, LIST, kWebMIdCluster},
+};
+
+static const ListElementInfo kListElementInfo[] = {
+  { kWebMIdCluster,    kClusterIds,    sizeof(kClusterIds) },
+  { kWebMIdInfo,       kInfoIds,       sizeof(kInfoIds) },
+  { kWebMIdTracks,     kTracksIds,     sizeof(kTracksIds) },
+  { kWebMIdTrackEntry, kTrackEntryIds, sizeof(kTrackEntryIds) },
+  { kWebMIdVideo,      kVideoIds,      sizeof(kVideoIds) },
+  { kWebMIdAudio,      kAudioIds,      sizeof(kAudioIds) },
+};
+
+// Number of elements in kListElementInfo.
+const int kListElementInfoCount =
+    sizeof(kListElementInfo) / sizeof(ListElementInfo);
+
+WebMParserClient::~WebMParserClient() {}
+
+// Parses an element header id or size field. These fields are variable length
+// encoded. The first byte indicates how many bytes the field occupies.
+// |buf|  - The buffer to parse.
+// |size| - The number of bytes in |buf|
+// |max_bytes| - The maximum number of bytes the field can be. ID fields
+//               set this to 4 & element size fields set this to 8. If the
+//               first byte indicates a larger field size than this it is a
+//               parser error.
+// |mask_first_byte| - For element size fields the field length encoding bits
+//                     need to be masked off. This parameter is true for
+//                     element size fields and is false for ID field values.
+//
+// Returns: The number of bytes parsed on success. -1 on error.
+static int ParseWebMElementHeaderField(const uint8* buf, int size,
+                                       int max_bytes, bool mask_first_byte,
+                                       int64* num) {
+  DCHECK(buf);
+  DCHECK(num);
+
+  if (size <= 0)
+    return -1;
+
+  int mask = 0x80;
+  uint8 ch = buf[0];
+  int extra_bytes = -1;
+  for (int i = 0; i < max_bytes; ++i) {
+    if ((ch & mask) == mask) {
+      *num = mask_first_byte ? ch & ~mask : ch;
+      extra_bytes = i;
+      break;
+    }
+    mask >>= 1;
+  }
+
+  if ((extra_bytes == -1) || ((1 + extra_bytes) > size))
+    return -1;
+
+  int bytes_used = 1;
+
+  for (int i = 0; i < extra_bytes; ++i)
+    *num = (*num << 8) | (0xff & buf[bytes_used++]);
+
+  return bytes_used;
+}
+
+// Parses an element header & returns the ID and element size.
+//
+// Returns: The number of bytes parsed on success. -1 on error.
+// |*id| contains the element ID on success & undefined on error.
+// |*element_size| contains the element size on success & undefined on error.
+static int ParseWebMElementHeader(const uint8* buf, int size,
+                                  int* id, int64* element_size) {
+  DCHECK(buf);
+  DCHECK_GE(size, 0);
+  DCHECK(id);
+  DCHECK(element_size);
+
+  if (size == 0)
+    return 0;
+
+  int64 tmp;
+  int num_id_bytes = ParseWebMElementHeaderField(buf, size, 4, false, &tmp);
+
+  if (num_id_bytes <= 0)
+    return num_id_bytes;
+
+  *id = static_cast<int>(tmp);
+
+  int num_size_bytes = ParseWebMElementHeaderField(buf + num_id_bytes,
+                                                   size - num_id_bytes,
+                                                   8, true, &tmp);
+
+  if (num_size_bytes <= 0)
+    return num_size_bytes;
+
+  *element_size = tmp;
+  return num_id_bytes + num_size_bytes;
+}
+
+// Finds ElementIdInfo for a specific ID.
+static const ElementIdInfo* FindIdInfo(int id,
+                                       const ElementIdInfo* id_info,
+                                       int id_info_size) {
+  int count = id_info_size / sizeof(*id_info);
+  for (int i = 0; i < count; ++i) {
+    if (id == id_info[i].id_)
+      return &id_info[i];
+  }
+
+  return NULL;
+}
+
+// Finds ListElementInfo for a specific ID.
+static const ListElementInfo* FindListInfo(int id) {
+  for (int i = 0; i < kListElementInfoCount; ++i) {
+    if (id == kListElementInfo[i].id_)
+      return &kListElementInfo[i];
+  }
+
+  return NULL;
+}
+
+static int ParseSimpleBlock(const uint8* buf, int size,
+                            WebMParserClient* client) {
+  if (size < 4)
+    return -1;
+
+  // Return an error if the trackNum > 127. We just aren't
+  // going to support large track numbers right now.
+  if ((buf[0] & 0x80) != 0x80) {
+    VLOG(1) << "TrackNumber over 127 not supported";
+    return -1;
+  }
+
+  int track_num = buf[0] & 0x7f;
+  int timecode = buf[1] << 8 | buf[2];
+  int flags = buf[3] & 0xff;
+  int lacing = (flags >> 1) & 0x3;
+
+  if (lacing != 0) {
+    VLOG(1) << "Lacing " << lacing << " not supported yet.";
+    return -1;
+  }
+
+  // Sign extend negative timecode offsets.
+  if (timecode & 0x8000)
+    timecode |= (-1 << 16);
+
+  const uint8* frame_data = buf + 4;
+  int frame_size = size - (frame_data - buf);
+  if (!client->OnSimpleBlock(track_num, timecode, flags,
+                             frame_data, frame_size)) {
+    return -1;
+  }
+
+  return size;
+}
+
+static int ParseElements(const ElementIdInfo* id_info,
+                         int id_info_size,
+                         const uint8* buf, int size, int level,
+                         WebMParserClient* client);
+
+static int ParseElementList(const uint8* buf, int size,
+                            int id, int level,
+                            WebMParserClient* client) {
+  const ListElementInfo* list_info = FindListInfo(id);
+
+  if (!list_info) {
+    VLOG(1) << "Failed to find list info for ID " << std::hex << id;
+    return -1;
+  }
+
+  if (!client->OnListStart(id))
+    return -1;
+
+  int res = ParseElements(list_info->id_info_,
+                          list_info->id_info_size_,
+                          buf, size,
+                          level + 1,
+                          client);
+
+  if (res < 0)
+    return -1;
+
+  if (!client->OnListEnd(id))
+    return -1;
+
+  DCHECK_EQ(res, size);
+  return res;
+}
+
+static int ParseUInt(const uint8* buf, int size, int id,
+                     WebMParserClient* client) {
+  if ((size <= 0) || (size > 8))
+    return -1;
+
+  // Read in the big-endian integer.
+  int64 value = 0;
+  for (int i = 0; i < size; ++i)
+    value = (value << 8) | buf[i];
+
+  if (!client->OnUInt(id, value))
+    return -1;
+
+  return size;
+}
+
+static int ParseFloat(const uint8* buf, int size, int id,
+                      WebMParserClient* client) {
+
+  if ((size != 4) && (size != 8))
+    return -1;
+
+  double value = -1;
+
+  // Read the bytes from big-endian form into a native endian integer.
+  int64 tmp = 0;
+  for (int i = 0; i < size; ++i)
+    tmp = (tmp << 8) | buf[i];
+
+  // Use a union to convert the integer bit pattern into a floating point
+  // number.
+  if (size == 4) {
+    union {
+      int32 src;
+      float dst;
+    } tmp2;
+    tmp2.src = static_cast<int32>(tmp);
+    value = tmp2.dst;
+  } else if (size == 8) {
+    union {
+      int64 src;
+      double dst;
+    } tmp2;
+    tmp2.src = tmp;
+    value = tmp2.dst;
+  } else {
+    return -1;
+  }
+
+  if (!client->OnFloat(id, value))
+    return -1;
+
+  return size;
+}
+
+static int ParseElements(const ElementIdInfo* id_info,
+                         int id_info_size,
+                         const uint8* buf, int size, int level,
+                         WebMParserClient* client) {
+  DCHECK_GE(id_info_size, 0);
+  DCHECK_GE(size, 0);
+  DCHECK_GE(level, 0);
+
+  const uint8* cur = buf;
+  int cur_size = size;
+  int used = 0;
+
+  if (level > kMaxLevelDepth)
+    return -1;
+
+  while (cur_size > 0) {
+    int id;
+    int64 element_size;
+    int res = ParseWebMElementHeader(cur, cur_size, &id, &element_size);
+
+    if (res < 0)
+      return res;
+
+    if (res == 0)
+      break;
+
+    cur += res;
+    cur_size -= res;
+    used += res;
+
+    // Check to see if the element is larger than the remaining data.
+    if (element_size > cur_size)
+      return -1;
+
+    const ElementIdInfo* info = FindIdInfo(id, id_info, id_info_size);
+
+    if (info == NULL) {
+      VLOG(1) << "No info for ID " << std::hex << id;
+
+      // TODO(acolwell): Change this to return -1 after the API has solidified.
+      // We don't want to allow elements we don't recognize.
+      cur += element_size;
+      cur_size -= element_size;
+      used += element_size;
+      continue;
+    }
+
+    if (info->level_ != level) {
+      VLOG(1) << "ID " << std::hex << id << std::dec << " at level "
+              << level << " instead of " << info->level_;
+      return -1;
+    }
+
+    switch(info->type_) {
+      case SBLOCK:
+        if (ParseSimpleBlock(cur, element_size, client) <= 0)
+          return -1;
+        break;
+      case LIST:
+        if (ParseElementList(cur, element_size, id, level, client) < 0)
+          return -1;
+        break;
+      case UINT:
+        if (ParseUInt(cur, element_size, id, client) <= 0)
+          return -1;
+        break;
+      case FLOAT:
+        if (ParseFloat(cur, element_size, id, client) <= 0)
+          return -1;
+        break;
+      case BINARY:
+        if (!client->OnBinary(id, cur, element_size))
+          return -1;
+        break;
+      case STRING:
+        if (!client->OnString(id,
+                              std::string(reinterpret_cast<const char*>(cur),
+                                          element_size)))
+          return -1;
+        break;
+      case SKIP:
+        // Do nothing.
+        break;
+      default:
+        VLOG(1) << "Unhandled id type " << info->type_;
+        return -1;
+    };
+
+    cur += element_size;
+    cur_size -= element_size;
+    used += element_size;
+  }
+
+  return used;
+}
+
+// Parses a single list element that matches |id|. This method fails if the
+// buffer points to an element that does not match |id|.
+int WebMParseListElement(const uint8* buf, int size, int id,
+                         int level, WebMParserClient* client) {
+  if (size == 0)
+    return -1;
+
+  const uint8* cur = buf;
+  int cur_size = size;
+
+  int element_id = 0;
+  int64 element_size = 0;
+  int res = ParseWebMElementHeader(cur, cur_size, &element_id, &element_size);
+
+  if (res <= 0)
+    return res;
+
+  cur += res;
+  cur_size -= res;
+
+  if (element_id != id || element_size > cur_size)
+    return -1;
+
+  res = ParseElementList(cur, element_size, element_id, level, client);
+
+  if (res < 0)
+    return -1;
+
+  cur += res;
+  cur_size -= res;
+
+  return size - cur_size;
+}
+
+}  // namespace media