diff options
author | dalecurtis@chromium.org <dalecurtis@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-01-23 03:40:36 +0000 |
---|---|---|
committer | dalecurtis@chromium.org <dalecurtis@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-01-23 03:40:36 +0000 |
commit | 278df3052aa195ee1a865e470877ece21edce2ec (patch) | |
tree | 933bacb2ccb71968e78e88e4a26eb483b322b420 /media/formats | |
parent | ce631ee54387703370a8924fcba68a2f6b7c79ac (diff) | |
download | chromium_src-278df3052aa195ee1a865e470877ece21edce2ec.zip chromium_src-278df3052aa195ee1a865e470877ece21edce2ec.tar.gz chromium_src-278df3052aa195ee1a865e470877ece21edce2ec.tar.bz2 |
Move MSE parsers under "formats" root directory.
Changes made programatically:
find -name \*.cc -o -name \*.h | xargs sed -r -i 's,media/(mp2t|mp3|mp4|webm)/,media/formats/\1/,g'
find -name \*.cc -o -name \*.h | xargs sed -r -i 's,MEDIA_(MP2T|MP3|MP4|WEBM)_,MEDIA_FORMATS_\1_,g'
find -name \*.gyp | xargs sed -r -i "s,'(mp2t|mp3|mp4|webm)/,'formats/\1/,g"
BUG=none
TEST=compiles
Review URL: https://codereview.chromium.org/136053003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@246512 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media/formats')
88 files changed, 15786 insertions, 0 deletions
diff --git a/media/formats/mp2t/es_parser.h b/media/formats/mp2t/es_parser.h new file mode 100644 index 0000000..5297d32 --- /dev/null +++ b/media/formats/mp2t/es_parser.h @@ -0,0 +1,42 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_ES_PARSER_H_ +#define MEDIA_FORMATS_MP2T_ES_PARSER_H_ + +#include "base/basictypes.h" +#include "base/callback.h" +#include "base/memory/ref_counted.h" +#include "base/time/time.h" + +namespace media { + +class StreamParserBuffer; + +namespace mp2t { + +class EsParser { + public: + typedef base::Callback<void(scoped_refptr<StreamParserBuffer>)> EmitBufferCB; + + EsParser() {} + virtual ~EsParser() {} + + // ES parsing. + // Should use kNoTimestamp when a timestamp is not valid. + virtual bool Parse(const uint8* buf, int size, + base::TimeDelta pts, + base::TimeDelta dts) = 0; + + // Flush any pending buffer. + virtual void Flush() = 0; + + // Reset the state of the ES parser. + virtual void Reset() = 0; +}; + +} // namespace mp2t +} // namespace media + +#endif diff --git a/media/formats/mp2t/es_parser_adts.cc b/media/formats/mp2t/es_parser_adts.cc new file mode 100644 index 0000000..d8bc407 --- /dev/null +++ b/media/formats/mp2t/es_parser_adts.cc @@ -0,0 +1,306 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/es_parser_adts.h" + +#include <list> + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/strings/string_number_conversions.h" +#include "media/base/audio_timestamp_helper.h" +#include "media/base/bit_reader.h" +#include "media/base/buffers.h" +#include "media/base/channel_layout.h" +#include "media/base/stream_parser_buffer.h" +#include "media/formats/mp2t/mp2t_common.h" + +// Adts header is at least 7 bytes (can be 9 bytes). +static const int kAdtsHeaderMinSize = 7; + +static const int adts_frequency_table[16] = { + 96000, + 88200, + 64000, + 48000, + 44100, + 32000, + 24000, + 22050, + 16000, + 12000, + 11025, + 8000, + 7350, + 0, + 0, + 0, +}; +static const int kMaxSupportedFrequencyIndex = 12; + +static media::ChannelLayout adts_channel_layout[8] = { + media::CHANNEL_LAYOUT_NONE, + media::CHANNEL_LAYOUT_MONO, + media::CHANNEL_LAYOUT_STEREO, + media::CHANNEL_LAYOUT_SURROUND, + media::CHANNEL_LAYOUT_4_0, + media::CHANNEL_LAYOUT_5_0_BACK, + media::CHANNEL_LAYOUT_5_1_BACK, + media::CHANNEL_LAYOUT_7_1, +}; + +// Number of samples per frame. +static const int kNumberSamplesPerAACFrame = 1024; + +static int ExtractAdtsFrameSize(const uint8* adts_header) { + return ((static_cast<int>(adts_header[5]) >> 5) | + (static_cast<int>(adts_header[4]) << 3) | + ((static_cast<int>(adts_header[3]) & 0x3) << 11)); +} + +static int ExtractAdtsFrequencyIndex(const uint8* adts_header) { + return ((adts_header[2] >> 2) & 0xf); +} + +static int ExtractAdtsChannelConfig(const uint8* adts_header) { + return (((adts_header[3] >> 6) & 0x3) | + ((adts_header[2] & 0x1) << 2)); +} + +// Return true if buf corresponds to an ADTS syncword. +// |buf| size must be at least 2. +static bool isAdtsSyncWord(const uint8* buf) { + return (buf[0] == 0xff) && ((buf[1] & 0xf6) == 0xf0); +} + +// Look for an ADTS syncword. +// |new_pos| returns +// - either the byte position of the ADTS frame (if found) +// - or the byte position of 1st byte that was not processed (if not found). +// In every case, the returned value in |new_pos| is such that new_pos >= pos +// |frame_sz| returns the size of the ADTS frame (if found). +// Return whether a syncword was found. +static bool LookForSyncWord(const uint8* raw_es, int raw_es_size, + int pos, + int* new_pos, int* frame_sz) { + DCHECK_GE(pos, 0); + DCHECK_LE(pos, raw_es_size); + + int max_offset = raw_es_size - kAdtsHeaderMinSize; + if (pos >= max_offset) { + // Do not change the position if: + // - max_offset < 0: not enough bytes to get a full header + // Since pos >= 0, this is a subcase of the next condition. + // - pos >= max_offset: might be the case after reading one full frame, + // |pos| is then incremented by the frame size and might then point + // to the end of the buffer. + *new_pos = pos; + return false; + } + + for (int offset = pos; offset < max_offset; offset++) { + const uint8* cur_buf = &raw_es[offset]; + + if (!isAdtsSyncWord(cur_buf)) + // The first 12 bits must be 1. + // The layer field (2 bits) must be set to 0. + continue; + + int frame_size = ExtractAdtsFrameSize(cur_buf); + if (frame_size < kAdtsHeaderMinSize) { + // Too short to be an ADTS frame. + continue; + } + + // Check whether there is another frame + // |size| apart from the current one. + int remaining_size = raw_es_size - offset; + if (remaining_size >= frame_size + 2 && + !isAdtsSyncWord(&cur_buf[frame_size])) { + continue; + } + + *new_pos = offset; + *frame_sz = frame_size; + return true; + } + + *new_pos = max_offset; + return false; +} + +namespace media { +namespace mp2t { + +EsParserAdts::EsParserAdts( + const NewAudioConfigCB& new_audio_config_cb, + const EmitBufferCB& emit_buffer_cb, + bool sbr_in_mimetype) + : new_audio_config_cb_(new_audio_config_cb), + emit_buffer_cb_(emit_buffer_cb), + sbr_in_mimetype_(sbr_in_mimetype) { +} + +EsParserAdts::~EsParserAdts() { +} + +bool EsParserAdts::Parse(const uint8* buf, int size, + base::TimeDelta pts, + base::TimeDelta dts) { + int raw_es_size; + const uint8* raw_es; + + // The incoming PTS applies to the access unit that comes just after + // the beginning of |buf|. + if (pts != kNoTimestamp()) { + es_byte_queue_.Peek(&raw_es, &raw_es_size); + pts_list_.push_back(EsPts(raw_es_size, pts)); + } + + // Copy the input data to the ES buffer. + es_byte_queue_.Push(buf, size); + es_byte_queue_.Peek(&raw_es, &raw_es_size); + + // Look for every ADTS frame in the ES buffer starting at offset = 0 + int es_position = 0; + int frame_size; + while (LookForSyncWord(raw_es, raw_es_size, es_position, + &es_position, &frame_size)) { + DVLOG(LOG_LEVEL_ES) + << "ADTS syncword @ pos=" << es_position + << " frame_size=" << frame_size; + DVLOG(LOG_LEVEL_ES) + << "ADTS header: " + << base::HexEncode(&raw_es[es_position], kAdtsHeaderMinSize); + + // Do not process the frame if this one is a partial frame. + int remaining_size = raw_es_size - es_position; + if (frame_size > remaining_size) + break; + + // Update the audio configuration if needed. + DCHECK_GE(frame_size, kAdtsHeaderMinSize); + if (!UpdateAudioConfiguration(&raw_es[es_position])) + return false; + + // Get the PTS & the duration of this access unit. + while (!pts_list_.empty() && + pts_list_.front().first <= es_position) { + audio_timestamp_helper_->SetBaseTimestamp(pts_list_.front().second); + pts_list_.pop_front(); + } + + base::TimeDelta current_pts = audio_timestamp_helper_->GetTimestamp(); + base::TimeDelta frame_duration = + audio_timestamp_helper_->GetFrameDuration(kNumberSamplesPerAACFrame); + + // Emit an audio frame. + bool is_key_frame = true; + scoped_refptr<StreamParserBuffer> stream_parser_buffer = + StreamParserBuffer::CopyFrom( + &raw_es[es_position], + frame_size, + is_key_frame); + stream_parser_buffer->SetDecodeTimestamp(current_pts); + stream_parser_buffer->set_timestamp(current_pts); + stream_parser_buffer->set_duration(frame_duration); + emit_buffer_cb_.Run(stream_parser_buffer); + + // Update the PTS of the next frame. + audio_timestamp_helper_->AddFrames(kNumberSamplesPerAACFrame); + + // Skip the current frame. + es_position += frame_size; + } + + // Discard all the bytes that have been processed. + DiscardEs(es_position); + + return true; +} + +void EsParserAdts::Flush() { +} + +void EsParserAdts::Reset() { + es_byte_queue_.Reset(); + pts_list_.clear(); + last_audio_decoder_config_ = AudioDecoderConfig(); +} + +bool EsParserAdts::UpdateAudioConfiguration(const uint8* adts_header) { + int frequency_index = ExtractAdtsFrequencyIndex(adts_header); + if (frequency_index > kMaxSupportedFrequencyIndex) { + // Frequency index 13 & 14 are reserved + // while 15 means that the frequency is explicitly written + // (not supported). + return false; + } + + int channel_configuration = ExtractAdtsChannelConfig(adts_header); + if (channel_configuration == 0) { + // TODO(damienv): Add support for inband channel configuration. + return false; + } + + // TODO(damienv): support HE-AAC frequency doubling (SBR) + // based on the incoming ADTS profile. + int samples_per_second = adts_frequency_table[frequency_index]; + int adts_profile = (adts_header[2] >> 6) & 0x3; + + // The following code is written according to ISO 14496 Part 3 Table 1.11 and + // Table 1.22. (Table 1.11 refers to the capping to 48000, Table 1.22 refers + // to SBR doubling the AAC sample rate.) + // TODO(damienv) : Extend sample rate cap to 96kHz for Level 5 content. + int extended_samples_per_second = sbr_in_mimetype_ + ? std::min(2 * samples_per_second, 48000) + : samples_per_second; + + AudioDecoderConfig audio_decoder_config( + kCodecAAC, + kSampleFormatS16, + adts_channel_layout[channel_configuration], + extended_samples_per_second, + NULL, 0, + false); + + if (!audio_decoder_config.Matches(last_audio_decoder_config_)) { + DVLOG(1) << "Sampling frequency: " << samples_per_second; + DVLOG(1) << "Extended sampling frequency: " << extended_samples_per_second; + DVLOG(1) << "Channel config: " << channel_configuration; + DVLOG(1) << "Adts profile: " << adts_profile; + // Reset the timestamp helper to use a new time scale. + if (audio_timestamp_helper_) { + base::TimeDelta base_timestamp = audio_timestamp_helper_->GetTimestamp(); + audio_timestamp_helper_.reset( + new AudioTimestampHelper(samples_per_second)); + audio_timestamp_helper_->SetBaseTimestamp(base_timestamp); + } else { + audio_timestamp_helper_.reset( + new AudioTimestampHelper(samples_per_second)); + } + // Audio config notification. + last_audio_decoder_config_ = audio_decoder_config; + new_audio_config_cb_.Run(audio_decoder_config); + } + + return true; +} + +void EsParserAdts::DiscardEs(int nbytes) { + DCHECK_GE(nbytes, 0); + if (nbytes <= 0) + return; + + // Adjust the ES position of each PTS. + for (EsPtsList::iterator it = pts_list_.begin(); it != pts_list_.end(); ++it) + it->first -= nbytes; + + // Discard |nbytes| of ES. + es_byte_queue_.Pop(nbytes); +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/es_parser_adts.h b/media/formats/mp2t/es_parser_adts.h new file mode 100644 index 0000000..e55eaf7 --- /dev/null +++ b/media/formats/mp2t/es_parser_adts.h @@ -0,0 +1,86 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_ES_PARSER_ADTS_H_ +#define MEDIA_FORMATS_MP2T_ES_PARSER_ADTS_H_ + +#include <list> +#include <utility> + +#include "base/callback.h" +#include "base/compiler_specific.h" +#include "base/memory/scoped_ptr.h" +#include "base/time/time.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/byte_queue.h" +#include "media/formats/mp2t/es_parser.h" + +namespace media { +class AudioTimestampHelper; +class BitReader; +class StreamParserBuffer; +} + +namespace media { +namespace mp2t { + +class EsParserAdts : public EsParser { + public: + typedef base::Callback<void(const AudioDecoderConfig&)> NewAudioConfigCB; + + EsParserAdts(const NewAudioConfigCB& new_audio_config_cb, + const EmitBufferCB& emit_buffer_cb, + bool sbr_in_mimetype); + virtual ~EsParserAdts(); + + // EsParser implementation. + virtual bool Parse(const uint8* buf, int size, + base::TimeDelta pts, + base::TimeDelta dts) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual void Reset() OVERRIDE; + + private: + // Used to link a PTS with a byte position in the ES stream. + typedef std::pair<int, base::TimeDelta> EsPts; + typedef std::list<EsPts> EsPtsList; + + // Signal any audio configuration change (if any). + // Return false if the current audio config is not + // a supported ADTS audio config. + bool UpdateAudioConfiguration(const uint8* adts_header); + + // Discard some bytes from the ES stream. + void DiscardEs(int nbytes); + + // Callbacks: + // - to signal a new audio configuration, + // - to send ES buffers. + NewAudioConfigCB new_audio_config_cb_; + EmitBufferCB emit_buffer_cb_; + + // True when AAC SBR extension is signalled in the mimetype + // (mp4a.40.5 in the codecs parameter). + bool sbr_in_mimetype_; + + // Bytes of the ES stream that have not been emitted yet. + ByteQueue es_byte_queue_; + + // List of PTS associated with a position in the ES stream. + EsPtsList pts_list_; + + // Interpolated PTS for frames that don't have one. + scoped_ptr<AudioTimestampHelper> audio_timestamp_helper_; + + // Last audio config. + AudioDecoderConfig last_audio_decoder_config_; + + DISALLOW_COPY_AND_ASSIGN(EsParserAdts); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/es_parser_h264.cc b/media/formats/mp2t/es_parser_h264.cc new file mode 100644 index 0000000..3a834a7 --- /dev/null +++ b/media/formats/mp2t/es_parser_h264.cc @@ -0,0 +1,535 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/es_parser_h264.h" + +#include "base/basictypes.h" +#include "base/logging.h" +#include "media/base/bit_reader.h" +#include "media/base/buffers.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/video_frame.h" +#include "media/formats/mp2t/mp2t_common.h" +#include "ui/gfx/rect.h" +#include "ui/gfx/size.h" + +static const int kExtendedSar = 255; + +// ISO 14496 part 10 +// VUI parameters: Table E-1 "Meaning of sample aspect ratio indicator" +static const int kSarTableSize = 17; +static const int kTableSarWidth[kSarTableSize] = { + 0, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160, 4, 3, 2 +}; +static const int kTableSarHeight[kSarTableSize] = { + 0, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99, 3, 2, 1 +}; + +// Remove the start code emulation prevention ( 0x000003 ) +// and return the size of the converted buffer. +// Note: Size of |buf_rbsp| should be at least |size| to accomodate +// the worst case. +static int ConvertToRbsp(const uint8* buf, int size, uint8* buf_rbsp) { + int rbsp_size = 0; + int zero_count = 0; + for (int k = 0; k < size; k++) { + if (buf[k] == 0x3 && zero_count >= 2) { + zero_count = 0; + continue; + } + if (buf[k] == 0) + zero_count++; + else + zero_count = 0; + buf_rbsp[rbsp_size++] = buf[k]; + } + return rbsp_size; +} + +namespace media { +namespace mp2t { + +// ISO 14496 - Part 10: Table 7-1 "NAL unit type codes" +enum NalUnitType { + kNalUnitTypeNonIdrSlice = 1, + kNalUnitTypeIdrSlice = 5, + kNalUnitTypeSPS = 7, + kNalUnitTypePPS = 8, + kNalUnitTypeAUD = 9, +}; + +class BitReaderH264 : public BitReader { + public: + BitReaderH264(const uint8* data, off_t size) + : BitReader(data, size) { } + + // Read an unsigned exp-golomb value. + // Return true if successful. + bool ReadBitsExpGolomb(uint32* exp_golomb_value); +}; + +bool BitReaderH264::ReadBitsExpGolomb(uint32* exp_golomb_value) { + // Get the number of leading zeros. + int zero_count = 0; + while (true) { + int one_bit; + RCHECK(ReadBits(1, &one_bit)); + if (one_bit != 0) + break; + zero_count++; + } + + // If zero_count is greater than 31, the calculated value will overflow. + if (zero_count > 31) { + SkipBits(zero_count); + return false; + } + + // Read the actual value. + uint32 base = (1 << zero_count) - 1; + uint32 offset; + RCHECK(ReadBits(zero_count, &offset)); + *exp_golomb_value = base + offset; + + return true; +} + +EsParserH264::EsParserH264( + const NewVideoConfigCB& new_video_config_cb, + const EmitBufferCB& emit_buffer_cb) + : new_video_config_cb_(new_video_config_cb), + emit_buffer_cb_(emit_buffer_cb), + es_pos_(0), + current_nal_pos_(-1), + current_access_unit_pos_(-1), + is_key_frame_(false) { +} + +EsParserH264::~EsParserH264() { +} + +bool EsParserH264::Parse(const uint8* buf, int size, + base::TimeDelta pts, + base::TimeDelta dts) { + // Note: Parse is invoked each time a PES packet has been reassembled. + // Unfortunately, a PES packet does not necessarily map + // to an h264 access unit, although the HLS recommendation is to use one PES + // for each access unit (but this is just a recommendation and some streams + // do not comply with this recommendation). + + // Link position |raw_es_size| in the ES stream with a timing descriptor. + // HLS recommendation: "In AVC video, you should have both a DTS and a + // PTS in each PES header". + if (dts == kNoTimestamp() && pts == kNoTimestamp()) { + DVLOG(1) << "A timestamp must be provided for each reassembled PES"; + return false; + } + TimingDesc timing_desc; + timing_desc.pts = pts; + timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts; + + int raw_es_size; + const uint8* raw_es; + es_byte_queue_.Peek(&raw_es, &raw_es_size); + timing_desc_list_.push_back( + std::pair<int, TimingDesc>(raw_es_size, timing_desc)); + + // Add the incoming bytes to the ES queue. + es_byte_queue_.Push(buf, size); + + // Add NALs from the incoming buffer. + if (!ParseInternal()) + return false; + + // Discard emitted frames + // or every byte that was parsed so far if there is no current frame. + int skip_count = + (current_access_unit_pos_ >= 0) ? current_access_unit_pos_ : es_pos_; + DiscardEs(skip_count); + + return true; +} + +void EsParserH264::Flush() { + if (current_access_unit_pos_ < 0) + return; + + // Force emitting the last access unit. + int next_aud_pos; + const uint8* raw_es; + es_byte_queue_.Peek(&raw_es, &next_aud_pos); + EmitFrameIfNeeded(next_aud_pos); + current_nal_pos_ = -1; + StartFrame(-1); + + // Discard the emitted frame. + DiscardEs(next_aud_pos); +} + +void EsParserH264::Reset() { + DVLOG(1) << "EsParserH264::Reset"; + es_byte_queue_.Reset(); + timing_desc_list_.clear(); + es_pos_ = 0; + current_nal_pos_ = -1; + StartFrame(-1); + last_video_decoder_config_ = VideoDecoderConfig(); +} + +bool EsParserH264::ParseInternal() { + int raw_es_size; + const uint8* raw_es; + es_byte_queue_.Peek(&raw_es, &raw_es_size); + + DCHECK_GE(es_pos_, 0); + DCHECK_LT(es_pos_, raw_es_size); + + // Resume h264 es parsing where it was left. + for ( ; es_pos_ < raw_es_size - 4; es_pos_++) { + // Make sure the syncword is either 00 00 00 01 or 00 00 01 + if (raw_es[es_pos_ + 0] != 0 || raw_es[es_pos_ + 1] != 0) + continue; + int syncword_length = 0; + if (raw_es[es_pos_ + 2] == 0 && raw_es[es_pos_ + 3] == 1) + syncword_length = 4; + else if (raw_es[es_pos_ + 2] == 1) + syncword_length = 3; + else + continue; + + // Parse the current NAL (and the new NAL then becomes the current one). + if (current_nal_pos_ >= 0) { + int nal_size = es_pos_ - current_nal_pos_; + DCHECK_GT(nal_size, 0); + RCHECK(NalParser(&raw_es[current_nal_pos_], nal_size)); + } + current_nal_pos_ = es_pos_ + syncword_length; + + // Retrieve the NAL type. + int nal_header = raw_es[current_nal_pos_]; + int forbidden_zero_bit = (nal_header >> 7) & 0x1; + RCHECK(forbidden_zero_bit == 0); + NalUnitType nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f); + DVLOG(LOG_LEVEL_ES) << "nal: offset=" << es_pos_ + << " type=" << nal_unit_type; + + // Emit a frame if needed. + if (nal_unit_type == kNalUnitTypeAUD) + RCHECK(EmitFrameIfNeeded(es_pos_)); + + // Skip the syncword. + es_pos_ += syncword_length; + } + + return true; +} + +bool EsParserH264::EmitFrameIfNeeded(int next_aud_pos) { + // There is no current frame: start a new frame. + if (current_access_unit_pos_ < 0) { + StartFrame(next_aud_pos); + return true; + } + + // Get the access unit timing info. + TimingDesc current_timing_desc = {kNoTimestamp(), kNoTimestamp()}; + while (!timing_desc_list_.empty() && + timing_desc_list_.front().first <= current_access_unit_pos_) { + current_timing_desc = timing_desc_list_.front().second; + timing_desc_list_.pop_front(); + } + + if (current_timing_desc.pts == kNoTimestamp()) + return false; + + // Emit a frame. + int raw_es_size; + const uint8* raw_es; + es_byte_queue_.Peek(&raw_es, &raw_es_size); + int access_unit_size = next_aud_pos - current_access_unit_pos_; + scoped_refptr<StreamParserBuffer> stream_parser_buffer = + StreamParserBuffer::CopyFrom( + &raw_es[current_access_unit_pos_], + access_unit_size, + is_key_frame_); + stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts); + stream_parser_buffer->set_timestamp(current_timing_desc.pts); + emit_buffer_cb_.Run(stream_parser_buffer); + + // Set the current frame position to the next AUD position. + StartFrame(next_aud_pos); + return true; +} + +void EsParserH264::StartFrame(int aud_pos) { + // Two cases: + // - if aud_pos < 0, clear the current frame and set |is_key_frame| to a + // default value (false). + // - if aud_pos >= 0, start a new frame and set |is_key_frame| to true + // |is_key_frame_| will be updated while parsing the NALs of that frame. + // If any NAL is a non IDR NAL, it will be set to false. + current_access_unit_pos_ = aud_pos; + is_key_frame_ = (aud_pos >= 0); +} + +void EsParserH264::DiscardEs(int nbytes) { + DCHECK_GE(nbytes, 0); + if (nbytes == 0) + return; + + // Update the position of + // - the parser, + // - the current NAL, + // - the current access unit. + es_pos_ -= nbytes; + if (es_pos_ < 0) + es_pos_ = 0; + + if (current_nal_pos_ >= 0) { + DCHECK_GE(current_nal_pos_, nbytes); + current_nal_pos_ -= nbytes; + } + if (current_access_unit_pos_ >= 0) { + DCHECK_GE(current_access_unit_pos_, nbytes); + current_access_unit_pos_ -= nbytes; + } + + // Update the timing information accordingly. + std::list<std::pair<int, TimingDesc> >::iterator timing_it + = timing_desc_list_.begin(); + for (; timing_it != timing_desc_list_.end(); ++timing_it) + timing_it->first -= nbytes; + + // Discard |nbytes| of ES. + es_byte_queue_.Pop(nbytes); +} + +bool EsParserH264::NalParser(const uint8* buf, int size) { + // Get the NAL header. + if (size < 1) { + DVLOG(1) << "NalParser: incomplete NAL"; + return false; + } + int nal_header = buf[0]; + buf += 1; + size -= 1; + + int forbidden_zero_bit = (nal_header >> 7) & 0x1; + if (forbidden_zero_bit != 0) + return false; + int nal_ref_idc = (nal_header >> 5) & 0x3; + int nal_unit_type = nal_header & 0x1f; + + // Process the NAL content. + switch (nal_unit_type) { + case kNalUnitTypeSPS: + DVLOG(LOG_LEVEL_ES) << "NAL: SPS"; + // |nal_ref_idc| should not be 0 for a SPS. + if (nal_ref_idc == 0) + return false; + return ProcessSPS(buf, size); + case kNalUnitTypeIdrSlice: + DVLOG(LOG_LEVEL_ES) << "NAL: IDR slice"; + return true; + case kNalUnitTypeNonIdrSlice: + DVLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice"; + is_key_frame_ = false; + return true; + case kNalUnitTypePPS: + DVLOG(LOG_LEVEL_ES) << "NAL: PPS"; + return true; + case kNalUnitTypeAUD: + DVLOG(LOG_LEVEL_ES) << "NAL: AUD"; + return true; + default: + DVLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type; + return true; + } + + NOTREACHED(); + return false; +} + +bool EsParserH264::ProcessSPS(const uint8* buf, int size) { + if (size <= 0) + return false; + + // Removes start code emulation prevention. + // TODO(damienv): refactoring in media/base + // so as to have a unique H264 bit reader in Chrome. + scoped_ptr<uint8[]> buf_rbsp(new uint8[size]); + int rbsp_size = ConvertToRbsp(buf, size, buf_rbsp.get()); + + BitReaderH264 bit_reader(buf_rbsp.get(), rbsp_size); + + int profile_idc; + int constraint_setX_flag; + int level_idc; + uint32 seq_parameter_set_id; + uint32 log2_max_frame_num_minus4; + uint32 pic_order_cnt_type; + RCHECK(bit_reader.ReadBits(8, &profile_idc)); + RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag)); + RCHECK(bit_reader.ReadBits(8, &level_idc)); + RCHECK(bit_reader.ReadBitsExpGolomb(&seq_parameter_set_id)); + + if (profile_idc == 100 || profile_idc == 110 || + profile_idc == 122 || profile_idc == 244 || + profile_idc == 44 || profile_idc == 83 || + profile_idc == 86 || profile_idc == 118 || + profile_idc == 128) { + uint32 chroma_format_idc; + RCHECK(bit_reader.ReadBitsExpGolomb(&chroma_format_idc)); + if (chroma_format_idc == 3) { + int separate_colour_plane_flag; + RCHECK(bit_reader.ReadBits(1, &separate_colour_plane_flag)); + } + uint32 bit_depth_luma_minus8; + uint32 bit_depth_chroma_minus8; + int qpprime_y_zero_transform_bypass_flag; + int seq_scaling_matrix_present_flag; + RCHECK(bit_reader.ReadBitsExpGolomb(&bit_depth_luma_minus8)); + RCHECK(bit_reader.ReadBitsExpGolomb(&bit_depth_chroma_minus8)); + RCHECK(bit_reader.ReadBits(1, &qpprime_y_zero_transform_bypass_flag)); + RCHECK(bit_reader.ReadBits(1, &seq_scaling_matrix_present_flag)); + if (seq_scaling_matrix_present_flag) { + int skip_count = (chroma_format_idc != 3) ? 8 : 12; + RCHECK(bit_reader.SkipBits(skip_count)); + } + } + + RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_frame_num_minus4)); + RCHECK(bit_reader.ReadBitsExpGolomb(&pic_order_cnt_type)); + + // |pic_order_cnt_type| shall be in the range of 0 to 2. + RCHECK(pic_order_cnt_type <= 2); + if (pic_order_cnt_type == 0) { + uint32 log2_max_pic_order_cnt_lsb_minus4; + RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4)); + } else if (pic_order_cnt_type == 1) { + // Note: |offset_for_non_ref_pic| and |offset_for_top_to_bottom_field| + // corresponds to their codenum not to their actual value. + int delta_pic_order_always_zero_flag; + uint32 offset_for_non_ref_pic; + uint32 offset_for_top_to_bottom_field; + uint32 num_ref_frames_in_pic_order_cnt_cycle; + RCHECK(bit_reader.ReadBits(1, &delta_pic_order_always_zero_flag)); + RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_non_ref_pic)); + RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_top_to_bottom_field)); + RCHECK( + bit_reader.ReadBitsExpGolomb(&num_ref_frames_in_pic_order_cnt_cycle)); + for (uint32 i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) { + uint32 offset_for_ref_frame_codenum; + RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_ref_frame_codenum)); + } + } + + uint32 num_ref_frames; + int gaps_in_frame_num_value_allowed_flag; + uint32 pic_width_in_mbs_minus1; + uint32 pic_height_in_map_units_minus1; + RCHECK(bit_reader.ReadBitsExpGolomb(&num_ref_frames)); + RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag)); + RCHECK(bit_reader.ReadBitsExpGolomb(&pic_width_in_mbs_minus1)); + RCHECK(bit_reader.ReadBitsExpGolomb(&pic_height_in_map_units_minus1)); + + int frame_mbs_only_flag; + RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag)); + if (!frame_mbs_only_flag) { + int mb_adaptive_frame_field_flag; + RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag)); + } + + int direct_8x8_inference_flag; + RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag)); + + int frame_cropping_flag; + uint32 frame_crop_left_offset = 0; + uint32 frame_crop_right_offset = 0; + uint32 frame_crop_top_offset = 0; + uint32 frame_crop_bottom_offset = 0; + RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag)); + if (frame_cropping_flag) { + RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_left_offset)); + RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_right_offset)); + RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_top_offset)); + RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_bottom_offset)); + } + + int vui_parameters_present_flag; + RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag)); + int sar_width = 1; + int sar_height = 1; + if (vui_parameters_present_flag) { + // Read only the aspect ratio information from the VUI section. + // TODO(damienv): check whether other VUI info are useful. + int aspect_ratio_info_present_flag; + RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag)); + if (aspect_ratio_info_present_flag) { + int aspect_ratio_idc; + RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc)); + if (aspect_ratio_idc == kExtendedSar) { + RCHECK(bit_reader.ReadBits(16, &sar_width)); + RCHECK(bit_reader.ReadBits(16, &sar_height)); + } else if (aspect_ratio_idc < kSarTableSize) { + sar_width = kTableSarWidth[aspect_ratio_idc]; + sar_height = kTableSarHeight[aspect_ratio_idc]; + } + } + } + + if (sar_width == 0 || sar_height == 0) { + DVLOG(1) << "Unspecified SAR not supported"; + return false; + } + + // TODO(damienv): a MAP unit can be either 16 or 32 pixels. + // although it's 16 pixels for progressive non MBAFF frames. + gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16, + (pic_height_in_map_units_minus1 + 1) * 16); + gfx::Rect visible_rect( + frame_crop_left_offset, + frame_crop_top_offset, + (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset, + (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset); + if (visible_rect.width() <= 0 || visible_rect.height() <= 0) + return false; + gfx::Size natural_size((visible_rect.width() * sar_width) / sar_height, + visible_rect.height()); + if (natural_size.width() == 0) + return false; + + // TODO(damienv): + // Assuming the SPS is used right away by the PPS + // and the slice headers is a strong assumption. + // In theory, we should process the SPS and PPS + // and only when one of the slice header is switching + // the PPS id, the video decoder config should be changed. + VideoDecoderConfig video_decoder_config( + kCodecH264, + VIDEO_CODEC_PROFILE_UNKNOWN, // TODO(damienv) + VideoFrame::YV12, + coded_size, + visible_rect, + natural_size, + NULL, 0, + false); + + if (!video_decoder_config.Matches(last_video_decoder_config_)) { + DVLOG(1) << "Profile IDC: " << profile_idc; + DVLOG(1) << "Level IDC: " << level_idc; + DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16; + DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16; + DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4; + DVLOG(1) << "SAR: width=" << sar_width << " height=" << sar_height; + last_video_decoder_config_ = video_decoder_config; + new_video_config_cb_.Run(video_decoder_config); + } + + return true; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/es_parser_h264.h b/media/formats/mp2t/es_parser_h264.h new file mode 100644 index 0000000..b3da98c --- /dev/null +++ b/media/formats/mp2t/es_parser_h264.h @@ -0,0 +1,98 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_ES_PARSER_H264_H_ +#define MEDIA_FORMATS_MP2T_ES_PARSER_H264_H_ + +#include <list> +#include <utility> + +#include "base/basictypes.h" +#include "base/callback.h" +#include "base/compiler_specific.h" +#include "base/time/time.h" +#include "media/base/byte_queue.h" +#include "media/base/video_decoder_config.h" +#include "media/formats/mp2t/es_parser.h" + +namespace media { +class BitReader; +class StreamParserBuffer; +} + +namespace media { +namespace mp2t { + +// Remark: +// In this h264 parser, frame splitting is based on AUD nals. +// Mpeg2 TS spec: "2.14 Carriage of Rec. ITU-T H.264 | ISO/IEC 14496-10 video" +// "Each AVC access unit shall contain an access unit delimiter NAL Unit;" +// +class EsParserH264 : public EsParser { + public: + typedef base::Callback<void(const VideoDecoderConfig&)> NewVideoConfigCB; + + EsParserH264(const NewVideoConfigCB& new_video_config_cb, + const EmitBufferCB& emit_buffer_cb); + virtual ~EsParserH264(); + + // EsParser implementation. + virtual bool Parse(const uint8* buf, int size, + base::TimeDelta pts, + base::TimeDelta dts) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual void Reset() OVERRIDE; + + private: + struct TimingDesc { + base::TimeDelta dts; + base::TimeDelta pts; + }; + + // H264 parser. + // It resumes parsing from byte position |es_pos_|. + bool ParseInternal(); + + // Emit a frame if a frame has been started earlier. + // Returns true if successful, false if no PTS is available for the frame. + bool EmitFrameIfNeeded(int next_aud_pos); + + // Start a new frame. + // Note: if aud_pos < 0, clear the current frame. + void StartFrame(int aud_pos); + + // Discard |nbytes| of ES from the ES byte queue. + void DiscardEs(int nbytes); + + // Parse a NAL / SPS. + // Returns true if successful (compliant bitstream). + bool NalParser(const uint8* buf, int size); + bool ProcessSPS(const uint8* buf, int size); + + // Callbacks to pass the stream configuration and the frames. + NewVideoConfigCB new_video_config_cb_; + EmitBufferCB emit_buffer_cb_; + + // Bytes of the ES stream that have not been emitted yet. + ByteQueue es_byte_queue_; + std::list<std::pair<int, TimingDesc> > timing_desc_list_; + + // H264 parser state. + // Note: |current_access_unit_pos_| is pointing to an annexB syncword + // while |current_nal_pos_| is pointing to the NAL unit + // (i.e. does not include the annexB syncword). + int es_pos_; + int current_nal_pos_; + int current_access_unit_pos_; + bool is_key_frame_; + + // Last video decoder config. + VideoDecoderConfig last_video_decoder_config_; +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/mp2t_common.h b/media/formats/mp2t/mp2t_common.h new file mode 100644 index 0000000..64446fb --- /dev/null +++ b/media/formats/mp2t/mp2t_common.h @@ -0,0 +1,21 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_MP2T_COMMON_H_ +#define MEDIA_FORMATS_MP2T_MP2T_COMMON_H_ + +#define LOG_LEVEL_TS 5 +#define LOG_LEVEL_PES 4 +#define LOG_LEVEL_ES 3 + +#define RCHECK(x) \ + do { \ + if (!(x)) { \ + DLOG(WARNING) << "Failure while parsing Mpeg2TS: " << #x; \ + return false; \ + } \ + } while (0) + +#endif + diff --git a/media/formats/mp2t/mp2t_stream_parser.cc b/media/formats/mp2t/mp2t_stream_parser.cc new file mode 100644 index 0000000..5cae16d --- /dev/null +++ b/media/formats/mp2t/mp2t_stream_parser.cc @@ -0,0 +1,620 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/mp2t_stream_parser.h" + +#include "base/bind.h" +#include "base/memory/scoped_ptr.h" +#include "base/stl_util.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/buffers.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/text_track_config.h" +#include "media/base/video_decoder_config.h" +#include "media/formats/mp2t/es_parser.h" +#include "media/formats/mp2t/es_parser_adts.h" +#include "media/formats/mp2t/es_parser_h264.h" +#include "media/formats/mp2t/mp2t_common.h" +#include "media/formats/mp2t/ts_packet.h" +#include "media/formats/mp2t/ts_section.h" +#include "media/formats/mp2t/ts_section_pat.h" +#include "media/formats/mp2t/ts_section_pes.h" +#include "media/formats/mp2t/ts_section_pmt.h" + +namespace media { +namespace mp2t { + +enum StreamType { + // ISO-13818.1 / ITU H.222 Table 2.34 "Stream type assignments" + kStreamTypeMpeg1Audio = 0x3, + kStreamTypeAAC = 0xf, + kStreamTypeAVC = 0x1b, +}; + +class PidState { + public: + enum PidType { + kPidPat, + kPidPmt, + kPidAudioPes, + kPidVideoPes, + }; + + PidState(int pid, PidType pid_tyoe, + scoped_ptr<TsSection> section_parser); + + // Extract the content of the TS packet and parse it. + // Return true if successful. + bool PushTsPacket(const TsPacket& ts_packet); + + // Flush the PID state (possibly emitting some pending frames) + // and reset its state. + void Flush(); + + // Enable/disable the PID. + // Disabling a PID will reset its state and ignore any further incoming TS + // packets. + void Enable(); + void Disable(); + bool IsEnabled() const; + + PidType pid_type() const { return pid_type_; } + + private: + void ResetState(); + + int pid_; + PidType pid_type_; + scoped_ptr<TsSection> section_parser_; + + bool enable_; + + int continuity_counter_; +}; + +PidState::PidState(int pid, PidType pid_type, + scoped_ptr<TsSection> section_parser) + : pid_(pid), + pid_type_(pid_type), + section_parser_(section_parser.Pass()), + enable_(false), + continuity_counter_(-1) { + DCHECK(section_parser_); +} + +bool PidState::PushTsPacket(const TsPacket& ts_packet) { + DCHECK_EQ(ts_packet.pid(), pid_); + + // The current PID is not part of the PID filter, + // just discard the incoming TS packet. + if (!enable_) + return true; + + int expected_continuity_counter = (continuity_counter_ + 1) % 16; + if (continuity_counter_ >= 0 && + ts_packet.continuity_counter() != expected_continuity_counter) { + DVLOG(1) << "TS discontinuity detected for pid: " << pid_; + return false; + } + + bool status = section_parser_->Parse( + ts_packet.payload_unit_start_indicator(), + ts_packet.payload(), + ts_packet.payload_size()); + + // At the minimum, when parsing failed, auto reset the section parser. + // Components that use the StreamParser can take further action if needed. + if (!status) { + DVLOG(1) << "Parsing failed for pid = " << pid_; + ResetState(); + } + + return status; +} + +void PidState::Flush() { + section_parser_->Flush(); + ResetState(); +} + +void PidState::Enable() { + enable_ = true; +} + +void PidState::Disable() { + if (!enable_) + return; + + ResetState(); + enable_ = false; +} + +bool PidState::IsEnabled() const { + return enable_; +} + +void PidState::ResetState() { + section_parser_->Reset(); + continuity_counter_ = -1; +} + +Mp2tStreamParser::BufferQueueWithConfig::BufferQueueWithConfig( + bool is_cfg_sent, + const AudioDecoderConfig& audio_cfg, + const VideoDecoderConfig& video_cfg) + : is_config_sent(is_cfg_sent), + audio_config(audio_cfg), + video_config(video_cfg) { +} + +Mp2tStreamParser::BufferQueueWithConfig::~BufferQueueWithConfig() { +} + +Mp2tStreamParser::Mp2tStreamParser(bool sbr_in_mimetype) + : sbr_in_mimetype_(sbr_in_mimetype), + selected_audio_pid_(-1), + selected_video_pid_(-1), + is_initialized_(false), + segment_started_(false), + first_video_frame_in_segment_(true) { +} + +Mp2tStreamParser::~Mp2tStreamParser() { + STLDeleteValues(&pids_); +} + +void Mp2tStreamParser::Init( + const InitCB& init_cb, + const NewConfigCB& config_cb, + const NewBuffersCB& new_buffers_cb, + const NewTextBuffersCB& /* text_cb */ , + const NeedKeyCB& need_key_cb, + const NewMediaSegmentCB& new_segment_cb, + const base::Closure& end_of_segment_cb, + const LogCB& log_cb) { + DCHECK(!is_initialized_); + DCHECK(init_cb_.is_null()); + DCHECK(!init_cb.is_null()); + DCHECK(!config_cb.is_null()); + DCHECK(!new_buffers_cb.is_null()); + DCHECK(!need_key_cb.is_null()); + DCHECK(!end_of_segment_cb.is_null()); + + init_cb_ = init_cb; + config_cb_ = config_cb; + new_buffers_cb_ = new_buffers_cb; + need_key_cb_ = need_key_cb; + new_segment_cb_ = new_segment_cb; + end_of_segment_cb_ = end_of_segment_cb; + log_cb_ = log_cb; +} + +void Mp2tStreamParser::Flush() { + DVLOG(1) << "Mp2tStreamParser::Flush"; + + // Flush the buffers and reset the pids. + for (std::map<int, PidState*>::iterator it = pids_.begin(); + it != pids_.end(); ++it) { + DVLOG(1) << "Flushing PID: " << it->first; + PidState* pid_state = it->second; + pid_state->Flush(); + delete pid_state; + } + pids_.clear(); + EmitRemainingBuffers(); + buffer_queue_chain_.clear(); + + // End of the segment. + // Note: does not need to invoke |end_of_segment_cb_| since flushing the + // stream parser already involves the end of the current segment. + segment_started_ = false; + first_video_frame_in_segment_ = true; + + // Remove any bytes left in the TS buffer. + // (i.e. any partial TS packet => less than 188 bytes). + ts_byte_queue_.Reset(); + + // Reset the selected PIDs. + selected_audio_pid_ = -1; + selected_video_pid_ = -1; +} + +bool Mp2tStreamParser::Parse(const uint8* buf, int size) { + DVLOG(1) << "Mp2tStreamParser::Parse size=" << size; + + // Add the data to the parser state. + ts_byte_queue_.Push(buf, size); + + while (true) { + const uint8* ts_buffer; + int ts_buffer_size; + ts_byte_queue_.Peek(&ts_buffer, &ts_buffer_size); + if (ts_buffer_size < TsPacket::kPacketSize) + break; + + // Synchronization. + int skipped_bytes = TsPacket::Sync(ts_buffer, ts_buffer_size); + if (skipped_bytes > 0) { + DVLOG(1) << "Packet not aligned on a TS syncword:" + << " skipped_bytes=" << skipped_bytes; + ts_byte_queue_.Pop(skipped_bytes); + continue; + } + + // Parse the TS header, skipping 1 byte if the header is invalid. + scoped_ptr<TsPacket> ts_packet(TsPacket::Parse(ts_buffer, ts_buffer_size)); + if (!ts_packet) { + DVLOG(1) << "Error: invalid TS packet"; + ts_byte_queue_.Pop(1); + continue; + } + DVLOG(LOG_LEVEL_TS) + << "Processing PID=" << ts_packet->pid() + << " start_unit=" << ts_packet->payload_unit_start_indicator(); + + // Parse the section. + std::map<int, PidState*>::iterator it = pids_.find(ts_packet->pid()); + if (it == pids_.end() && + ts_packet->pid() == TsSection::kPidPat) { + // Create the PAT state here if needed. + scoped_ptr<TsSection> pat_section_parser( + new TsSectionPat( + base::Bind(&Mp2tStreamParser::RegisterPmt, + base::Unretained(this)))); + scoped_ptr<PidState> pat_pid_state( + new PidState(ts_packet->pid(), PidState::kPidPat, + pat_section_parser.Pass())); + pat_pid_state->Enable(); + it = pids_.insert( + std::pair<int, PidState*>(ts_packet->pid(), + pat_pid_state.release())).first; + } + + if (it != pids_.end()) { + if (!it->second->PushTsPacket(*ts_packet)) + return false; + } else { + DVLOG(LOG_LEVEL_TS) << "Ignoring TS packet for pid: " << ts_packet->pid(); + } + + // Go to the next packet. + ts_byte_queue_.Pop(TsPacket::kPacketSize); + } + + RCHECK(FinishInitializationIfNeeded()); + + // Emit the A/V buffers that kept accumulating during TS parsing. + return EmitRemainingBuffers(); +} + +void Mp2tStreamParser::RegisterPmt(int program_number, int pmt_pid) { + DVLOG(1) << "RegisterPmt:" + << " program_number=" << program_number + << " pmt_pid=" << pmt_pid; + + // Only one TS program is allowed. Ignore the incoming program map table, + // if there is already one registered. + for (std::map<int, PidState*>::iterator it = pids_.begin(); + it != pids_.end(); ++it) { + PidState* pid_state = it->second; + if (pid_state->pid_type() == PidState::kPidPmt) { + DVLOG_IF(1, pmt_pid != it->first) << "More than one program is defined"; + return; + } + } + + // Create the PMT state here if needed. + DVLOG(1) << "Create a new PMT parser"; + scoped_ptr<TsSection> pmt_section_parser( + new TsSectionPmt( + base::Bind(&Mp2tStreamParser::RegisterPes, + base::Unretained(this), pmt_pid))); + scoped_ptr<PidState> pmt_pid_state( + new PidState(pmt_pid, PidState::kPidPmt, pmt_section_parser.Pass())); + pmt_pid_state->Enable(); + pids_.insert(std::pair<int, PidState*>(pmt_pid, pmt_pid_state.release())); +} + +void Mp2tStreamParser::RegisterPes(int pmt_pid, + int pes_pid, + int stream_type) { + // TODO(damienv): check there is no mismatch if the entry already exists. + DVLOG(1) << "RegisterPes:" + << " pes_pid=" << pes_pid + << " stream_type=" << std::hex << stream_type << std::dec; + std::map<int, PidState*>::iterator it = pids_.find(pes_pid); + if (it != pids_.end()) + return; + + // Create a stream parser corresponding to the stream type. + bool is_audio = false; + scoped_ptr<EsParser> es_parser; + if (stream_type == kStreamTypeAVC) { + es_parser.reset( + new EsParserH264( + base::Bind(&Mp2tStreamParser::OnVideoConfigChanged, + base::Unretained(this), + pes_pid), + base::Bind(&Mp2tStreamParser::OnEmitVideoBuffer, + base::Unretained(this), + pes_pid))); + } else if (stream_type == kStreamTypeAAC) { + es_parser.reset( + new EsParserAdts( + base::Bind(&Mp2tStreamParser::OnAudioConfigChanged, + base::Unretained(this), + pes_pid), + base::Bind(&Mp2tStreamParser::OnEmitAudioBuffer, + base::Unretained(this), + pes_pid), + sbr_in_mimetype_)); + is_audio = true; + } else { + return; + } + + // Create the PES state here. + DVLOG(1) << "Create a new PES state"; + scoped_ptr<TsSection> pes_section_parser( + new TsSectionPes(es_parser.Pass())); + PidState::PidType pid_type = + is_audio ? PidState::kPidAudioPes : PidState::kPidVideoPes; + scoped_ptr<PidState> pes_pid_state( + new PidState(pes_pid, pid_type, pes_section_parser.Pass())); + pids_.insert(std::pair<int, PidState*>(pes_pid, pes_pid_state.release())); + + // A new PES pid has been added, the PID filter might change. + UpdatePidFilter(); +} + +void Mp2tStreamParser::UpdatePidFilter() { + // Applies the HLS rule to select the default audio/video PIDs: + // select the audio/video streams with the lowest PID. + // TODO(damienv): this can be changed when the StreamParser interface + // supports multiple audio/video streams. + PidMap::iterator lowest_audio_pid = pids_.end(); + PidMap::iterator lowest_video_pid = pids_.end(); + for (PidMap::iterator it = pids_.begin(); it != pids_.end(); ++it) { + int pid = it->first; + PidState* pid_state = it->second; + if (pid_state->pid_type() == PidState::kPidAudioPes && + (lowest_audio_pid == pids_.end() || pid < lowest_audio_pid->first)) + lowest_audio_pid = it; + if (pid_state->pid_type() == PidState::kPidVideoPes && + (lowest_video_pid == pids_.end() || pid < lowest_video_pid->first)) + lowest_video_pid = it; + } + + // Enable both the lowest audio and video PIDs. + if (lowest_audio_pid != pids_.end()) { + DVLOG(1) << "Enable audio pid: " << lowest_audio_pid->first; + lowest_audio_pid->second->Enable(); + selected_audio_pid_ = lowest_audio_pid->first; + } + if (lowest_video_pid != pids_.end()) { + DVLOG(1) << "Enable video pid: " << lowest_video_pid->first; + lowest_video_pid->second->Enable(); + selected_video_pid_ = lowest_video_pid->first; + } + + // Disable all the other audio and video PIDs. + for (PidMap::iterator it = pids_.begin(); it != pids_.end(); ++it) { + PidState* pid_state = it->second; + if (it != lowest_audio_pid && it != lowest_video_pid && + (pid_state->pid_type() == PidState::kPidAudioPes || + pid_state->pid_type() == PidState::kPidVideoPes)) + pid_state->Disable(); + } +} + +void Mp2tStreamParser::OnVideoConfigChanged( + int pes_pid, + const VideoDecoderConfig& video_decoder_config) { + DVLOG(1) << "OnVideoConfigChanged for pid=" << pes_pid; + DCHECK_EQ(pes_pid, selected_video_pid_); + DCHECK(video_decoder_config.IsValidConfig()); + + // Create a new entry in |buffer_queue_chain_| with the updated configs. + BufferQueueWithConfig buffer_queue_with_config( + false, + buffer_queue_chain_.empty() + ? AudioDecoderConfig() : buffer_queue_chain_.back().audio_config, + video_decoder_config); + buffer_queue_chain_.push_back(buffer_queue_with_config); + + // Replace any non valid config with the 1st valid entry. + // This might happen if there was no available config before. + for (std::list<BufferQueueWithConfig>::iterator it = + buffer_queue_chain_.begin(); it != buffer_queue_chain_.end(); ++it) { + if (it->video_config.IsValidConfig()) + break; + it->video_config = video_decoder_config; + } +} + +void Mp2tStreamParser::OnAudioConfigChanged( + int pes_pid, + const AudioDecoderConfig& audio_decoder_config) { + DVLOG(1) << "OnAudioConfigChanged for pid=" << pes_pid; + DCHECK_EQ(pes_pid, selected_audio_pid_); + DCHECK(audio_decoder_config.IsValidConfig()); + + // Create a new entry in |buffer_queue_chain_| with the updated configs. + BufferQueueWithConfig buffer_queue_with_config( + false, + audio_decoder_config, + buffer_queue_chain_.empty() + ? VideoDecoderConfig() : buffer_queue_chain_.back().video_config); + buffer_queue_chain_.push_back(buffer_queue_with_config); + + // Replace any non valid config with the 1st valid entry. + // This might happen if there was no available config before. + for (std::list<BufferQueueWithConfig>::iterator it = + buffer_queue_chain_.begin(); it != buffer_queue_chain_.end(); ++it) { + if (it->audio_config.IsValidConfig()) + break; + it->audio_config = audio_decoder_config; + } +} + +bool Mp2tStreamParser::FinishInitializationIfNeeded() { + // Nothing to be done if already initialized. + if (is_initialized_) + return true; + + // Wait for more data to come to finish initialization. + if (buffer_queue_chain_.empty()) + return true; + + // Wait for more data to come if one of the config is not available. + BufferQueueWithConfig& queue_with_config = buffer_queue_chain_.front(); + if (selected_audio_pid_ > 0 && + !queue_with_config.audio_config.IsValidConfig()) + return true; + if (selected_video_pid_ > 0 && + !queue_with_config.video_config.IsValidConfig()) + return true; + + // Pass the config before invoking the initialization callback. + RCHECK(config_cb_.Run(queue_with_config.audio_config, + queue_with_config.video_config, + TextTrackConfigMap())); + queue_with_config.is_config_sent = true; + + // For Mpeg2 TS, the duration is not known. + DVLOG(1) << "Mpeg2TS stream parser initialization done"; + init_cb_.Run(true, kInfiniteDuration()); + is_initialized_ = true; + + return true; +} + +void Mp2tStreamParser::OnEmitAudioBuffer( + int pes_pid, + scoped_refptr<StreamParserBuffer> stream_parser_buffer) { + DCHECK_EQ(pes_pid, selected_audio_pid_); + + DVLOG(LOG_LEVEL_ES) + << "OnEmitAudioBuffer: " + << " size=" + << stream_parser_buffer->data_size() + << " dts=" + << stream_parser_buffer->GetDecodeTimestamp().InMilliseconds() + << " pts=" + << stream_parser_buffer->timestamp().InMilliseconds(); + stream_parser_buffer->set_timestamp( + stream_parser_buffer->timestamp() - time_offset_); + stream_parser_buffer->SetDecodeTimestamp( + stream_parser_buffer->GetDecodeTimestamp() - time_offset_); + + // Ignore the incoming buffer if it is not associated with any config. + if (buffer_queue_chain_.empty()) { + DVLOG(1) << "Ignoring audio buffer with no corresponding audio config"; + return; + } + + buffer_queue_chain_.back().audio_queue.push_back(stream_parser_buffer); +} + +void Mp2tStreamParser::OnEmitVideoBuffer( + int pes_pid, + scoped_refptr<StreamParserBuffer> stream_parser_buffer) { + DCHECK_EQ(pes_pid, selected_video_pid_); + + DVLOG(LOG_LEVEL_ES) + << "OnEmitVideoBuffer" + << " size=" + << stream_parser_buffer->data_size() + << " dts=" + << stream_parser_buffer->GetDecodeTimestamp().InMilliseconds() + << " pts=" + << stream_parser_buffer->timestamp().InMilliseconds() + << " IsKeyframe=" + << stream_parser_buffer->IsKeyframe(); + stream_parser_buffer->set_timestamp( + stream_parser_buffer->timestamp() - time_offset_); + stream_parser_buffer->SetDecodeTimestamp( + stream_parser_buffer->GetDecodeTimestamp() - time_offset_); + + // Ignore the incoming buffer if it is not associated with any config. + if (buffer_queue_chain_.empty()) { + DVLOG(1) << "Ignoring video buffer with no corresponding video config:" + << " keyframe=" << stream_parser_buffer->IsKeyframe() + << " dts=" + << stream_parser_buffer->GetDecodeTimestamp().InMilliseconds(); + return; + } + + // A segment cannot start with a non key frame. + // Ignore the frame if that's the case. + if (first_video_frame_in_segment_ && !stream_parser_buffer->IsKeyframe()) { + DVLOG(1) << "Ignoring non-key frame:" + << " dts=" + << stream_parser_buffer->GetDecodeTimestamp().InMilliseconds(); + return; + } + + first_video_frame_in_segment_ = false; + buffer_queue_chain_.back().video_queue.push_back(stream_parser_buffer); +} + +bool Mp2tStreamParser::EmitRemainingBuffers() { + DVLOG(LOG_LEVEL_ES) << "Mp2tStreamParser::EmitRemainingBuffers"; + + // No buffer should be sent until fully initialized. + if (!is_initialized_) + return true; + + if (buffer_queue_chain_.empty()) + return true; + + // Keep track of the last audio and video config sent. + AudioDecoderConfig last_audio_config = + buffer_queue_chain_.back().audio_config; + VideoDecoderConfig last_video_config = + buffer_queue_chain_.back().video_config; + + // Buffer emission. + while (!buffer_queue_chain_.empty()) { + // Start a segment if needed. + if (!segment_started_) { + DVLOG(1) << "Starting a new segment"; + segment_started_ = true; + new_segment_cb_.Run(); + } + + // Update the audio and video config if needed. + BufferQueueWithConfig& queue_with_config = buffer_queue_chain_.front(); + if (!queue_with_config.is_config_sent) { + if (!config_cb_.Run(queue_with_config.audio_config, + queue_with_config.video_config, + TextTrackConfigMap())) + return false; + queue_with_config.is_config_sent = true; + } + + // Add buffers. + if (!queue_with_config.audio_queue.empty() || + !queue_with_config.video_queue.empty()) { + if (!new_buffers_cb_.Run(queue_with_config.audio_queue, + queue_with_config.video_queue)) { + return false; + } + } + + buffer_queue_chain_.pop_front(); + } + + // Push an empty queue with the last audio/video config + // so that buffers with the same config can be added later on. + BufferQueueWithConfig queue_with_config( + true, last_audio_config, last_video_config); + buffer_queue_chain_.push_back(queue_with_config); + + return true; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/mp2t_stream_parser.h b/media/formats/mp2t/mp2t_stream_parser.h new file mode 100644 index 0000000..20a1353 --- /dev/null +++ b/media/formats/mp2t/mp2t_stream_parser.h @@ -0,0 +1,136 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_MP2T_STREAM_PARSER_H_ +#define MEDIA_FORMATS_MP2T_MP2T_STREAM_PARSER_H_ + +#include <list> +#include <map> + +#include "base/memory/ref_counted.h" +#include "base/memory/scoped_ptr.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/byte_queue.h" +#include "media/base/media_export.h" +#include "media/base/stream_parser.h" +#include "media/base/video_decoder_config.h" + +namespace media { + +class StreamParserBuffer; + +namespace mp2t { + +class PidState; + +class MEDIA_EXPORT Mp2tStreamParser : public StreamParser { + public: + explicit Mp2tStreamParser(bool sbr_in_mimetype); + virtual ~Mp2tStreamParser(); + + // StreamParser implementation. + virtual void Init(const InitCB& init_cb, + const NewConfigCB& config_cb, + const NewBuffersCB& new_buffers_cb, + const NewTextBuffersCB& text_cb, + const NeedKeyCB& need_key_cb, + const NewMediaSegmentCB& new_segment_cb, + const base::Closure& end_of_segment_cb, + const LogCB& log_cb) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual bool Parse(const uint8* buf, int size) OVERRIDE; + + private: + typedef std::map<int, PidState*> PidMap; + + struct BufferQueueWithConfig { + BufferQueueWithConfig(bool is_cfg_sent, + const AudioDecoderConfig& audio_cfg, + const VideoDecoderConfig& video_cfg); + ~BufferQueueWithConfig(); + + bool is_config_sent; + AudioDecoderConfig audio_config; + StreamParser::BufferQueue audio_queue; + VideoDecoderConfig video_config; + StreamParser::BufferQueue video_queue; + }; + + // Callback invoked to register a Program Map Table. + // Note: Does nothing if the PID is already registered. + void RegisterPmt(int program_number, int pmt_pid); + + // Callback invoked to register a PES pid. + // Possible values for |stream_type| are defined in: + // ISO-13818.1 / ITU H.222 Table 2.34 "Stream type assignments". + // |pes_pid| is part of the Program Map Table refered by |pmt_pid|. + void RegisterPes(int pmt_pid, int pes_pid, int stream_type); + + // Since the StreamParser interface allows only one audio & video streams, + // an automatic PID filtering should be applied to select the audio & video + // streams. + void UpdatePidFilter(); + + // Callback invoked each time the audio/video decoder configuration is + // changed. + void OnVideoConfigChanged(int pes_pid, + const VideoDecoderConfig& video_decoder_config); + void OnAudioConfigChanged(int pes_pid, + const AudioDecoderConfig& audio_decoder_config); + + // Invoke the initialization callback if needed. + bool FinishInitializationIfNeeded(); + + // Callback invoked by the ES stream parser + // to emit a new audio/video access unit. + void OnEmitAudioBuffer( + int pes_pid, + scoped_refptr<StreamParserBuffer> stream_parser_buffer); + void OnEmitVideoBuffer( + int pes_pid, + scoped_refptr<StreamParserBuffer> stream_parser_buffer); + bool EmitRemainingBuffers(); + + // List of callbacks. + InitCB init_cb_; + NewConfigCB config_cb_; + NewBuffersCB new_buffers_cb_; + NeedKeyCB need_key_cb_; + NewMediaSegmentCB new_segment_cb_; + base::Closure end_of_segment_cb_; + LogCB log_cb_; + + // True when AAC SBR extension is signalled in the mimetype + // (mp4a.40.5 in the codecs parameter). + bool sbr_in_mimetype_; + + // Bytes of the TS stream. + ByteQueue ts_byte_queue_; + + // List of PIDs and their state. + PidMap pids_; + + // Selected audio and video PIDs. + int selected_audio_pid_; + int selected_video_pid_; + + // Pending audio & video buffers. + std::list<BufferQueueWithConfig> buffer_queue_chain_; + + // Whether |init_cb_| has been invoked. + bool is_initialized_; + + // Indicate whether a segment was started. + bool segment_started_; + bool first_video_frame_in_segment_; + base::TimeDelta time_offset_; + + DISALLOW_COPY_AND_ASSIGN(Mp2tStreamParser); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/mp2t_stream_parser_unittest.cc b/media/formats/mp2t/mp2t_stream_parser_unittest.cc new file mode 100644 index 0000000..a88e40e --- /dev/null +++ b/media/formats/mp2t/mp2t_stream_parser_unittest.cc @@ -0,0 +1,178 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <algorithm> +#include <string> + +#include "base/bind.h" +#include "base/bind_helpers.h" +#include "base/logging.h" +#include "base/memory/ref_counted.h" +#include "base/time/time.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/decoder_buffer.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/test_data_util.h" +#include "media/base/text_track_config.h" +#include "media/base/video_decoder_config.h" +#include "media/formats/mp2t/mp2t_stream_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { +namespace mp2t { + +class Mp2tStreamParserTest : public testing::Test { + public: + Mp2tStreamParserTest() + : audio_frame_count_(0), + video_frame_count_(0), + video_min_dts_(kNoTimestamp()), + video_max_dts_(kNoTimestamp()) { + bool has_sbr = false; + parser_.reset(new Mp2tStreamParser(has_sbr)); + } + + protected: + scoped_ptr<Mp2tStreamParser> parser_; + int audio_frame_count_; + int video_frame_count_; + base::TimeDelta video_min_dts_; + base::TimeDelta video_max_dts_; + + bool AppendData(const uint8* data, size_t length) { + return parser_->Parse(data, length); + } + + bool AppendDataInPieces(const uint8* data, size_t length, size_t piece_size) { + const uint8* start = data; + const uint8* end = data + length; + while (start < end) { + size_t append_size = std::min(piece_size, + static_cast<size_t>(end - start)); + if (!AppendData(start, append_size)) + return false; + start += append_size; + } + return true; + } + + void OnInit(bool init_ok, base::TimeDelta duration) { + DVLOG(1) << "OnInit: ok=" << init_ok + << ", dur=" << duration.InMilliseconds(); + } + + bool OnNewConfig(const AudioDecoderConfig& ac, + const VideoDecoderConfig& vc, + const StreamParser::TextTrackConfigMap& tc) { + DVLOG(1) << "OnNewConfig: audio=" << ac.IsValidConfig() + << ", video=" << vc.IsValidConfig(); + return true; + } + + + void DumpBuffers(const std::string& label, + const StreamParser::BufferQueue& buffers) { + DVLOG(2) << "DumpBuffers: " << label << " size " << buffers.size(); + for (StreamParser::BufferQueue::const_iterator buf = buffers.begin(); + buf != buffers.end(); buf++) { + DVLOG(3) << " n=" << buf - buffers.begin() + << ", size=" << (*buf)->data_size() + << ", dur=" << (*buf)->duration().InMilliseconds(); + } + } + + bool OnNewBuffers(const StreamParser::BufferQueue& audio_buffers, + const StreamParser::BufferQueue& video_buffers) { + DumpBuffers("audio_buffers", audio_buffers); + DumpBuffers("video_buffers", video_buffers); + audio_frame_count_ += audio_buffers.size(); + video_frame_count_ += video_buffers.size(); + + if (video_min_dts_ == kNoTimestamp() && !video_buffers.empty()) + video_min_dts_ = video_buffers.front()->GetDecodeTimestamp(); + if (!video_buffers.empty()) { + video_max_dts_ = video_buffers.back()->GetDecodeTimestamp(); + // Verify monotonicity. + StreamParser::BufferQueue::const_iterator it1 = video_buffers.begin(); + StreamParser::BufferQueue::const_iterator it2 = ++it1; + for ( ; it2 != video_buffers.end(); ++it1, ++it2) { + if ((*it2)->GetDecodeTimestamp() < (*it1)->GetDecodeTimestamp()) + return false; + } + } + + return true; + } + + void OnKeyNeeded(const std::string& type, + const std::vector<uint8>& init_data) { + DVLOG(1) << "OnKeyNeeded: " << init_data.size(); + } + + void OnNewSegment() { + DVLOG(1) << "OnNewSegment"; + } + + void OnEndOfSegment() { + DVLOG(1) << "OnEndOfSegment()"; + } + + void InitializeParser() { + parser_->Init( + base::Bind(&Mp2tStreamParserTest::OnInit, + base::Unretained(this)), + base::Bind(&Mp2tStreamParserTest::OnNewConfig, + base::Unretained(this)), + base::Bind(&Mp2tStreamParserTest::OnNewBuffers, + base::Unretained(this)), + StreamParser::NewTextBuffersCB(), + base::Bind(&Mp2tStreamParserTest::OnKeyNeeded, + base::Unretained(this)), + base::Bind(&Mp2tStreamParserTest::OnNewSegment, + base::Unretained(this)), + base::Bind(&Mp2tStreamParserTest::OnEndOfSegment, + base::Unretained(this)), + LogCB()); + } + + bool ParseMpeg2TsFile(const std::string& filename, int append_bytes) { + InitializeParser(); + + scoped_refptr<DecoderBuffer> buffer = ReadTestDataFile(filename); + EXPECT_TRUE(AppendDataInPieces(buffer->data(), + buffer->data_size(), + append_bytes)); + return true; + } +}; + +TEST_F(Mp2tStreamParserTest, UnalignedAppend17) { + // Test small, non-segment-aligned appends. + ParseMpeg2TsFile("bear-1280x720.ts", 17); + EXPECT_EQ(video_frame_count_, 81); + parser_->Flush(); + EXPECT_EQ(video_frame_count_, 82); +} + +TEST_F(Mp2tStreamParserTest, UnalignedAppend512) { + // Test small, non-segment-aligned appends. + ParseMpeg2TsFile("bear-1280x720.ts", 512); + EXPECT_EQ(video_frame_count_, 81); + parser_->Flush(); + EXPECT_EQ(video_frame_count_, 82); +} + +TEST_F(Mp2tStreamParserTest, TimestampWrapAround) { + // "bear-1280x720_ptswraparound.ts" has been transcoded + // from bear-1280x720.mp4 by applying a time offset of 95442s + // (close to 2^33 / 90000) which results in timestamps wrap around + // in the Mpeg2 TS stream. + ParseMpeg2TsFile("bear-1280x720_ptswraparound.ts", 512); + EXPECT_EQ(video_frame_count_, 81); + EXPECT_GE(video_min_dts_, base::TimeDelta::FromSeconds(95443 - 10)); + EXPECT_LE(video_max_dts_, base::TimeDelta::FromSeconds(95443 + 10)); +} + +} // namespace mp2t +} // namespace media diff --git a/media/formats/mp2t/ts_packet.cc b/media/formats/mp2t/ts_packet.cc new file mode 100644 index 0000000..8463c11 --- /dev/null +++ b/media/formats/mp2t/ts_packet.cc @@ -0,0 +1,215 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/ts_packet.h" + +#include "base/memory/scoped_ptr.h" +#include "media/base/bit_reader.h" +#include "media/formats/mp2t/mp2t_common.h" + +namespace media { +namespace mp2t { + +static const uint8 kTsHeaderSyncword = 0x47; + +// static +int TsPacket::Sync(const uint8* buf, int size) { + int k = 0; + for (; k < size; k++) { + // Verify that we have 4 syncwords in a row when possible, + // this should improve synchronization robustness. + // TODO(damienv): Consider the case where there is garbage + // between TS packets. + bool is_header = true; + for (int i = 0; i < 4; i++) { + int idx = k + i * kPacketSize; + if (idx >= size) + break; + if (buf[idx] != kTsHeaderSyncword) { + DVLOG(LOG_LEVEL_TS) + << "ByteSync" << idx << ": " + << std::hex << static_cast<int>(buf[idx]) << std::dec; + is_header = false; + break; + } + } + if (is_header) + break; + } + + DVLOG_IF(1, k != 0) << "SYNC: nbytes_skipped=" << k; + return k; +} + +// static +TsPacket* TsPacket::Parse(const uint8* buf, int size) { + if (size < kPacketSize) { + DVLOG(1) << "Buffer does not hold one full TS packet:" + << " buffer_size=" << size; + return NULL; + } + + DCHECK_EQ(buf[0], kTsHeaderSyncword); + if (buf[0] != kTsHeaderSyncword) { + DVLOG(1) << "Not on a TS syncword:" + << " buf[0]=" + << std::hex << static_cast<int>(buf[0]) << std::dec; + return NULL; + } + + scoped_ptr<TsPacket> ts_packet(new TsPacket()); + bool status = ts_packet->ParseHeader(buf); + if (!status) { + DVLOG(1) << "Parsing header failed"; + return NULL; + } + return ts_packet.release(); +} + +TsPacket::TsPacket() { +} + +TsPacket::~TsPacket() { +} + +bool TsPacket::ParseHeader(const uint8* buf) { + BitReader bit_reader(buf, kPacketSize); + payload_ = buf; + payload_size_ = kPacketSize; + + // Read the TS header: 4 bytes. + int syncword; + int transport_error_indicator; + int payload_unit_start_indicator; + int transport_priority; + int transport_scrambling_control; + int adaptation_field_control; + RCHECK(bit_reader.ReadBits(8, &syncword)); + RCHECK(bit_reader.ReadBits(1, &transport_error_indicator)); + RCHECK(bit_reader.ReadBits(1, &payload_unit_start_indicator)); + RCHECK(bit_reader.ReadBits(1, &transport_priority)); + RCHECK(bit_reader.ReadBits(13, &pid_)); + RCHECK(bit_reader.ReadBits(2, &transport_scrambling_control)); + RCHECK(bit_reader.ReadBits(2, &adaptation_field_control)); + RCHECK(bit_reader.ReadBits(4, &continuity_counter_)); + payload_unit_start_indicator_ = (payload_unit_start_indicator != 0); + payload_ += 4; + payload_size_ -= 4; + + // Default values when no adaptation field. + discontinuity_indicator_ = false; + random_access_indicator_ = false; + + // Done since no adaptation field. + if ((adaptation_field_control & 0x2) == 0) + return true; + + // Read the adaptation field if needed. + int adaptation_field_length; + RCHECK(bit_reader.ReadBits(8, &adaptation_field_length)); + DVLOG(LOG_LEVEL_TS) << "adaptation_field_length=" << adaptation_field_length; + payload_ += 1; + payload_size_ -= 1; + if ((adaptation_field_control & 0x1) == 0 && + adaptation_field_length != 183) { + DVLOG(1) << "adaptation_field_length=" << adaptation_field_length; + return false; + } + if ((adaptation_field_control & 0x1) == 1 && + adaptation_field_length > 182) { + DVLOG(1) << "adaptation_field_length=" << adaptation_field_length; + // This is not allowed by the spec. + // However, some badly encoded streams are using + // adaptation_field_length = 183 + return false; + } + + // adaptation_field_length = '0' is used to insert a single stuffing byte + // in the adaptation field of a transport stream packet. + if (adaptation_field_length == 0) + return true; + + bool status = ParseAdaptationField(&bit_reader, adaptation_field_length); + payload_ += adaptation_field_length; + payload_size_ -= adaptation_field_length; + return status; +} + +bool TsPacket::ParseAdaptationField(BitReader* bit_reader, + int adaptation_field_length) { + DCHECK_GT(adaptation_field_length, 0); + int adaptation_field_start_marker = bit_reader->bits_available() / 8; + + int discontinuity_indicator; + int random_access_indicator; + int elementary_stream_priority_indicator; + int pcr_flag; + int opcr_flag; + int splicing_point_flag; + int transport_private_data_flag; + int adaptation_field_extension_flag; + RCHECK(bit_reader->ReadBits(1, &discontinuity_indicator)); + RCHECK(bit_reader->ReadBits(1, &random_access_indicator)); + RCHECK(bit_reader->ReadBits(1, &elementary_stream_priority_indicator)); + RCHECK(bit_reader->ReadBits(1, &pcr_flag)); + RCHECK(bit_reader->ReadBits(1, &opcr_flag)); + RCHECK(bit_reader->ReadBits(1, &splicing_point_flag)); + RCHECK(bit_reader->ReadBits(1, &transport_private_data_flag)); + RCHECK(bit_reader->ReadBits(1, &adaptation_field_extension_flag)); + discontinuity_indicator_ = (discontinuity_indicator != 0); + random_access_indicator_ = (random_access_indicator != 0); + + if (pcr_flag) { + int64 program_clock_reference_base; + int reserved; + int program_clock_reference_extension; + RCHECK(bit_reader->ReadBits(33, &program_clock_reference_base)); + RCHECK(bit_reader->ReadBits(6, &reserved)); + RCHECK(bit_reader->ReadBits(9, &program_clock_reference_extension)); + } + + if (opcr_flag) { + int64 original_program_clock_reference_base; + int reserved; + int original_program_clock_reference_extension; + RCHECK(bit_reader->ReadBits(33, &original_program_clock_reference_base)); + RCHECK(bit_reader->ReadBits(6, &reserved)); + RCHECK( + bit_reader->ReadBits(9, &original_program_clock_reference_extension)); + } + + if (splicing_point_flag) { + int splice_countdown; + RCHECK(bit_reader->ReadBits(8, &splice_countdown)); + } + + if (transport_private_data_flag) { + int transport_private_data_length; + RCHECK(bit_reader->ReadBits(8, &transport_private_data_length)); + RCHECK(bit_reader->SkipBits(8 * transport_private_data_length)); + } + + if (adaptation_field_extension_flag) { + int adaptation_field_extension_length; + RCHECK(bit_reader->ReadBits(8, &adaptation_field_extension_length)); + RCHECK(bit_reader->SkipBits(8 * adaptation_field_extension_length)); + } + + // The rest of the adaptation field should be stuffing bytes. + int adaptation_field_remaining_size = adaptation_field_length - + (adaptation_field_start_marker - bit_reader->bits_available() / 8); + RCHECK(adaptation_field_remaining_size >= 0); + for (int k = 0; k < adaptation_field_remaining_size; k++) { + int stuffing_byte; + RCHECK(bit_reader->ReadBits(8, &stuffing_byte)); + RCHECK(stuffing_byte == 0xff); + } + + DVLOG(LOG_LEVEL_TS) << "random_access_indicator=" << random_access_indicator_; + return true; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/ts_packet.h b/media/formats/mp2t/ts_packet.h new file mode 100644 index 0000000..a232705 --- /dev/null +++ b/media/formats/mp2t/ts_packet.h @@ -0,0 +1,73 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_PACKET_H_ +#define MEDIA_FORMATS_MP2T_TS_PACKET_H_ + +#include "base/basictypes.h" + +namespace media { + +class BitReader; + +namespace mp2t { + +class TsPacket { + public: + static const int kPacketSize = 188; + + // Return the number of bytes to discard + // to be synchronized on a TS syncword. + static int Sync(const uint8* buf, int size); + + // Parse a TS packet. + // Return a TsPacket only when parsing was successful. + // Return NULL otherwise. + static TsPacket* Parse(const uint8* buf, int size); + + ~TsPacket(); + + // TS header accessors. + bool payload_unit_start_indicator() const { + return payload_unit_start_indicator_; + } + int pid() const { return pid_; } + int continuity_counter() const { return continuity_counter_; } + bool discontinuity_indicator() const { return discontinuity_indicator_; } + bool random_access_indicator() const { return random_access_indicator_; } + + // Return the offset and the size of the payload. + const uint8* payload() const { return payload_; } + int payload_size() const { return payload_size_; } + + private: + TsPacket(); + + // Parse an Mpeg2 TS header. + // The buffer size should be at least |kPacketSize| + bool ParseHeader(const uint8* buf); + bool ParseAdaptationField(BitReader* bit_reader, + int adaptation_field_length); + + // Size of the payload. + const uint8* payload_; + int payload_size_; + + // TS header. + bool payload_unit_start_indicator_; + int pid_; + int continuity_counter_; + + // Params from the adaptation field. + bool discontinuity_indicator_; + bool random_access_indicator_; + + DISALLOW_COPY_AND_ASSIGN(TsPacket); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/ts_section.h b/media/formats/mp2t/ts_section.h new file mode 100644 index 0000000..9273733 --- /dev/null +++ b/media/formats/mp2t/ts_section.h @@ -0,0 +1,40 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_SECTION_H_ +#define MEDIA_FORMATS_MP2T_TS_SECTION_H_ + +namespace media { +namespace mp2t { + +class TsSection { + public: + // From ISO/IEC 13818-1 or ITU H.222 spec: Table 2-3 - PID table. + enum SpecialPid { + kPidPat = 0x0, + kPidCat = 0x1, + kPidTsdt = 0x2, + kPidNullPacket = 0x1fff, + kPidMax = 0x1fff, + }; + + virtual ~TsSection() {} + + // Parse the data bytes of the TS packet. + // Return true if parsing is successful. + virtual bool Parse(bool payload_unit_start_indicator, + const uint8* buf, int size) = 0; + + // Process bytes that have not been processed yet (pending buffers in the + // pipe). Flush might thus results in frame emission, as an example. + virtual void Flush() = 0; + + // Reset the state of the parser to its initial state. + virtual void Reset() = 0; +}; + +} // namespace mp2t +} // namespace media + +#endif diff --git a/media/formats/mp2t/ts_section_pat.cc b/media/formats/mp2t/ts_section_pat.cc new file mode 100644 index 0000000..2fcc24b --- /dev/null +++ b/media/formats/mp2t/ts_section_pat.cc @@ -0,0 +1,122 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/ts_section_pat.h" + +#include <vector> + +#include "base/logging.h" +#include "media/base/bit_reader.h" +#include "media/formats/mp2t/mp2t_common.h" + +namespace media { +namespace mp2t { + +TsSectionPat::TsSectionPat(const RegisterPmtCb& register_pmt_cb) + : register_pmt_cb_(register_pmt_cb), + version_number_(-1) { +} + +TsSectionPat::~TsSectionPat() { +} + +bool TsSectionPat::ParsePsiSection(BitReader* bit_reader) { + // Read the fixed section length. + int table_id; + int section_syntax_indicator; + int dummy_zero; + int reserved; + int section_length; + int transport_stream_id; + int version_number; + int current_next_indicator; + int section_number; + int last_section_number; + RCHECK(bit_reader->ReadBits(8, &table_id)); + RCHECK(bit_reader->ReadBits(1, §ion_syntax_indicator)); + RCHECK(bit_reader->ReadBits(1, &dummy_zero)); + RCHECK(bit_reader->ReadBits(2, &reserved)); + RCHECK(bit_reader->ReadBits(12, §ion_length)); + RCHECK(section_length >= 5); + RCHECK(section_length <= 1021); + RCHECK(bit_reader->ReadBits(16, &transport_stream_id)); + RCHECK(bit_reader->ReadBits(2, &reserved)); + RCHECK(bit_reader->ReadBits(5, &version_number)); + RCHECK(bit_reader->ReadBits(1, ¤t_next_indicator)); + RCHECK(bit_reader->ReadBits(8, §ion_number)); + RCHECK(bit_reader->ReadBits(8, &last_section_number)); + section_length -= 5; + + // Perform a few verifications: + // - Table ID should be 0 for a PAT. + // - section_syntax_indicator should be one. + // - section length should not exceed 1021 + RCHECK(table_id == 0x0); + RCHECK(section_syntax_indicator); + RCHECK(!dummy_zero); + + // Both the program table and the CRC have a size multiple of 4. + // Note for pmt_pid_count: minus 4 to account for the CRC. + RCHECK((section_length % 4) == 0); + int pmt_pid_count = (section_length - 4) / 4; + + // Read the variable length section: program table & crc. + std::vector<int> program_number_array(pmt_pid_count); + std::vector<int> pmt_pid_array(pmt_pid_count); + for (int k = 0; k < pmt_pid_count; k++) { + int reserved; + RCHECK(bit_reader->ReadBits(16, &program_number_array[k])); + RCHECK(bit_reader->ReadBits(3, &reserved)); + RCHECK(bit_reader->ReadBits(13, &pmt_pid_array[k])); + } + int crc32; + RCHECK(bit_reader->ReadBits(32, &crc32)); + + // Just ignore the PAT if not applicable yet. + if (!current_next_indicator) { + DVLOG(1) << "Not supported: received a PAT not applicable yet"; + return true; + } + + // Ignore the program table if it hasn't changed. + if (version_number == version_number_) + return true; + + // Both the MSE and the HLS spec specifies that TS streams should convey + // exactly one program. + if (pmt_pid_count > 1) { + DVLOG(1) << "Multiple programs detected in the Mpeg2 TS stream"; + return false; + } + + // Can now register the PMT. +#if !defined(NDEBUG) + int expected_version_number = version_number; + if (version_number_ >= 0) + expected_version_number = (version_number_ + 1) % 32; + DVLOG_IF(1, version_number != expected_version_number) + << "Unexpected version number: " + << version_number << " vs " << version_number_; +#endif + for (int k = 0; k < pmt_pid_count; k++) { + if (program_number_array[k] != 0) { + // Program numbers different from 0 correspond to PMT. + register_pmt_cb_.Run(program_number_array[k], pmt_pid_array[k]); + // Even if there are multiple programs, only one can be supported now. + // HLS: "Transport Stream segments MUST contain a single MPEG-2 Program." + break; + } + } + version_number_ = version_number; + + return true; +} + +void TsSectionPat::ResetPsiSection() { + version_number_ = -1; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/ts_section_pat.h b/media/formats/mp2t/ts_section_pat.h new file mode 100644 index 0000000..f8079ad --- /dev/null +++ b/media/formats/mp2t/ts_section_pat.h @@ -0,0 +1,40 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_SECTION_PAT_H_ +#define MEDIA_FORMATS_MP2T_TS_SECTION_PAT_H_ + +#include "base/callback.h" +#include "base/compiler_specific.h" +#include "media/formats/mp2t/ts_section_psi.h" + +namespace media { +namespace mp2t { + +class TsSectionPat : public TsSectionPsi { + public: + // RegisterPmtCb::Run(int program_number, int pmt_pid); + typedef base::Callback<void(int, int)> RegisterPmtCb; + + explicit TsSectionPat(const RegisterPmtCb& register_pmt_cb); + virtual ~TsSectionPat(); + + // TsSectionPsi implementation. + virtual bool ParsePsiSection(BitReader* bit_reader) OVERRIDE; + virtual void ResetPsiSection() OVERRIDE; + + private: + RegisterPmtCb register_pmt_cb_; + + // Parameters from the PAT. + int version_number_; + + DISALLOW_COPY_AND_ASSIGN(TsSectionPat); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/ts_section_pes.cc b/media/formats/mp2t/ts_section_pes.cc new file mode 100644 index 0000000..de69a32 --- /dev/null +++ b/media/formats/mp2t/ts_section_pes.cc @@ -0,0 +1,312 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/ts_section_pes.h" + +#include "base/logging.h" +#include "base/strings/string_number_conversions.h" +#include "media/base/bit_reader.h" +#include "media/base/buffers.h" +#include "media/formats/mp2t/es_parser.h" +#include "media/formats/mp2t/mp2t_common.h" + +static const int kPesStartCode = 0x000001; + +// Given that |time| is coded using 33 bits, +// UnrollTimestamp returns the corresponding unrolled timestamp. +// The unrolled timestamp is defined by: +// |time| + k * (2 ^ 33) +// where k is estimated so that the unrolled timestamp +// is as close as possible to |previous_unrolled_time|. +static int64 UnrollTimestamp(int64 previous_unrolled_time, int64 time) { + // Mpeg2 TS timestamps have an accuracy of 33 bits. + const int nbits = 33; + + // |timestamp| has a precision of |nbits| + // so make sure the highest bits are set to 0. + DCHECK_EQ((time >> nbits), 0); + + // Consider 3 possibilities to estimate the missing high bits of |time|. + int64 previous_unrolled_time_high = + (previous_unrolled_time >> nbits); + int64 time0 = ((previous_unrolled_time_high - 1) << nbits) | time; + int64 time1 = ((previous_unrolled_time_high + 0) << nbits) | time; + int64 time2 = ((previous_unrolled_time_high + 1) << nbits) | time; + + // Select the min absolute difference with the current time + // so as to ensure time continuity. + int64 diff0 = time0 - previous_unrolled_time; + int64 diff1 = time1 - previous_unrolled_time; + int64 diff2 = time2 - previous_unrolled_time; + if (diff0 < 0) + diff0 = -diff0; + if (diff1 < 0) + diff1 = -diff1; + if (diff2 < 0) + diff2 = -diff2; + + int64 unrolled_time; + int64 min_diff; + if (diff1 < diff0) { + unrolled_time = time1; + min_diff = diff1; + } else { + unrolled_time = time0; + min_diff = diff0; + } + if (diff2 < min_diff) + unrolled_time = time2; + + return unrolled_time; +} + +static bool IsTimestampSectionValid(int64 timestamp_section) { + // |pts_section| has 40 bits: + // - starting with either '0010' or '0011' or '0001' + // - and ending with a marker bit. + // See ITU H.222 standard - PES section. + + // Verify that all the marker bits are set to one. + return ((timestamp_section & 0x1) != 0) && + ((timestamp_section & 0x10000) != 0) && + ((timestamp_section & 0x100000000) != 0); +} + +static int64 ConvertTimestampSectionToTimestamp(int64 timestamp_section) { + return (((timestamp_section >> 33) & 0x7) << 30) | + (((timestamp_section >> 17) & 0x7fff) << 15) | + (((timestamp_section >> 1) & 0x7fff) << 0); +} + +namespace media { +namespace mp2t { + +TsSectionPes::TsSectionPes(scoped_ptr<EsParser> es_parser) + : es_parser_(es_parser.release()), + wait_for_pusi_(true), + previous_pts_valid_(false), + previous_pts_(0), + previous_dts_valid_(false), + previous_dts_(0) { + DCHECK(es_parser_); +} + +TsSectionPes::~TsSectionPes() { +} + +bool TsSectionPes::Parse(bool payload_unit_start_indicator, + const uint8* buf, int size) { + // Ignore partial PES. + if (wait_for_pusi_ && !payload_unit_start_indicator) + return true; + + bool parse_result = true; + if (payload_unit_start_indicator) { + // Try emitting a packet since we might have a pending PES packet + // with an undefined size. + // In this case, a unit is emitted when the next unit is coming. + int raw_pes_size; + const uint8* raw_pes; + pes_byte_queue_.Peek(&raw_pes, &raw_pes_size); + if (raw_pes_size > 0) + parse_result = Emit(true); + + // Reset the state. + ResetPesState(); + + // Update the state. + wait_for_pusi_ = false; + } + + // Add the data to the parser state. + if (size > 0) + pes_byte_queue_.Push(buf, size); + + // Try emitting the current PES packet. + return (parse_result && Emit(false)); +} + +void TsSectionPes::Flush() { + // Try emitting a packet since we might have a pending PES packet + // with an undefined size. + Emit(true); + + // Flush the underlying ES parser. + es_parser_->Flush(); +} + +void TsSectionPes::Reset() { + ResetPesState(); + + previous_pts_valid_ = false; + previous_pts_ = 0; + previous_dts_valid_ = false; + previous_dts_ = 0; + + es_parser_->Reset(); +} + +bool TsSectionPes::Emit(bool emit_for_unknown_size) { + int raw_pes_size; + const uint8* raw_pes; + pes_byte_queue_.Peek(&raw_pes, &raw_pes_size); + + // A PES should be at least 6 bytes. + // Wait for more data to come if not enough bytes. + if (raw_pes_size < 6) + return true; + + // Check whether we have enough data to start parsing. + int pes_packet_length = + (static_cast<int>(raw_pes[4]) << 8) | + (static_cast<int>(raw_pes[5])); + if ((pes_packet_length == 0 && !emit_for_unknown_size) || + (pes_packet_length != 0 && raw_pes_size < pes_packet_length + 6)) { + // Wait for more data to come either because: + // - there are not enough bytes, + // - or the PES size is unknown and the "force emit" flag is not set. + // (PES size might be unknown for video PES packet). + return true; + } + DVLOG(LOG_LEVEL_PES) << "pes_packet_length=" << pes_packet_length; + + // Parse the packet. + bool parse_result = ParseInternal(raw_pes, raw_pes_size); + + // Reset the state. + ResetPesState(); + + return parse_result; +} + +bool TsSectionPes::ParseInternal(const uint8* raw_pes, int raw_pes_size) { + BitReader bit_reader(raw_pes, raw_pes_size); + + // Read up to the pes_packet_length (6 bytes). + int packet_start_code_prefix; + int stream_id; + int pes_packet_length; + RCHECK(bit_reader.ReadBits(24, &packet_start_code_prefix)); + RCHECK(bit_reader.ReadBits(8, &stream_id)); + RCHECK(bit_reader.ReadBits(16, &pes_packet_length)); + + RCHECK(packet_start_code_prefix == kPesStartCode); + DVLOG(LOG_LEVEL_PES) << "stream_id=" << std::hex << stream_id << std::dec; + if (pes_packet_length == 0) + pes_packet_length = bit_reader.bits_available() / 8; + + // Ignore the PES for unknown stream IDs. + // See ITU H.222 Table 2-22 "Stream_id assignments" + bool is_audio_stream_id = ((stream_id & 0xe0) == 0xc0); + bool is_video_stream_id = ((stream_id & 0xf0) == 0xe0); + if (!is_audio_stream_id && !is_video_stream_id) + return true; + + // Read up to "pes_header_data_length". + int dummy_2; + int PES_scrambling_control; + int PES_priority; + int data_alignment_indicator; + int copyright; + int original_or_copy; + int pts_dts_flags; + int escr_flag; + int es_rate_flag; + int dsm_trick_mode_flag; + int additional_copy_info_flag; + int pes_crc_flag; + int pes_extension_flag; + int pes_header_data_length; + RCHECK(bit_reader.ReadBits(2, &dummy_2)); + RCHECK(dummy_2 == 0x2); + RCHECK(bit_reader.ReadBits(2, &PES_scrambling_control)); + RCHECK(bit_reader.ReadBits(1, &PES_priority)); + RCHECK(bit_reader.ReadBits(1, &data_alignment_indicator)); + RCHECK(bit_reader.ReadBits(1, ©right)); + RCHECK(bit_reader.ReadBits(1, &original_or_copy)); + RCHECK(bit_reader.ReadBits(2, &pts_dts_flags)); + RCHECK(bit_reader.ReadBits(1, &escr_flag)); + RCHECK(bit_reader.ReadBits(1, &es_rate_flag)); + RCHECK(bit_reader.ReadBits(1, &dsm_trick_mode_flag)); + RCHECK(bit_reader.ReadBits(1, &additional_copy_info_flag)); + RCHECK(bit_reader.ReadBits(1, &pes_crc_flag)); + RCHECK(bit_reader.ReadBits(1, &pes_extension_flag)); + RCHECK(bit_reader.ReadBits(8, &pes_header_data_length)); + int pes_header_start_size = bit_reader.bits_available() / 8; + + // Compute the size and the offset of the ES payload. + // "6" for the 6 bytes read before and including |pes_packet_length|. + // "3" for the 3 bytes read before and including |pes_header_data_length|. + int es_size = pes_packet_length - 3 - pes_header_data_length; + int es_offset = 6 + 3 + pes_header_data_length; + RCHECK(es_size >= 0); + RCHECK(es_offset + es_size <= raw_pes_size); + + // Read the timing information section. + bool is_pts_valid = false; + bool is_dts_valid = false; + int64 pts_section = 0; + int64 dts_section = 0; + if (pts_dts_flags == 0x2) { + RCHECK(bit_reader.ReadBits(40, &pts_section)); + RCHECK((((pts_section >> 36) & 0xf) == 0x2) && + IsTimestampSectionValid(pts_section)); + is_pts_valid = true; + } + if (pts_dts_flags == 0x3) { + RCHECK(bit_reader.ReadBits(40, &pts_section)); + RCHECK(bit_reader.ReadBits(40, &dts_section)); + RCHECK((((pts_section >> 36) & 0xf) == 0x3) && + IsTimestampSectionValid(pts_section)); + RCHECK((((dts_section >> 36) & 0xf) == 0x1) && + IsTimestampSectionValid(dts_section)); + is_pts_valid = true; + is_dts_valid = true; + } + + // Convert and unroll the timestamps. + base::TimeDelta media_pts(kNoTimestamp()); + base::TimeDelta media_dts(kNoTimestamp()); + if (is_pts_valid) { + int64 pts = ConvertTimestampSectionToTimestamp(pts_section); + if (previous_pts_valid_) + pts = UnrollTimestamp(previous_pts_, pts); + previous_pts_ = pts; + previous_pts_valid_ = true; + media_pts = base::TimeDelta::FromMicroseconds((1000 * pts) / 90); + } + if (is_dts_valid) { + int64 dts = ConvertTimestampSectionToTimestamp(dts_section); + if (previous_dts_valid_) + dts = UnrollTimestamp(previous_dts_, dts); + previous_dts_ = dts; + previous_dts_valid_ = true; + media_dts = base::TimeDelta::FromMicroseconds((1000 * dts) / 90); + } + + // Discard the rest of the PES packet header. + // TODO(damienv): check if some info of the PES packet header are useful. + DCHECK_EQ(bit_reader.bits_available() % 8, 0); + int pes_header_remaining_size = pes_header_data_length - + (pes_header_start_size - bit_reader.bits_available() / 8); + RCHECK(pes_header_remaining_size >= 0); + + // Read the PES packet. + DVLOG(LOG_LEVEL_PES) + << "Emit a reassembled PES:" + << " size=" << es_size + << " pts=" << media_pts.InMilliseconds() + << " dts=" << media_dts.InMilliseconds() + << " data_alignment_indicator=" << data_alignment_indicator; + return es_parser_->Parse(&raw_pes[es_offset], es_size, media_pts, media_dts); +} + +void TsSectionPes::ResetPesState() { + pes_byte_queue_.Reset(); + wait_for_pusi_ = true; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/ts_section_pes.h b/media/formats/mp2t/ts_section_pes.h new file mode 100644 index 0000000..b442ae4 --- /dev/null +++ b/media/formats/mp2t/ts_section_pes.h @@ -0,0 +1,64 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_SECTION_PES_H_ +#define MEDIA_FORMATS_MP2T_TS_SECTION_PES_H_ + +#include "base/basictypes.h" +#include "base/compiler_specific.h" +#include "base/memory/scoped_ptr.h" +#include "media/base/byte_queue.h" +#include "media/formats/mp2t/ts_section.h" + +namespace media { +namespace mp2t { + +class EsParser; + +class TsSectionPes : public TsSection { + public: + explicit TsSectionPes(scoped_ptr<EsParser> es_parser); + virtual ~TsSectionPes(); + + // TsSection implementation. + virtual bool Parse(bool payload_unit_start_indicator, + const uint8* buf, int size) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual void Reset() OVERRIDE; + + private: + // Emit a reassembled PES packet. + // Return true if successful. + // |emit_for_unknown_size| is used to force emission for PES packets + // whose size is unknown. + bool Emit(bool emit_for_unknown_size); + + // Parse a PES packet, return true if successful. + bool ParseInternal(const uint8* raw_pes, int raw_pes_size); + + void ResetPesState(); + + // Bytes of the current PES. + ByteQueue pes_byte_queue_; + + // ES parser. + scoped_ptr<EsParser> es_parser_; + + // Do not start parsing before getting a unit start indicator. + bool wait_for_pusi_; + + // Used to unroll PTS and DTS. + bool previous_pts_valid_; + int64 previous_pts_; + bool previous_dts_valid_; + int64 previous_dts_; + + DISALLOW_COPY_AND_ASSIGN(TsSectionPes); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/ts_section_pmt.cc b/media/formats/mp2t/ts_section_pmt.cc new file mode 100644 index 0000000..72b492a --- /dev/null +++ b/media/formats/mp2t/ts_section_pmt.cc @@ -0,0 +1,122 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/ts_section_pmt.h" + +#include <map> + +#include "base/logging.h" +#include "media/base/bit_reader.h" +#include "media/formats/mp2t/mp2t_common.h" + +namespace media { +namespace mp2t { + +TsSectionPmt::TsSectionPmt(const RegisterPesCb& register_pes_cb) + : register_pes_cb_(register_pes_cb) { +} + +TsSectionPmt::~TsSectionPmt() { +} + +bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) { + // Read up to |last_section_number|. + int table_id; + int section_syntax_indicator; + int dummy_zero; + int reserved; + int section_length; + int program_number; + int version_number; + int current_next_indicator; + int section_number; + int last_section_number; + RCHECK(bit_reader->ReadBits(8, &table_id)); + RCHECK(bit_reader->ReadBits(1, §ion_syntax_indicator)); + RCHECK(bit_reader->ReadBits(1, &dummy_zero)); + RCHECK(bit_reader->ReadBits(2, &reserved)); + RCHECK(bit_reader->ReadBits(12, §ion_length)); + int section_start_marker = bit_reader->bits_available() / 8; + + RCHECK(bit_reader->ReadBits(16, &program_number)); + RCHECK(bit_reader->ReadBits(2, &reserved)); + RCHECK(bit_reader->ReadBits(5, &version_number)); + RCHECK(bit_reader->ReadBits(1, ¤t_next_indicator)); + RCHECK(bit_reader->ReadBits(8, §ion_number)); + RCHECK(bit_reader->ReadBits(8, &last_section_number)); + + // Perform a few verifications: + // - table ID should be 2 for a PMT. + // - section_syntax_indicator should be one. + // - section length should not exceed 1021. + RCHECK(table_id == 0x2); + RCHECK(section_syntax_indicator); + RCHECK(!dummy_zero); + RCHECK(section_length <= 1021); + RCHECK(section_number == 0); + RCHECK(last_section_number == 0); + + // TODO(damienv): + // Verify that there is no mismatch between the program number + // and the program number that was provided in a PAT for the current PMT. + + // Read the end of the fixed length section. + int pcr_pid; + int program_info_length; + RCHECK(bit_reader->ReadBits(3, &reserved)); + RCHECK(bit_reader->ReadBits(13, &pcr_pid)); + RCHECK(bit_reader->ReadBits(4, &reserved)); + RCHECK(bit_reader->ReadBits(12, &program_info_length)); + RCHECK(program_info_length < 1024); + + // Read the program info descriptor. + // TODO(damienv): check wether any of the descriptors could be useful. + // Defined in section 2.6 of ISO-13818. + RCHECK(bit_reader->SkipBits(8 * program_info_length)); + + // Read the ES description table. + // The end of the PID map if 4 bytes away from the end of the section + // (4 bytes = size of the CRC). + int pid_map_end_marker = section_start_marker - section_length + 4; + std::map<int, int> pid_map; + while (bit_reader->bits_available() > 8 * pid_map_end_marker) { + int stream_type; + int reserved; + int pid_es; + int es_info_length; + RCHECK(bit_reader->ReadBits(8, &stream_type)); + RCHECK(bit_reader->ReadBits(3, &reserved)); + RCHECK(bit_reader->ReadBits(13, &pid_es)); + RCHECK(bit_reader->ReadBits(4, &reserved)); + RCHECK(bit_reader->ReadBits(12, &es_info_length)); + + // Do not register the PID right away. + // Wait for the end of the section to be fully parsed + // to make sure there is no error. + pid_map.insert(std::pair<int, int>(pid_es, stream_type)); + + // Read the ES info descriptors. + // TODO(damienv): check wether any of the descriptors could be useful. + // Defined in section 2.6 of ISO-13818. + RCHECK(bit_reader->SkipBits(8 * es_info_length)); + } + + // Read the CRC. + int crc32; + RCHECK(bit_reader->ReadBits(32, &crc32)); + + // Once the PMT has been proved to be correct, register the PIDs. + for (std::map<int, int>::iterator it = pid_map.begin(); + it != pid_map.end(); ++it) + register_pes_cb_.Run(it->first, it->second); + + return true; +} + +void TsSectionPmt::ResetPsiSection() { +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/ts_section_pmt.h b/media/formats/mp2t/ts_section_pmt.h new file mode 100644 index 0000000..c1b3d46 --- /dev/null +++ b/media/formats/mp2t/ts_section_pmt.h @@ -0,0 +1,40 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_SECTION_PMT_H_ +#define MEDIA_FORMATS_MP2T_TS_SECTION_PMT_H_ + +#include "base/callback.h" +#include "base/compiler_specific.h" +#include "media/formats/mp2t/ts_section_psi.h" + +namespace media { +namespace mp2t { + +class TsSectionPmt : public TsSectionPsi { + public: + // RegisterPesCb::Run(int pes_pid, int stream_type); + // Stream type is defined in + // "Table 2-34 – Stream type assignments" in H.222 + // TODO(damienv): add the program number. + typedef base::Callback<void(int, int)> RegisterPesCb; + + explicit TsSectionPmt(const RegisterPesCb& register_pes_cb); + virtual ~TsSectionPmt(); + + // Mpeg2TsPsiParser implementation. + virtual bool ParsePsiSection(BitReader* bit_reader) OVERRIDE; + virtual void ResetPsiSection() OVERRIDE; + + private: + RegisterPesCb register_pes_cb_; + + DISALLOW_COPY_AND_ASSIGN(TsSectionPmt); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/ts_section_psi.cc b/media/formats/mp2t/ts_section_psi.cc new file mode 100644 index 0000000..f9db880 --- /dev/null +++ b/media/formats/mp2t/ts_section_psi.cc @@ -0,0 +1,132 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/ts_section_psi.h" + +#include "base/basictypes.h" +#include "base/logging.h" +#include "media/base/bit_reader.h" +#include "media/formats/mp2t/mp2t_common.h" + +static bool IsCrcValid(const uint8* buf, int size) { + uint32 crc = 0xffffffffu; + const uint32 kCrcPoly = 0x4c11db7; + + for (int k = 0; k < size; k++) { + int nbits = 8; + uint32 data_msb_aligned = buf[k]; + data_msb_aligned <<= (32 - nbits); + + while (nbits > 0) { + if ((data_msb_aligned ^ crc) & 0x80000000) { + crc <<= 1; + crc ^= kCrcPoly; + } else { + crc <<= 1; + } + + data_msb_aligned <<= 1; + nbits--; + } + } + + return (crc == 0); +} + +namespace media { +namespace mp2t { + +TsSectionPsi::TsSectionPsi() + : wait_for_pusi_(true), + leading_bytes_to_discard_(0) { +} + +TsSectionPsi::~TsSectionPsi() { +} + +bool TsSectionPsi::Parse(bool payload_unit_start_indicator, + const uint8* buf, int size) { + // Ignore partial PSI. + if (wait_for_pusi_ && !payload_unit_start_indicator) + return true; + + if (payload_unit_start_indicator) { + // Reset the state of the PSI section. + ResetPsiState(); + + // Update the state. + wait_for_pusi_ = false; + DCHECK_GE(size, 1); + int pointer_field = buf[0]; + leading_bytes_to_discard_ = pointer_field; + buf++; + size--; + } + + // Discard some leading bytes if needed. + if (leading_bytes_to_discard_ > 0) { + int nbytes_to_discard = std::min(leading_bytes_to_discard_, size); + buf += nbytes_to_discard; + size -= nbytes_to_discard; + leading_bytes_to_discard_ -= nbytes_to_discard; + } + if (size == 0) + return true; + + // Add the data to the parser state. + psi_byte_queue_.Push(buf, size); + int raw_psi_size; + const uint8* raw_psi; + psi_byte_queue_.Peek(&raw_psi, &raw_psi_size); + + // Check whether we have enough data to start parsing. + if (raw_psi_size < 3) + return true; + int section_length = + ((static_cast<int>(raw_psi[1]) << 8) | + (static_cast<int>(raw_psi[2]))) & 0xfff; + if (section_length >= 1021) + return false; + int psi_length = section_length + 3; + if (raw_psi_size < psi_length) { + // Don't throw an error when there is not enough data, + // just wait for more data to come. + return true; + } + + // There should not be any trailing bytes after a PMT. + // Instead, the pointer field should be used to stuff bytes. + DVLOG_IF(1, raw_psi_size > psi_length) + << "Trailing bytes after a PSI section: " + << psi_length << " vs " << raw_psi_size; + + // Verify the CRC. + RCHECK(IsCrcValid(raw_psi, psi_length)); + + // Parse the PSI section. + BitReader bit_reader(raw_psi, raw_psi_size); + bool status = ParsePsiSection(&bit_reader); + if (status) + ResetPsiState(); + + return status; +} + +void TsSectionPsi::Flush() { +} + +void TsSectionPsi::Reset() { + ResetPsiSection(); + ResetPsiState(); +} + +void TsSectionPsi::ResetPsiState() { + wait_for_pusi_ = true; + psi_byte_queue_.Reset(); + leading_bytes_to_discard_ = 0; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/ts_section_psi.h b/media/formats/mp2t/ts_section_psi.h new file mode 100644 index 0000000..1b81884 --- /dev/null +++ b/media/formats/mp2t/ts_section_psi.h @@ -0,0 +1,54 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_SECTION_PSI_H_ +#define MEDIA_FORMATS_MP2T_TS_SECTION_PSI_H_ + +#include "base/compiler_specific.h" +#include "media/base/byte_queue.h" +#include "media/formats/mp2t/ts_section.h" + +namespace media { + +class BitReader; + +namespace mp2t { + +class TsSectionPsi : public TsSection { + public: + TsSectionPsi(); + virtual ~TsSectionPsi(); + + // TsSection implementation. + virtual bool Parse(bool payload_unit_start_indicator, + const uint8* buf, int size) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual void Reset() OVERRIDE; + + // Parse the content of the PSI section. + virtual bool ParsePsiSection(BitReader* bit_reader) = 0; + + // Reset the state of the PSI section. + virtual void ResetPsiSection() = 0; + + private: + void ResetPsiState(); + + // Bytes of the current PSI. + ByteQueue psi_byte_queue_; + + // Do not start parsing before getting a unit start indicator. + bool wait_for_pusi_; + + // Number of leading bytes to discard (pointer field). + int leading_bytes_to_discard_; + + DISALLOW_COPY_AND_ASSIGN(TsSectionPsi); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp3/mp3_stream_parser.cc b/media/formats/mp3/mp3_stream_parser.cc new file mode 100644 index 0000000..3834f7a --- /dev/null +++ b/media/formats/mp3/mp3_stream_parser.cc @@ -0,0 +1,613 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp3/mp3_stream_parser.h" + +#include "base/bind.h" +#include "base/callback_helpers.h" +#include "base/message_loop/message_loop.h" +#include "media/base/bit_reader.h" +#include "media/base/buffers.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/text_track_config.h" +#include "media/base/video_decoder_config.h" + +namespace media { + +static const uint32 kMP3StartCodeMask = 0xffe00000; +static const uint32 kICYStartCode = 0x49435920; // 'ICY ' + +// Arbitrary upper bound on the size of an IceCast header before it +// triggers an error. +static const int kMaxIcecastHeaderSize = 4096; + +static const uint32 kID3StartCodeMask = 0xffffff00; +static const uint32 kID3v1StartCode = 0x54414700; // 'TAG\0' +static const int kID3v1Size = 128; +static const int kID3v1ExtendedSize = 227; +static const uint32 kID3v2StartCode = 0x49443300; // 'ID3\0' + +// Map that determines which bitrate_index & channel_mode combinations +// are allowed. +// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html +static const bool kIsAllowed[17][4] = { + { true, true, true, true }, // free + { true, false, false, false }, // 32 + { true, false, false, false }, // 48 + { true, false, false, false }, // 56 + { true, true, true, true }, // 64 + { true, false, false, false }, // 80 + { true, true, true, true }, // 96 + { true, true, true, true }, // 112 + { true, true, true, true }, // 128 + { true, true, true, true }, // 160 + { true, true, true, true }, // 192 + { false, true, true, true }, // 224 + { false, true, true, true }, // 256 + { false, true, true, true }, // 320 + { false, true, true, true }, // 384 + { false, false, false, false } // bad +}; + +// Maps version and layer information in the frame header +// into an index for the |kBitrateMap|. +// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html +static const int kVersionLayerMap[4][4] = { + // { reserved, L3, L2, L1 } + { 5, 4, 4, 3 }, // MPEG 2.5 + { 5, 5, 5, 5 }, // reserved + { 5, 4, 4, 3 }, // MPEG 2 + { 5, 2, 1, 0 } // MPEG 1 +}; + +// Maps the bitrate index field in the header and an index +// from |kVersionLayerMap| to a frame bitrate. +// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html +static const int kBitrateMap[16][6] = { + // { V1L1, V1L2, V1L3, V2L1, V2L2 & V2L3, reserved } + { 0, 0, 0, 0, 0, 0 }, + { 32, 32, 32, 32, 8, 0 }, + { 64, 48, 40, 48, 16, 0 }, + { 96, 56, 48, 56, 24, 0 }, + { 128, 64, 56, 64, 32, 0 }, + { 160, 80, 64, 80, 40, 0 }, + { 192, 96, 80, 96, 48, 0 }, + { 224, 112, 96, 112, 56, 0 }, + { 256, 128, 112, 128, 64, 0 }, + { 288, 160, 128, 144, 80, 0 }, + { 320, 192, 160, 160, 96, 0 }, + { 352, 224, 192, 176, 112, 0 }, + { 384, 256, 224, 192, 128, 0 }, + { 416, 320, 256, 224, 144, 0 }, + { 448, 384, 320, 256, 160, 0 }, + { 0, 0, 0, 0, 0} +}; + +// Maps the sample rate index and version fields from the frame header +// to a sample rate. +// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html +static const int kSampleRateMap[4][4] = { + // { V2.5, reserved, V2, V1 } + { 11025, 0, 22050, 44100 }, + { 12000, 0, 24000, 48000 }, + { 8000, 0, 16000, 32000 }, + { 0, 0, 0, 0 } +}; + +// Frame header field constants. +static const int kVersion2 = 2; +static const int kVersionReserved = 1; +static const int kVersion2_5 = 0; +static const int kLayerReserved = 0; +static const int kLayer1 = 3; +static const int kLayer2 = 2; +static const int kLayer3 = 1; +static const int kBitrateFree = 0; +static const int kBitrateBad = 0xf; +static const int kSampleRateReserved = 3; + +MP3StreamParser::MP3StreamParser() + : state_(UNINITIALIZED), + in_media_segment_(false) { +} + +MP3StreamParser::~MP3StreamParser() {} + +void MP3StreamParser::Init(const InitCB& init_cb, + const NewConfigCB& config_cb, + const NewBuffersCB& new_buffers_cb, + const NewTextBuffersCB& text_cb, + const NeedKeyCB& need_key_cb, + const NewMediaSegmentCB& new_segment_cb, + const base::Closure& end_of_segment_cb, + const LogCB& log_cb) { + DVLOG(1) << __FUNCTION__; + DCHECK_EQ(state_, UNINITIALIZED); + init_cb_ = init_cb; + config_cb_ = config_cb; + new_buffers_cb_ = new_buffers_cb; + new_segment_cb_ = new_segment_cb; + end_of_segment_cb_ = end_of_segment_cb; + log_cb_ = log_cb; + + ChangeState(INITIALIZED); +} + +void MP3StreamParser::Flush() { + DVLOG(1) << __FUNCTION__; + DCHECK_NE(state_, UNINITIALIZED); + queue_.Reset(); + timestamp_helper_->SetBaseTimestamp(base::TimeDelta()); + in_media_segment_ = false; +} + +bool MP3StreamParser::Parse(const uint8* buf, int size) { + DVLOG(1) << __FUNCTION__ << "(" << size << ")"; + DCHECK(buf); + DCHECK_GT(size, 0); + DCHECK_NE(state_, UNINITIALIZED); + + if (state_ == PARSE_ERROR) + return false; + + DCHECK_EQ(state_, INITIALIZED); + + queue_.Push(buf, size); + + bool end_of_segment = true; + BufferQueue buffers; + for (;;) { + const uint8* data; + int data_size; + queue_.Peek(&data, &data_size); + + if (data_size < 4) + break; + + uint32 start_code = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3]; + int bytes_read = 0; + bool parsed_metadata = true; + if ((start_code & kMP3StartCodeMask) == kMP3StartCodeMask) { + bytes_read = ParseMP3Frame(data, data_size, &buffers); + + // Only allow the current segment to end if a full frame has been parsed. + end_of_segment = bytes_read > 0; + parsed_metadata = false; + } else if (start_code == kICYStartCode) { + bytes_read = ParseIcecastHeader(data, data_size); + } else if ((start_code & kID3StartCodeMask) == kID3v1StartCode) { + bytes_read = ParseID3v1(data, data_size); + } else if ((start_code & kID3StartCodeMask) == kID3v2StartCode) { + bytes_read = ParseID3v2(data, data_size); + } else { + bytes_read = FindNextValidStartCode(data, data_size); + + if (bytes_read > 0) { + DVLOG(1) << "Unexpected start code 0x" << std::hex << start_code; + DVLOG(1) << "SKIPPING " << bytes_read << " bytes of garbage."; + } + } + + CHECK_LE(bytes_read, data_size); + + if (bytes_read < 0) { + ChangeState(PARSE_ERROR); + return false; + } else if (bytes_read == 0) { + // Need more data. + break; + } + + // Send pending buffers if we have encountered metadata. + if (parsed_metadata && !buffers.empty() && !SendBuffers(&buffers, true)) + return false; + + queue_.Pop(bytes_read); + end_of_segment = true; + } + + if (buffers.empty()) + return true; + + // Send buffers collected in this append that haven't been sent yet. + return SendBuffers(&buffers, end_of_segment); +} + +void MP3StreamParser::ChangeState(State state) { + DVLOG(1) << __FUNCTION__ << "() : " << state_ << " -> " << state; + state_ = state; +} + +int MP3StreamParser::ParseFrameHeader(const uint8* data, int size, + int* frame_size, + int* sample_rate, + ChannelLayout* channel_layout, + int* sample_count) const { + DCHECK(data); + DCHECK_GE(size, 0); + DCHECK(frame_size); + + if (size < 4) + return 0; + + BitReader reader(data, size); + int sync; + int version; + int layer; + int is_protected; + int bitrate_index; + int sample_rate_index; + int has_padding; + int is_private; + int channel_mode; + int other_flags; + + if (!reader.ReadBits(11, &sync) || + !reader.ReadBits(2, &version) || + !reader.ReadBits(2, &layer) || + !reader.ReadBits(1, &is_protected) || + !reader.ReadBits(4, &bitrate_index) || + !reader.ReadBits(2, &sample_rate_index) || + !reader.ReadBits(1, &has_padding) || + !reader.ReadBits(1, &is_private) || + !reader.ReadBits(2, &channel_mode) || + !reader.ReadBits(6, &other_flags)) { + return -1; + } + + DVLOG(2) << "Header data :" << std::hex + << " sync 0x" << sync + << " version 0x" << version + << " layer 0x" << layer + << " bitrate_index 0x" << bitrate_index + << " sample_rate_index 0x" << sample_rate_index + << " channel_mode 0x" << channel_mode; + + if (sync != 0x7ff || + version == kVersionReserved || + layer == kLayerReserved || + bitrate_index == kBitrateFree || bitrate_index == kBitrateBad || + sample_rate_index == kSampleRateReserved) { + MEDIA_LOG(log_cb_) << "Invalid header data :" << std::hex + << " sync 0x" << sync + << " version 0x" << version + << " layer 0x" << layer + << " bitrate_index 0x" << bitrate_index + << " sample_rate_index 0x" << sample_rate_index + << " channel_mode 0x" << channel_mode; + return -1; + } + + if (layer == kLayer2 && kIsAllowed[bitrate_index][channel_mode]) { + MEDIA_LOG(log_cb_) << "Invalid (bitrate_index, channel_mode) combination :" + << std::hex + << " bitrate_index " << bitrate_index + << " channel_mode " << channel_mode; + return -1; + } + + int bitrate = kBitrateMap[bitrate_index][kVersionLayerMap[version][layer]]; + + if (bitrate == 0) { + MEDIA_LOG(log_cb_) << "Invalid bitrate :" << std::hex + << " version " << version + << " layer " << layer + << " bitrate_index " << bitrate_index; + return -1; + } + + DVLOG(2) << " bitrate " << bitrate; + + int frame_sample_rate = kSampleRateMap[sample_rate_index][version]; + if (frame_sample_rate == 0) { + MEDIA_LOG(log_cb_) << "Invalid sample rate :" << std::hex + << " version " << version + << " sample_rate_index " << sample_rate_index; + return -1; + } + + if (sample_rate) + *sample_rate = frame_sample_rate; + + // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf + // Table 2.1.5 + int samples_per_frame; + switch (layer) { + case kLayer1: + samples_per_frame = 384; + break; + + case kLayer2: + samples_per_frame = 1152; + break; + + case kLayer3: + if (version == kVersion2 || version == kVersion2_5) + samples_per_frame = 576; + else + samples_per_frame = 1152; + break; + + default: + return -1; + } + + if (sample_count) + *sample_count = samples_per_frame; + + // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf + // Text just below Table 2.1.5. + if (layer == kLayer1) { + // This formulation is a slight variation on the equation below, + // but has slightly different truncation characteristics to deal + // with the fact that Layer 1 has 4 byte "slots" instead of single + // byte ones. + *frame_size = 4 * (12 * bitrate * 1000 / frame_sample_rate); + } else { + *frame_size = + ((samples_per_frame / 8) * bitrate * 1000) / frame_sample_rate; + } + + if (has_padding) + *frame_size += (layer == kLayer1) ? 4 : 1; + + if (channel_layout) { + // Map Stereo(0), Joint Stereo(1), and Dual Channel (2) to + // CHANNEL_LAYOUT_STEREO and Single Channel (3) to CHANNEL_LAYOUT_MONO. + *channel_layout = + (channel_mode == 3) ? CHANNEL_LAYOUT_MONO : CHANNEL_LAYOUT_STEREO; + } + + return 4; +} + +int MP3StreamParser::ParseMP3Frame(const uint8* data, + int size, + BufferQueue* buffers) { + DVLOG(2) << __FUNCTION__ << "(" << size << ")"; + + int sample_rate; + ChannelLayout channel_layout; + int frame_size; + int sample_count; + int bytes_read = ParseFrameHeader( + data, size, &frame_size, &sample_rate, &channel_layout, &sample_count); + + if (bytes_read <= 0) + return bytes_read; + + // Make sure data contains the entire frame. + if (size < frame_size) + return 0; + + DVLOG(2) << " sample_rate " << sample_rate + << " channel_layout " << channel_layout + << " frame_size " << frame_size; + + if (config_.IsValidConfig() && + (config_.samples_per_second() != sample_rate || + config_.channel_layout() != channel_layout)) { + // Clear config data so that a config change is initiated. + config_ = AudioDecoderConfig(); + + // Send all buffers associated with the previous config. + if (!buffers->empty() && !SendBuffers(buffers, true)) + return -1; + } + + if (!config_.IsValidConfig()) { + config_.Initialize(kCodecMP3, kSampleFormatF32, channel_layout, + sample_rate, NULL, 0, false, false, + base::TimeDelta(), base::TimeDelta()); + + base::TimeDelta base_timestamp; + if (timestamp_helper_) + base_timestamp = timestamp_helper_->GetTimestamp(); + + timestamp_helper_.reset(new AudioTimestampHelper(sample_rate)); + timestamp_helper_->SetBaseTimestamp(base_timestamp); + + VideoDecoderConfig video_config; + bool success = config_cb_.Run(config_, video_config, TextTrackConfigMap()); + + if (!init_cb_.is_null()) + base::ResetAndReturn(&init_cb_).Run(success, kInfiniteDuration()); + + if (!success) + return -1; + } + + scoped_refptr<StreamParserBuffer> buffer = + StreamParserBuffer::CopyFrom(data, frame_size, true); + buffer->set_timestamp(timestamp_helper_->GetTimestamp()); + buffer->set_duration(timestamp_helper_->GetFrameDuration(sample_count)); + buffers->push_back(buffer); + + timestamp_helper_->AddFrames(sample_count); + + return frame_size; +} + +static int LocateEndOfHeaders(const uint8_t* buf, int buf_len, int i) { + bool was_lf = false; + char last_c = '\0'; + for (; i < buf_len; ++i) { + char c = buf[i]; + if (c == '\n') { + if (was_lf) + return i + 1; + was_lf = true; + } else if (c != '\r' || last_c != '\n') { + was_lf = false; + } + last_c = c; + } + return -1; +} + + +int MP3StreamParser::ParseIcecastHeader(const uint8* data, int size) { + DVLOG(1) << __FUNCTION__ << "(" << size << ")"; + + if (size < 4) + return 0; + + if (memcmp("ICY ", data, 4)) + return -1; + + int locate_size = std::min(size, kMaxIcecastHeaderSize); + int offset = LocateEndOfHeaders(data, locate_size, 4); + if (offset < 0) { + if (locate_size == kMaxIcecastHeaderSize) { + MEDIA_LOG(log_cb_) << "Icecast header is too large."; + return -1; + } + + return 0; + } + + return offset; +} + +int MP3StreamParser::ParseID3v1(const uint8* data, int size) { + DVLOG(1) << __FUNCTION__ << "(" << size << ")"; + + if (size < kID3v1Size) + return 0; + + // TODO(acolwell): Add code to actually validate ID3v1 data and + // expose it as a metadata text track. + return !memcmp(data, "TAG+", 4) ? kID3v1ExtendedSize : kID3v1Size; +} + +int MP3StreamParser::ParseID3v2(const uint8* data, int size) { + DVLOG(1) << __FUNCTION__ << "(" << size << ")"; + + if (size < 10) + return 0; + + BitReader reader(data, size); + int32 id; + int version; + uint8 flags; + int32 id3_size; + + if (!reader.ReadBits(24, &id) || + !reader.ReadBits(16, &version) || + !reader.ReadBits(8, &flags) || + !ParseSyncSafeInt(&reader, &id3_size)) { + return -1; + } + + int32 actual_tag_size = 10 + id3_size; + + // Increment size if 'Footer present' flag is set. + if (flags & 0x10) + actual_tag_size += 10; + + // Make sure we have the entire tag. + if (size < actual_tag_size) + return 0; + + // TODO(acolwell): Add code to actually validate ID3v2 data and + // expose it as a metadata text track. + return actual_tag_size; +} + +bool MP3StreamParser::ParseSyncSafeInt(BitReader* reader, int32* value) { + *value = 0; + for (int i = 0; i < 4; ++i) { + uint8 tmp; + if (!reader->ReadBits(1, &tmp) || tmp != 0) { + MEDIA_LOG(log_cb_) << "ID3 syncsafe integer byte MSb is not 0!"; + return false; + } + + if (!reader->ReadBits(7, &tmp)) + return false; + + *value <<= 7; + *value += tmp; + } + + return true; +} + +int MP3StreamParser::FindNextValidStartCode(const uint8* data, int size) const { + const uint8* start = data; + const uint8* end = data + size; + + while (start < end) { + int bytes_left = end - start; + const uint8* candidate_start_code = + static_cast<const uint8*>(memchr(start, 0xff, bytes_left)); + + if (!candidate_start_code) + return 0; + + bool parse_header_failed = false; + const uint8* sync = candidate_start_code; + // Try to find 3 valid frames in a row. 3 was selected to decrease + // the probability of false positives. + for (int i = 0; i < 3; ++i) { + int sync_size = end - sync; + int frame_size; + int sync_bytes = ParseFrameHeader( + sync, sync_size, &frame_size, NULL, NULL, NULL); + + if (sync_bytes == 0) + return 0; + + if (sync_bytes > 0) { + DCHECK_LT(sync_bytes, sync_size); + + // Skip over this frame so we can check the next one. + sync += frame_size; + + // Make sure the next frame starts inside the buffer. + if (sync >= end) + return 0; + } else { + DVLOG(1) << "ParseFrameHeader() " << i << " failed @" << (sync - data); + parse_header_failed = true; + break; + } + } + + if (parse_header_failed) { + // One of the frame header parses failed so |candidate_start_code| + // did not point to the start of a real frame. Move |start| forward + // so we can find the next candidate. + start = candidate_start_code + 1; + continue; + } + + return candidate_start_code - data; + } + + return 0; +} + +bool MP3StreamParser::SendBuffers(BufferQueue* buffers, bool end_of_segment) { + DCHECK(!buffers->empty()); + + if (!in_media_segment_) { + in_media_segment_ = true; + new_segment_cb_.Run(); + } + + BufferQueue empty_video_buffers; + if (!new_buffers_cb_.Run(*buffers, empty_video_buffers)) + return false; + buffers->clear(); + + if (end_of_segment) { + in_media_segment_ = false; + end_of_segment_cb_.Run(); + } + + return true; +} + +} // namespace media diff --git a/media/formats/mp3/mp3_stream_parser.h b/media/formats/mp3/mp3_stream_parser.h new file mode 100644 index 0000000..5d1778c --- /dev/null +++ b/media/formats/mp3/mp3_stream_parser.h @@ -0,0 +1,126 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP3_MP3_STREAM_PARSER_H_ +#define MEDIA_FORMATS_MP3_MP3_STREAM_PARSER_H_ + +#include <set> +#include <vector> + +#include "base/basictypes.h" +#include "base/callback.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/audio_timestamp_helper.h" +#include "media/base/byte_queue.h" +#include "media/base/media_export.h" +#include "media/base/stream_parser.h" + +namespace media { + +class BitReader; + +class MEDIA_EXPORT MP3StreamParser : public StreamParser { + public: + MP3StreamParser(); + virtual ~MP3StreamParser(); + + // StreamParser implementation. + virtual void Init(const InitCB& init_cb, const NewConfigCB& config_cb, + const NewBuffersCB& new_buffers_cb, + const NewTextBuffersCB& text_cb, + const NeedKeyCB& need_key_cb, + const NewMediaSegmentCB& new_segment_cb, + const base::Closure& end_of_segment_cb, + const LogCB& log_cb) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual bool Parse(const uint8* buf, int size) OVERRIDE; + + private: + enum State { + UNINITIALIZED, + INITIALIZED, + PARSE_ERROR + }; + + State state_; + + InitCB init_cb_; + NewConfigCB config_cb_; + NewBuffersCB new_buffers_cb_; + NewMediaSegmentCB new_segment_cb_; + base::Closure end_of_segment_cb_; + LogCB log_cb_; + + ByteQueue queue_; + + AudioDecoderConfig config_; + scoped_ptr<AudioTimestampHelper> timestamp_helper_; + bool in_media_segment_; + + void ChangeState(State state); + + // Parsing functions for various byte stream elements. + // |data| & |size| describe the data available for parsing. + // These functions are expected to consume an entire frame/header. + // It should only return a value greater than 0 when |data| has + // enough bytes to successfully parse & consume the entire element. + // + // |frame_size| - Required parameter that is set to the size of the frame, in + // bytes, including the frame header if the function returns a value > 0. + // |sample_rate| - Optional parameter that is set to the sample rate + // of the frame if this function returns a value > 0. + // |channel_layout| - Optional parameter that is set to the channel_layout + // of the frame if this function returns a value > 0. + // |sample_count| - Optional parameter that is set to the number of samples + // in the frame if this function returns a value > 0. + // + // |sample_rate|, |channel_layout|, |sample_count| may be NULL if the caller + // is not interested in receiving these values from the frame header. + // + // Returns: + // > 0 : The number of bytes parsed. + // 0 : If more data is needed to parse the entire element. + // < 0 : An error was encountered during parsing. + int ParseFrameHeader(const uint8* data, int size, + int* frame_size, + int* sample_rate, + ChannelLayout* channel_layout, + int* sample_count) const; + int ParseMP3Frame(const uint8* data, int size, BufferQueue* buffers); + int ParseIcecastHeader(const uint8* data, int size); + int ParseID3v1(const uint8* data, int size); + int ParseID3v2(const uint8* data, int size); + + // Parses an ID3v2 "sync safe" integer. + // |reader| - A BitReader to read from. + // |value| - Set to the integer value read, if true is returned. + // + // Returns true if the integer was successfully parsed and |value| + // was set. + // Returns false if an error was encountered. The state of |value| is + // undefined when false is returned. + bool ParseSyncSafeInt(BitReader* reader, int32* value); + + // Scans |data| for the next valid start code. + // Returns: + // > 0 : The number of bytes that should be skipped to reach the + // next start code.. + // 0 : If a valid start code was not found and more data is needed. + // < 0 : An error was encountered during parsing. + int FindNextValidStartCode(const uint8* data, int size) const; + + // Sends the buffers in |buffers| to |new_buffers_cb_| and then clears + // |buffers|. + // If |end_of_segment| is set to true, then |end_of_segment_cb_| is called + // after |new_buffers_cb_| to signal that these buffers represent the end of a + // media segment. + // Returns true if the buffers are sent successfully. + bool SendBuffers(BufferQueue* buffers, bool end_of_segment); + + DISALLOW_COPY_AND_ASSIGN(MP3StreamParser); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_MP3_MP3_STREAM_PARSER_H_ diff --git a/media/formats/mp3/mp3_stream_parser_unittest.cc b/media/formats/mp3/mp3_stream_parser_unittest.cc new file mode 100644 index 0000000..a279a1c --- /dev/null +++ b/media/formats/mp3/mp3_stream_parser_unittest.cc @@ -0,0 +1,165 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/bind.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/decoder_buffer.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/test_data_util.h" +#include "media/base/text_track_config.h" +#include "media/base/video_decoder_config.h" +#include "media/formats/mp3/mp3_stream_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { + +class MP3StreamParserTest : public testing::Test { + public: + MP3StreamParserTest() {} + + protected: + MP3StreamParser parser_; + std::stringstream results_stream_; + + bool AppendData(const uint8* data, size_t length) { + return parser_.Parse(data, length); + } + + bool AppendDataInPieces(const uint8* data, size_t length, size_t piece_size) { + const uint8* start = data; + const uint8* end = data + length; + while (start < end) { + size_t append_size = + std::min(piece_size, static_cast<size_t>(end - start)); + if (!AppendData(start, append_size)) + return false; + start += append_size; + } + return true; + } + + void OnInitDone(bool success, base::TimeDelta duration) { + DVLOG(1) << __FUNCTION__ << "(" << success << ", " + << duration.InMilliseconds() << ")"; + } + + bool OnNewConfig(const AudioDecoderConfig& audio_config, + const VideoDecoderConfig& video_config, + const StreamParser::TextTrackConfigMap& text_config) { + DVLOG(1) << __FUNCTION__ << "(" << audio_config.IsValidConfig() << ", " + << video_config.IsValidConfig() << ")"; + EXPECT_TRUE(audio_config.IsValidConfig()); + EXPECT_FALSE(video_config.IsValidConfig()); + return true; + } + + std::string BufferQueueToString(const StreamParser::BufferQueue& buffers) { + std::stringstream ss; + + ss << "{"; + for (StreamParser::BufferQueue::const_iterator itr = buffers.begin(); + itr != buffers.end(); + ++itr) { + ss << " " << (*itr)->timestamp().InMilliseconds(); + if ((*itr)->IsKeyframe()) + ss << "K"; + } + ss << " }"; + + return ss.str(); + } + + bool OnNewBuffers(const StreamParser::BufferQueue& audio_buffers, + const StreamParser::BufferQueue& video_buffers) { + EXPECT_FALSE(audio_buffers.empty()); + EXPECT_TRUE(video_buffers.empty()); + + std::string buffers_str = BufferQueueToString(audio_buffers); + DVLOG(1) << __FUNCTION__ << " : " << buffers_str; + results_stream_ << buffers_str; + return true; + } + + void OnKeyNeeded(const std::string& type, + const std::vector<uint8>& init_data) { + DVLOG(1) << __FUNCTION__ << "(" << type << ", " << init_data.size() << ")"; + } + + void OnNewSegment() { + DVLOG(1) << __FUNCTION__; + results_stream_ << "NewSegment"; + } + + void OnEndOfSegment() { + DVLOG(1) << __FUNCTION__; + results_stream_ << "EndOfSegment"; + } + + void InitializeParser() { + parser_.Init( + base::Bind(&MP3StreamParserTest::OnInitDone, base::Unretained(this)), + base::Bind(&MP3StreamParserTest::OnNewConfig, base::Unretained(this)), + base::Bind(&MP3StreamParserTest::OnNewBuffers, base::Unretained(this)), + StreamParser::NewTextBuffersCB(), + base::Bind(&MP3StreamParserTest::OnKeyNeeded, base::Unretained(this)), + base::Bind(&MP3StreamParserTest::OnNewSegment, base::Unretained(this)), + base::Bind(&MP3StreamParserTest::OnEndOfSegment, + base::Unretained(this)), + LogCB()); + } + + std::string ParseFile(const std::string& filename, int append_bytes) { + results_stream_.clear(); + InitializeParser(); + + scoped_refptr<DecoderBuffer> buffer = ReadTestDataFile(filename); + EXPECT_TRUE( + AppendDataInPieces(buffer->data(), buffer->data_size(), append_bytes)); + return results_stream_.str(); + } +}; + +// Test parsing with small prime sized chunks to smoke out "power of +// 2" field size assumptions. +TEST_F(MP3StreamParserTest, UnalignedAppend) { + std::string expected = + "NewSegment" + "{ 0K }" + "{ 26K }" + "{ 52K }" + "{ 78K }" + "{ 104K }" + "{ 130K }" + "{ 156K }" + "{ 182K }" + "EndOfSegment" + "NewSegment" + "{ 208K }" + "{ 235K }" + "{ 261K }" + "EndOfSegment" + "NewSegment" + "{ 287K }" + "{ 313K }" + "EndOfSegment"; + EXPECT_EQ(expected, ParseFile("sfx.mp3", 17)); +} + +// Test parsing with a larger piece size to verify that multiple buffers +// are passed to |new_buffer_cb_|. +TEST_F(MP3StreamParserTest, UnalignedAppend512) { + std::string expected = + "NewSegment" + "{ 0K }" + "{ 26K 52K 78K 104K }" + "EndOfSegment" + "NewSegment" + "{ 130K 156K 182K }" + "{ 208K 235K 261K 287K }" + "{ 313K }" + "EndOfSegment"; + EXPECT_EQ(expected, ParseFile("sfx.mp3", 512)); +} + +} // namespace media diff --git a/media/formats/mp4/aac.cc b/media/formats/mp4/aac.cc new file mode 100644 index 0000000..fedb33f --- /dev/null +++ b/media/formats/mp4/aac.cc @@ -0,0 +1,275 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/aac.h" + +#include <algorithm> + +#include "base/logging.h" +#include "media/base/bit_reader.h" +#include "media/formats/mp4/rcheck.h" + +// The following conversion table is extracted from ISO 14496 Part 3 - +// Table 1.16 - Sampling Frequency Index. +static const int kFrequencyMap[] = { + 96000, 88200, 64000, 48000, 44100, 32000, 24000, + 22050, 16000, 12000, 11025, 8000, 7350 +}; + +namespace media { + +static ChannelLayout ConvertChannelConfigToLayout(uint8 channel_config) { + switch (channel_config) { + case 1: + return CHANNEL_LAYOUT_MONO; + case 2: + return CHANNEL_LAYOUT_STEREO; + case 3: + return CHANNEL_LAYOUT_SURROUND; + case 4: + return CHANNEL_LAYOUT_4_0; + case 5: + return CHANNEL_LAYOUT_5_0; + case 6: + return CHANNEL_LAYOUT_5_1; + case 8: + return CHANNEL_LAYOUT_7_1; + default: + break; + } + + return CHANNEL_LAYOUT_UNSUPPORTED; +} + +namespace mp4 { + +AAC::AAC() + : profile_(0), frequency_index_(0), channel_config_(0), frequency_(0), + extension_frequency_(0), channel_layout_(CHANNEL_LAYOUT_UNSUPPORTED) { +} + +AAC::~AAC() { +} + +bool AAC::Parse(const std::vector<uint8>& data) { +#if defined(OS_ANDROID) + codec_specific_data_ = data; +#endif + if (data.empty()) + return false; + + BitReader reader(&data[0], data.size()); + uint8 extension_type = 0; + bool ps_present = false; + uint8 extension_frequency_index = 0xff; + + frequency_ = 0; + extension_frequency_ = 0; + + // The following code is written according to ISO 14496 Part 3 Table 1.13 - + // Syntax of AudioSpecificConfig. + + // Read base configuration + RCHECK(reader.ReadBits(5, &profile_)); + RCHECK(reader.ReadBits(4, &frequency_index_)); + if (frequency_index_ == 0xf) + RCHECK(reader.ReadBits(24, &frequency_)); + RCHECK(reader.ReadBits(4, &channel_config_)); + + // Read extension configuration. + if (profile_ == 5 || profile_ == 29) { + ps_present = (profile_ == 29); + extension_type = 5; + RCHECK(reader.ReadBits(4, &extension_frequency_index)); + if (extension_frequency_index == 0xf) + RCHECK(reader.ReadBits(24, &extension_frequency_)); + RCHECK(reader.ReadBits(5, &profile_)); + } + + RCHECK(SkipDecoderGASpecificConfig(&reader)); + RCHECK(SkipErrorSpecificConfig()); + + // Read extension configuration again + // Note: The check for 16 available bits comes from the AAC spec. + if (extension_type != 5 && reader.bits_available() >= 16) { + uint16 sync_extension_type; + uint8 sbr_present_flag; + uint8 ps_present_flag; + + if (reader.ReadBits(11, &sync_extension_type) && + sync_extension_type == 0x2b7) { + if (reader.ReadBits(5, &extension_type) && extension_type == 5) { + RCHECK(reader.ReadBits(1, &sbr_present_flag)); + + if (sbr_present_flag) { + RCHECK(reader.ReadBits(4, &extension_frequency_index)); + + if (extension_frequency_index == 0xf) + RCHECK(reader.ReadBits(24, &extension_frequency_)); + + // Note: The check for 12 available bits comes from the AAC spec. + if (reader.bits_available() >= 12) { + RCHECK(reader.ReadBits(11, &sync_extension_type)); + if (sync_extension_type == 0x548) { + RCHECK(reader.ReadBits(1, &ps_present_flag)); + ps_present = ps_present_flag != 0; + } + } + } + } + } + } + + if (frequency_ == 0) { + RCHECK(frequency_index_ < arraysize(kFrequencyMap)); + frequency_ = kFrequencyMap[frequency_index_]; + } + + if (extension_frequency_ == 0 && extension_frequency_index != 0xff) { + RCHECK(extension_frequency_index < arraysize(kFrequencyMap)); + extension_frequency_ = kFrequencyMap[extension_frequency_index]; + } + + // When Parametric Stereo is on, mono will be played as stereo. + if (ps_present && channel_config_ == 1) + channel_layout_ = CHANNEL_LAYOUT_STEREO; + else + channel_layout_ = ConvertChannelConfigToLayout(channel_config_); + + return frequency_ != 0 && channel_layout_ != CHANNEL_LAYOUT_UNSUPPORTED && + profile_ >= 1 && profile_ <= 4 && frequency_index_ != 0xf && + channel_config_ <= 7; +} + +int AAC::GetOutputSamplesPerSecond(bool sbr_in_mimetype) const { + if (extension_frequency_ > 0) + return extension_frequency_; + + if (!sbr_in_mimetype) + return frequency_; + + // The following code is written according to ISO 14496 Part 3 Table 1.11 and + // Table 1.22. (Table 1.11 refers to the capping to 48000, Table 1.22 refers + // to SBR doubling the AAC sample rate.) + // TODO(acolwell) : Extend sample rate cap to 96kHz for Level 5 content. + DCHECK_GT(frequency_, 0); + return std::min(2 * frequency_, 48000); +} + +ChannelLayout AAC::GetChannelLayout(bool sbr_in_mimetype) const { + // Check for implicit signalling of HE-AAC and indicate stereo output + // if the mono channel configuration is signalled. + // See ISO-14496-3 Section 1.6.6.1.2 for details about this special casing. + if (sbr_in_mimetype && channel_config_ == 1) + return CHANNEL_LAYOUT_STEREO; + + return channel_layout_; +} + +bool AAC::ConvertEsdsToADTS(std::vector<uint8>* buffer) const { + size_t size = buffer->size() + kADTSHeaderSize; + + DCHECK(profile_ >= 1 && profile_ <= 4 && frequency_index_ != 0xf && + channel_config_ <= 7); + + // ADTS header uses 13 bits for packet size. + if (size >= (1 << 13)) + return false; + + std::vector<uint8>& adts = *buffer; + + adts.insert(buffer->begin(), kADTSHeaderSize, 0); + adts[0] = 0xff; + adts[1] = 0xf1; + adts[2] = ((profile_ - 1) << 6) + (frequency_index_ << 2) + + (channel_config_ >> 2); + adts[3] = ((channel_config_ & 0x3) << 6) + (size >> 11); + adts[4] = (size & 0x7ff) >> 3; + adts[5] = ((size & 7) << 5) + 0x1f; + adts[6] = 0xfc; + + return true; +} + +// Currently this function only support GASpecificConfig defined in +// ISO 14496 Part 3 Table 4.1 - Syntax of GASpecificConfig() +bool AAC::SkipDecoderGASpecificConfig(BitReader* bit_reader) const { + switch (profile_) { + case 1: + case 2: + case 3: + case 4: + case 6: + case 7: + case 17: + case 19: + case 20: + case 21: + case 22: + case 23: + return SkipGASpecificConfig(bit_reader); + default: + break; + } + + return false; +} + +bool AAC::SkipErrorSpecificConfig() const { + switch (profile_) { + case 17: + case 19: + case 20: + case 21: + case 22: + case 23: + case 24: + case 25: + case 26: + case 27: + return false; + default: + break; + } + + return true; +} + +// The following code is written according to ISO 14496 part 3 Table 4.1 - +// GASpecificConfig. +bool AAC::SkipGASpecificConfig(BitReader* bit_reader) const { + uint8 extension_flag = 0; + uint8 depends_on_core_coder; + uint16 dummy; + + RCHECK(bit_reader->ReadBits(1, &dummy)); // frameLengthFlag + RCHECK(bit_reader->ReadBits(1, &depends_on_core_coder)); + if (depends_on_core_coder == 1) + RCHECK(bit_reader->ReadBits(14, &dummy)); // coreCoderDelay + + RCHECK(bit_reader->ReadBits(1, &extension_flag)); + RCHECK(channel_config_ != 0); + + if (profile_ == 6 || profile_ == 20) + RCHECK(bit_reader->ReadBits(3, &dummy)); // layerNr + + if (extension_flag) { + if (profile_ == 22) { + RCHECK(bit_reader->ReadBits(5, &dummy)); // numOfSubFrame + RCHECK(bit_reader->ReadBits(11, &dummy)); // layer_length + } + + if (profile_ == 17 || profile_ == 19 || profile_ == 20 || profile_ == 23) { + RCHECK(bit_reader->ReadBits(3, &dummy)); // resilience flags + } + + RCHECK(bit_reader->ReadBits(1, &dummy)); // extensionFlag3 + } + + return true; +} + +} // namespace mp4 + +} // namespace media diff --git a/media/formats/mp4/aac.h b/media/formats/mp4/aac.h new file mode 100644 index 0000000..333d621 --- /dev/null +++ b/media/formats/mp4/aac.h @@ -0,0 +1,94 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_AAC_H_ +#define MEDIA_FORMATS_MP4_AAC_H_ + +#include <vector> + +#include "base/basictypes.h" +#include "media/base/channel_layout.h" +#include "media/base/media_export.h" + +namespace media { + +class BitReader; + +namespace mp4 { + +// This class parses the AAC information from decoder specific information +// embedded in the esds box in an ISO BMFF file. +// Please refer to ISO 14496 Part 3 Table 1.13 - Syntax of AudioSpecificConfig +// for more details. +class MEDIA_EXPORT AAC { + public: + AAC(); + ~AAC(); + + // Parse the AAC config from the raw binary data embedded in esds box. + // The function will parse the data and get the ElementaryStreamDescriptor, + // then it will parse the ElementaryStreamDescriptor to get audio stream + // configurations. + bool Parse(const std::vector<uint8>& data); + + // Gets the output sample rate for the AAC stream. + // |sbr_in_mimetype| should be set to true if the SBR mode is + // signalled in the mimetype. (ie mp4a.40.5 in the codecs parameter). + // Returns the samples_per_second value that should used in an + // AudioDecoderConfig. + int GetOutputSamplesPerSecond(bool sbr_in_mimetype) const; + + // Gets the channel layout for the AAC stream. + // |sbr_in_mimetype| should be set to true if the SBR mode is + // signalled in the mimetype. (ie mp4a.40.5 in the codecs parameter). + // Returns the channel_layout value that should used in an + // AudioDecoderConfig. + ChannelLayout GetChannelLayout(bool sbr_in_mimetype) const; + + // This function converts a raw AAC frame into an AAC frame with an ADTS + // header. On success, the function returns true and stores the converted data + // in the buffer. The function returns false on failure and leaves the buffer + // unchanged. + bool ConvertEsdsToADTS(std::vector<uint8>* buffer) const; + +#if defined(OS_ANDROID) + // Returns the codec specific data needed by android MediaCodec. + std::vector<uint8> codec_specific_data() const { + return codec_specific_data_; + } +#endif + + // Size in bytes of the ADTS header added by ConvertEsdsToADTS(). + static const size_t kADTSHeaderSize = 7; + + private: + bool SkipDecoderGASpecificConfig(BitReader* bit_reader) const; + bool SkipErrorSpecificConfig() const; + bool SkipGASpecificConfig(BitReader* bit_reader) const; + + // The following variables store the AAC specific configuration information + // that are used to generate the ADTS header. + uint8 profile_; + uint8 frequency_index_; + uint8 channel_config_; + +#if defined(OS_ANDROID) + // The codec specific data needed by the android MediaCodec. + std::vector<uint8> codec_specific_data_; +#endif + + // The following variables store audio configuration information that + // can be used by Chromium. They are based on the AAC specific + // configuration but can be overridden by extensions in elementary + // stream descriptor. + int frequency_; + int extension_frequency_; + ChannelLayout channel_layout_; +}; + +} // namespace mp4 + +} // namespace media + +#endif // MEDIA_FORMATS_MP4_AAC_H_ diff --git a/media/formats/mp4/aac_unittest.cc b/media/formats/mp4/aac_unittest.cc new file mode 100644 index 0000000..6c2e797 --- /dev/null +++ b/media/formats/mp4/aac_unittest.cc @@ -0,0 +1,146 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/aac.h" + +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { + +namespace mp4 { + +TEST(AACTest, BasicProfileTest) { + AAC aac; + uint8 buffer[] = {0x12, 0x10}; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_TRUE(aac.Parse(data)); + EXPECT_EQ(aac.GetOutputSamplesPerSecond(false), 44100); + EXPECT_EQ(aac.GetChannelLayout(false), CHANNEL_LAYOUT_STEREO); +} + +TEST(AACTest, ExtensionTest) { + AAC aac; + uint8 buffer[] = {0x13, 0x08, 0x56, 0xe5, 0x9d, 0x48, 0x80}; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_TRUE(aac.Parse(data)); + EXPECT_EQ(aac.GetOutputSamplesPerSecond(false), 48000); + EXPECT_EQ(aac.GetOutputSamplesPerSecond(true), 48000); + EXPECT_EQ(aac.GetChannelLayout(false), CHANNEL_LAYOUT_STEREO); +} + +// Test implicit SBR with mono channel config. +// Mono channel layout should only be reported if SBR is not +// specified. Otherwise stereo should be reported. +// See ISO-14496-3 Section 1.6.6.1.2 for details about this special casing. +TEST(AACTest, ImplicitSBR_ChannelConfig0) { + AAC aac; + uint8 buffer[] = {0x13, 0x08}; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_TRUE(aac.Parse(data)); + + // Test w/o implict SBR. + EXPECT_EQ(aac.GetOutputSamplesPerSecond(false), 24000); + EXPECT_EQ(aac.GetChannelLayout(false), CHANNEL_LAYOUT_MONO); + + // Test implicit SBR. + EXPECT_EQ(aac.GetOutputSamplesPerSecond(true), 48000); + EXPECT_EQ(aac.GetChannelLayout(true), CHANNEL_LAYOUT_STEREO); +} + +// Tests implicit SBR with a stereo channel config. +TEST(AACTest, ImplicitSBR_ChannelConfig1) { + AAC aac; + uint8 buffer[] = {0x13, 0x10}; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_TRUE(aac.Parse(data)); + + // Test w/o implict SBR. + EXPECT_EQ(aac.GetOutputSamplesPerSecond(false), 24000); + EXPECT_EQ(aac.GetChannelLayout(false), CHANNEL_LAYOUT_STEREO); + + // Test implicit SBR. + EXPECT_EQ(aac.GetOutputSamplesPerSecond(true), 48000); + EXPECT_EQ(aac.GetChannelLayout(true), CHANNEL_LAYOUT_STEREO); +} + +TEST(AACTest, SixChannelTest) { + AAC aac; + uint8 buffer[] = {0x11, 0xb0}; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_TRUE(aac.Parse(data)); + EXPECT_EQ(aac.GetOutputSamplesPerSecond(false), 48000); + EXPECT_EQ(aac.GetChannelLayout(false), CHANNEL_LAYOUT_5_1); +} + +TEST(AACTest, DataTooShortTest) { + AAC aac; + std::vector<uint8> data; + + EXPECT_FALSE(aac.Parse(data)); + + data.push_back(0x12); + EXPECT_FALSE(aac.Parse(data)); +} + +TEST(AACTest, IncorrectProfileTest) { + AAC aac; + uint8 buffer[] = {0x0, 0x08}; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_FALSE(aac.Parse(data)); + + data[0] = 0x08; + EXPECT_TRUE(aac.Parse(data)); + + data[0] = 0x28; + EXPECT_FALSE(aac.Parse(data)); +} + +TEST(AACTest, IncorrectFrequencyTest) { + AAC aac; + uint8 buffer[] = {0x0f, 0x88}; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_FALSE(aac.Parse(data)); + + data[0] = 0x0e; + data[1] = 0x08; + EXPECT_TRUE(aac.Parse(data)); +} + +TEST(AACTest, IncorrectChannelTest) { + AAC aac; + uint8 buffer[] = {0x0e, 0x00}; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_FALSE(aac.Parse(data)); + + data[1] = 0x08; + EXPECT_TRUE(aac.Parse(data)); +} + +} // namespace mp4 + +} // namespace media diff --git a/media/formats/mp4/avc.cc b/media/formats/mp4/avc.cc new file mode 100644 index 0000000..6b670fe --- /dev/null +++ b/media/formats/mp4/avc.cc @@ -0,0 +1,91 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/avc.h" + +#include <algorithm> +#include <vector> + +#include "media/formats/mp4/box_definitions.h" +#include "media/formats/mp4/box_reader.h" + +namespace media { +namespace mp4 { + +static const uint8 kAnnexBStartCode[] = {0, 0, 0, 1}; +static const int kAnnexBStartCodeSize = 4; + +static bool ConvertAVCToAnnexBInPlaceForLengthSize4(std::vector<uint8>* buf) { + const int kLengthSize = 4; + size_t pos = 0; + while (pos + kLengthSize < buf->size()) { + int nal_size = (*buf)[pos]; + nal_size = (nal_size << 8) + (*buf)[pos+1]; + nal_size = (nal_size << 8) + (*buf)[pos+2]; + nal_size = (nal_size << 8) + (*buf)[pos+3]; + std::copy(kAnnexBStartCode, kAnnexBStartCode + kAnnexBStartCodeSize, + buf->begin() + pos); + pos += kLengthSize + nal_size; + } + return pos == buf->size(); +} + +// static +bool AVC::ConvertFrameToAnnexB(int length_size, std::vector<uint8>* buffer) { + RCHECK(length_size == 1 || length_size == 2 || length_size == 4); + + if (length_size == 4) + return ConvertAVCToAnnexBInPlaceForLengthSize4(buffer); + + std::vector<uint8> temp; + temp.swap(*buffer); + buffer->reserve(temp.size() + 32); + + size_t pos = 0; + while (pos + length_size < temp.size()) { + int nal_size = temp[pos]; + if (length_size == 2) nal_size = (nal_size << 8) + temp[pos+1]; + pos += length_size; + + RCHECK(pos + nal_size <= temp.size()); + buffer->insert(buffer->end(), kAnnexBStartCode, + kAnnexBStartCode + kAnnexBStartCodeSize); + buffer->insert(buffer->end(), temp.begin() + pos, + temp.begin() + pos + nal_size); + pos += nal_size; + } + return pos == temp.size(); +} + +// static +bool AVC::ConvertConfigToAnnexB( + const AVCDecoderConfigurationRecord& avc_config, + std::vector<uint8>* buffer) { + DCHECK(buffer->empty()); + buffer->clear(); + int total_size = 0; + for (size_t i = 0; i < avc_config.sps_list.size(); i++) + total_size += avc_config.sps_list[i].size() + kAnnexBStartCodeSize; + for (size_t i = 0; i < avc_config.pps_list.size(); i++) + total_size += avc_config.pps_list[i].size() + kAnnexBStartCodeSize; + buffer->reserve(total_size); + + for (size_t i = 0; i < avc_config.sps_list.size(); i++) { + buffer->insert(buffer->end(), kAnnexBStartCode, + kAnnexBStartCode + kAnnexBStartCodeSize); + buffer->insert(buffer->end(), avc_config.sps_list[i].begin(), + avc_config.sps_list[i].end()); + } + + for (size_t i = 0; i < avc_config.pps_list.size(); i++) { + buffer->insert(buffer->end(), kAnnexBStartCode, + kAnnexBStartCode + kAnnexBStartCodeSize); + buffer->insert(buffer->end(), avc_config.pps_list[i].begin(), + avc_config.pps_list[i].end()); + } + return true; +} + +} // namespace mp4 +} // namespace media diff --git a/media/formats/mp4/avc.h b/media/formats/mp4/avc.h new file mode 100644 index 0000000..731e401 --- /dev/null +++ b/media/formats/mp4/avc.h @@ -0,0 +1,30 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_AVC_H_ +#define MEDIA_FORMATS_MP4_AVC_H_ + +#include <vector> + +#include "base/basictypes.h" +#include "media/base/media_export.h" + +namespace media { +namespace mp4 { + +struct AVCDecoderConfigurationRecord; + +class MEDIA_EXPORT AVC { + public: + static bool ConvertFrameToAnnexB(int length_size, std::vector<uint8>* buffer); + + static bool ConvertConfigToAnnexB( + const AVCDecoderConfigurationRecord& avc_config, + std::vector<uint8>* buffer); +}; + +} // namespace mp4 +} // namespace media + +#endif // MEDIA_FORMATS_MP4_AVC_H_ diff --git a/media/formats/mp4/avc_unittest.cc b/media/formats/mp4/avc_unittest.cc new file mode 100644 index 0000000..f6a1d569 --- /dev/null +++ b/media/formats/mp4/avc_unittest.cc @@ -0,0 +1,94 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <string.h> + +#include "base/basictypes.h" +#include "media/base/stream_parser_buffer.h" +#include "media/formats/mp4/avc.h" +#include "media/formats/mp4/box_definitions.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { +namespace mp4 { + +static const uint8 kNALU1[] = { 0x01, 0x02, 0x03 }; +static const uint8 kNALU2[] = { 0x04, 0x05, 0x06, 0x07 }; +static const uint8 kExpected[] = { + 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x03, + 0x00, 0x00, 0x00, 0x01, 0x04, 0x05, 0x06, 0x07 }; + +static const uint8 kExpectedParamSets[] = { + 0x00, 0x00, 0x00, 0x01, 0x67, 0x12, + 0x00, 0x00, 0x00, 0x01, 0x67, 0x34, + 0x00, 0x00, 0x00, 0x01, 0x68, 0x56, 0x78}; + +class AVCConversionTest : public testing::TestWithParam<int> { + protected: + void MakeInputForLength(int length_size, std::vector<uint8>* buf) { + buf->clear(); + for (int i = 1; i < length_size; i++) + buf->push_back(0); + buf->push_back(sizeof(kNALU1)); + buf->insert(buf->end(), kNALU1, kNALU1 + sizeof(kNALU1)); + + for (int i = 1; i < length_size; i++) + buf->push_back(0); + buf->push_back(sizeof(kNALU2)); + buf->insert(buf->end(), kNALU2, kNALU2 + sizeof(kNALU2)); + } +}; + +TEST_P(AVCConversionTest, ParseCorrectly) { + std::vector<uint8> buf; + MakeInputForLength(GetParam(), &buf); + EXPECT_TRUE(AVC::ConvertFrameToAnnexB(GetParam(), &buf)); + EXPECT_EQ(buf.size(), sizeof(kExpected)); + EXPECT_EQ(0, memcmp(kExpected, &buf[0], sizeof(kExpected))); +} + +TEST_P(AVCConversionTest, ParsePartial) { + std::vector<uint8> buf; + MakeInputForLength(GetParam(), &buf); + buf.pop_back(); + EXPECT_FALSE(AVC::ConvertFrameToAnnexB(GetParam(), &buf)); + // This tests a buffer ending in the middle of a NAL length. For length size + // of one, this can't happen, so we skip that case. + if (GetParam() != 1) { + MakeInputForLength(GetParam(), &buf); + buf.erase(buf.end() - (sizeof(kNALU2) + 1), buf.end()); + EXPECT_FALSE(AVC::ConvertFrameToAnnexB(GetParam(), &buf)); + } +} + +TEST_P(AVCConversionTest, ParseEmpty) { + std::vector<uint8> buf; + EXPECT_TRUE(AVC::ConvertFrameToAnnexB(GetParam(), &buf)); + EXPECT_EQ(0u, buf.size()); +} + +INSTANTIATE_TEST_CASE_P(AVCConversionTestValues, + AVCConversionTest, + ::testing::Values(1, 2, 4)); + +TEST_F(AVCConversionTest, ConvertConfigToAnnexB) { + AVCDecoderConfigurationRecord avc_config; + avc_config.sps_list.resize(2); + avc_config.sps_list[0].push_back(0x67); + avc_config.sps_list[0].push_back(0x12); + avc_config.sps_list[1].push_back(0x67); + avc_config.sps_list[1].push_back(0x34); + avc_config.pps_list.resize(1); + avc_config.pps_list[0].push_back(0x68); + avc_config.pps_list[0].push_back(0x56); + avc_config.pps_list[0].push_back(0x78); + + std::vector<uint8> buf; + EXPECT_TRUE(AVC::ConvertConfigToAnnexB(avc_config, &buf)); + EXPECT_EQ(0, memcmp(kExpectedParamSets, &buf[0], + sizeof(kExpectedParamSets))); +} + +} // namespace mp4 +} // namespace media diff --git a/media/formats/mp4/box_definitions.cc b/media/formats/mp4/box_definitions.cc new file mode 100644 index 0000000..8367fd4 --- /dev/null +++ b/media/formats/mp4/box_definitions.cc @@ -0,0 +1,824 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/box_definitions.h" + +#include "base/logging.h" +#include "media/formats/mp4/es_descriptor.h" +#include "media/formats/mp4/rcheck.h" + +namespace media { +namespace mp4 { + +FileType::FileType() {} +FileType::~FileType() {} +FourCC FileType::BoxType() const { return FOURCC_FTYP; } + +bool FileType::Parse(BoxReader* reader) { + RCHECK(reader->ReadFourCC(&major_brand) && reader->Read4(&minor_version)); + size_t num_brands = (reader->size() - reader->pos()) / sizeof(FourCC); + return reader->SkipBytes(sizeof(FourCC) * num_brands); // compatible_brands +} + +ProtectionSystemSpecificHeader::ProtectionSystemSpecificHeader() {} +ProtectionSystemSpecificHeader::~ProtectionSystemSpecificHeader() {} +FourCC ProtectionSystemSpecificHeader::BoxType() const { return FOURCC_PSSH; } + +bool ProtectionSystemSpecificHeader::Parse(BoxReader* reader) { + // Validate the box's contents and hang on to the system ID. + uint32 size; + RCHECK(reader->ReadFullBoxHeader() && + reader->ReadVec(&system_id, 16) && + reader->Read4(&size) && + reader->HasBytes(size)); + + // Copy the entire box, including the header, for passing to EME as initData. + DCHECK(raw_box.empty()); + raw_box.assign(reader->data(), reader->data() + reader->size()); + return true; +} + +SampleAuxiliaryInformationOffset::SampleAuxiliaryInformationOffset() {} +SampleAuxiliaryInformationOffset::~SampleAuxiliaryInformationOffset() {} +FourCC SampleAuxiliaryInformationOffset::BoxType() const { return FOURCC_SAIO; } + +bool SampleAuxiliaryInformationOffset::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader()); + if (reader->flags() & 1) + RCHECK(reader->SkipBytes(8)); + + uint32 count; + RCHECK(reader->Read4(&count) && + reader->HasBytes(count * (reader->version() == 1 ? 8 : 4))); + offsets.resize(count); + + for (uint32 i = 0; i < count; i++) { + if (reader->version() == 1) { + RCHECK(reader->Read8(&offsets[i])); + } else { + RCHECK(reader->Read4Into8(&offsets[i])); + } + } + return true; +} + +SampleAuxiliaryInformationSize::SampleAuxiliaryInformationSize() + : default_sample_info_size(0), sample_count(0) { +} +SampleAuxiliaryInformationSize::~SampleAuxiliaryInformationSize() {} +FourCC SampleAuxiliaryInformationSize::BoxType() const { return FOURCC_SAIZ; } + +bool SampleAuxiliaryInformationSize::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader()); + if (reader->flags() & 1) + RCHECK(reader->SkipBytes(8)); + + RCHECK(reader->Read1(&default_sample_info_size) && + reader->Read4(&sample_count)); + if (default_sample_info_size == 0) + return reader->ReadVec(&sample_info_sizes, sample_count); + return true; +} + +OriginalFormat::OriginalFormat() : format(FOURCC_NULL) {} +OriginalFormat::~OriginalFormat() {} +FourCC OriginalFormat::BoxType() const { return FOURCC_FRMA; } + +bool OriginalFormat::Parse(BoxReader* reader) { + return reader->ReadFourCC(&format); +} + +SchemeType::SchemeType() : type(FOURCC_NULL), version(0) {} +SchemeType::~SchemeType() {} +FourCC SchemeType::BoxType() const { return FOURCC_SCHM; } + +bool SchemeType::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader() && + reader->ReadFourCC(&type) && + reader->Read4(&version)); + return true; +} + +TrackEncryption::TrackEncryption() + : is_encrypted(false), default_iv_size(0) { +} +TrackEncryption::~TrackEncryption() {} +FourCC TrackEncryption::BoxType() const { return FOURCC_TENC; } + +bool TrackEncryption::Parse(BoxReader* reader) { + uint8 flag; + RCHECK(reader->ReadFullBoxHeader() && + reader->SkipBytes(2) && + reader->Read1(&flag) && + reader->Read1(&default_iv_size) && + reader->ReadVec(&default_kid, 16)); + is_encrypted = (flag != 0); + if (is_encrypted) { + RCHECK(default_iv_size == 8 || default_iv_size == 16); + } else { + RCHECK(default_iv_size == 0); + } + return true; +} + +SchemeInfo::SchemeInfo() {} +SchemeInfo::~SchemeInfo() {} +FourCC SchemeInfo::BoxType() const { return FOURCC_SCHI; } + +bool SchemeInfo::Parse(BoxReader* reader) { + return reader->ScanChildren() && reader->ReadChild(&track_encryption); +} + +ProtectionSchemeInfo::ProtectionSchemeInfo() {} +ProtectionSchemeInfo::~ProtectionSchemeInfo() {} +FourCC ProtectionSchemeInfo::BoxType() const { return FOURCC_SINF; } + +bool ProtectionSchemeInfo::Parse(BoxReader* reader) { + RCHECK(reader->ScanChildren() && + reader->ReadChild(&format) && + reader->ReadChild(&type)); + if (type.type == FOURCC_CENC) + RCHECK(reader->ReadChild(&info)); + // Other protection schemes are silently ignored. Since the protection scheme + // type can't be determined until this box is opened, we return 'true' for + // non-CENC protection scheme types. It is the parent box's responsibility to + // ensure that this scheme type is a supported one. + return true; +} + +MovieHeader::MovieHeader() + : creation_time(0), + modification_time(0), + timescale(0), + duration(0), + rate(-1), + volume(-1), + next_track_id(0) {} +MovieHeader::~MovieHeader() {} +FourCC MovieHeader::BoxType() const { return FOURCC_MVHD; } + +bool MovieHeader::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader()); + + if (reader->version() == 1) { + RCHECK(reader->Read8(&creation_time) && + reader->Read8(&modification_time) && + reader->Read4(×cale) && + reader->Read8(&duration)); + } else { + RCHECK(reader->Read4Into8(&creation_time) && + reader->Read4Into8(&modification_time) && + reader->Read4(×cale) && + reader->Read4Into8(&duration)); + } + + RCHECK(reader->Read4s(&rate) && + reader->Read2s(&volume) && + reader->SkipBytes(10) && // reserved + reader->SkipBytes(36) && // matrix + reader->SkipBytes(24) && // predefined zero + reader->Read4(&next_track_id)); + return true; +} + +TrackHeader::TrackHeader() + : creation_time(0), + modification_time(0), + track_id(0), + duration(0), + layer(-1), + alternate_group(-1), + volume(-1), + width(0), + height(0) {} +TrackHeader::~TrackHeader() {} +FourCC TrackHeader::BoxType() const { return FOURCC_TKHD; } + +bool TrackHeader::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader()); + if (reader->version() == 1) { + RCHECK(reader->Read8(&creation_time) && + reader->Read8(&modification_time) && + reader->Read4(&track_id) && + reader->SkipBytes(4) && // reserved + reader->Read8(&duration)); + } else { + RCHECK(reader->Read4Into8(&creation_time) && + reader->Read4Into8(&modification_time) && + reader->Read4(&track_id) && + reader->SkipBytes(4) && // reserved + reader->Read4Into8(&duration)); + } + + RCHECK(reader->SkipBytes(8) && // reserved + reader->Read2s(&layer) && + reader->Read2s(&alternate_group) && + reader->Read2s(&volume) && + reader->SkipBytes(2) && // reserved + reader->SkipBytes(36) && // matrix + reader->Read4(&width) && + reader->Read4(&height)); + width >>= 16; + height >>= 16; + return true; +} + +SampleDescription::SampleDescription() : type(kInvalid) {} +SampleDescription::~SampleDescription() {} +FourCC SampleDescription::BoxType() const { return FOURCC_STSD; } + +bool SampleDescription::Parse(BoxReader* reader) { + uint32 count; + RCHECK(reader->SkipBytes(4) && + reader->Read4(&count)); + video_entries.clear(); + audio_entries.clear(); + + // Note: this value is preset before scanning begins. See comments in the + // Parse(Media*) function. + if (type == kVideo) { + RCHECK(reader->ReadAllChildren(&video_entries)); + } else if (type == kAudio) { + RCHECK(reader->ReadAllChildren(&audio_entries)); + } + return true; +} + +SyncSample::SyncSample() : is_present(false) {} +SyncSample::~SyncSample() {} +FourCC SyncSample::BoxType() const { return FOURCC_STSS; } + +bool SyncSample::Parse(BoxReader* reader) { + uint32 entry_count; + RCHECK(reader->ReadFullBoxHeader() && + reader->Read4(&entry_count)); + + is_present = true; + + if (entry_count == 0) + return true; + + // Skip over the entries since we don't actually care about + // them right now. In most fragmented files with an stss, there + // aren't any entries anyways because the random access point info + // is signalled in the fragments. + int64 skip_size = 4 * entry_count; + if (skip_size > INT_MAX) + return false; + + RCHECK(reader->SkipBytes(skip_size)); + + return true; +} + +SampleTable::SampleTable() {} +SampleTable::~SampleTable() {} +FourCC SampleTable::BoxType() const { return FOURCC_STBL; } + +bool SampleTable::Parse(BoxReader* reader) { + return reader->ScanChildren() && + reader->ReadChild(&description) && + reader->MaybeReadChild(&sync_sample); +} + +EditList::EditList() {} +EditList::~EditList() {} +FourCC EditList::BoxType() const { return FOURCC_ELST; } + +bool EditList::Parse(BoxReader* reader) { + uint32 count; + RCHECK(reader->ReadFullBoxHeader() && reader->Read4(&count)); + + if (reader->version() == 1) { + RCHECK(reader->HasBytes(count * 20)); + } else { + RCHECK(reader->HasBytes(count * 12)); + } + edits.resize(count); + + for (std::vector<EditListEntry>::iterator edit = edits.begin(); + edit != edits.end(); ++edit) { + if (reader->version() == 1) { + RCHECK(reader->Read8(&edit->segment_duration) && + reader->Read8s(&edit->media_time)); + } else { + RCHECK(reader->Read4Into8(&edit->segment_duration) && + reader->Read4sInto8s(&edit->media_time)); + } + RCHECK(reader->Read2s(&edit->media_rate_integer) && + reader->Read2s(&edit->media_rate_fraction)); + } + return true; +} + +Edit::Edit() {} +Edit::~Edit() {} +FourCC Edit::BoxType() const { return FOURCC_EDTS; } + +bool Edit::Parse(BoxReader* reader) { + return reader->ScanChildren() && reader->ReadChild(&list); +} + +HandlerReference::HandlerReference() : type(kInvalid) {} +HandlerReference::~HandlerReference() {} +FourCC HandlerReference::BoxType() const { return FOURCC_HDLR; } + +bool HandlerReference::Parse(BoxReader* reader) { + FourCC hdlr_type; + RCHECK(reader->SkipBytes(8) && reader->ReadFourCC(&hdlr_type)); + // Note: remaining fields in box ignored + if (hdlr_type == FOURCC_VIDE) { + type = kVideo; + } else if (hdlr_type == FOURCC_SOUN) { + type = kAudio; + } else { + type = kInvalid; + } + return true; +} + +AVCDecoderConfigurationRecord::AVCDecoderConfigurationRecord() + : version(0), + profile_indication(0), + profile_compatibility(0), + avc_level(0), + length_size(0) {} + +AVCDecoderConfigurationRecord::~AVCDecoderConfigurationRecord() {} +FourCC AVCDecoderConfigurationRecord::BoxType() const { return FOURCC_AVCC; } + +bool AVCDecoderConfigurationRecord::Parse(BoxReader* reader) { + RCHECK(reader->Read1(&version) && version == 1 && + reader->Read1(&profile_indication) && + reader->Read1(&profile_compatibility) && + reader->Read1(&avc_level)); + + uint8 length_size_minus_one; + RCHECK(reader->Read1(&length_size_minus_one) && + (length_size_minus_one & 0xfc) == 0xfc); + length_size = (length_size_minus_one & 0x3) + 1; + + uint8 num_sps; + RCHECK(reader->Read1(&num_sps) && (num_sps & 0xe0) == 0xe0); + num_sps &= 0x1f; + + sps_list.resize(num_sps); + for (int i = 0; i < num_sps; i++) { + uint16 sps_length; + RCHECK(reader->Read2(&sps_length) && + reader->ReadVec(&sps_list[i], sps_length)); + } + + uint8 num_pps; + RCHECK(reader->Read1(&num_pps)); + + pps_list.resize(num_pps); + for (int i = 0; i < num_pps; i++) { + uint16 pps_length; + RCHECK(reader->Read2(&pps_length) && + reader->ReadVec(&pps_list[i], pps_length)); + } + + return true; +} + +PixelAspectRatioBox::PixelAspectRatioBox() : h_spacing(1), v_spacing(1) {} +PixelAspectRatioBox::~PixelAspectRatioBox() {} +FourCC PixelAspectRatioBox::BoxType() const { return FOURCC_PASP; } + +bool PixelAspectRatioBox::Parse(BoxReader* reader) { + RCHECK(reader->Read4(&h_spacing) && + reader->Read4(&v_spacing)); + return true; +} + +VideoSampleEntry::VideoSampleEntry() + : format(FOURCC_NULL), + data_reference_index(0), + width(0), + height(0) {} + +VideoSampleEntry::~VideoSampleEntry() {} +FourCC VideoSampleEntry::BoxType() const { + DCHECK(false) << "VideoSampleEntry should be parsed according to the " + << "handler type recovered in its Media ancestor."; + return FOURCC_NULL; +} + +bool VideoSampleEntry::Parse(BoxReader* reader) { + format = reader->type(); + RCHECK(reader->SkipBytes(6) && + reader->Read2(&data_reference_index) && + reader->SkipBytes(16) && + reader->Read2(&width) && + reader->Read2(&height) && + reader->SkipBytes(50)); + + RCHECK(reader->ScanChildren() && + reader->MaybeReadChild(&pixel_aspect)); + + if (format == FOURCC_ENCV) { + // Continue scanning until a recognized protection scheme is found, or until + // we run out of protection schemes. + while (sinf.type.type != FOURCC_CENC) { + if (!reader->ReadChild(&sinf)) + return false; + } + } + + if (IsFormatValid()) + RCHECK(reader->ReadChild(&avcc)); + + return true; +} + +bool VideoSampleEntry::IsFormatValid() const { + return format == FOURCC_AVC1 || format == FOURCC_AVC3 || + (format == FOURCC_ENCV && (sinf.format.format == FOURCC_AVC1 || + sinf.format.format == FOURCC_AVC3)); +} + +ElementaryStreamDescriptor::ElementaryStreamDescriptor() + : object_type(kForbidden) {} + +ElementaryStreamDescriptor::~ElementaryStreamDescriptor() {} + +FourCC ElementaryStreamDescriptor::BoxType() const { + return FOURCC_ESDS; +} + +bool ElementaryStreamDescriptor::Parse(BoxReader* reader) { + std::vector<uint8> data; + ESDescriptor es_desc; + + RCHECK(reader->ReadFullBoxHeader()); + RCHECK(reader->ReadVec(&data, reader->size() - reader->pos())); + RCHECK(es_desc.Parse(data)); + + object_type = es_desc.object_type(); + + RCHECK(aac.Parse(es_desc.decoder_specific_info())); + + return true; +} + +AudioSampleEntry::AudioSampleEntry() + : format(FOURCC_NULL), + data_reference_index(0), + channelcount(0), + samplesize(0), + samplerate(0) {} + +AudioSampleEntry::~AudioSampleEntry() {} + +FourCC AudioSampleEntry::BoxType() const { + DCHECK(false) << "AudioSampleEntry should be parsed according to the " + << "handler type recovered in its Media ancestor."; + return FOURCC_NULL; +} + +bool AudioSampleEntry::Parse(BoxReader* reader) { + format = reader->type(); + RCHECK(reader->SkipBytes(6) && + reader->Read2(&data_reference_index) && + reader->SkipBytes(8) && + reader->Read2(&channelcount) && + reader->Read2(&samplesize) && + reader->SkipBytes(4) && + reader->Read4(&samplerate)); + // Convert from 16.16 fixed point to integer + samplerate >>= 16; + + RCHECK(reader->ScanChildren()); + if (format == FOURCC_ENCA) { + // Continue scanning until a recognized protection scheme is found, or until + // we run out of protection schemes. + while (sinf.type.type != FOURCC_CENC) { + if (!reader->ReadChild(&sinf)) + return false; + } + } + + // ESDS is not valid in case of EAC3. + RCHECK(reader->MaybeReadChild(&esds)); + return true; +} + +MediaHeader::MediaHeader() + : creation_time(0), + modification_time(0), + timescale(0), + duration(0) {} +MediaHeader::~MediaHeader() {} +FourCC MediaHeader::BoxType() const { return FOURCC_MDHD; } + +bool MediaHeader::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader()); + + if (reader->version() == 1) { + RCHECK(reader->Read8(&creation_time) && + reader->Read8(&modification_time) && + reader->Read4(×cale) && + reader->Read8(&duration)); + } else { + RCHECK(reader->Read4Into8(&creation_time) && + reader->Read4Into8(&modification_time) && + reader->Read4(×cale) && + reader->Read4Into8(&duration)); + } + // Skip language information + return reader->SkipBytes(4); +} + +MediaInformation::MediaInformation() {} +MediaInformation::~MediaInformation() {} +FourCC MediaInformation::BoxType() const { return FOURCC_MINF; } + +bool MediaInformation::Parse(BoxReader* reader) { + return reader->ScanChildren() && + reader->ReadChild(&sample_table); +} + +Media::Media() {} +Media::~Media() {} +FourCC Media::BoxType() const { return FOURCC_MDIA; } + +bool Media::Parse(BoxReader* reader) { + RCHECK(reader->ScanChildren() && + reader->ReadChild(&header) && + reader->ReadChild(&handler)); + + // Maddeningly, the HandlerReference box specifies how to parse the + // SampleDescription box, making the latter the only box (of those that we + // support) which cannot be parsed correctly on its own (or even with + // information from its strict ancestor tree). We thus copy the handler type + // to the sample description box *before* parsing it to provide this + // information while parsing. + information.sample_table.description.type = handler.type; + RCHECK(reader->ReadChild(&information)); + return true; +} + +Track::Track() {} +Track::~Track() {} +FourCC Track::BoxType() const { return FOURCC_TRAK; } + +bool Track::Parse(BoxReader* reader) { + RCHECK(reader->ScanChildren() && + reader->ReadChild(&header) && + reader->ReadChild(&media) && + reader->MaybeReadChild(&edit)); + return true; +} + +MovieExtendsHeader::MovieExtendsHeader() : fragment_duration(0) {} +MovieExtendsHeader::~MovieExtendsHeader() {} +FourCC MovieExtendsHeader::BoxType() const { return FOURCC_MEHD; } + +bool MovieExtendsHeader::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader()); + if (reader->version() == 1) { + RCHECK(reader->Read8(&fragment_duration)); + } else { + RCHECK(reader->Read4Into8(&fragment_duration)); + } + return true; +} + +TrackExtends::TrackExtends() + : track_id(0), + default_sample_description_index(0), + default_sample_duration(0), + default_sample_size(0), + default_sample_flags(0) {} +TrackExtends::~TrackExtends() {} +FourCC TrackExtends::BoxType() const { return FOURCC_TREX; } + +bool TrackExtends::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader() && + reader->Read4(&track_id) && + reader->Read4(&default_sample_description_index) && + reader->Read4(&default_sample_duration) && + reader->Read4(&default_sample_size) && + reader->Read4(&default_sample_flags)); + return true; +} + +MovieExtends::MovieExtends() {} +MovieExtends::~MovieExtends() {} +FourCC MovieExtends::BoxType() const { return FOURCC_MVEX; } + +bool MovieExtends::Parse(BoxReader* reader) { + header.fragment_duration = 0; + return reader->ScanChildren() && + reader->MaybeReadChild(&header) && + reader->ReadChildren(&tracks); +} + +Movie::Movie() : fragmented(false) {} +Movie::~Movie() {} +FourCC Movie::BoxType() const { return FOURCC_MOOV; } + +bool Movie::Parse(BoxReader* reader) { + return reader->ScanChildren() && + reader->ReadChild(&header) && + reader->ReadChildren(&tracks) && + // Media Source specific: 'mvex' required + reader->ReadChild(&extends) && + reader->MaybeReadChildren(&pssh); +} + +TrackFragmentDecodeTime::TrackFragmentDecodeTime() : decode_time(0) {} +TrackFragmentDecodeTime::~TrackFragmentDecodeTime() {} +FourCC TrackFragmentDecodeTime::BoxType() const { return FOURCC_TFDT; } + +bool TrackFragmentDecodeTime::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader()); + if (reader->version() == 1) + return reader->Read8(&decode_time); + else + return reader->Read4Into8(&decode_time); +} + +MovieFragmentHeader::MovieFragmentHeader() : sequence_number(0) {} +MovieFragmentHeader::~MovieFragmentHeader() {} +FourCC MovieFragmentHeader::BoxType() const { return FOURCC_MFHD; } + +bool MovieFragmentHeader::Parse(BoxReader* reader) { + return reader->SkipBytes(4) && reader->Read4(&sequence_number); +} + +TrackFragmentHeader::TrackFragmentHeader() + : track_id(0), + sample_description_index(0), + default_sample_duration(0), + default_sample_size(0), + default_sample_flags(0), + has_default_sample_flags(false) {} + +TrackFragmentHeader::~TrackFragmentHeader() {} +FourCC TrackFragmentHeader::BoxType() const { return FOURCC_TFHD; } + +bool TrackFragmentHeader::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader() && reader->Read4(&track_id)); + + // Media Source specific: reject tracks that set 'base-data-offset-present'. + // Although the Media Source requires that 'default-base-is-moof' (14496-12 + // Amendment 2) be set, we omit this check as many otherwise-valid files in + // the wild don't set it. + // + // RCHECK((flags & 0x020000) && !(flags & 0x1)); + RCHECK(!(reader->flags() & 0x1)); + + if (reader->flags() & 0x2) { + RCHECK(reader->Read4(&sample_description_index)); + } else { + sample_description_index = 0; + } + + if (reader->flags() & 0x8) { + RCHECK(reader->Read4(&default_sample_duration)); + } else { + default_sample_duration = 0; + } + + if (reader->flags() & 0x10) { + RCHECK(reader->Read4(&default_sample_size)); + } else { + default_sample_size = 0; + } + + if (reader->flags() & 0x20) { + RCHECK(reader->Read4(&default_sample_flags)); + has_default_sample_flags = true; + } else { + has_default_sample_flags = false; + } + + return true; +} + +TrackFragmentRun::TrackFragmentRun() + : sample_count(0), data_offset(0) {} +TrackFragmentRun::~TrackFragmentRun() {} +FourCC TrackFragmentRun::BoxType() const { return FOURCC_TRUN; } + +bool TrackFragmentRun::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader() && + reader->Read4(&sample_count)); + const uint32 flags = reader->flags(); + + bool data_offset_present = (flags & 0x1) != 0; + bool first_sample_flags_present = (flags & 0x4) != 0; + bool sample_duration_present = (flags & 0x100) != 0; + bool sample_size_present = (flags & 0x200) != 0; + bool sample_flags_present = (flags & 0x400) != 0; + bool sample_composition_time_offsets_present = (flags & 0x800) != 0; + + if (data_offset_present) { + RCHECK(reader->Read4(&data_offset)); + } else { + data_offset = 0; + } + + uint32 first_sample_flags; + if (first_sample_flags_present) + RCHECK(reader->Read4(&first_sample_flags)); + + int fields = sample_duration_present + sample_size_present + + sample_flags_present + sample_composition_time_offsets_present; + RCHECK(reader->HasBytes(fields * sample_count)); + + if (sample_duration_present) + sample_durations.resize(sample_count); + if (sample_size_present) + sample_sizes.resize(sample_count); + if (sample_flags_present) + sample_flags.resize(sample_count); + if (sample_composition_time_offsets_present) + sample_composition_time_offsets.resize(sample_count); + + for (uint32 i = 0; i < sample_count; ++i) { + if (sample_duration_present) + RCHECK(reader->Read4(&sample_durations[i])); + if (sample_size_present) + RCHECK(reader->Read4(&sample_sizes[i])); + if (sample_flags_present) + RCHECK(reader->Read4(&sample_flags[i])); + if (sample_composition_time_offsets_present) + RCHECK(reader->Read4s(&sample_composition_time_offsets[i])); + } + + if (first_sample_flags_present) { + if (sample_flags.size() == 0) { + sample_flags.push_back(first_sample_flags); + } else { + sample_flags[0] = first_sample_flags; + } + } + return true; +} + +TrackFragment::TrackFragment() {} +TrackFragment::~TrackFragment() {} +FourCC TrackFragment::BoxType() const { return FOURCC_TRAF; } + +bool TrackFragment::Parse(BoxReader* reader) { + return reader->ScanChildren() && + reader->ReadChild(&header) && + // Media Source specific: 'tfdt' required + reader->ReadChild(&decode_time) && + reader->MaybeReadChildren(&runs) && + reader->MaybeReadChild(&auxiliary_offset) && + reader->MaybeReadChild(&auxiliary_size) && + reader->MaybeReadChild(&sdtp); +} + +MovieFragment::MovieFragment() {} +MovieFragment::~MovieFragment() {} +FourCC MovieFragment::BoxType() const { return FOURCC_MOOF; } + +bool MovieFragment::Parse(BoxReader* reader) { + RCHECK(reader->ScanChildren() && + reader->ReadChild(&header) && + reader->ReadChildren(&tracks) && + reader->MaybeReadChildren(&pssh)); + return true; +} + +IndependentAndDisposableSamples::IndependentAndDisposableSamples() {} +IndependentAndDisposableSamples::~IndependentAndDisposableSamples() {} +FourCC IndependentAndDisposableSamples::BoxType() const { return FOURCC_SDTP; } + +bool IndependentAndDisposableSamples::Parse(BoxReader* reader) { + RCHECK(reader->ReadFullBoxHeader()); + RCHECK(reader->version() == 0); + RCHECK(reader->flags() == 0); + + int sample_count = reader->size() - reader->pos(); + sample_depends_on_.resize(sample_count); + for (int i = 0; i < sample_count; ++i) { + uint8 sample_info; + RCHECK(reader->Read1(&sample_info)); + RCHECK((sample_info >> 6) == 0); // reserved. + + sample_depends_on_[i] = + static_cast<SampleDependsOn>((sample_info >> 4) & 0x3); + + RCHECK(sample_depends_on_[i] != kSampleDependsOnReserved); + } + + return true; +} + +SampleDependsOn IndependentAndDisposableSamples::sample_depends_on( + size_t i) const { + if (i >= sample_depends_on_.size()) + return kSampleDependsOnUnknown; + + return sample_depends_on_[i]; +} + +} // namespace mp4 +} // namespace media diff --git a/media/formats/mp4/box_definitions.h b/media/formats/mp4/box_definitions.h new file mode 100644 index 0000000..9daa82e --- /dev/null +++ b/media/formats/mp4/box_definitions.h @@ -0,0 +1,382 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_ +#define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_ + +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "base/compiler_specific.h" +#include "media/base/media_export.h" +#include "media/formats/mp4/aac.h" +#include "media/formats/mp4/avc.h" +#include "media/formats/mp4/box_reader.h" +#include "media/formats/mp4/fourccs.h" + +namespace media { +namespace mp4 { + +enum TrackType { + kInvalid = 0, + kVideo, + kAudio, + kHint +}; + +#define DECLARE_BOX_METHODS(T) \ + T(); \ + virtual ~T(); \ + virtual bool Parse(BoxReader* reader) OVERRIDE; \ + virtual FourCC BoxType() const OVERRIDE; \ + +struct MEDIA_EXPORT FileType : Box { + DECLARE_BOX_METHODS(FileType); + + FourCC major_brand; + uint32 minor_version; +}; + +struct MEDIA_EXPORT ProtectionSystemSpecificHeader : Box { + DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader); + + std::vector<uint8> system_id; + std::vector<uint8> raw_box; +}; + +struct MEDIA_EXPORT SampleAuxiliaryInformationOffset : Box { + DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset); + + std::vector<uint64> offsets; +}; + +struct MEDIA_EXPORT SampleAuxiliaryInformationSize : Box { + DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize); + + uint8 default_sample_info_size; + uint32 sample_count; + std::vector<uint8> sample_info_sizes; +}; + +struct MEDIA_EXPORT OriginalFormat : Box { + DECLARE_BOX_METHODS(OriginalFormat); + + FourCC format; +}; + +struct MEDIA_EXPORT SchemeType : Box { + DECLARE_BOX_METHODS(SchemeType); + + FourCC type; + uint32 version; +}; + +struct MEDIA_EXPORT TrackEncryption : Box { + DECLARE_BOX_METHODS(TrackEncryption); + + // Note: this definition is specific to the CENC protection type. + bool is_encrypted; + uint8 default_iv_size; + std::vector<uint8> default_kid; +}; + +struct MEDIA_EXPORT SchemeInfo : Box { + DECLARE_BOX_METHODS(SchemeInfo); + + TrackEncryption track_encryption; +}; + +struct MEDIA_EXPORT ProtectionSchemeInfo : Box { + DECLARE_BOX_METHODS(ProtectionSchemeInfo); + + OriginalFormat format; + SchemeType type; + SchemeInfo info; +}; + +struct MEDIA_EXPORT MovieHeader : Box { + DECLARE_BOX_METHODS(MovieHeader); + + uint64 creation_time; + uint64 modification_time; + uint32 timescale; + uint64 duration; + int32 rate; + int16 volume; + uint32 next_track_id; +}; + +struct MEDIA_EXPORT TrackHeader : Box { + DECLARE_BOX_METHODS(TrackHeader); + + uint64 creation_time; + uint64 modification_time; + uint32 track_id; + uint64 duration; + int16 layer; + int16 alternate_group; + int16 volume; + uint32 width; + uint32 height; +}; + +struct MEDIA_EXPORT EditListEntry { + uint64 segment_duration; + int64 media_time; + int16 media_rate_integer; + int16 media_rate_fraction; +}; + +struct MEDIA_EXPORT EditList : Box { + DECLARE_BOX_METHODS(EditList); + + std::vector<EditListEntry> edits; +}; + +struct MEDIA_EXPORT Edit : Box { + DECLARE_BOX_METHODS(Edit); + + EditList list; +}; + +struct MEDIA_EXPORT HandlerReference : Box { + DECLARE_BOX_METHODS(HandlerReference); + + TrackType type; +}; + +struct MEDIA_EXPORT AVCDecoderConfigurationRecord : Box { + DECLARE_BOX_METHODS(AVCDecoderConfigurationRecord); + + uint8 version; + uint8 profile_indication; + uint8 profile_compatibility; + uint8 avc_level; + uint8 length_size; + + typedef std::vector<uint8> SPS; + typedef std::vector<uint8> PPS; + + std::vector<SPS> sps_list; + std::vector<PPS> pps_list; +}; + +struct MEDIA_EXPORT PixelAspectRatioBox : Box { + DECLARE_BOX_METHODS(PixelAspectRatioBox); + + uint32 h_spacing; + uint32 v_spacing; +}; + +struct MEDIA_EXPORT VideoSampleEntry : Box { + DECLARE_BOX_METHODS(VideoSampleEntry); + + FourCC format; + uint16 data_reference_index; + uint16 width; + uint16 height; + + PixelAspectRatioBox pixel_aspect; + ProtectionSchemeInfo sinf; + + // Currently expected to be present regardless of format. + AVCDecoderConfigurationRecord avcc; + + bool IsFormatValid() const; +}; + +struct MEDIA_EXPORT ElementaryStreamDescriptor : Box { + DECLARE_BOX_METHODS(ElementaryStreamDescriptor); + + uint8 object_type; + AAC aac; +}; + +struct MEDIA_EXPORT AudioSampleEntry : Box { + DECLARE_BOX_METHODS(AudioSampleEntry); + + FourCC format; + uint16 data_reference_index; + uint16 channelcount; + uint16 samplesize; + uint32 samplerate; + + ProtectionSchemeInfo sinf; + ElementaryStreamDescriptor esds; +}; + +struct MEDIA_EXPORT SampleDescription : Box { + DECLARE_BOX_METHODS(SampleDescription); + + TrackType type; + std::vector<VideoSampleEntry> video_entries; + std::vector<AudioSampleEntry> audio_entries; +}; + +struct MEDIA_EXPORT SyncSample : Box { + DECLARE_BOX_METHODS(SyncSample); + + bool is_present; +}; + +struct MEDIA_EXPORT SampleTable : Box { + DECLARE_BOX_METHODS(SampleTable); + + // Media Source specific: we ignore many of the sub-boxes in this box, + // including some that are required to be present in the BMFF spec. This + // includes the 'stts', 'stsc', and 'stco' boxes, which must contain no + // samples in order to be compliant files. + SampleDescription description; + SyncSample sync_sample; +}; + +struct MEDIA_EXPORT MediaHeader : Box { + DECLARE_BOX_METHODS(MediaHeader); + + uint64 creation_time; + uint64 modification_time; + uint32 timescale; + uint64 duration; +}; + +struct MEDIA_EXPORT MediaInformation : Box { + DECLARE_BOX_METHODS(MediaInformation); + + SampleTable sample_table; +}; + +struct MEDIA_EXPORT Media : Box { + DECLARE_BOX_METHODS(Media); + + MediaHeader header; + HandlerReference handler; + MediaInformation information; +}; + +struct MEDIA_EXPORT Track : Box { + DECLARE_BOX_METHODS(Track); + + TrackHeader header; + Media media; + Edit edit; +}; + +struct MEDIA_EXPORT MovieExtendsHeader : Box { + DECLARE_BOX_METHODS(MovieExtendsHeader); + + uint64 fragment_duration; +}; + +struct MEDIA_EXPORT TrackExtends : Box { + DECLARE_BOX_METHODS(TrackExtends); + + uint32 track_id; + uint32 default_sample_description_index; + uint32 default_sample_duration; + uint32 default_sample_size; + uint32 default_sample_flags; +}; + +struct MEDIA_EXPORT MovieExtends : Box { + DECLARE_BOX_METHODS(MovieExtends); + + MovieExtendsHeader header; + std::vector<TrackExtends> tracks; +}; + +struct MEDIA_EXPORT Movie : Box { + DECLARE_BOX_METHODS(Movie); + + bool fragmented; + MovieHeader header; + MovieExtends extends; + std::vector<Track> tracks; + std::vector<ProtectionSystemSpecificHeader> pssh; +}; + +struct MEDIA_EXPORT TrackFragmentDecodeTime : Box { + DECLARE_BOX_METHODS(TrackFragmentDecodeTime); + + uint64 decode_time; +}; + +struct MEDIA_EXPORT MovieFragmentHeader : Box { + DECLARE_BOX_METHODS(MovieFragmentHeader); + + uint32 sequence_number; +}; + +struct MEDIA_EXPORT TrackFragmentHeader : Box { + DECLARE_BOX_METHODS(TrackFragmentHeader); + + uint32 track_id; + + uint32 sample_description_index; + uint32 default_sample_duration; + uint32 default_sample_size; + uint32 default_sample_flags; + + // As 'flags' might be all zero, we cannot use zeroness alone to identify + // when default_sample_flags wasn't specified, unlike the other values. + bool has_default_sample_flags; +}; + +struct MEDIA_EXPORT TrackFragmentRun : Box { + DECLARE_BOX_METHODS(TrackFragmentRun); + + uint32 sample_count; + uint32 data_offset; + std::vector<uint32> sample_flags; + std::vector<uint32> sample_sizes; + std::vector<uint32> sample_durations; + std::vector<int32> sample_composition_time_offsets; +}; + +// sample_depends_on values in ISO/IEC 14496-12 Section 8.40.2.3. +enum SampleDependsOn { + kSampleDependsOnUnknown = 0, + kSampleDependsOnOthers = 1, + kSampleDependsOnNoOther = 2, + kSampleDependsOnReserved = 3, +}; + +class MEDIA_EXPORT IndependentAndDisposableSamples : public Box { + public: + DECLARE_BOX_METHODS(IndependentAndDisposableSamples); + + // Returns the SampleDependsOn value for the |i|'th value + // in the track. If no data was parsed for the |i|'th sample, + // then |kSampleDependsOnUnknown| is returned. + SampleDependsOn sample_depends_on(size_t i) const; + + private: + std::vector<SampleDependsOn> sample_depends_on_; +}; + +struct MEDIA_EXPORT TrackFragment : Box { + DECLARE_BOX_METHODS(TrackFragment); + + TrackFragmentHeader header; + std::vector<TrackFragmentRun> runs; + TrackFragmentDecodeTime decode_time; + SampleAuxiliaryInformationOffset auxiliary_offset; + SampleAuxiliaryInformationSize auxiliary_size; + IndependentAndDisposableSamples sdtp; +}; + +struct MEDIA_EXPORT MovieFragment : Box { + DECLARE_BOX_METHODS(MovieFragment); + + MovieFragmentHeader header; + std::vector<TrackFragment> tracks; + std::vector<ProtectionSystemSpecificHeader> pssh; +}; + +#undef DECLARE_BOX + +} // namespace mp4 +} // namespace media + +#endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_ diff --git a/media/formats/mp4/box_reader.cc b/media/formats/mp4/box_reader.cc new file mode 100644 index 0000000..57e095a --- /dev/null +++ b/media/formats/mp4/box_reader.cc @@ -0,0 +1,240 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/box_reader.h" + +#include <string.h> +#include <algorithm> +#include <map> +#include <set> + +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "media/formats/mp4/box_definitions.h" +#include "media/formats/mp4/rcheck.h" + +namespace media { +namespace mp4 { + +Box::~Box() {} + +bool BufferReader::Read1(uint8* v) { + RCHECK(HasBytes(1)); + *v = buf_[pos_++]; + return true; +} + +// Internal implementation of multi-byte reads +template<typename T> bool BufferReader::Read(T* v) { + RCHECK(HasBytes(sizeof(T))); + + T tmp = 0; + for (size_t i = 0; i < sizeof(T); i++) { + tmp <<= 8; + tmp += buf_[pos_++]; + } + *v = tmp; + return true; +} + +bool BufferReader::Read2(uint16* v) { return Read(v); } +bool BufferReader::Read2s(int16* v) { return Read(v); } +bool BufferReader::Read4(uint32* v) { return Read(v); } +bool BufferReader::Read4s(int32* v) { return Read(v); } +bool BufferReader::Read8(uint64* v) { return Read(v); } +bool BufferReader::Read8s(int64* v) { return Read(v); } + +bool BufferReader::ReadFourCC(FourCC* v) { + return Read4(reinterpret_cast<uint32*>(v)); +} + +bool BufferReader::ReadVec(std::vector<uint8>* vec, int count) { + RCHECK(HasBytes(count)); + vec->clear(); + vec->insert(vec->end(), buf_ + pos_, buf_ + pos_ + count); + pos_ += count; + return true; +} + +bool BufferReader::SkipBytes(int bytes) { + RCHECK(HasBytes(bytes)); + pos_ += bytes; + return true; +} + +bool BufferReader::Read4Into8(uint64* v) { + uint32 tmp; + RCHECK(Read4(&tmp)); + *v = tmp; + return true; +} + +bool BufferReader::Read4sInto8s(int64* v) { + // Beware of the need for sign extension. + int32 tmp; + RCHECK(Read4s(&tmp)); + *v = tmp; + return true; +} + + +BoxReader::BoxReader(const uint8* buf, const int size, + const LogCB& log_cb) + : BufferReader(buf, size), + log_cb_(log_cb), + type_(FOURCC_NULL), + version_(0), + flags_(0), + scanned_(false) { +} + +BoxReader::~BoxReader() { + if (scanned_ && !children_.empty()) { + for (ChildMap::iterator itr = children_.begin(); + itr != children_.end(); ++itr) { + DVLOG(1) << "Skipping unknown box: " << FourCCToString(itr->first); + } + } +} + +// static +BoxReader* BoxReader::ReadTopLevelBox(const uint8* buf, + const int buf_size, + const LogCB& log_cb, + bool* err) { + scoped_ptr<BoxReader> reader(new BoxReader(buf, buf_size, log_cb)); + if (!reader->ReadHeader(err)) + return NULL; + + if (!IsValidTopLevelBox(reader->type(), log_cb)) { + *err = true; + return NULL; + } + + if (reader->size() <= buf_size) + return reader.release(); + + return NULL; +} + +// static +bool BoxReader::StartTopLevelBox(const uint8* buf, + const int buf_size, + const LogCB& log_cb, + FourCC* type, + int* box_size, + bool* err) { + BoxReader reader(buf, buf_size, log_cb); + if (!reader.ReadHeader(err)) return false; + if (!IsValidTopLevelBox(reader.type(), log_cb)) { + *err = true; + return false; + } + *type = reader.type(); + *box_size = reader.size(); + return true; +} + +// static +bool BoxReader::IsValidTopLevelBox(const FourCC& type, + const LogCB& log_cb) { + switch (type) { + case FOURCC_FTYP: + case FOURCC_PDIN: + case FOURCC_BLOC: + case FOURCC_MOOV: + case FOURCC_MOOF: + case FOURCC_MFRA: + case FOURCC_MDAT: + case FOURCC_FREE: + case FOURCC_SKIP: + case FOURCC_META: + case FOURCC_MECO: + case FOURCC_STYP: + case FOURCC_SIDX: + case FOURCC_SSIX: + case FOURCC_PRFT: + return true; + default: + // Hex is used to show nonprintable characters and aid in debugging + MEDIA_LOG(log_cb) << "Unrecognized top-level box type 0x" + << std::hex << type; + return false; + } +} + +bool BoxReader::ScanChildren() { + DCHECK(!scanned_); + scanned_ = true; + + bool err = false; + while (pos() < size()) { + BoxReader child(&buf_[pos_], size_ - pos_, log_cb_); + if (!child.ReadHeader(&err)) break; + + children_.insert(std::pair<FourCC, BoxReader>(child.type(), child)); + pos_ += child.size(); + } + + DCHECK(!err); + return !err && pos() == size(); +} + +bool BoxReader::ReadChild(Box* child) { + DCHECK(scanned_); + FourCC child_type = child->BoxType(); + + ChildMap::iterator itr = children_.find(child_type); + RCHECK(itr != children_.end()); + DVLOG(2) << "Found a " << FourCCToString(child_type) << " box."; + RCHECK(child->Parse(&itr->second)); + children_.erase(itr); + return true; +} + +bool BoxReader::MaybeReadChild(Box* child) { + if (!children_.count(child->BoxType())) return true; + return ReadChild(child); +} + +bool BoxReader::ReadFullBoxHeader() { + uint32 vflags; + RCHECK(Read4(&vflags)); + version_ = vflags >> 24; + flags_ = vflags & 0xffffff; + return true; +} + +bool BoxReader::ReadHeader(bool* err) { + uint64 size = 0; + *err = false; + + if (!HasBytes(8)) return false; + CHECK(Read4Into8(&size) && ReadFourCC(&type_)); + + if (size == 0) { + // Media Source specific: we do not support boxes that run to EOS. + *err = true; + return false; + } else if (size == 1) { + if (!HasBytes(8)) return false; + CHECK(Read8(&size)); + } + + // Implementation-specific: support for boxes larger than 2^31 has been + // removed. + if (size < static_cast<uint64>(pos_) || + size > static_cast<uint64>(kint32max)) { + *err = true; + return false; + } + + // Note that the pos_ head has advanced to the byte immediately after the + // header, which is where we want it. + size_ = size; + return true; +} + +} // namespace mp4 +} // namespace media diff --git a/media/formats/mp4/box_reader.h b/media/formats/mp4/box_reader.h new file mode 100644 index 0000000..d4b608e7 --- /dev/null +++ b/media/formats/mp4/box_reader.h @@ -0,0 +1,214 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_BOX_READER_H_ +#define MEDIA_FORMATS_MP4_BOX_READER_H_ + +#include <map> +#include <vector> + +#include "base/compiler_specific.h" +#include "base/logging.h" +#include "media/base/media_export.h" +#include "media/base/media_log.h" +#include "media/formats/mp4/fourccs.h" +#include "media/formats/mp4/rcheck.h" + +namespace media { +namespace mp4 { + +class BoxReader; + +struct MEDIA_EXPORT Box { + virtual ~Box(); + virtual bool Parse(BoxReader* reader) = 0; + virtual FourCC BoxType() const = 0; +}; + +class MEDIA_EXPORT BufferReader { + public: + BufferReader(const uint8* buf, const int size) + : buf_(buf), size_(size), pos_(0) {} + + bool HasBytes(int count) { return (pos() + count <= size()); } + + // Read a value from the stream, perfoming endian correction, and advance the + // stream pointer. + bool Read1(uint8* v) WARN_UNUSED_RESULT; + bool Read2(uint16* v) WARN_UNUSED_RESULT; + bool Read2s(int16* v) WARN_UNUSED_RESULT; + bool Read4(uint32* v) WARN_UNUSED_RESULT; + bool Read4s(int32* v) WARN_UNUSED_RESULT; + bool Read8(uint64* v) WARN_UNUSED_RESULT; + bool Read8s(int64* v) WARN_UNUSED_RESULT; + + bool ReadFourCC(FourCC* v) WARN_UNUSED_RESULT; + + bool ReadVec(std::vector<uint8>* t, int count) WARN_UNUSED_RESULT; + + // These variants read a 4-byte integer of the corresponding signedness and + // store it in the 8-byte return type. + bool Read4Into8(uint64* v) WARN_UNUSED_RESULT; + bool Read4sInto8s(int64* v) WARN_UNUSED_RESULT; + + // Advance the stream by this many bytes. + bool SkipBytes(int nbytes) WARN_UNUSED_RESULT; + + const uint8* data() const { return buf_; } + int size() const { return size_; } + int pos() const { return pos_; } + + protected: + const uint8* buf_; + int size_; + int pos_; + + template<typename T> bool Read(T* t) WARN_UNUSED_RESULT; +}; + +class MEDIA_EXPORT BoxReader : public BufferReader { + public: + ~BoxReader(); + + // Create a BoxReader from a buffer. Note that this function may return NULL + // if an intact, complete box was not available in the buffer. If |*err| is + // set, there was a stream-level error when creating the box; otherwise, NULL + // values are only expected when insufficient data is available. + // + // |buf| is retained but not owned, and must outlive the BoxReader instance. + static BoxReader* ReadTopLevelBox(const uint8* buf, + const int buf_size, + const LogCB& log_cb, + bool* err); + + // Read the box header from the current buffer. This function returns true if + // there is enough data to read the header and the header is sane; that is, it + // does not check to ensure the entire box is in the buffer before returning + // true. The semantics of |*err| are the same as above. + // + // |buf| is not retained. + static bool StartTopLevelBox(const uint8* buf, + const int buf_size, + const LogCB& log_cb, + FourCC* type, + int* box_size, + bool* err) WARN_UNUSED_RESULT; + + // Returns true if |type| is recognized to be a top-level box, false + // otherwise. This returns true for some boxes which we do not parse. + // Helpful in debugging misaligned appends. + static bool IsValidTopLevelBox(const FourCC& type, + const LogCB& log_cb); + + // Scan through all boxes within the current box, starting at the current + // buffer position. Must be called before any of the *Child functions work. + bool ScanChildren() WARN_UNUSED_RESULT; + + // Read exactly one child box from the set of children. The type of the child + // will be determined by the BoxType() method of |child|. + bool ReadChild(Box* child) WARN_UNUSED_RESULT; + + // Read one child if available. Returns false on error, true on successful + // read or on child absent. + bool MaybeReadChild(Box* child) WARN_UNUSED_RESULT; + + // Read at least one child. False means error or no such child present. + template<typename T> bool ReadChildren( + std::vector<T>* children) WARN_UNUSED_RESULT; + + // Read any number of children. False means error. + template<typename T> bool MaybeReadChildren( + std::vector<T>* children) WARN_UNUSED_RESULT; + + // Read all children, regardless of FourCC. This is used from exactly one box, + // corresponding to a rather significant inconsistency in the BMFF spec. + // Note that this method is mutually exclusive with ScanChildren(). + template<typename T> bool ReadAllChildren( + std::vector<T>* children) WARN_UNUSED_RESULT; + + // Populate the values of 'version()' and 'flags()' from a full box header. + // Many boxes, but not all, use these values. This call should happen after + // the box has been initialized, and does not re-read the main box header. + bool ReadFullBoxHeader() WARN_UNUSED_RESULT; + + FourCC type() const { return type_; } + uint8 version() const { return version_; } + uint32 flags() const { return flags_; } + + private: + BoxReader(const uint8* buf, const int size, const LogCB& log_cb); + + // Must be called immediately after init. If the return is false, this + // indicates that the box header and its contents were not available in the + // stream or were nonsensical, and that the box must not be used further. In + // this case, if |*err| is false, the problem was simply a lack of data, and + // should only be an error condition if some higher-level component knows that + // no more data is coming (i.e. EOS or end of containing box). If |*err| is + // true, the error is unrecoverable and the stream should be aborted. + bool ReadHeader(bool* err); + + LogCB log_cb_; + FourCC type_; + uint8 version_; + uint32 flags_; + + typedef std::multimap<FourCC, BoxReader> ChildMap; + + // The set of child box FourCCs and their corresponding buffer readers. Only + // valid if scanned_ is true. + ChildMap children_; + bool scanned_; +}; + +// Template definitions +template<typename T> bool BoxReader::ReadChildren(std::vector<T>* children) { + RCHECK(MaybeReadChildren(children) && !children->empty()); + return true; +} + +template<typename T> +bool BoxReader::MaybeReadChildren(std::vector<T>* children) { + DCHECK(scanned_); + DCHECK(children->empty()); + + children->resize(1); + FourCC child_type = (*children)[0].BoxType(); + + ChildMap::iterator start_itr = children_.lower_bound(child_type); + ChildMap::iterator end_itr = children_.upper_bound(child_type); + children->resize(std::distance(start_itr, end_itr)); + typename std::vector<T>::iterator child_itr = children->begin(); + for (ChildMap::iterator itr = start_itr; itr != end_itr; ++itr) { + RCHECK(child_itr->Parse(&itr->second)); + ++child_itr; + } + children_.erase(start_itr, end_itr); + + DVLOG(2) << "Found " << children->size() << " " + << FourCCToString(child_type) << " boxes."; + return true; +} + +template<typename T> +bool BoxReader::ReadAllChildren(std::vector<T>* children) { + DCHECK(!scanned_); + scanned_ = true; + + bool err = false; + while (pos() < size()) { + BoxReader child_reader(&buf_[pos_], size_ - pos_, log_cb_); + if (!child_reader.ReadHeader(&err)) break; + T child; + RCHECK(child.Parse(&child_reader)); + children->push_back(child); + pos_ += child_reader.size(); + } + + return !err; +} + +} // namespace mp4 +} // namespace media + +#endif // MEDIA_FORMATS_MP4_BOX_READER_H_ diff --git a/media/formats/mp4/box_reader_unittest.cc b/media/formats/mp4/box_reader_unittest.cc new file mode 100644 index 0000000..eae3fa5 --- /dev/null +++ b/media/formats/mp4/box_reader_unittest.cc @@ -0,0 +1,201 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <string.h> + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "media/formats/mp4/box_reader.h" +#include "media/formats/mp4/rcheck.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { +namespace mp4 { + +static const uint8 kSkipBox[] = { + // Top-level test box containing three children + 0x00, 0x00, 0x00, 0x40, 's', 'k', 'i', 'p', + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0xf9, 0x0a, 0x0b, 0x0c, 0xfd, 0x0e, 0x0f, 0x10, + // Ordinary (8-byte header) child box + 0x00, 0x00, 0x00, 0x0c, 'p', 's', 's', 'h', 0xde, 0xad, 0xbe, 0xef, + // Extended-size header child box + 0x00, 0x00, 0x00, 0x01, 'p', 's', 's', 'h', + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, + 0xfa, 0xce, 0xca, 0xfe, + // Empty free box + 0x00, 0x00, 0x00, 0x08, 'f', 'r', 'e', 'e', + // Trailing garbage + 0x00 }; + +struct FreeBox : Box { + virtual bool Parse(BoxReader* reader) OVERRIDE { + return true; + } + virtual FourCC BoxType() const OVERRIDE { return FOURCC_FREE; } +}; + +struct PsshBox : Box { + uint32 val; + + virtual bool Parse(BoxReader* reader) OVERRIDE { + return reader->Read4(&val); + } + virtual FourCC BoxType() const OVERRIDE { return FOURCC_PSSH; } +}; + +struct SkipBox : Box { + uint8 a, b; + uint16 c; + int32 d; + int64 e; + + std::vector<PsshBox> kids; + FreeBox mpty; + + virtual bool Parse(BoxReader* reader) OVERRIDE { + RCHECK(reader->ReadFullBoxHeader() && + reader->Read1(&a) && + reader->Read1(&b) && + reader->Read2(&c) && + reader->Read4s(&d) && + reader->Read4sInto8s(&e)); + return reader->ScanChildren() && + reader->ReadChildren(&kids) && + reader->MaybeReadChild(&mpty); + } + virtual FourCC BoxType() const OVERRIDE { return FOURCC_SKIP; } + + SkipBox(); + virtual ~SkipBox(); +}; + +SkipBox::SkipBox() {} +SkipBox::~SkipBox() {} + +class BoxReaderTest : public testing::Test { + protected: + std::vector<uint8> GetBuf() { + return std::vector<uint8>(kSkipBox, kSkipBox + sizeof(kSkipBox)); + } +}; + +TEST_F(BoxReaderTest, ExpectedOperationTest) { + std::vector<uint8> buf = GetBuf(); + bool err; + scoped_ptr<BoxReader> reader( + BoxReader::ReadTopLevelBox(&buf[0], buf.size(), LogCB(), &err)); + EXPECT_FALSE(err); + EXPECT_TRUE(reader.get()); + + SkipBox box; + EXPECT_TRUE(box.Parse(reader.get())); + EXPECT_EQ(0x01, reader->version()); + EXPECT_EQ(0x020304u, reader->flags()); + EXPECT_EQ(0x05, box.a); + EXPECT_EQ(0x06, box.b); + EXPECT_EQ(0x0708, box.c); + EXPECT_EQ(static_cast<int32>(0xf90a0b0c), box.d); + EXPECT_EQ(static_cast<int32>(0xfd0e0f10), box.e); + + EXPECT_EQ(2u, box.kids.size()); + EXPECT_EQ(0xdeadbeef, box.kids[0].val); + EXPECT_EQ(0xfacecafe, box.kids[1].val); + + // Accounting for the extra byte outside of the box above + EXPECT_EQ(buf.size(), static_cast<uint64>(reader->size() + 1)); +} + +TEST_F(BoxReaderTest, OuterTooShortTest) { + std::vector<uint8> buf = GetBuf(); + bool err; + + // Create a soft failure by truncating the outer box. + scoped_ptr<BoxReader> r( + BoxReader::ReadTopLevelBox(&buf[0], buf.size() - 2, LogCB(), &err)); + + EXPECT_FALSE(err); + EXPECT_FALSE(r.get()); +} + +TEST_F(BoxReaderTest, InnerTooLongTest) { + std::vector<uint8> buf = GetBuf(); + bool err; + + // Make an inner box too big for its outer box. + buf[25] = 1; + scoped_ptr<BoxReader> reader( + BoxReader::ReadTopLevelBox(&buf[0], buf.size(), LogCB(), &err)); + + SkipBox box; + EXPECT_FALSE(box.Parse(reader.get())); +} + +TEST_F(BoxReaderTest, WrongFourCCTest) { + std::vector<uint8> buf = GetBuf(); + bool err; + + // Set an unrecognized top-level FourCC. + buf[5] = 1; + scoped_ptr<BoxReader> reader( + BoxReader::ReadTopLevelBox(&buf[0], buf.size(), LogCB(), &err)); + EXPECT_FALSE(reader.get()); + EXPECT_TRUE(err); +} + +TEST_F(BoxReaderTest, ScanChildrenTest) { + std::vector<uint8> buf = GetBuf(); + bool err; + scoped_ptr<BoxReader> reader( + BoxReader::ReadTopLevelBox(&buf[0], buf.size(), LogCB(), &err)); + + EXPECT_TRUE(reader->SkipBytes(16) && reader->ScanChildren()); + + FreeBox free; + EXPECT_TRUE(reader->ReadChild(&free)); + EXPECT_FALSE(reader->ReadChild(&free)); + EXPECT_TRUE(reader->MaybeReadChild(&free)); + + std::vector<PsshBox> kids; + + EXPECT_TRUE(reader->ReadChildren(&kids)); + EXPECT_EQ(2u, kids.size()); + kids.clear(); + EXPECT_FALSE(reader->ReadChildren(&kids)); + EXPECT_TRUE(reader->MaybeReadChildren(&kids)); +} + +TEST_F(BoxReaderTest, ReadAllChildrenTest) { + std::vector<uint8> buf = GetBuf(); + // Modify buffer to exclude its last 'free' box + buf[3] = 0x38; + bool err; + scoped_ptr<BoxReader> reader( + BoxReader::ReadTopLevelBox(&buf[0], buf.size(), LogCB(), &err)); + + std::vector<PsshBox> kids; + EXPECT_TRUE(reader->SkipBytes(16) && reader->ReadAllChildren(&kids)); + EXPECT_EQ(2u, kids.size()); + EXPECT_EQ(kids[0].val, 0xdeadbeef); // Ensure order is preserved +} + +TEST_F(BoxReaderTest, SkippingBloc) { + static const uint8 kData[] = { + 0x00, 0x00, 0x00, 0x09, 'b', 'l', 'o', 'c', 0x00 + }; + + std::vector<uint8> buf(kData, kData + sizeof(kData)); + + bool err; + scoped_ptr<BoxReader> reader( + BoxReader::ReadTopLevelBox(&buf[0], buf.size(), LogCB(), &err)); + + EXPECT_FALSE(err); + EXPECT_TRUE(reader); + EXPECT_EQ(FOURCC_BLOC, reader->type()); +} + +} // namespace mp4 +} // namespace media diff --git a/media/formats/mp4/cenc.cc b/media/formats/mp4/cenc.cc new file mode 100644 index 0000000..001b6d8 --- /dev/null +++ b/media/formats/mp4/cenc.cc @@ -0,0 +1,58 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/cenc.h" + +#include <cstring> + +#include "media/formats/mp4/box_reader.h" +#include "media/formats/mp4/rcheck.h" + +namespace media { +namespace mp4 { + +FrameCENCInfo::FrameCENCInfo() {} +FrameCENCInfo::~FrameCENCInfo() {} + +bool FrameCENCInfo::Parse(int iv_size, BufferReader* reader) { + const int kEntrySize = 6; + // Mandated by CENC spec + RCHECK(iv_size == 8 || iv_size == 16); + + memset(iv, 0, sizeof(iv)); + for (int i = 0; i < iv_size; i++) + RCHECK(reader->Read1(&iv[i])); + + if (!reader->HasBytes(1)) return true; + + uint16 subsample_count; + RCHECK(reader->Read2(&subsample_count) && + reader->HasBytes(subsample_count * kEntrySize)); + + subsamples.resize(subsample_count); + for (int i = 0; i < subsample_count; i++) { + uint16 clear_bytes; + uint32 cypher_bytes; + RCHECK(reader->Read2(&clear_bytes) && + reader->Read4(&cypher_bytes)); + subsamples[i].clear_bytes = clear_bytes; + subsamples[i].cypher_bytes = cypher_bytes; + } + return true; +} + +bool FrameCENCInfo::GetTotalSizeOfSubsamples(size_t* total_size) const { + size_t size = 0; + for (size_t i = 0; i < subsamples.size(); i++) { + size += subsamples[i].clear_bytes; + RCHECK(size >= subsamples[i].clear_bytes); // overflow + size += subsamples[i].cypher_bytes; + RCHECK(size >= subsamples[i].cypher_bytes); // overflow + } + *total_size = size; + return true; +} + +} // namespace mp4 +} // namespace media diff --git a/media/formats/mp4/cenc.h b/media/formats/mp4/cenc.h new file mode 100644 index 0000000..9eb3358 --- /dev/null +++ b/media/formats/mp4/cenc.h @@ -0,0 +1,32 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_CENC_H_ +#define MEDIA_FORMATS_MP4_CENC_H_ + +#include <vector> + +#include "base/basictypes.h" +#include "media/base/decrypt_config.h" + +namespace media { +namespace mp4 { + +class BufferReader; + +struct FrameCENCInfo { + uint8 iv[16]; + std::vector<SubsampleEntry> subsamples; + + FrameCENCInfo(); + ~FrameCENCInfo(); + bool Parse(int iv_size, BufferReader* r) WARN_UNUSED_RESULT; + bool GetTotalSizeOfSubsamples(size_t* total_size) const WARN_UNUSED_RESULT; +}; + + +} // namespace mp4 +} // namespace media + +#endif // MEDIA_FORMATS_MP4_CENC_H_ diff --git a/media/formats/mp4/es_descriptor.cc b/media/formats/mp4/es_descriptor.cc new file mode 100644 index 0000000..e1da28a --- /dev/null +++ b/media/formats/mp4/es_descriptor.cc @@ -0,0 +1,117 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/es_descriptor.h" + +#include "media/base/bit_reader.h" +#include "media/formats/mp4/rcheck.h" + +// The elementary stream size is specific by up to 4 bytes. +// The MSB of a byte indicates if there are more bytes for the size. +static bool ReadESSize(media::BitReader* reader, uint32* size) { + uint8 msb; + uint8 byte; + + *size = 0; + + for (size_t i = 0; i < 4; ++i) { + RCHECK(reader->ReadBits(1, &msb)); + RCHECK(reader->ReadBits(7, &byte)); + *size = (*size << 7) + byte; + + if (msb == 0) + break; + } + + return true; +} + +namespace media { + +namespace mp4 { + +// static +bool ESDescriptor::IsAAC(uint8 object_type) { + return object_type == kISO_14496_3 || object_type == kISO_13818_7_AAC_LC; +} + +ESDescriptor::ESDescriptor() + : object_type_(kForbidden) { +} + +ESDescriptor::~ESDescriptor() {} + +bool ESDescriptor::Parse(const std::vector<uint8>& data) { + BitReader reader(&data[0], data.size()); + uint8 tag; + uint32 size; + uint8 stream_dependency_flag; + uint8 url_flag; + uint8 ocr_stream_flag; + uint16 dummy; + + RCHECK(reader.ReadBits(8, &tag)); + RCHECK(tag == kESDescrTag); + RCHECK(ReadESSize(&reader, &size)); + + RCHECK(reader.ReadBits(16, &dummy)); // ES_ID + RCHECK(reader.ReadBits(1, &stream_dependency_flag)); + RCHECK(reader.ReadBits(1, &url_flag)); + RCHECK(!url_flag); // We don't support url flag + RCHECK(reader.ReadBits(1, &ocr_stream_flag)); + RCHECK(reader.ReadBits(5, &dummy)); // streamPriority + + if (stream_dependency_flag) + RCHECK(reader.ReadBits(16, &dummy)); // dependsOn_ES_ID + if (ocr_stream_flag) + RCHECK(reader.ReadBits(16, &dummy)); // OCR_ES_Id + + RCHECK(ParseDecoderConfigDescriptor(&reader)); + + return true; +} + +uint8 ESDescriptor::object_type() const { + return object_type_; +} + +const std::vector<uint8>& ESDescriptor::decoder_specific_info() const { + return decoder_specific_info_; +} + +bool ESDescriptor::ParseDecoderConfigDescriptor(BitReader* reader) { + uint8 tag; + uint32 size; + uint64 dummy; + + RCHECK(reader->ReadBits(8, &tag)); + RCHECK(tag == kDecoderConfigDescrTag); + RCHECK(ReadESSize(reader, &size)); + + RCHECK(reader->ReadBits(8, &object_type_)); + RCHECK(reader->ReadBits(64, &dummy)); + RCHECK(reader->ReadBits(32, &dummy)); + RCHECK(ParseDecoderSpecificInfo(reader)); + + return true; +} + +bool ESDescriptor::ParseDecoderSpecificInfo(BitReader* reader) { + uint8 tag; + uint32 size; + + RCHECK(reader->ReadBits(8, &tag)); + RCHECK(tag == kDecoderSpecificInfoTag); + RCHECK(ReadESSize(reader, &size)); + + decoder_specific_info_.resize(size); + for (uint32 i = 0; i < size; ++i) + RCHECK(reader->ReadBits(8, &decoder_specific_info_[i])); + + return true; +} + +} // namespace mp4 + +} // namespace media diff --git a/media/formats/mp4/es_descriptor.h b/media/formats/mp4/es_descriptor.h new file mode 100644 index 0000000..1df4526 --- /dev/null +++ b/media/formats/mp4/es_descriptor.h @@ -0,0 +1,61 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_ES_DESCRIPTOR_H_ +#define MEDIA_FORMATS_MP4_ES_DESCRIPTOR_H_ + +#include <vector> + +#include "base/basictypes.h" +#include "media/base/media_export.h" + +namespace media { + +class BitReader; + +namespace mp4 { + +// The following values are extracted from ISO 14496 Part 1 Table 5 - +// objectTypeIndication Values. Only values currently in use are included. +enum ObjectType { + kForbidden = 0, + kISO_14496_3 = 0x40, // MPEG4 AAC + kISO_13818_7_AAC_LC = 0x67 // MPEG2 AAC-LC +}; + +// This class parse object type and decoder specific information from an +// elementary stream descriptor, which is usually contained in an esds box. +// Please refer to ISO 14496 Part 1 7.2.6.5 for more details. +class MEDIA_EXPORT ESDescriptor { + public: + // Utility function to check if the given object type is AAC. + static bool IsAAC(uint8 object_type); + + ESDescriptor(); + ~ESDescriptor(); + + bool Parse(const std::vector<uint8>& data); + + uint8 object_type() const; + const std::vector<uint8>& decoder_specific_info() const; + + private: + enum Tag { + kESDescrTag = 0x03, + kDecoderConfigDescrTag = 0x04, + kDecoderSpecificInfoTag = 0x05 + }; + + bool ParseDecoderConfigDescriptor(BitReader* reader); + bool ParseDecoderSpecificInfo(BitReader* reader); + + uint8 object_type_; + std::vector<uint8> decoder_specific_info_; +}; + +} // namespace mp4 + +} // namespace media + +#endif // MEDIA_FORMATS_MP4_ES_DESCRIPTOR_H_ diff --git a/media/formats/mp4/es_descriptor_unittest.cc b/media/formats/mp4/es_descriptor_unittest.cc new file mode 100644 index 0000000..6334f5b --- /dev/null +++ b/media/formats/mp4/es_descriptor_unittest.cc @@ -0,0 +1,92 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/es_descriptor.h" + +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { + +namespace mp4 { + +TEST(ESDescriptorTest, SingleByteLengthTest) { + ESDescriptor es_desc; + uint8 buffer[] = { + 0x03, 0x19, 0x00, 0x01, 0x00, 0x04, 0x11, 0x40, + 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x02, 0x12, 0x10, + 0x06, 0x01, 0x02 + }; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_EQ(es_desc.object_type(), kForbidden); + EXPECT_TRUE(es_desc.Parse(data)); + EXPECT_EQ(es_desc.object_type(), kISO_14496_3); + EXPECT_EQ(es_desc.decoder_specific_info().size(), 2u); + EXPECT_EQ(es_desc.decoder_specific_info()[0], 0x12); + EXPECT_EQ(es_desc.decoder_specific_info()[1], 0x10); +} + +TEST(ESDescriptorTest, NonAACTest) { + ESDescriptor es_desc; + uint8 buffer[] = { + 0x03, 0x19, 0x00, 0x01, 0x00, 0x04, 0x11, 0x66, + 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x02, 0x12, 0x10, + 0x06, 0x01, 0x02 + }; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_TRUE(es_desc.Parse(data)); + EXPECT_NE(es_desc.object_type(), kISO_14496_3); + EXPECT_EQ(es_desc.decoder_specific_info().size(), 2u); + EXPECT_EQ(es_desc.decoder_specific_info()[0], 0x12); + EXPECT_EQ(es_desc.decoder_specific_info()[1], 0x10); +} + +TEST(ESDescriptorTest, MultiByteLengthTest) { + ESDescriptor es_desc; + uint8 buffer[] = { + 0x03, 0x80, 0x19, 0x00, 0x01, 0x00, 0x04, 0x80, + 0x80, 0x11, 0x40, 0x15, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, + 0x80, 0x80, 0x80, 0x02, 0x12, 0x10, 0x06, 0x01, + 0x02 + }; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_TRUE(es_desc.Parse(data)); + EXPECT_EQ(es_desc.object_type(), kISO_14496_3); + EXPECT_EQ(es_desc.decoder_specific_info().size(), 2u); + EXPECT_EQ(es_desc.decoder_specific_info()[0], 0x12); + EXPECT_EQ(es_desc.decoder_specific_info()[1], 0x10); +} + +TEST(ESDescriptorTest, FiveByteLengthTest) { + ESDescriptor es_desc; + uint8 buffer[] = { + 0x03, 0x80, 0x19, 0x00, 0x01, 0x00, 0x04, 0x80, + 0x80, 0x11, 0x40, 0x15, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, + 0x80, 0x80, 0x80, 0x80, 0x02, 0x12, 0x10, 0x06, + 0x01, 0x02 + }; + std::vector<uint8> data; + + data.assign(buffer, buffer + sizeof(buffer)); + + EXPECT_TRUE(es_desc.Parse(data)); + EXPECT_EQ(es_desc.object_type(), kISO_14496_3); + EXPECT_EQ(es_desc.decoder_specific_info().size(), 0u); +} + +} // namespace mp4 + +} // namespace media diff --git a/media/formats/mp4/fourccs.h b/media/formats/mp4/fourccs.h new file mode 100644 index 0000000..3914221 --- /dev/null +++ b/media/formats/mp4/fourccs.h @@ -0,0 +1,100 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_FOURCCS_H_ +#define MEDIA_FORMATS_MP4_FOURCCS_H_ + +#include <string> + +namespace media { +namespace mp4 { + +enum FourCC { + FOURCC_NULL = 0, + FOURCC_AVC1 = 0x61766331, + FOURCC_AVC3 = 0x61766333, + FOURCC_AVCC = 0x61766343, + FOURCC_BLOC = 0x626C6F63, + FOURCC_CENC = 0x63656e63, + FOURCC_CO64 = 0x636f3634, + FOURCC_CTTS = 0x63747473, + FOURCC_DINF = 0x64696e66, + FOURCC_EDTS = 0x65647473, + FOURCC_ELST = 0x656c7374, + FOURCC_ENCA = 0x656e6361, + FOURCC_ENCV = 0x656e6376, + FOURCC_ESDS = 0x65736473, + FOURCC_FREE = 0x66726565, + FOURCC_FRMA = 0x66726d61, + FOURCC_FTYP = 0x66747970, + FOURCC_HDLR = 0x68646c72, + FOURCC_HINT = 0x68696e74, + FOURCC_IODS = 0x696f6473, + FOURCC_MDAT = 0x6d646174, + FOURCC_MDHD = 0x6d646864, + FOURCC_MDIA = 0x6d646961, + FOURCC_MECO = 0x6d65636f, + FOURCC_MEHD = 0x6d656864, + FOURCC_META = 0x6d657461, + FOURCC_MFHD = 0x6d666864, + FOURCC_MFRA = 0x6d667261, + FOURCC_MINF = 0x6d696e66, + FOURCC_MOOF = 0x6d6f6f66, + FOURCC_MOOV = 0x6d6f6f76, + FOURCC_MP4A = 0x6d703461, + FOURCC_MP4V = 0x6d703476, + FOURCC_MVEX = 0x6d766578, + FOURCC_MVHD = 0x6d766864, + FOURCC_PASP = 0x70617370, + FOURCC_PDIN = 0x7064696e, + FOURCC_PRFT = 0x70726674, + FOURCC_PSSH = 0x70737368, + FOURCC_SAIO = 0x7361696f, + FOURCC_SAIZ = 0x7361697a, + FOURCC_SCHI = 0x73636869, + FOURCC_SCHM = 0x7363686d, + FOURCC_SDTP = 0x73647470, + FOURCC_SIDX = 0x73696478, + FOURCC_SINF = 0x73696e66, + FOURCC_SKIP = 0x736b6970, + FOURCC_SMHD = 0x736d6864, + FOURCC_SOUN = 0x736f756e, + FOURCC_SSIX = 0x73736978, + FOURCC_STBL = 0x7374626c, + FOURCC_STCO = 0x7374636f, + FOURCC_STSC = 0x73747363, + FOURCC_STSD = 0x73747364, + FOURCC_STSS = 0x73747373, + FOURCC_STSZ = 0x7374737a, + FOURCC_STTS = 0x73747473, + FOURCC_STYP = 0x73747970, + FOURCC_TENC = 0x74656e63, + FOURCC_TFDT = 0x74666474, + FOURCC_TFHD = 0x74666864, + FOURCC_TKHD = 0x746b6864, + FOURCC_TRAF = 0x74726166, + FOURCC_TRAK = 0x7472616b, + FOURCC_TREX = 0x74726578, + FOURCC_TRUN = 0x7472756e, + FOURCC_UDTA = 0x75647461, + FOURCC_UUID = 0x75756964, + FOURCC_VIDE = 0x76696465, + FOURCC_VMHD = 0x766d6864, + FOURCC_WIDE = 0x77696465, +}; + +const inline std::string FourCCToString(FourCC fourcc) { + char buf[5]; + buf[0] = (fourcc >> 24) & 0xff; + buf[1] = (fourcc >> 16) & 0xff; + buf[2] = (fourcc >> 8) & 0xff; + buf[3] = (fourcc) & 0xff; + buf[4] = 0; + return std::string(buf); +} + +} // namespace mp4 +} // namespace media + +#endif // MEDIA_FORMATS_MP4_FOURCCS_H_ diff --git a/media/formats/mp4/mp4_stream_parser.cc b/media/formats/mp4/mp4_stream_parser.cc new file mode 100644 index 0000000..a8038ea --- /dev/null +++ b/media/formats/mp4/mp4_stream_parser.cc @@ -0,0 +1,568 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/mp4_stream_parser.h" + +#include "base/callback.h" +#include "base/callback_helpers.h" +#include "base/logging.h" +#include "base/time/time.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/text_track_config.h" +#include "media/base/video_decoder_config.h" +#include "media/base/video_util.h" +#include "media/formats/mp4/box_definitions.h" +#include "media/formats/mp4/box_reader.h" +#include "media/formats/mp4/es_descriptor.h" +#include "media/formats/mp4/rcheck.h" + +namespace media { +namespace mp4 { + +// TODO(xhwang): Figure out the init data type appropriately once it's spec'ed. +static const char kMp4InitDataType[] = "video/mp4"; + +MP4StreamParser::MP4StreamParser(const std::set<int>& audio_object_types, + bool has_sbr) + : state_(kWaitingForInit), + moof_head_(0), + mdat_tail_(0), + has_audio_(false), + has_video_(false), + audio_track_id_(0), + video_track_id_(0), + audio_object_types_(audio_object_types), + has_sbr_(has_sbr), + is_audio_track_encrypted_(false), + is_video_track_encrypted_(false) { +} + +MP4StreamParser::~MP4StreamParser() {} + +void MP4StreamParser::Init(const InitCB& init_cb, + const NewConfigCB& config_cb, + const NewBuffersCB& new_buffers_cb, + const NewTextBuffersCB& /* text_cb */ , + const NeedKeyCB& need_key_cb, + const NewMediaSegmentCB& new_segment_cb, + const base::Closure& end_of_segment_cb, + const LogCB& log_cb) { + DCHECK_EQ(state_, kWaitingForInit); + DCHECK(init_cb_.is_null()); + DCHECK(!init_cb.is_null()); + DCHECK(!config_cb.is_null()); + DCHECK(!new_buffers_cb.is_null()); + DCHECK(!need_key_cb.is_null()); + DCHECK(!end_of_segment_cb.is_null()); + + ChangeState(kParsingBoxes); + init_cb_ = init_cb; + config_cb_ = config_cb; + new_buffers_cb_ = new_buffers_cb; + need_key_cb_ = need_key_cb; + new_segment_cb_ = new_segment_cb; + end_of_segment_cb_ = end_of_segment_cb; + log_cb_ = log_cb; +} + +void MP4StreamParser::Reset() { + queue_.Reset(); + runs_.reset(); + moof_head_ = 0; + mdat_tail_ = 0; +} + +void MP4StreamParser::Flush() { + DCHECK_NE(state_, kWaitingForInit); + Reset(); + ChangeState(kParsingBoxes); +} + +bool MP4StreamParser::Parse(const uint8* buf, int size) { + DCHECK_NE(state_, kWaitingForInit); + + if (state_ == kError) + return false; + + queue_.Push(buf, size); + + BufferQueue audio_buffers; + BufferQueue video_buffers; + + bool result, err = false; + + do { + if (state_ == kParsingBoxes) { + result = ParseBox(&err); + } else { + DCHECK_EQ(kEmittingSamples, state_); + result = EnqueueSample(&audio_buffers, &video_buffers, &err); + if (result) { + int64 max_clear = runs_->GetMaxClearOffset() + moof_head_; + err = !ReadAndDiscardMDATsUntil(max_clear); + } + } + } while (result && !err); + + if (!err) + err = !SendAndFlushSamples(&audio_buffers, &video_buffers); + + if (err) { + DLOG(ERROR) << "Error while parsing MP4"; + moov_.reset(); + Reset(); + ChangeState(kError); + return false; + } + + return true; +} + +bool MP4StreamParser::ParseBox(bool* err) { + const uint8* buf; + int size; + queue_.Peek(&buf, &size); + if (!size) return false; + + scoped_ptr<BoxReader> reader( + BoxReader::ReadTopLevelBox(buf, size, log_cb_, err)); + if (reader.get() == NULL) return false; + + if (reader->type() == FOURCC_MOOV) { + *err = !ParseMoov(reader.get()); + } else if (reader->type() == FOURCC_MOOF) { + moof_head_ = queue_.head(); + *err = !ParseMoof(reader.get()); + + // Set up first mdat offset for ReadMDATsUntil(). + mdat_tail_ = queue_.head() + reader->size(); + + // Return early to avoid evicting 'moof' data from queue. Auxiliary info may + // be located anywhere in the file, including inside the 'moof' itself. + // (Since 'default-base-is-moof' is mandated, no data references can come + // before the head of the 'moof', so keeping this box around is sufficient.) + return !(*err); + } else { + MEDIA_LOG(log_cb_) << "Skipping unrecognized top-level box: " + << FourCCToString(reader->type()); + } + + queue_.Pop(reader->size()); + return !(*err); +} + + +bool MP4StreamParser::ParseMoov(BoxReader* reader) { + moov_.reset(new Movie); + RCHECK(moov_->Parse(reader)); + runs_.reset(); + + has_audio_ = false; + has_video_ = false; + + AudioDecoderConfig audio_config; + VideoDecoderConfig video_config; + + for (std::vector<Track>::const_iterator track = moov_->tracks.begin(); + track != moov_->tracks.end(); ++track) { + // TODO(strobe): Only the first audio and video track present in a file are + // used. (Track selection is better accomplished via Source IDs, though, so + // adding support for track selection within a stream is low-priority.) + const SampleDescription& samp_descr = + track->media.information.sample_table.description; + + // TODO(strobe): When codec reconfigurations are supported, detect and send + // a codec reconfiguration for fragments using a sample description index + // different from the previous one + size_t desc_idx = 0; + for (size_t t = 0; t < moov_->extends.tracks.size(); t++) { + const TrackExtends& trex = moov_->extends.tracks[t]; + if (trex.track_id == track->header.track_id) { + desc_idx = trex.default_sample_description_index; + break; + } + } + RCHECK(desc_idx > 0); + desc_idx -= 1; // BMFF descriptor index is one-based + + if (track->media.handler.type == kAudio && !audio_config.IsValidConfig()) { + RCHECK(!samp_descr.audio_entries.empty()); + + // It is not uncommon to find otherwise-valid files with incorrect sample + // description indices, so we fail gracefully in that case. + if (desc_idx >= samp_descr.audio_entries.size()) + desc_idx = 0; + const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx]; + const AAC& aac = entry.esds.aac; + + if (!(entry.format == FOURCC_MP4A || + (entry.format == FOURCC_ENCA && + entry.sinf.format.format == FOURCC_MP4A))) { + MEDIA_LOG(log_cb_) << "Unsupported audio format 0x" + << std::hex << entry.format << " in stsd box."; + return false; + } + + uint8 audio_type = entry.esds.object_type; + DVLOG(1) << "audio_type " << std::hex << audio_type; + if (audio_object_types_.find(audio_type) == audio_object_types_.end()) { + MEDIA_LOG(log_cb_) << "audio object type 0x" << std::hex << audio_type + << " does not match what is specified in the" + << " mimetype."; + return false; + } + + AudioCodec codec = kUnknownAudioCodec; + ChannelLayout channel_layout = CHANNEL_LAYOUT_NONE; + int sample_per_second = 0; + std::vector<uint8> extra_data; + // Check if it is MPEG4 AAC defined in ISO 14496 Part 3 or + // supported MPEG2 AAC varients. + if (ESDescriptor::IsAAC(audio_type)) { + codec = kCodecAAC; + channel_layout = aac.GetChannelLayout(has_sbr_); + sample_per_second = aac.GetOutputSamplesPerSecond(has_sbr_); +#if defined(OS_ANDROID) + extra_data = aac.codec_specific_data(); +#endif + } else { + MEDIA_LOG(log_cb_) << "Unsupported audio object type 0x" << std::hex + << audio_type << " in esds."; + return false; + } + + SampleFormat sample_format; + if (entry.samplesize == 8) { + sample_format = kSampleFormatU8; + } else if (entry.samplesize == 16) { + sample_format = kSampleFormatS16; + } else if (entry.samplesize == 32) { + sample_format = kSampleFormatS32; + } else { + LOG(ERROR) << "Unsupported sample size."; + return false; + } + + is_audio_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted; + DVLOG(1) << "is_audio_track_encrypted_: " << is_audio_track_encrypted_; + audio_config.Initialize( + codec, sample_format, channel_layout, sample_per_second, + extra_data.size() ? &extra_data[0] : NULL, extra_data.size(), + is_audio_track_encrypted_, false, base::TimeDelta(), + base::TimeDelta()); + has_audio_ = true; + audio_track_id_ = track->header.track_id; + } + if (track->media.handler.type == kVideo && !video_config.IsValidConfig()) { + RCHECK(!samp_descr.video_entries.empty()); + if (desc_idx >= samp_descr.video_entries.size()) + desc_idx = 0; + const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx]; + + if (!entry.IsFormatValid()) { + MEDIA_LOG(log_cb_) << "Unsupported video format 0x" + << std::hex << entry.format << " in stsd box."; + return false; + } + + // TODO(strobe): Recover correct crop box + gfx::Size coded_size(entry.width, entry.height); + gfx::Rect visible_rect(coded_size); + gfx::Size natural_size = GetNaturalSize(visible_rect.size(), + entry.pixel_aspect.h_spacing, + entry.pixel_aspect.v_spacing); + is_video_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted; + DVLOG(1) << "is_video_track_encrypted_: " << is_video_track_encrypted_; + video_config.Initialize(kCodecH264, H264PROFILE_MAIN, VideoFrame::YV12, + coded_size, visible_rect, natural_size, + // No decoder-specific buffer needed for AVC; + // SPS/PPS are embedded in the video stream + NULL, 0, is_video_track_encrypted_, true); + has_video_ = true; + video_track_id_ = track->header.track_id; + } + } + + RCHECK(config_cb_.Run(audio_config, video_config, TextTrackConfigMap())); + + base::TimeDelta duration; + if (moov_->extends.header.fragment_duration > 0) { + duration = TimeDeltaFromRational(moov_->extends.header.fragment_duration, + moov_->header.timescale); + } else if (moov_->header.duration > 0 && + moov_->header.duration != kuint64max) { + duration = TimeDeltaFromRational(moov_->header.duration, + moov_->header.timescale); + } else { + duration = kInfiniteDuration(); + } + + if (!init_cb_.is_null()) + base::ResetAndReturn(&init_cb_).Run(true, duration); + + EmitNeedKeyIfNecessary(moov_->pssh); + return true; +} + +bool MP4StreamParser::ParseMoof(BoxReader* reader) { + RCHECK(moov_.get()); // Must already have initialization segment + MovieFragment moof; + RCHECK(moof.Parse(reader)); + if (!runs_) + runs_.reset(new TrackRunIterator(moov_.get(), log_cb_)); + RCHECK(runs_->Init(moof)); + EmitNeedKeyIfNecessary(moof.pssh); + new_segment_cb_.Run(); + ChangeState(kEmittingSamples); + return true; +} + +void MP4StreamParser::EmitNeedKeyIfNecessary( + const std::vector<ProtectionSystemSpecificHeader>& headers) { + // TODO(strobe): ensure that the value of init_data (all PSSH headers + // concatenated in arbitrary order) matches the EME spec. + // See https://www.w3.org/Bugs/Public/show_bug.cgi?id=17673. + if (headers.empty()) + return; + + size_t total_size = 0; + for (size_t i = 0; i < headers.size(); i++) + total_size += headers[i].raw_box.size(); + + std::vector<uint8> init_data(total_size); + size_t pos = 0; + for (size_t i = 0; i < headers.size(); i++) { + memcpy(&init_data[pos], &headers[i].raw_box[0], + headers[i].raw_box.size()); + pos += headers[i].raw_box.size(); + } + need_key_cb_.Run(kMp4InitDataType, init_data); +} + +bool MP4StreamParser::PrepareAVCBuffer( + const AVCDecoderConfigurationRecord& avc_config, + std::vector<uint8>* frame_buf, + std::vector<SubsampleEntry>* subsamples) const { + // Convert the AVC NALU length fields to Annex B headers, as expected by + // decoding libraries. Since this may enlarge the size of the buffer, we also + // update the clear byte count for each subsample if encryption is used to + // account for the difference in size between the length prefix and Annex B + // start code. + RCHECK(AVC::ConvertFrameToAnnexB(avc_config.length_size, frame_buf)); + if (!subsamples->empty()) { + const int nalu_size_diff = 4 - avc_config.length_size; + size_t expected_size = runs_->sample_size() + + subsamples->size() * nalu_size_diff; + RCHECK(frame_buf->size() == expected_size); + for (size_t i = 0; i < subsamples->size(); i++) + (*subsamples)[i].clear_bytes += nalu_size_diff; + } + + if (runs_->is_keyframe()) { + // If this is a keyframe, we (re-)inject SPS and PPS headers at the start of + // a frame. If subsample info is present, we also update the clear byte + // count for that first subsample. + std::vector<uint8> param_sets; + RCHECK(AVC::ConvertConfigToAnnexB(avc_config, ¶m_sets)); + frame_buf->insert(frame_buf->begin(), + param_sets.begin(), param_sets.end()); + if (!subsamples->empty()) + (*subsamples)[0].clear_bytes += param_sets.size(); + } + return true; +} + +bool MP4StreamParser::PrepareAACBuffer( + const AAC& aac_config, std::vector<uint8>* frame_buf, + std::vector<SubsampleEntry>* subsamples) const { + // Append an ADTS header to every audio sample. + RCHECK(aac_config.ConvertEsdsToADTS(frame_buf)); + + // As above, adjust subsample information to account for the headers. AAC is + // not required to use subsample encryption, so we may need to add an entry. + if (subsamples->empty()) { + SubsampleEntry entry; + entry.clear_bytes = AAC::kADTSHeaderSize; + entry.cypher_bytes = frame_buf->size() - AAC::kADTSHeaderSize; + subsamples->push_back(entry); + } else { + (*subsamples)[0].clear_bytes += AAC::kADTSHeaderSize; + } + return true; +} + +bool MP4StreamParser::EnqueueSample(BufferQueue* audio_buffers, + BufferQueue* video_buffers, + bool* err) { + if (!runs_->IsRunValid()) { + // Flush any buffers we've gotten in this chunk so that buffers don't + // cross NewSegment() calls + *err = !SendAndFlushSamples(audio_buffers, video_buffers); + if (*err) + return false; + + // Remain in kEnqueueingSamples state, discarding data, until the end of + // the current 'mdat' box has been appended to the queue. + if (!queue_.Trim(mdat_tail_)) + return false; + + ChangeState(kParsingBoxes); + end_of_segment_cb_.Run(); + return true; + } + + if (!runs_->IsSampleValid()) { + runs_->AdvanceRun(); + return true; + } + + DCHECK(!(*err)); + + const uint8* buf; + int buf_size; + queue_.Peek(&buf, &buf_size); + if (!buf_size) return false; + + bool audio = has_audio_ && audio_track_id_ == runs_->track_id(); + bool video = has_video_ && video_track_id_ == runs_->track_id(); + + // Skip this entire track if it's not one we're interested in + if (!audio && !video) + runs_->AdvanceRun(); + + // Attempt to cache the auxiliary information first. Aux info is usually + // placed in a contiguous block before the sample data, rather than being + // interleaved. If we didn't cache it, this would require that we retain the + // start of the segment buffer while reading samples. Aux info is typically + // quite small compared to sample data, so this pattern is useful on + // memory-constrained devices where the source buffer consumes a substantial + // portion of the total system memory. + if (runs_->AuxInfoNeedsToBeCached()) { + queue_.PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size); + if (buf_size < runs_->aux_info_size()) return false; + *err = !runs_->CacheAuxInfo(buf, buf_size); + return !*err; + } + + queue_.PeekAt(runs_->sample_offset() + moof_head_, &buf, &buf_size); + if (buf_size < runs_->sample_size()) return false; + + scoped_ptr<DecryptConfig> decrypt_config; + std::vector<SubsampleEntry> subsamples; + if (runs_->is_encrypted()) { + decrypt_config = runs_->GetDecryptConfig(); + if (!decrypt_config) { + *err = true; + return false; + } + subsamples = decrypt_config->subsamples(); + } + + std::vector<uint8> frame_buf(buf, buf + runs_->sample_size()); + if (video) { + if (!PrepareAVCBuffer(runs_->video_description().avcc, + &frame_buf, &subsamples)) { + MEDIA_LOG(log_cb_) << "Failed to prepare AVC sample for decode"; + *err = true; + return false; + } + } + + if (audio) { + if (ESDescriptor::IsAAC(runs_->audio_description().esds.object_type) && + !PrepareAACBuffer(runs_->audio_description().esds.aac, + &frame_buf, &subsamples)) { + MEDIA_LOG(log_cb_) << "Failed to prepare AAC sample for decode"; + *err = true; + return false; + } + } + + if (decrypt_config) { + if (!subsamples.empty()) { + // Create a new config with the updated subsamples. + decrypt_config.reset(new DecryptConfig( + decrypt_config->key_id(), + decrypt_config->iv(), + subsamples)); + } + // else, use the existing config. + } else if ((audio && is_audio_track_encrypted_) || + (video && is_video_track_encrypted_)) { + // The media pipeline requires a DecryptConfig with an empty |iv|. + // TODO(ddorwin): Refactor so we do not need a fake key ID ("1"); + decrypt_config.reset( + new DecryptConfig("1", "", std::vector<SubsampleEntry>())); + } + + scoped_refptr<StreamParserBuffer> stream_buf = + StreamParserBuffer::CopyFrom(&frame_buf[0], frame_buf.size(), + runs_->is_keyframe()); + + if (decrypt_config) + stream_buf->set_decrypt_config(decrypt_config.Pass()); + + stream_buf->set_duration(runs_->duration()); + stream_buf->set_timestamp(runs_->cts()); + stream_buf->SetDecodeTimestamp(runs_->dts()); + + DVLOG(3) << "Pushing frame: aud=" << audio + << ", key=" << runs_->is_keyframe() + << ", dur=" << runs_->duration().InMilliseconds() + << ", dts=" << runs_->dts().InMilliseconds() + << ", cts=" << runs_->cts().InMilliseconds() + << ", size=" << runs_->sample_size(); + + if (audio) { + audio_buffers->push_back(stream_buf); + } else { + video_buffers->push_back(stream_buf); + } + + runs_->AdvanceSample(); + return true; +} + +bool MP4StreamParser::SendAndFlushSamples(BufferQueue* audio_buffers, + BufferQueue* video_buffers) { + if (audio_buffers->empty() && video_buffers->empty()) + return true; + + bool success = new_buffers_cb_.Run(*audio_buffers, *video_buffers); + audio_buffers->clear(); + video_buffers->clear(); + return success; +} + +bool MP4StreamParser::ReadAndDiscardMDATsUntil(const int64 offset) { + bool err = false; + while (mdat_tail_ < offset) { + const uint8* buf; + int size; + queue_.PeekAt(mdat_tail_, &buf, &size); + + FourCC type; + int box_sz; + if (!BoxReader::StartTopLevelBox(buf, size, log_cb_, + &type, &box_sz, &err)) + break; + + if (type != FOURCC_MDAT) { + MEDIA_LOG(log_cb_) << "Unexpected box type while parsing MDATs: " + << FourCCToString(type); + } + mdat_tail_ += box_sz; + } + queue_.Trim(std::min(mdat_tail_, offset)); + return !err; +} + +void MP4StreamParser::ChangeState(State new_state) { + DVLOG(2) << "Changing state: " << new_state; + state_ = new_state; +} + +} // namespace mp4 +} // namespace media diff --git a/media/formats/mp4/mp4_stream_parser.h b/media/formats/mp4/mp4_stream_parser.h new file mode 100644 index 0000000..d229852 --- /dev/null +++ b/media/formats/mp4/mp4_stream_parser.h @@ -0,0 +1,121 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_MP4_STREAM_PARSER_H_ +#define MEDIA_FORMATS_MP4_MP4_STREAM_PARSER_H_ + +#include <set> +#include <vector> + +#include "base/basictypes.h" +#include "base/callback.h" +#include "base/compiler_specific.h" +#include "base/memory/scoped_ptr.h" +#include "media/base/media_export.h" +#include "media/base/stream_parser.h" +#include "media/formats/mp4/offset_byte_queue.h" +#include "media/formats/mp4/track_run_iterator.h" + +namespace media { +namespace mp4 { + +struct Movie; +class BoxReader; + +class MEDIA_EXPORT MP4StreamParser : public StreamParser { + public: + MP4StreamParser(const std::set<int>& audio_object_types, bool has_sbr); + virtual ~MP4StreamParser(); + + virtual void Init(const InitCB& init_cb, const NewConfigCB& config_cb, + const NewBuffersCB& new_buffers_cb, + const NewTextBuffersCB& text_cb, + const NeedKeyCB& need_key_cb, + const NewMediaSegmentCB& new_segment_cb, + const base::Closure& end_of_segment_cb, + const LogCB& log_cb) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual bool Parse(const uint8* buf, int size) OVERRIDE; + + private: + enum State { + kWaitingForInit, + kParsingBoxes, + kEmittingSamples, + kError + }; + + bool ParseBox(bool* err); + bool ParseMoov(mp4::BoxReader* reader); + bool ParseMoof(mp4::BoxReader* reader); + + void EmitNeedKeyIfNecessary( + const std::vector<ProtectionSystemSpecificHeader>& headers); + + // To retain proper framing, each 'mdat' atom must be read; to limit memory + // usage, the atom's data needs to be discarded incrementally as frames are + // extracted from the stream. This function discards data from the stream up + // to |offset|, updating the |mdat_tail_| value so that framing can be + // retained after all 'mdat' information has been read. + // Returns 'true' on success, 'false' if there was an error. + bool ReadAndDiscardMDATsUntil(const int64 offset); + + void ChangeState(State new_state); + + bool EmitConfigs(); + bool PrepareAVCBuffer(const AVCDecoderConfigurationRecord& avc_config, + std::vector<uint8>* frame_buf, + std::vector<SubsampleEntry>* subsamples) const; + bool PrepareAACBuffer(const AAC& aac_config, + std::vector<uint8>* frame_buf, + std::vector<SubsampleEntry>* subsamples) const; + bool EnqueueSample(BufferQueue* audio_buffers, + BufferQueue* video_buffers, + bool* err); + bool SendAndFlushSamples(BufferQueue* audio_buffers, + BufferQueue* video_buffers); + + void Reset(); + + State state_; + InitCB init_cb_; + NewConfigCB config_cb_; + NewBuffersCB new_buffers_cb_; + NeedKeyCB need_key_cb_; + NewMediaSegmentCB new_segment_cb_; + base::Closure end_of_segment_cb_; + LogCB log_cb_; + + OffsetByteQueue queue_; + + // These two parameters are only valid in the |kEmittingSegments| state. + // + // |moof_head_| is the offset of the start of the most recently parsed moof + // block. All byte offsets in sample information are relative to this offset, + // as mandated by the Media Source spec. + int64 moof_head_; + // |mdat_tail_| is the stream offset of the end of the current 'mdat' box. + // Valid iff it is greater than the head of the queue. + int64 mdat_tail_; + + scoped_ptr<mp4::Movie> moov_; + scoped_ptr<mp4::TrackRunIterator> runs_; + + bool has_audio_; + bool has_video_; + uint32 audio_track_id_; + uint32 video_track_id_; + // The object types allowed for audio tracks. + std::set<int> audio_object_types_; + bool has_sbr_; + bool is_audio_track_encrypted_; + bool is_video_track_encrypted_; + + DISALLOW_COPY_AND_ASSIGN(MP4StreamParser); +}; + +} // namespace mp4 +} // namespace media + +#endif // MEDIA_FORMATS_MP4_MP4_STREAM_PARSER_H_ diff --git a/media/formats/mp4/mp4_stream_parser_unittest.cc b/media/formats/mp4/mp4_stream_parser_unittest.cc new file mode 100644 index 0000000..0f9260a --- /dev/null +++ b/media/formats/mp4/mp4_stream_parser_unittest.cc @@ -0,0 +1,204 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <algorithm> +#include <string> + +#include "base/bind.h" +#include "base/bind_helpers.h" +#include "base/logging.h" +#include "base/memory/ref_counted.h" +#include "base/time/time.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/decoder_buffer.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/test_data_util.h" +#include "media/base/text_track_config.h" +#include "media/base/video_decoder_config.h" +#include "media/formats/mp4/es_descriptor.h" +#include "media/formats/mp4/mp4_stream_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::TimeDelta; + +namespace media { +namespace mp4 { + +// TODO(xhwang): Figure out the init data type appropriately once it's spec'ed. +static const char kMp4InitDataType[] = "video/mp4"; + +class MP4StreamParserTest : public testing::Test { + public: + MP4StreamParserTest() + : configs_received_(false) { + std::set<int> audio_object_types; + audio_object_types.insert(kISO_14496_3); + parser_.reset(new MP4StreamParser(audio_object_types, false)); + } + + protected: + scoped_ptr<MP4StreamParser> parser_; + bool configs_received_; + + bool AppendData(const uint8* data, size_t length) { + return parser_->Parse(data, length); + } + + bool AppendDataInPieces(const uint8* data, size_t length, size_t piece_size) { + const uint8* start = data; + const uint8* end = data + length; + while (start < end) { + size_t append_size = std::min(piece_size, + static_cast<size_t>(end - start)); + if (!AppendData(start, append_size)) + return false; + start += append_size; + } + return true; + } + + void InitF(bool init_ok, base::TimeDelta duration) { + DVLOG(1) << "InitF: ok=" << init_ok + << ", dur=" << duration.InMilliseconds(); + } + + bool NewConfigF(const AudioDecoderConfig& ac, + const VideoDecoderConfig& vc, + const StreamParser::TextTrackConfigMap& tc) { + DVLOG(1) << "NewConfigF: audio=" << ac.IsValidConfig() + << ", video=" << vc.IsValidConfig(); + configs_received_ = true; + return true; + } + + + void DumpBuffers(const std::string& label, + const StreamParser::BufferQueue& buffers) { + DVLOG(2) << "DumpBuffers: " << label << " size " << buffers.size(); + for (StreamParser::BufferQueue::const_iterator buf = buffers.begin(); + buf != buffers.end(); buf++) { + DVLOG(3) << " n=" << buf - buffers.begin() + << ", size=" << (*buf)->data_size() + << ", dur=" << (*buf)->duration().InMilliseconds(); + } + } + + bool NewBuffersF(const StreamParser::BufferQueue& audio_buffers, + const StreamParser::BufferQueue& video_buffers) { + DumpBuffers("audio_buffers", audio_buffers); + DumpBuffers("video_buffers", video_buffers); + return true; + } + + void KeyNeededF(const std::string& type, + const std::vector<uint8>& init_data) { + DVLOG(1) << "KeyNeededF: " << init_data.size(); + EXPECT_EQ(kMp4InitDataType, type); + EXPECT_FALSE(init_data.empty()); + } + + void NewSegmentF() { + DVLOG(1) << "NewSegmentF"; + } + + void EndOfSegmentF() { + DVLOG(1) << "EndOfSegmentF()"; + } + + void InitializeParser() { + parser_->Init( + base::Bind(&MP4StreamParserTest::InitF, base::Unretained(this)), + base::Bind(&MP4StreamParserTest::NewConfigF, base::Unretained(this)), + base::Bind(&MP4StreamParserTest::NewBuffersF, base::Unretained(this)), + StreamParser::NewTextBuffersCB(), + base::Bind(&MP4StreamParserTest::KeyNeededF, base::Unretained(this)), + base::Bind(&MP4StreamParserTest::NewSegmentF, base::Unretained(this)), + base::Bind(&MP4StreamParserTest::EndOfSegmentF, + base::Unretained(this)), + LogCB()); + } + + bool ParseMP4File(const std::string& filename, int append_bytes) { + InitializeParser(); + + scoped_refptr<DecoderBuffer> buffer = ReadTestDataFile(filename); + EXPECT_TRUE(AppendDataInPieces(buffer->data(), + buffer->data_size(), + append_bytes)); + return true; + } +}; + +TEST_F(MP4StreamParserTest, UnalignedAppend) { + // Test small, non-segment-aligned appends (small enough to exercise + // incremental append system) + ParseMP4File("bear-1280x720-av_frag.mp4", 512); +} + +TEST_F(MP4StreamParserTest, BytewiseAppend) { + // Ensure no incremental errors occur when parsing + ParseMP4File("bear-1280x720-av_frag.mp4", 1); +} + +TEST_F(MP4StreamParserTest, MultiFragmentAppend) { + // Large size ensures multiple fragments are appended in one call (size is + // larger than this particular test file) + ParseMP4File("bear-1280x720-av_frag.mp4", 768432); +} + +TEST_F(MP4StreamParserTest, Flush) { + // Flush while reading sample data, then start a new stream. + InitializeParser(); + + scoped_refptr<DecoderBuffer> buffer = + ReadTestDataFile("bear-1280x720-av_frag.mp4"); + EXPECT_TRUE(AppendDataInPieces(buffer->data(), 65536, 512)); + parser_->Flush(); + EXPECT_TRUE(AppendDataInPieces(buffer->data(), + buffer->data_size(), + 512)); +} + +TEST_F(MP4StreamParserTest, Reinitialization) { + InitializeParser(); + + scoped_refptr<DecoderBuffer> buffer = + ReadTestDataFile("bear-1280x720-av_frag.mp4"); + EXPECT_TRUE(AppendDataInPieces(buffer->data(), + buffer->data_size(), + 512)); + EXPECT_TRUE(AppendDataInPieces(buffer->data(), + buffer->data_size(), + 512)); +} + +TEST_F(MP4StreamParserTest, MPEG2_AAC_LC) { + std::set<int> audio_object_types; + audio_object_types.insert(kISO_13818_7_AAC_LC); + parser_.reset(new MP4StreamParser(audio_object_types, false)); + ParseMP4File("bear-mpeg2-aac-only_frag.mp4", 512); +} + +// Test that a moov box is not always required after Flush() is called. +TEST_F(MP4StreamParserTest, NoMoovAfterFlush) { + InitializeParser(); + + scoped_refptr<DecoderBuffer> buffer = + ReadTestDataFile("bear-1280x720-av_frag.mp4"); + EXPECT_TRUE(AppendDataInPieces(buffer->data(), + buffer->data_size(), + 512)); + parser_->Flush(); + + const int kFirstMoofOffset = 1307; + EXPECT_TRUE(AppendDataInPieces(buffer->data() + kFirstMoofOffset, + buffer->data_size() - kFirstMoofOffset, + 512)); +} + +// TODO(strobe): Create and test media which uses CENC auxiliary info stored +// inside a private box + +} // namespace mp4 +} // namespace media diff --git a/media/formats/mp4/offset_byte_queue.cc b/media/formats/mp4/offset_byte_queue.cc new file mode 100644 index 0000000..a23a634 --- /dev/null +++ b/media/formats/mp4/offset_byte_queue.cc @@ -0,0 +1,64 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/offset_byte_queue.h" + +#include "base/basictypes.h" +#include "base/logging.h" + +namespace media { + +OffsetByteQueue::OffsetByteQueue() : buf_(NULL), size_(0), head_(0) {} +OffsetByteQueue::~OffsetByteQueue() {} + +void OffsetByteQueue::Reset() { + queue_.Reset(); + buf_ = NULL; + size_ = 0; + head_ = 0; +} + +void OffsetByteQueue::Push(const uint8* buf, int size) { + queue_.Push(buf, size); + Sync(); + DVLOG(4) << "Buffer pushed. head=" << head() << " tail=" << tail(); +} + +void OffsetByteQueue::Peek(const uint8** buf, int* size) { + *buf = size_ > 0 ? buf_ : NULL; + *size = size_; +} + +void OffsetByteQueue::Pop(int count) { + queue_.Pop(count); + head_ += count; + Sync(); +} + +void OffsetByteQueue::PeekAt(int64 offset, const uint8** buf, int* size) { + DCHECK(offset >= head()); + if (offset < head() || offset >= tail()) { + *buf = NULL; + *size = 0; + return; + } + *buf = &buf_[offset - head()]; + *size = tail() - offset; +} + +bool OffsetByteQueue::Trim(int64 max_offset) { + if (max_offset < head_) return true; + if (max_offset > tail()) { + Pop(size_); + return false; + } + Pop(max_offset - head_); + return true; +} + +void OffsetByteQueue::Sync() { + queue_.Peek(&buf_, &size_); +} + +} // namespace media diff --git a/media/formats/mp4/offset_byte_queue.h b/media/formats/mp4/offset_byte_queue.h new file mode 100644 index 0000000..a54ee8f --- /dev/null +++ b/media/formats/mp4/offset_byte_queue.h @@ -0,0 +1,66 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_OFFSET_BYTE_QUEUE_H_ +#define MEDIA_FORMATS_MP4_OFFSET_BYTE_QUEUE_H_ + +#include "base/basictypes.h" +#include "media/base/byte_queue.h" +#include "media/base/media_export.h" + +namespace media { + +// A wrapper around a ByteQueue which maintains a notion of a +// monotonically-increasing offset. All buffer access is done by passing these +// offsets into this class, going some way towards preventing the proliferation +// of many different meanings of "offset", "head", etc. +class MEDIA_EXPORT OffsetByteQueue { + public: + OffsetByteQueue(); + ~OffsetByteQueue(); + + // These work like their underlying ByteQueue counterparts. + void Reset(); + void Push(const uint8* buf, int size); + void Peek(const uint8** buf, int* size); + void Pop(int count); + + // Sets |buf| to point at the first buffered byte corresponding to |offset|, + // and |size| to the number of bytes available starting from that offset. + // + // It is an error if the offset is before the current head. It's not an error + // if the current offset is beyond tail(), but you will of course get back + // a null |buf| and a |size| of zero. + void PeekAt(int64 offset, const uint8** buf, int* size); + + // Marks the bytes up to (but not including) |max_offset| as ready for + // deletion. This is relatively inexpensive, but will not necessarily reduce + // the resident buffer size right away (or ever). + // + // Returns true if the full range of bytes were successfully trimmed, + // including the case where |max_offset| is less than the current head. + // Returns false if |max_offset| > tail() (although all bytes currently + // buffered are still cleared). + bool Trim(int64 max_offset); + + // The head and tail positions, in terms of the file's absolute offsets. + // tail() is an exclusive bound. + int64 head() { return head_; } + int64 tail() { return head_ + size_; } + + private: + // Synchronize |buf_| and |size_| with |queue_|. + void Sync(); + + ByteQueue queue_; + const uint8* buf_; + int size_; + int64 head_; + + DISALLOW_COPY_AND_ASSIGN(OffsetByteQueue); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_MP4_MP4_STREAM_PARSER_H_ diff --git a/media/formats/mp4/offset_byte_queue_unittest.cc b/media/formats/mp4/offset_byte_queue_unittest.cc new file mode 100644 index 0000000..d3ce32d --- /dev/null +++ b/media/formats/mp4/offset_byte_queue_unittest.cc @@ -0,0 +1,92 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <string.h> + +#include "base/basictypes.h" +#include "base/memory/scoped_ptr.h" +#include "media/formats/mp4/offset_byte_queue.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { + +class OffsetByteQueueTest : public testing::Test { + public: + virtual void SetUp() OVERRIDE { + uint8 buf[256]; + for (int i = 0; i < 256; i++) { + buf[i] = i; + } + queue_.reset(new OffsetByteQueue); + queue_->Push(buf, sizeof(buf)); + queue_->Push(buf, sizeof(buf)); + queue_->Pop(384); + + // Queue will start with 128 bytes of data and an offset of 384 bytes. + // These values are used throughout the test. + } + + protected: + scoped_ptr<OffsetByteQueue> queue_; +}; + +TEST_F(OffsetByteQueueTest, SetUp) { + EXPECT_EQ(384, queue_->head()); + EXPECT_EQ(512, queue_->tail()); + + const uint8* buf; + int size; + + queue_->Peek(&buf, &size); + EXPECT_EQ(128, size); + EXPECT_EQ(128, buf[0]); + EXPECT_EQ(255, buf[size-1]); +} + +TEST_F(OffsetByteQueueTest, PeekAt) { + const uint8* buf; + int size; + + queue_->PeekAt(400, &buf, &size); + EXPECT_EQ(queue_->tail() - 400, size); + EXPECT_EQ(400 - 256, buf[0]); + + queue_->PeekAt(512, &buf, &size); + EXPECT_EQ(NULL, buf); + EXPECT_EQ(0, size); +} + +TEST_F(OffsetByteQueueTest, Trim) { + EXPECT_TRUE(queue_->Trim(128)); + EXPECT_TRUE(queue_->Trim(384)); + EXPECT_EQ(384, queue_->head()); + EXPECT_EQ(512, queue_->tail()); + + EXPECT_TRUE(queue_->Trim(400)); + EXPECT_EQ(400, queue_->head()); + EXPECT_EQ(512, queue_->tail()); + + const uint8* buf; + int size; + queue_->PeekAt(400, &buf, &size); + EXPECT_EQ(queue_->tail() - 400, size); + EXPECT_EQ(400 - 256, buf[0]); + + // Trimming to the exact end of the buffer should return 'true'. This + // accomodates EOS cases. + EXPECT_TRUE(queue_->Trim(512)); + EXPECT_EQ(512, queue_->head()); + queue_->Peek(&buf, &size); + EXPECT_EQ(NULL, buf); + + // Trimming past the end of the buffer should return 'false'; we haven't seen + // the preceeding bytes. + EXPECT_FALSE(queue_->Trim(513)); + + // However, doing that shouldn't affect the EOS case. Only adding new data + // should alter this behavior. + EXPECT_TRUE(queue_->Trim(512)); +} + +} // namespace media diff --git a/media/formats/mp4/rcheck.h b/media/formats/mp4/rcheck.h new file mode 100644 index 0000000..fb0f8f2 --- /dev/null +++ b/media/formats/mp4/rcheck.h @@ -0,0 +1,18 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_RCHECK_H_ +#define MEDIA_FORMATS_MP4_RCHECK_H_ + +#include "base/logging.h" + +#define RCHECK(x) \ + do { \ + if (!(x)) { \ + DLOG(ERROR) << "Failure while parsing MP4: " << #x; \ + return false; \ + } \ + } while (0) + +#endif // MEDIA_FORMATS_MP4_RCHECK_H_ diff --git a/media/formats/mp4/track_run_iterator.cc b/media/formats/mp4/track_run_iterator.cc new file mode 100644 index 0000000..e2a145e --- /dev/null +++ b/media/formats/mp4/track_run_iterator.cc @@ -0,0 +1,467 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp4/track_run_iterator.h" + +#include <algorithm> + +#include "media/base/buffers.h" +#include "media/base/stream_parser_buffer.h" +#include "media/formats/mp4/rcheck.h" + +namespace { +static const uint32 kSampleIsDifferenceSampleFlagMask = 0x10000; +} + +namespace media { +namespace mp4 { + +struct SampleInfo { + int size; + int duration; + int cts_offset; + bool is_keyframe; +}; + +struct TrackRunInfo { + uint32 track_id; + std::vector<SampleInfo> samples; + int64 timescale; + int64 start_dts; + int64 sample_start_offset; + + bool is_audio; + const AudioSampleEntry* audio_description; + const VideoSampleEntry* video_description; + + int64 aux_info_start_offset; // Only valid if aux_info_total_size > 0. + int aux_info_default_size; + std::vector<uint8> aux_info_sizes; // Populated if default_size == 0. + int aux_info_total_size; + + TrackRunInfo(); + ~TrackRunInfo(); +}; + +TrackRunInfo::TrackRunInfo() + : track_id(0), + timescale(-1), + start_dts(-1), + sample_start_offset(-1), + is_audio(false), + aux_info_start_offset(-1), + aux_info_default_size(-1), + aux_info_total_size(-1) { +} +TrackRunInfo::~TrackRunInfo() {} + +TimeDelta TimeDeltaFromRational(int64 numer, int64 denom) { + DCHECK_LT((numer > 0 ? numer : -numer), + kint64max / base::Time::kMicrosecondsPerSecond); + return TimeDelta::FromMicroseconds( + base::Time::kMicrosecondsPerSecond * numer / denom); +} + +TrackRunIterator::TrackRunIterator(const Movie* moov, + const LogCB& log_cb) + : moov_(moov), log_cb_(log_cb), sample_offset_(0) { + CHECK(moov); +} + +TrackRunIterator::~TrackRunIterator() {} + +static void PopulateSampleInfo(const TrackExtends& trex, + const TrackFragmentHeader& tfhd, + const TrackFragmentRun& trun, + const int64 edit_list_offset, + const uint32 i, + SampleInfo* sample_info, + const SampleDependsOn sample_depends_on) { + if (i < trun.sample_sizes.size()) { + sample_info->size = trun.sample_sizes[i]; + } else if (tfhd.default_sample_size > 0) { + sample_info->size = tfhd.default_sample_size; + } else { + sample_info->size = trex.default_sample_size; + } + + if (i < trun.sample_durations.size()) { + sample_info->duration = trun.sample_durations[i]; + } else if (tfhd.default_sample_duration > 0) { + sample_info->duration = tfhd.default_sample_duration; + } else { + sample_info->duration = trex.default_sample_duration; + } + + if (i < trun.sample_composition_time_offsets.size()) { + sample_info->cts_offset = trun.sample_composition_time_offsets[i]; + } else { + sample_info->cts_offset = 0; + } + sample_info->cts_offset += edit_list_offset; + + uint32 flags; + if (i < trun.sample_flags.size()) { + flags = trun.sample_flags[i]; + } else if (tfhd.has_default_sample_flags) { + flags = tfhd.default_sample_flags; + } else { + flags = trex.default_sample_flags; + } + + switch (sample_depends_on) { + case kSampleDependsOnUnknown: + sample_info->is_keyframe = !(flags & kSampleIsDifferenceSampleFlagMask); + break; + + case kSampleDependsOnOthers: + sample_info->is_keyframe = false; + break; + + case kSampleDependsOnNoOther: + sample_info->is_keyframe = true; + break; + + case kSampleDependsOnReserved: + CHECK(false); + } +} + +// In well-structured encrypted media, each track run will be immediately +// preceded by its auxiliary information; this is the only optimal storage +// pattern in terms of minimum number of bytes from a serial stream needed to +// begin playback. It also allows us to optimize caching on memory-constrained +// architectures, because we can cache the relatively small auxiliary +// information for an entire run and then discard data from the input stream, +// instead of retaining the entire 'mdat' box. +// +// We optimize for this situation (with no loss of generality) by sorting track +// runs during iteration in order of their first data offset (either sample data +// or auxiliary data). +class CompareMinTrackRunDataOffset { + public: + bool operator()(const TrackRunInfo& a, const TrackRunInfo& b) { + int64 a_aux = a.aux_info_total_size ? a.aux_info_start_offset : kint64max; + int64 b_aux = b.aux_info_total_size ? b.aux_info_start_offset : kint64max; + + int64 a_lesser = std::min(a_aux, a.sample_start_offset); + int64 a_greater = std::max(a_aux, a.sample_start_offset); + int64 b_lesser = std::min(b_aux, b.sample_start_offset); + int64 b_greater = std::max(b_aux, b.sample_start_offset); + + if (a_lesser == b_lesser) return a_greater < b_greater; + return a_lesser < b_lesser; + } +}; + +bool TrackRunIterator::Init(const MovieFragment& moof) { + runs_.clear(); + + for (size_t i = 0; i < moof.tracks.size(); i++) { + const TrackFragment& traf = moof.tracks[i]; + + const Track* trak = NULL; + for (size_t t = 0; t < moov_->tracks.size(); t++) { + if (moov_->tracks[t].header.track_id == traf.header.track_id) + trak = &moov_->tracks[t]; + } + RCHECK(trak); + + const TrackExtends* trex = NULL; + for (size_t t = 0; t < moov_->extends.tracks.size(); t++) { + if (moov_->extends.tracks[t].track_id == traf.header.track_id) + trex = &moov_->extends.tracks[t]; + } + RCHECK(trex); + + const SampleDescription& stsd = + trak->media.information.sample_table.description; + if (stsd.type != kAudio && stsd.type != kVideo) { + DVLOG(1) << "Skipping unhandled track type"; + continue; + } + size_t desc_idx = traf.header.sample_description_index; + if (!desc_idx) desc_idx = trex->default_sample_description_index; + RCHECK(desc_idx > 0); // Descriptions are one-indexed in the file + desc_idx -= 1; + + // Process edit list to remove CTS offset introduced in the presence of + // B-frames (those that contain a single edit with a nonnegative media + // time). Other uses of edit lists are not supported, as they are + // both uncommon and better served by higher-level protocols. + int64 edit_list_offset = 0; + const std::vector<EditListEntry>& edits = trak->edit.list.edits; + if (!edits.empty()) { + if (edits.size() > 1) + DVLOG(1) << "Multi-entry edit box detected; some components ignored."; + + if (edits[0].media_time < 0) { + DVLOG(1) << "Empty edit list entry ignored."; + } else { + edit_list_offset = -edits[0].media_time; + } + } + + int64 run_start_dts = traf.decode_time.decode_time; + int sample_count_sum = 0; + bool is_sync_sample_box_present = + trak->media.information.sample_table.sync_sample.is_present; + for (size_t j = 0; j < traf.runs.size(); j++) { + const TrackFragmentRun& trun = traf.runs[j]; + TrackRunInfo tri; + tri.track_id = traf.header.track_id; + tri.timescale = trak->media.header.timescale; + tri.start_dts = run_start_dts; + tri.sample_start_offset = trun.data_offset; + + tri.is_audio = (stsd.type == kAudio); + if (tri.is_audio) { + RCHECK(!stsd.audio_entries.empty()); + if (desc_idx > stsd.audio_entries.size()) + desc_idx = 0; + tri.audio_description = &stsd.audio_entries[desc_idx]; + } else { + RCHECK(!stsd.video_entries.empty()); + if (desc_idx > stsd.video_entries.size()) + desc_idx = 0; + tri.video_description = &stsd.video_entries[desc_idx]; + } + + // Collect information from the auxiliary_offset entry with the same index + // in the 'saiz' container as the current run's index in the 'trun' + // container, if it is present. + if (traf.auxiliary_offset.offsets.size() > j) { + // There should be an auxiliary info entry corresponding to each sample + // in the auxiliary offset entry's corresponding track run. + RCHECK(traf.auxiliary_size.sample_count >= + sample_count_sum + trun.sample_count); + tri.aux_info_start_offset = traf.auxiliary_offset.offsets[j]; + tri.aux_info_default_size = + traf.auxiliary_size.default_sample_info_size; + if (tri.aux_info_default_size == 0) { + const std::vector<uint8>& sizes = + traf.auxiliary_size.sample_info_sizes; + tri.aux_info_sizes.insert(tri.aux_info_sizes.begin(), + sizes.begin() + sample_count_sum, + sizes.begin() + sample_count_sum + trun.sample_count); + } + + // If the default info size is positive, find the total size of the aux + // info block from it, otherwise sum over the individual sizes of each + // aux info entry in the aux_offset entry. + if (tri.aux_info_default_size) { + tri.aux_info_total_size = + tri.aux_info_default_size * trun.sample_count; + } else { + tri.aux_info_total_size = 0; + for (size_t k = 0; k < trun.sample_count; k++) { + tri.aux_info_total_size += tri.aux_info_sizes[k]; + } + } + } else { + tri.aux_info_start_offset = -1; + tri.aux_info_total_size = 0; + } + + tri.samples.resize(trun.sample_count); + for (size_t k = 0; k < trun.sample_count; k++) { + PopulateSampleInfo(*trex, traf.header, trun, edit_list_offset, + k, &tri.samples[k], traf.sdtp.sample_depends_on(k)); + run_start_dts += tri.samples[k].duration; + + // ISO-14496-12 Section 8.20.1 : If the sync sample box is not present, + // every sample is a random access point. + // + // NOTE: MPEG's "is random access point" concept is equivalent to this + // and downstream code's "is keyframe" concept. + if (!is_sync_sample_box_present) + tri.samples[k].is_keyframe = true; + } + runs_.push_back(tri); + sample_count_sum += trun.sample_count; + } + } + + std::sort(runs_.begin(), runs_.end(), CompareMinTrackRunDataOffset()); + run_itr_ = runs_.begin(); + ResetRun(); + return true; +} + +void TrackRunIterator::AdvanceRun() { + ++run_itr_; + ResetRun(); +} + +void TrackRunIterator::ResetRun() { + if (!IsRunValid()) return; + sample_dts_ = run_itr_->start_dts; + sample_offset_ = run_itr_->sample_start_offset; + sample_itr_ = run_itr_->samples.begin(); + cenc_info_.clear(); +} + +void TrackRunIterator::AdvanceSample() { + DCHECK(IsSampleValid()); + sample_dts_ += sample_itr_->duration; + sample_offset_ += sample_itr_->size; + ++sample_itr_; +} + +// This implementation only indicates a need for caching if CENC auxiliary +// info is available in the stream. +bool TrackRunIterator::AuxInfoNeedsToBeCached() { + DCHECK(IsRunValid()); + return is_encrypted() && aux_info_size() > 0 && cenc_info_.size() == 0; +} + +// This implementation currently only caches CENC auxiliary info. +bool TrackRunIterator::CacheAuxInfo(const uint8* buf, int buf_size) { + RCHECK(AuxInfoNeedsToBeCached() && buf_size >= aux_info_size()); + + cenc_info_.resize(run_itr_->samples.size()); + int64 pos = 0; + for (size_t i = 0; i < run_itr_->samples.size(); i++) { + int info_size = run_itr_->aux_info_default_size; + if (!info_size) + info_size = run_itr_->aux_info_sizes[i]; + + BufferReader reader(buf + pos, info_size); + RCHECK(cenc_info_[i].Parse(track_encryption().default_iv_size, &reader)); + pos += info_size; + } + + return true; +} + +bool TrackRunIterator::IsRunValid() const { + return run_itr_ != runs_.end(); +} + +bool TrackRunIterator::IsSampleValid() const { + return IsRunValid() && (sample_itr_ != run_itr_->samples.end()); +} + +// Because tracks are in sorted order and auxiliary information is cached when +// returning samples, it is guaranteed that no data will be required before the +// lesser of the minimum data offset of this track and the next in sequence. +// (The stronger condition - that no data is required before the minimum data +// offset of this track alone - is not guaranteed, because the BMFF spec does +// not have any inter-run ordering restrictions.) +int64 TrackRunIterator::GetMaxClearOffset() { + int64 offset = kint64max; + + if (IsSampleValid()) { + offset = std::min(offset, sample_offset_); + if (AuxInfoNeedsToBeCached()) + offset = std::min(offset, aux_info_offset()); + } + if (run_itr_ != runs_.end()) { + std::vector<TrackRunInfo>::const_iterator next_run = run_itr_ + 1; + if (next_run != runs_.end()) { + offset = std::min(offset, next_run->sample_start_offset); + if (next_run->aux_info_total_size) + offset = std::min(offset, next_run->aux_info_start_offset); + } + } + if (offset == kint64max) return 0; + return offset; +} + +uint32 TrackRunIterator::track_id() const { + DCHECK(IsRunValid()); + return run_itr_->track_id; +} + +bool TrackRunIterator::is_encrypted() const { + DCHECK(IsRunValid()); + return track_encryption().is_encrypted; +} + +int64 TrackRunIterator::aux_info_offset() const { + return run_itr_->aux_info_start_offset; +} + +int TrackRunIterator::aux_info_size() const { + return run_itr_->aux_info_total_size; +} + +bool TrackRunIterator::is_audio() const { + DCHECK(IsRunValid()); + return run_itr_->is_audio; +} + +const AudioSampleEntry& TrackRunIterator::audio_description() const { + DCHECK(is_audio()); + DCHECK(run_itr_->audio_description); + return *run_itr_->audio_description; +} + +const VideoSampleEntry& TrackRunIterator::video_description() const { + DCHECK(!is_audio()); + DCHECK(run_itr_->video_description); + return *run_itr_->video_description; +} + +int64 TrackRunIterator::sample_offset() const { + DCHECK(IsSampleValid()); + return sample_offset_; +} + +int TrackRunIterator::sample_size() const { + DCHECK(IsSampleValid()); + return sample_itr_->size; +} + +TimeDelta TrackRunIterator::dts() const { + DCHECK(IsSampleValid()); + return TimeDeltaFromRational(sample_dts_, run_itr_->timescale); +} + +TimeDelta TrackRunIterator::cts() const { + DCHECK(IsSampleValid()); + return TimeDeltaFromRational(sample_dts_ + sample_itr_->cts_offset, + run_itr_->timescale); +} + +TimeDelta TrackRunIterator::duration() const { + DCHECK(IsSampleValid()); + return TimeDeltaFromRational(sample_itr_->duration, run_itr_->timescale); +} + +bool TrackRunIterator::is_keyframe() const { + DCHECK(IsSampleValid()); + return sample_itr_->is_keyframe; +} + +const TrackEncryption& TrackRunIterator::track_encryption() const { + if (is_audio()) + return audio_description().sinf.info.track_encryption; + return video_description().sinf.info.track_encryption; +} + +scoped_ptr<DecryptConfig> TrackRunIterator::GetDecryptConfig() { + size_t sample_idx = sample_itr_ - run_itr_->samples.begin(); + DCHECK(sample_idx < cenc_info_.size()); + const FrameCENCInfo& cenc_info = cenc_info_[sample_idx]; + DCHECK(is_encrypted() && !AuxInfoNeedsToBeCached()); + + size_t total_size = 0; + if (!cenc_info.subsamples.empty() && + (!cenc_info.GetTotalSizeOfSubsamples(&total_size) || + total_size != static_cast<size_t>(sample_size()))) { + MEDIA_LOG(log_cb_) << "Incorrect CENC subsample size."; + return scoped_ptr<DecryptConfig>(); + } + + const std::vector<uint8>& kid = track_encryption().default_kid; + return scoped_ptr<DecryptConfig>(new DecryptConfig( + std::string(reinterpret_cast<const char*>(&kid[0]), kid.size()), + std::string(reinterpret_cast<const char*>(cenc_info.iv), + arraysize(cenc_info.iv)), + cenc_info.subsamples)); +} + +} // namespace mp4 +} // namespace media diff --git a/media/formats/mp4/track_run_iterator.h b/media/formats/mp4/track_run_iterator.h new file mode 100644 index 0000000..829dd11 --- /dev/null +++ b/media/formats/mp4/track_run_iterator.h @@ -0,0 +1,108 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP4_TRACK_RUN_ITERATOR_H_ +#define MEDIA_FORMATS_MP4_TRACK_RUN_ITERATOR_H_ + +#include <vector> + +#include "base/memory/scoped_ptr.h" +#include "base/time/time.h" +#include "media/base/media_export.h" +#include "media/base/media_log.h" +#include "media/formats/mp4/box_definitions.h" +#include "media/formats/mp4/cenc.h" + +namespace media { + +class DecryptConfig; + +namespace mp4 { + +using base::TimeDelta; +base::TimeDelta MEDIA_EXPORT TimeDeltaFromRational(int64 numer, int64 denom); + +struct SampleInfo; +struct TrackRunInfo; + +class MEDIA_EXPORT TrackRunIterator { + public: + // Create a new TrackRunIterator. A reference to |moov| will be retained for + // the lifetime of this object. + TrackRunIterator(const Movie* moov, const LogCB& log_cb); + ~TrackRunIterator(); + + // Sets up the iterator to handle all the runs from the current fragment. + bool Init(const MovieFragment& moof); + + // Returns true if the properties of the current run or sample are valid. + bool IsRunValid() const; + bool IsSampleValid() const; + + // Advance the properties to refer to the next run or sample. Requires that + // the current sample be valid. + void AdvanceRun(); + void AdvanceSample(); + + // Returns true if this track run has auxiliary information and has not yet + // been cached. Only valid if IsRunValid(). + bool AuxInfoNeedsToBeCached(); + + // Caches the CENC data from the given buffer. |buf| must be a buffer starting + // at the offset given by cenc_offset(), with a |size| of at least + // cenc_size(). Returns true on success, false on error. + bool CacheAuxInfo(const uint8* buf, int size); + + // Returns the maximum buffer location at which no data earlier in the stream + // will be required in order to read the current or any subsequent sample. You + // may clear all data up to this offset before reading the current sample + // safely. Result is in the same units as offset() (for Media Source this is + // in bytes past the the head of the MOOF box). + int64 GetMaxClearOffset(); + + // Property of the current run. Only valid if IsRunValid(). + uint32 track_id() const; + int64 aux_info_offset() const; + int aux_info_size() const; + bool is_encrypted() const; + bool is_audio() const; + // Only one is valid, based on the value of is_audio(). + const AudioSampleEntry& audio_description() const; + const VideoSampleEntry& video_description() const; + + // Properties of the current sample. Only valid if IsSampleValid(). + int64 sample_offset() const; + int sample_size() const; + TimeDelta dts() const; + TimeDelta cts() const; + TimeDelta duration() const; + bool is_keyframe() const; + + // Only call when is_encrypted() is true and AuxInfoNeedsToBeCached() is + // false. Result is owned by caller. + scoped_ptr<DecryptConfig> GetDecryptConfig(); + + private: + void ResetRun(); + const TrackEncryption& track_encryption() const; + + const Movie* moov_; + LogCB log_cb_; + + std::vector<TrackRunInfo> runs_; + std::vector<TrackRunInfo>::const_iterator run_itr_; + std::vector<SampleInfo>::const_iterator sample_itr_; + + std::vector<FrameCENCInfo> cenc_info_; + + int64 sample_dts_; + int64 sample_offset_; + + DISALLOW_COPY_AND_ASSIGN(TrackRunIterator); +}; + +} // namespace mp4 +} // namespace media + +#endif // MEDIA_FORMATS_MP4_TRACK_RUN_ITERATOR_H_ diff --git a/media/formats/mp4/track_run_iterator_unittest.cc b/media/formats/mp4/track_run_iterator_unittest.cc new file mode 100644 index 0000000..ea37bab --- /dev/null +++ b/media/formats/mp4/track_run_iterator_unittest.cc @@ -0,0 +1,514 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "media/formats/mp4/box_definitions.h" +#include "media/formats/mp4/rcheck.h" +#include "media/formats/mp4/track_run_iterator.h" +#include "testing/gtest/include/gtest/gtest.h" + +// The sum of the elements in a vector initialized with SumAscending, +// less the value of the last element. +static const int kSumAscending1 = 45; + +static const int kAudioScale = 48000; +static const int kVideoScale = 25; + +static const uint32 kSampleIsDifferenceSampleFlagMask = 0x10000; + +static const uint8 kAuxInfo[] = { + 0x41, 0x54, 0x65, 0x73, 0x74, 0x49, 0x76, 0x31, + 0x41, 0x54, 0x65, 0x73, 0x74, 0x49, 0x76, 0x32, + 0x00, 0x02, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x04 +}; + +static const char kIv1[] = { + 0x41, 0x54, 0x65, 0x73, 0x74, 0x49, 0x76, 0x31, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const uint8 kKeyId[] = { + 0x41, 0x47, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x54, + 0x65, 0x73, 0x74, 0x4b, 0x65, 0x79, 0x49, 0x44 +}; + +namespace media { +namespace mp4 { + +class TrackRunIteratorTest : public testing::Test { + public: + TrackRunIteratorTest() { + CreateMovie(); + } + + protected: + Movie moov_; + LogCB log_cb_; + scoped_ptr<TrackRunIterator> iter_; + + void CreateMovie() { + moov_.header.timescale = 1000; + moov_.tracks.resize(3); + moov_.extends.tracks.resize(2); + moov_.tracks[0].header.track_id = 1; + moov_.tracks[0].media.header.timescale = kAudioScale; + SampleDescription& desc1 = + moov_.tracks[0].media.information.sample_table.description; + AudioSampleEntry aud_desc; + aud_desc.format = FOURCC_MP4A; + aud_desc.sinf.info.track_encryption.is_encrypted = false; + desc1.type = kAudio; + desc1.audio_entries.push_back(aud_desc); + moov_.extends.tracks[0].track_id = 1; + moov_.extends.tracks[0].default_sample_description_index = 1; + moov_.tracks[0].media.information.sample_table.sync_sample.is_present = + true; + moov_.tracks[1].header.track_id = 2; + moov_.tracks[1].media.header.timescale = kVideoScale; + SampleDescription& desc2 = + moov_.tracks[1].media.information.sample_table.description; + VideoSampleEntry vid_desc; + vid_desc.format = FOURCC_AVC1; + vid_desc.sinf.info.track_encryption.is_encrypted = false; + desc2.type = kVideo; + desc2.video_entries.push_back(vid_desc); + moov_.extends.tracks[1].track_id = 2; + moov_.extends.tracks[1].default_sample_description_index = 1; + moov_.tracks[1].media.information.sample_table.sync_sample.is_present = + true; + + moov_.tracks[2].header.track_id = 3; + moov_.tracks[2].media.information.sample_table.description.type = kHint; + } + + MovieFragment CreateFragment() { + MovieFragment moof; + moof.tracks.resize(2); + moof.tracks[0].decode_time.decode_time = 0; + moof.tracks[0].header.track_id = 1; + moof.tracks[0].header.has_default_sample_flags = true; + moof.tracks[0].header.default_sample_duration = 1024; + moof.tracks[0].header.default_sample_size = 4; + moof.tracks[0].runs.resize(2); + moof.tracks[0].runs[0].sample_count = 10; + moof.tracks[0].runs[0].data_offset = 100; + SetAscending(&moof.tracks[0].runs[0].sample_sizes); + + moof.tracks[0].runs[1].sample_count = 10; + moof.tracks[0].runs[1].data_offset = 10000; + + moof.tracks[1].header.track_id = 2; + moof.tracks[1].header.has_default_sample_flags = false; + moof.tracks[1].decode_time.decode_time = 10; + moof.tracks[1].runs.resize(1); + moof.tracks[1].runs[0].sample_count = 10; + moof.tracks[1].runs[0].data_offset = 200; + SetAscending(&moof.tracks[1].runs[0].sample_sizes); + SetAscending(&moof.tracks[1].runs[0].sample_durations); + moof.tracks[1].runs[0].sample_flags.resize(10); + for (size_t i = 1; i < moof.tracks[1].runs[0].sample_flags.size(); i++) { + moof.tracks[1].runs[0].sample_flags[i] = + kSampleIsDifferenceSampleFlagMask; + } + + return moof; + } + + // Update the first sample description of a Track to indicate encryption + void AddEncryption(Track* track) { + SampleDescription* stsd = + &track->media.information.sample_table.description; + ProtectionSchemeInfo* sinf; + if (!stsd->video_entries.empty()) { + sinf = &stsd->video_entries[0].sinf; + } else { + sinf = &stsd->audio_entries[0].sinf; + } + + sinf->type.type = FOURCC_CENC; + sinf->info.track_encryption.is_encrypted = true; + sinf->info.track_encryption.default_iv_size = 8; + sinf->info.track_encryption.default_kid.insert( + sinf->info.track_encryption.default_kid.begin(), + kKeyId, kKeyId + arraysize(kKeyId)); + } + + // Add aux info covering the first track run to a TrackFragment, and update + // the run to ensure it matches length and subsample information. + void AddAuxInfoHeaders(int offset, TrackFragment* frag) { + frag->auxiliary_offset.offsets.push_back(offset); + frag->auxiliary_size.sample_count = 2; + frag->auxiliary_size.sample_info_sizes.push_back(8); + frag->auxiliary_size.sample_info_sizes.push_back(22); + frag->runs[0].sample_count = 2; + frag->runs[0].sample_sizes[1] = 10; + } + + void SetAscending(std::vector<uint32>* vec) { + vec->resize(10); + for (size_t i = 0; i < vec->size(); i++) + (*vec)[i] = i+1; + } +}; + +TEST_F(TrackRunIteratorTest, NoRunsTest) { + iter_.reset(new TrackRunIterator(&moov_, log_cb_)); + ASSERT_TRUE(iter_->Init(MovieFragment())); + EXPECT_FALSE(iter_->IsRunValid()); + EXPECT_FALSE(iter_->IsSampleValid()); +} + +TEST_F(TrackRunIteratorTest, BasicOperationTest) { + iter_.reset(new TrackRunIterator(&moov_, log_cb_)); + MovieFragment moof = CreateFragment(); + + // Test that runs are sorted correctly, and that properties of the initial + // sample of the first run are correct + ASSERT_TRUE(iter_->Init(moof)); + EXPECT_TRUE(iter_->IsRunValid()); + EXPECT_FALSE(iter_->is_encrypted()); + EXPECT_EQ(iter_->track_id(), 1u); + EXPECT_EQ(iter_->sample_offset(), 100); + EXPECT_EQ(iter_->sample_size(), 1); + EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(0, kAudioScale)); + EXPECT_EQ(iter_->cts(), TimeDeltaFromRational(0, kAudioScale)); + EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(1024, kAudioScale)); + EXPECT_TRUE(iter_->is_keyframe()); + + // Advance to the last sample in the current run, and test its properties + for (int i = 0; i < 9; i++) iter_->AdvanceSample(); + EXPECT_EQ(iter_->track_id(), 1u); + EXPECT_EQ(iter_->sample_offset(), 100 + kSumAscending1); + EXPECT_EQ(iter_->sample_size(), 10); + EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(1024 * 9, kAudioScale)); + EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(1024, kAudioScale)); + EXPECT_TRUE(iter_->is_keyframe()); + + // Test end-of-run + iter_->AdvanceSample(); + EXPECT_FALSE(iter_->IsSampleValid()); + + // Test last sample of next run + iter_->AdvanceRun(); + EXPECT_TRUE(iter_->is_keyframe()); + for (int i = 0; i < 9; i++) iter_->AdvanceSample(); + EXPECT_EQ(iter_->track_id(), 2u); + EXPECT_EQ(iter_->sample_offset(), 200 + kSumAscending1); + EXPECT_EQ(iter_->sample_size(), 10); + int64 base_dts = kSumAscending1 + moof.tracks[1].decode_time.decode_time; + EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(base_dts, kVideoScale)); + EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(10, kVideoScale)); + EXPECT_FALSE(iter_->is_keyframe()); + + // Test final run + iter_->AdvanceRun(); + EXPECT_EQ(iter_->track_id(), 1u); + EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(1024 * 10, kAudioScale)); + iter_->AdvanceSample(); + EXPECT_EQ(moof.tracks[0].runs[1].data_offset + + moof.tracks[0].header.default_sample_size, + iter_->sample_offset()); + iter_->AdvanceRun(); + EXPECT_FALSE(iter_->IsRunValid()); +} + +TEST_F(TrackRunIteratorTest, TrackExtendsDefaultsTest) { + moov_.extends.tracks[0].default_sample_duration = 50; + moov_.extends.tracks[0].default_sample_size = 3; + moov_.extends.tracks[0].default_sample_flags = + kSampleIsDifferenceSampleFlagMask; + iter_.reset(new TrackRunIterator(&moov_, log_cb_)); + MovieFragment moof = CreateFragment(); + moof.tracks[0].header.has_default_sample_flags = false; + moof.tracks[0].header.default_sample_size = 0; + moof.tracks[0].header.default_sample_duration = 0; + moof.tracks[0].runs[0].sample_sizes.clear(); + ASSERT_TRUE(iter_->Init(moof)); + iter_->AdvanceSample(); + EXPECT_FALSE(iter_->is_keyframe()); + EXPECT_EQ(iter_->sample_size(), 3); + EXPECT_EQ(iter_->sample_offset(), moof.tracks[0].runs[0].data_offset + 3); + EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(50, kAudioScale)); + EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(50, kAudioScale)); +} + +TEST_F(TrackRunIteratorTest, FirstSampleFlagTest) { + // Ensure that keyframes are flagged correctly in the face of BMFF boxes which + // explicitly specify the flags for the first sample in a run and rely on + // defaults for all subsequent samples + iter_.reset(new TrackRunIterator(&moov_, log_cb_)); + MovieFragment moof = CreateFragment(); + moof.tracks[1].header.has_default_sample_flags = true; + moof.tracks[1].header.default_sample_flags = + kSampleIsDifferenceSampleFlagMask; + moof.tracks[1].runs[0].sample_flags.resize(1); + ASSERT_TRUE(iter_->Init(moof)); + iter_->AdvanceRun(); + EXPECT_TRUE(iter_->is_keyframe()); + iter_->AdvanceSample(); + EXPECT_FALSE(iter_->is_keyframe()); +} + +TEST_F(TrackRunIteratorTest, ReorderingTest) { + // Test frame reordering and edit list support. The frames have the following + // decode timestamps: + // + // 0ms 40ms 120ms 240ms + // | 0 | 1 - | 2 - - | + // + // ...and these composition timestamps, after edit list adjustment: + // + // 0ms 40ms 160ms 240ms + // | 0 | 2 - - | 1 - | + + // Create an edit list with one entry, with an initial start time of 80ms + // (that is, 2 / kVideoTimescale) and a duration of zero (which is treated as + // infinite according to 14496-12:2012). This will cause the first 80ms of the + // media timeline - which will be empty, due to CTS biasing - to be discarded. + iter_.reset(new TrackRunIterator(&moov_, log_cb_)); + EditListEntry entry; + entry.segment_duration = 0; + entry.media_time = 2; + entry.media_rate_integer = 1; + entry.media_rate_fraction = 0; + moov_.tracks[1].edit.list.edits.push_back(entry); + + // Add CTS offsets. Without bias, the CTS offsets for the first three frames + // would simply be [0, 3, -2]. Since CTS offsets should be non-negative for + // maximum compatibility, these values are biased up to [2, 5, 0], and the + // extra 80ms is removed via the edit list. + MovieFragment moof = CreateFragment(); + std::vector<int32>& cts_offsets = + moof.tracks[1].runs[0].sample_composition_time_offsets; + cts_offsets.resize(10); + cts_offsets[0] = 2; + cts_offsets[1] = 5; + cts_offsets[2] = 0; + moof.tracks[1].decode_time.decode_time = 0; + + ASSERT_TRUE(iter_->Init(moof)); + iter_->AdvanceRun(); + EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(0, kVideoScale)); + EXPECT_EQ(iter_->cts(), TimeDeltaFromRational(0, kVideoScale)); + EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(1, kVideoScale)); + iter_->AdvanceSample(); + EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(1, kVideoScale)); + EXPECT_EQ(iter_->cts(), TimeDeltaFromRational(4, kVideoScale)); + EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(2, kVideoScale)); + iter_->AdvanceSample(); + EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(3, kVideoScale)); + EXPECT_EQ(iter_->cts(), TimeDeltaFromRational(1, kVideoScale)); + EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(3, kVideoScale)); +} + +TEST_F(TrackRunIteratorTest, IgnoreUnknownAuxInfoTest) { + iter_.reset(new TrackRunIterator(&moov_, log_cb_)); + MovieFragment moof = CreateFragment(); + moof.tracks[1].auxiliary_offset.offsets.push_back(50); + moof.tracks[1].auxiliary_size.default_sample_info_size = 2; + moof.tracks[1].auxiliary_size.sample_count = 2; + moof.tracks[1].runs[0].sample_count = 2; + ASSERT_TRUE(iter_->Init(moof)); + iter_->AdvanceRun(); + EXPECT_FALSE(iter_->AuxInfoNeedsToBeCached()); +} + +TEST_F(TrackRunIteratorTest, DecryptConfigTest) { + AddEncryption(&moov_.tracks[1]); + iter_.reset(new TrackRunIterator(&moov_, log_cb_)); + + MovieFragment moof = CreateFragment(); + AddAuxInfoHeaders(50, &moof.tracks[1]); + + ASSERT_TRUE(iter_->Init(moof)); + + // The run for track 2 will be first, since its aux info offset is the first + // element in the file. + EXPECT_EQ(iter_->track_id(), 2u); + EXPECT_TRUE(iter_->is_encrypted()); + EXPECT_TRUE(iter_->AuxInfoNeedsToBeCached()); + EXPECT_EQ(static_cast<uint32>(iter_->aux_info_size()), arraysize(kAuxInfo)); + EXPECT_EQ(iter_->aux_info_offset(), 50); + EXPECT_EQ(iter_->GetMaxClearOffset(), 50); + EXPECT_FALSE(iter_->CacheAuxInfo(NULL, 0)); + EXPECT_FALSE(iter_->CacheAuxInfo(kAuxInfo, 3)); + EXPECT_TRUE(iter_->AuxInfoNeedsToBeCached()); + EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo))); + EXPECT_FALSE(iter_->AuxInfoNeedsToBeCached()); + EXPECT_EQ(iter_->sample_offset(), 200); + EXPECT_EQ(iter_->GetMaxClearOffset(), moof.tracks[0].runs[0].data_offset); + scoped_ptr<DecryptConfig> config = iter_->GetDecryptConfig(); + ASSERT_EQ(arraysize(kKeyId), config->key_id().size()); + EXPECT_TRUE(!memcmp(kKeyId, config->key_id().data(), + config->key_id().size())); + ASSERT_EQ(arraysize(kIv1), config->iv().size()); + EXPECT_TRUE(!memcmp(kIv1, config->iv().data(), config->iv().size())); + EXPECT_TRUE(config->subsamples().empty()); + iter_->AdvanceSample(); + config = iter_->GetDecryptConfig(); + EXPECT_EQ(config->subsamples().size(), 2u); + EXPECT_EQ(config->subsamples()[0].clear_bytes, 1u); + EXPECT_EQ(config->subsamples()[1].cypher_bytes, 4u); +} + +// It is legal for aux info blocks to be shared among multiple formats. +TEST_F(TrackRunIteratorTest, SharedAuxInfoTest) { + AddEncryption(&moov_.tracks[0]); + AddEncryption(&moov_.tracks[1]); + iter_.reset(new TrackRunIterator(&moov_, log_cb_)); + + MovieFragment moof = CreateFragment(); + moof.tracks[0].runs.resize(1); + AddAuxInfoHeaders(50, &moof.tracks[0]); + AddAuxInfoHeaders(50, &moof.tracks[1]); + moof.tracks[0].auxiliary_size.default_sample_info_size = 8; + + ASSERT_TRUE(iter_->Init(moof)); + EXPECT_EQ(iter_->track_id(), 1u); + EXPECT_EQ(iter_->aux_info_offset(), 50); + EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo))); + scoped_ptr<DecryptConfig> config = iter_->GetDecryptConfig(); + ASSERT_EQ(arraysize(kIv1), config->iv().size()); + EXPECT_TRUE(!memcmp(kIv1, config->iv().data(), config->iv().size())); + iter_->AdvanceSample(); + EXPECT_EQ(iter_->GetMaxClearOffset(), 50); + iter_->AdvanceRun(); + EXPECT_EQ(iter_->GetMaxClearOffset(), 50); + EXPECT_EQ(iter_->aux_info_offset(), 50); + EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo))); + EXPECT_EQ(iter_->GetMaxClearOffset(), 200); + ASSERT_EQ(arraysize(kIv1), config->iv().size()); + EXPECT_TRUE(!memcmp(kIv1, config->iv().data(), config->iv().size())); + iter_->AdvanceSample(); + EXPECT_EQ(iter_->GetMaxClearOffset(), 201); +} + +// Sensible files are expected to place auxiliary information for a run +// immediately before the main data for that run. Alternative schemes are +// possible, however, including the somewhat reasonable behavior of placing all +// aux info at the head of the 'mdat' box together, and the completely +// unreasonable behavior demonstrated here: +// byte 50: track 2, run 1 aux info +// byte 100: track 1, run 1 data +// byte 200: track 2, run 1 data +// byte 201: track 1, run 2 aux info (*inside* track 2, run 1 data) +// byte 10000: track 1, run 2 data +// byte 20000: track 1, run 1 aux info +TEST_F(TrackRunIteratorTest, UnexpectedOrderingTest) { + AddEncryption(&moov_.tracks[0]); + AddEncryption(&moov_.tracks[1]); + iter_.reset(new TrackRunIterator(&moov_, log_cb_)); + + MovieFragment moof = CreateFragment(); + AddAuxInfoHeaders(20000, &moof.tracks[0]); + moof.tracks[0].auxiliary_offset.offsets.push_back(201); + moof.tracks[0].auxiliary_size.sample_count += 2; + moof.tracks[0].auxiliary_size.default_sample_info_size = 8; + moof.tracks[0].runs[1].sample_count = 2; + AddAuxInfoHeaders(50, &moof.tracks[1]); + moof.tracks[1].runs[0].sample_sizes[0] = 5; + + ASSERT_TRUE(iter_->Init(moof)); + EXPECT_EQ(iter_->track_id(), 2u); + EXPECT_EQ(iter_->aux_info_offset(), 50); + EXPECT_EQ(iter_->sample_offset(), 200); + EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo))); + EXPECT_EQ(iter_->GetMaxClearOffset(), 100); + iter_->AdvanceRun(); + EXPECT_EQ(iter_->track_id(), 1u); + EXPECT_EQ(iter_->aux_info_offset(), 20000); + EXPECT_EQ(iter_->sample_offset(), 100); + EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo))); + EXPECT_EQ(iter_->GetMaxClearOffset(), 100); + iter_->AdvanceSample(); + EXPECT_EQ(iter_->GetMaxClearOffset(), 101); + iter_->AdvanceRun(); + EXPECT_EQ(iter_->track_id(), 1u); + EXPECT_EQ(iter_->aux_info_offset(), 201); + EXPECT_EQ(iter_->sample_offset(), 10000); + EXPECT_EQ(iter_->GetMaxClearOffset(), 201); + EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo))); + EXPECT_EQ(iter_->GetMaxClearOffset(), 10000); +} + +TEST_F(TrackRunIteratorTest, MissingStssMakesAllSamplesKeyframes) { + iter_.reset(new TrackRunIterator(&moov_, log_cb_)); + MovieFragment moof = CreateFragment(); + + // Test that runs are sorted correctly, and that properties of the initial + // sample of the first run are correct + ASSERT_TRUE(iter_->Init(moof)); + EXPECT_TRUE(iter_->IsRunValid()); + + // Count the number of non-keyframes for each run with the default + // sync_sample info. + + EXPECT_TRUE(moov_.tracks[0].media.information.sample_table.sync_sample. + is_present); + EXPECT_TRUE(moov_.tracks[1].media.information.sample_table.sync_sample. + is_present); + + int first_run_non_keyframe_count = 0; + int second_run_non_keyframe_count = 0; + + EXPECT_TRUE(iter_->IsRunValid()); + while (iter_->IsSampleValid()) { + EXPECT_EQ(iter_->track_id(), 1u); + + if (!iter_->is_keyframe()) + first_run_non_keyframe_count++; + + iter_->AdvanceSample(); + } + + iter_->AdvanceRun(); + + EXPECT_TRUE(iter_->IsRunValid()); + while (iter_->IsSampleValid()) { + EXPECT_EQ(iter_->track_id(), 2u); + + if (!iter_->is_keyframe()) + second_run_non_keyframe_count++; + + iter_->AdvanceSample(); + } + + EXPECT_EQ(0, first_run_non_keyframe_count); + EXPECT_EQ(9, second_run_non_keyframe_count); + + // Update sync_sample info to reflect that the box is not present on + // both tracks. + moov_.tracks[0].media.information.sample_table.sync_sample.is_present = + false; + moov_.tracks[1].media.information.sample_table.sync_sample.is_present = + false; + + // Reinitialize the iterator and verify that all samples are keyframes + // on both tracks now. + + ASSERT_TRUE(iter_->Init(moof)); + EXPECT_TRUE(iter_->IsRunValid()); + while (iter_->IsSampleValid()) { + EXPECT_EQ(iter_->track_id(), 1u); + EXPECT_TRUE(iter_->is_keyframe()); + iter_->AdvanceSample(); + } + + iter_->AdvanceRun(); + EXPECT_TRUE(iter_->IsRunValid()); + + while (iter_->IsSampleValid()) { + EXPECT_EQ(iter_->track_id(), 2u); + EXPECT_TRUE(iter_->is_keyframe()); + iter_->AdvanceSample(); + } +} + + +} // namespace mp4 +} // namespace media diff --git a/media/formats/webm/chromeos/DEPS b/media/formats/webm/chromeos/DEPS new file mode 100644 index 0000000..a4378dc --- /dev/null +++ b/media/formats/webm/chromeos/DEPS @@ -0,0 +1,4 @@ +include_rules = [ + "+libyuv", + "+third_party/libvpx", +] diff --git a/media/formats/webm/chromeos/ebml_writer.cc b/media/formats/webm/chromeos/ebml_writer.cc new file mode 100644 index 0000000..c00063f --- /dev/null +++ b/media/formats/webm/chromeos/ebml_writer.cc @@ -0,0 +1,33 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/chromeos/ebml_writer.h" + +#include "media/base/media_export.h" + +extern "C" { +#include "third_party/libvpx/source/libvpx/third_party/libmkv/EbmlWriter.h" + +EbmlGlobal::EbmlGlobal() { +} + +EbmlGlobal::~EbmlGlobal() { +} + +// These functions must be in the global namespace and visible to libmkv. + +void MEDIA_EXPORT Ebml_Write(EbmlGlobal* glob, + const void* buffer, + unsigned long len) { + glob->write_cb.Run(buffer, len); +} + +void MEDIA_EXPORT Ebml_Serialize(EbmlGlobal* glob, + const void* buffer, + int buffer_size, + unsigned long len) { + glob->serialize_cb.Run(buffer, buffer_size, len); +} + +} // extern "C" diff --git a/media/formats/webm/chromeos/ebml_writer.h b/media/formats/webm/chromeos/ebml_writer.h new file mode 100644 index 0000000..3c1faa0 --- /dev/null +++ b/media/formats/webm/chromeos/ebml_writer.h @@ -0,0 +1,21 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_CHROMEOS_EBML_WRITER_H_ +#define MEDIA_FORMATS_WEBM_CHROMEOS_EBML_WRITER_H_ + +#include "base/callback.h" + +// This struct serves as a bridge betweeen static libmkv interface and Chrome's +// base::Callback. Must be in the global namespace. See EbmlWriter.h. +struct EbmlGlobal { + EbmlGlobal(); + ~EbmlGlobal(); + + base::Callback<void(const void* buffer, unsigned long len)> write_cb; + base::Callback<void(const void* buffer, int buffer_size, unsigned long len)> + serialize_cb; +}; + +#endif // MEDIA_FORMATS_WEBM_CHROMEOS_EBML_WRITER_H_ diff --git a/media/formats/webm/chromeos/webm_encoder.cc b/media/formats/webm/chromeos/webm_encoder.cc new file mode 100644 index 0000000..4b5c782 --- /dev/null +++ b/media/formats/webm/chromeos/webm_encoder.cc @@ -0,0 +1,321 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/chromeos/webm_encoder.h" + +#include "base/bind.h" +#include "base/file_util.h" +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "libyuv/convert.h" +#include "libyuv/video_common.h" +#include "third_party/skia/include/core/SkBitmap.h" + +extern "C" { +// Getting the right degree of C compatibility has been a constant struggle. +// - Stroustrup, C++ Report, 12(7), July/August 2000. +#define private priv +#include "third_party/libvpx/source/libvpx/third_party/libmkv/EbmlIDs.h" +#include "third_party/libvpx/source/libvpx/third_party/libmkv/EbmlWriter.h" +#undef private +} + +// Number of encoder threads to use. +static const int kNumEncoderThreads = 2; + +// Need a fixed size serializer for the track ID. libmkv provides a 64 bit +// one, but not a 32 bit one. +static void Ebml_SerializeUnsigned32(EbmlGlobal* ebml, + unsigned long class_id, + uint64_t value) { + uint8 size_serialized = 4 | 0x80; + Ebml_WriteID(ebml, class_id); + Ebml_Serialize(ebml, &size_serialized, sizeof(size_serialized), 1); + Ebml_Serialize(ebml, &value, sizeof(value), 4); +} + +// Wrapper functor for vpx_codec_destroy(). +struct VpxCodecDeleter { + void operator()(vpx_codec_ctx_t* codec) { + vpx_codec_destroy(codec); + } +}; + +// Wrapper functor for vpx_img_free(). +struct VpxImgDeleter { + void operator()(vpx_image_t* image) { + vpx_img_free(image); + } +}; + +namespace media { + +namespace chromeos { + +WebmEncoder::WebmEncoder(const base::FilePath& output_path, + int bitrate, + bool realtime) + : bitrate_(bitrate), + deadline_(realtime ? VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY), + output_path_(output_path), + has_errors_(false) { + ebml_writer_.write_cb = base::Bind( + &WebmEncoder::EbmlWrite, base::Unretained(this)); + ebml_writer_.serialize_cb = base::Bind( + &WebmEncoder::EbmlSerialize, base::Unretained(this)); +} + +WebmEncoder::~WebmEncoder() { +} + +bool WebmEncoder::EncodeFromSprite(const SkBitmap& sprite, + int fps_n, + int fps_d) { + DCHECK(!sprite.isNull()); + DCHECK(!sprite.empty()); + + has_errors_ = false; + width_ = sprite.width(); + height_ = sprite.width(); + fps_.num = fps_n; + fps_.den = fps_d; + + // Sprite is tiled vertically. + frame_count_ = sprite.height() / width_; + + vpx_image_t image; + vpx_img_alloc(&image, VPX_IMG_FMT_I420, width_, height_, 16); + // Ensure that image is freed after return. + scoped_ptr<vpx_image_t, VpxImgDeleter> image_ptr(&image); + + const vpx_codec_iface_t* codec_iface = vpx_codec_vp8_cx(); + DCHECK(codec_iface); + vpx_codec_err_t ret = vpx_codec_enc_config_default(codec_iface, &config_, 0); + DCHECK_EQ(VPX_CODEC_OK, ret); + + config_.rc_target_bitrate = bitrate_; + config_.g_w = width_; + config_.g_h = height_; + config_.g_pass = VPX_RC_ONE_PASS; + config_.g_profile = 0; // Default profile. + config_.g_threads = kNumEncoderThreads; + config_.rc_min_quantizer = 0; + config_.rc_max_quantizer = 63; // Maximum possible range. + config_.g_timebase.num = fps_.den; + config_.g_timebase.den = fps_.num; + config_.kf_mode = VPX_KF_AUTO; // Auto key frames. + + vpx_codec_ctx_t codec; + ret = vpx_codec_enc_init(&codec, codec_iface, &config_, 0); + if (ret != VPX_CODEC_OK) + return false; + // Ensure that codec context is freed after return. + scoped_ptr<vpx_codec_ctx_t, VpxCodecDeleter> codec_ptr(&codec); + + SkAutoLockPixels lock_sprite(sprite); + + const uint8* src = reinterpret_cast<const uint8*>(sprite.getAddr32(0, 0)); + size_t src_frame_size = sprite.getSize(); + int crop_y = 0; + + if (!WriteWebmHeader()) + return false; + + for (size_t frame = 0; frame < frame_count_ && !has_errors_; ++frame) { + int res = libyuv::ConvertToI420( + src, src_frame_size, + image.planes[VPX_PLANE_Y], image.stride[VPX_PLANE_Y], + image.planes[VPX_PLANE_U], image.stride[VPX_PLANE_U], + image.planes[VPX_PLANE_V], image.stride[VPX_PLANE_V], + 0, crop_y, // src origin + width_, sprite.height(), // src size + width_, height_, // dest size + libyuv::kRotate0, + libyuv::FOURCC_ARGB); + if (res) { + has_errors_ = true; + break; + } + crop_y += height_; + + ret = vpx_codec_encode(&codec, &image, frame, 1, 0, deadline_); + if (ret != VPX_CODEC_OK) { + has_errors_ = true; + break; + } + + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t* packet; + while (!has_errors_ && (packet = vpx_codec_get_cx_data(&codec, &iter))) { + if (packet->kind == VPX_CODEC_CX_FRAME_PKT) + WriteWebmBlock(packet); + } + } + + return WriteWebmFooter(); +} + +bool WebmEncoder::WriteWebmHeader() { + output_ = base::OpenFile(output_path_, "wb"); + if (!output_) + return false; + + // Global header. + StartSubElement(EBML); + { + Ebml_SerializeUnsigned(&ebml_writer_, EBMLVersion, 1); + Ebml_SerializeUnsigned(&ebml_writer_, EBMLReadVersion, 1); + Ebml_SerializeUnsigned(&ebml_writer_, EBMLMaxIDLength, 4); + Ebml_SerializeUnsigned(&ebml_writer_, EBMLMaxSizeLength, 8); + Ebml_SerializeString(&ebml_writer_, DocType, "webm"); + Ebml_SerializeUnsigned(&ebml_writer_, DocTypeVersion, 2); + Ebml_SerializeUnsigned(&ebml_writer_, DocTypeReadVersion, 2); + } + EndSubElement(); // EBML + + // Single segment with a video track. + StartSubElement(Segment); + { + StartSubElement(Info); + { + // All timecodes in the segment will be expressed in milliseconds. + Ebml_SerializeUnsigned(&ebml_writer_, TimecodeScale, 1000000); + double duration = 1000. * frame_count_ * fps_.den / fps_.num; + Ebml_SerializeFloat(&ebml_writer_, Segment_Duration, duration); + } + EndSubElement(); // Info + + StartSubElement(Tracks); + { + StartSubElement(TrackEntry); + { + Ebml_SerializeUnsigned(&ebml_writer_, TrackNumber, 1); + Ebml_SerializeUnsigned32(&ebml_writer_, TrackUID, 1); + Ebml_SerializeUnsigned(&ebml_writer_, TrackType, 1); // Video + Ebml_SerializeString(&ebml_writer_, CodecID, "V_VP8"); + + StartSubElement(Video); + { + Ebml_SerializeUnsigned(&ebml_writer_, PixelWidth, width_); + Ebml_SerializeUnsigned(&ebml_writer_, PixelHeight, height_); + Ebml_SerializeUnsigned(&ebml_writer_, StereoMode, 0); // Mono + float fps = static_cast<float>(fps_.num) / fps_.den; + Ebml_SerializeFloat(&ebml_writer_, FrameRate, fps); + } + EndSubElement(); // Video + } + EndSubElement(); // TrackEntry + } + EndSubElement(); // Tracks + + StartSubElement(Cluster); { + Ebml_SerializeUnsigned(&ebml_writer_, Timecode, 0); + } // Cluster left open. + } // Segment left open. + + // No check for |has_errors_| here because |false| is only returned when + // opening file fails. + return true; +} + +void WebmEncoder::WriteWebmBlock(const vpx_codec_cx_pkt_t* packet) { + bool is_keyframe = packet->data.frame.flags & VPX_FRAME_IS_KEY; + int64_t pts_ms = 1000 * packet->data.frame.pts * fps_.den / fps_.num; + + DVLOG(1) << "Video packet @" << pts_ms << " ms " + << packet->data.frame.sz << " bytes " + << (is_keyframe ? "K" : ""); + + Ebml_WriteID(&ebml_writer_, SimpleBlock); + + uint32 block_length = (packet->data.frame.sz + 4) | 0x10000000; + EbmlSerializeHelper(&block_length, 4); + + uint8 track_number = 1 | 0x80; + EbmlSerializeHelper(&track_number, 1); + + EbmlSerializeHelper(&pts_ms, 2); + + uint8 flags = 0; + if (is_keyframe) + flags |= 0x80; + if (packet->data.frame.flags & VPX_FRAME_IS_INVISIBLE) + flags |= 0x08; + EbmlSerializeHelper(&flags, 1); + + EbmlWrite(packet->data.frame.buf, packet->data.frame.sz); +} + +bool WebmEncoder::WriteWebmFooter() { + EndSubElement(); // Cluster + EndSubElement(); // Segment + DCHECK(ebml_sub_elements_.empty()); + return base::CloseFile(output_) && !has_errors_; +} + +void WebmEncoder::StartSubElement(unsigned long class_id) { + Ebml_WriteID(&ebml_writer_, class_id); + ebml_sub_elements_.push(ftell(output_)); + static const uint64_t kUnknownLen = 0x01FFFFFFFFFFFFFFLLU; + EbmlSerializeHelper(&kUnknownLen, 8); +} + +void WebmEncoder::EndSubElement() { + DCHECK(!ebml_sub_elements_.empty()); + + long int end_pos = ftell(output_); + long int start_pos = ebml_sub_elements_.top(); + ebml_sub_elements_.pop(); + + uint64_t size = (end_pos - start_pos - 8) | 0x0100000000000000ULL; + // Seek to the beginning of the sub-element and patch in the calculated size. + if (fseek(output_, start_pos, SEEK_SET)) { + has_errors_ = true; + LOG(ERROR) << "Error writing to " << output_path_.value(); + } + EbmlSerializeHelper(&size, 8); + + // Restore write position. + if (fseek(output_, end_pos, SEEK_SET)) { + has_errors_ = true; + LOG(ERROR) << "Error writing to " << output_path_.value(); + } +} + +void WebmEncoder::EbmlWrite(const void* buffer, + unsigned long len) { + if (fwrite(buffer, 1, len, output_) != len) { + has_errors_ = true; + LOG(ERROR) << "Error writing to " << output_path_.value(); + } +} + +template <class T> +void WebmEncoder::EbmlSerializeHelper(const T* buffer, unsigned long len) { + for (int i = len - 1; i >= 0; i--) { + uint8 c = *buffer >> (i * CHAR_BIT); + EbmlWrite(&c, 1); + } +} + +void WebmEncoder::EbmlSerialize(const void* buffer, + int buffer_size, + unsigned long len) { + switch (buffer_size) { + case 1: + return EbmlSerializeHelper(static_cast<const int8_t*>(buffer), len); + case 2: + return EbmlSerializeHelper(static_cast<const int16_t*>(buffer), len); + case 4: + return EbmlSerializeHelper(static_cast<const int32_t*>(buffer), len); + case 8: + return EbmlSerializeHelper(static_cast<const int64_t*>(buffer), len); + default: + NOTREACHED() << "Invalid EbmlSerialize length: " << len; + } +} + +} // namespace chromeos + +} // namespace media diff --git a/media/formats/webm/chromeos/webm_encoder.h b/media/formats/webm/chromeos/webm_encoder.h new file mode 100644 index 0000000..fd0fc75 --- /dev/null +++ b/media/formats/webm/chromeos/webm_encoder.h @@ -0,0 +1,106 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_CHROMEOS_WEBM_ENCODER_H_ +#define MEDIA_FORMATS_WEBM_CHROMEOS_WEBM_ENCODER_H_ + +#include <stdio.h> +#include <stack> + +#include "base/files/file_path.h" +#include "media/base/media_export.h" +#include "media/formats/webm/chromeos/ebml_writer.h" + +extern "C" { +#define VPX_CODEC_DISABLE_COMPAT 1 +#include "third_party/libvpx/source/libvpx/vpx/vpx_encoder.h" +#include "third_party/libvpx/source/libvpx/vpx/vp8cx.h" +} + +class SkBitmap; + +namespace base { +class FilePath; +} + +namespace media { + +namespace chromeos { + +// WebM encoder using libvpx. Currently only supports one-pass, constant bitrate +// encoding of short files consisting of a single video track. Seek info and +// cues are not supported, so generated .webm file does not strictly adhere to +// WebM standard (http://www.webmproject.org/code/specs/container/). +class MEDIA_EXPORT WebmEncoder { + public: + // Create new instance for writing to |output_path|. If |realtime| is |true|, + // uses realtime deadline, otherwise - "good quality" deadline. + WebmEncoder(const base::FilePath& output_path, int bitrate, bool realtime); + ~WebmEncoder(); + + // Encodes video from a Nx(N*M) sprite, having M frames of size NxN with FPS + // |fps_n/fps_d|. Must be called on a thread that allows disk IO. + // Returns |true| iff encoding and writing to file is successful. + bool EncodeFromSprite(const SkBitmap& sprite, int fps_n, int fps_d); + + private: + // Writes global WebM header and starts a single video track. Returns |false| + // if there was an error opening file for writing. + bool WriteWebmHeader(); + + // Writes VPX packet to output file. + void WriteWebmBlock(const vpx_codec_cx_pkt_t* packet); + + // Finishes video track and closes output file. Returns |false| if there were + // any error during encoding/writing file. + bool WriteWebmFooter(); + + // Starts a new WebM sub-element of given type. Those can be nested. + void StartSubElement(unsigned long class_id); + + // Closes current top-level sub-element. + void EndSubElement(); + + // libmkv callbacks. + void EbmlWrite(const void* buffer, unsigned long len); + void EbmlSerialize(const void* buffer, int buffer_size, unsigned long len); + + template <typename T> + void EbmlSerializeHelper(const T* buffer, unsigned long len); + + // Video dimensions and FPS. + size_t width_; + size_t height_; + vpx_rational_t fps_; + + // Number of frames in video. + size_t frame_count_; + + // VPX config in use. + vpx_codec_enc_cfg_t config_; + + // VPX parameters. + int bitrate_; + unsigned long deadline_; + + // EbmlWriter context. + EbmlGlobal ebml_writer_; + + // Stack with start offsets of currently open sub-elements. + std::stack<long int> ebml_sub_elements_; + + base::FilePath output_path_; + FILE* output_; + + // True if an error occured while encoding/writing to file. + bool has_errors_; + + DISALLOW_COPY_AND_ASSIGN(WebmEncoder); +}; + +} // namespace chromeos + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_CHROMEOS_WEBM_ENCODER_H_ diff --git a/media/formats/webm/cluster_builder.cc b/media/formats/webm/cluster_builder.cc new file mode 100644 index 0000000..ec95616 --- /dev/null +++ b/media/formats/webm/cluster_builder.cc @@ -0,0 +1,175 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/cluster_builder.h" + +#include "base/logging.h" +#include "media/base/data_buffer.h" + +namespace media { + +static const uint8 kClusterHeader[] = { + 0x1F, 0x43, 0xB6, 0x75, // CLUSTER ID + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // cluster(size = 0) + 0xE7, // Timecode ID + 0x88, // timecode(size=8) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // timecode value +}; + +static const uint8 kSimpleBlockHeader[] = { + 0xA3, // SimpleBlock ID + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // SimpleBlock(size = 0) +}; + +static const uint8 kBlockGroupHeader[] = { + 0xA0, // BlockGroup ID + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // BlockGroup(size = 0) + 0x9B, // BlockDuration ID + 0x88, // BlockDuration(size = 8) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // duration + 0xA1, // Block ID + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Block(size = 0) +}; + +enum { + kClusterSizeOffset = 4, + kClusterTimecodeOffset = 14, + + kSimpleBlockSizeOffset = 1, + + kBlockGroupSizeOffset = 1, + kBlockGroupDurationOffset = 11, + kBlockGroupBlockSizeOffset = 20, + + kInitialBufferSize = 32768, +}; + +Cluster::Cluster(scoped_ptr<uint8[]> data, int size) + : data_(data.Pass()), size_(size) {} +Cluster::~Cluster() {} + +ClusterBuilder::ClusterBuilder() { Reset(); } +ClusterBuilder::~ClusterBuilder() {} + +void ClusterBuilder::SetClusterTimecode(int64 cluster_timecode) { + DCHECK_EQ(cluster_timecode_, -1); + + cluster_timecode_ = cluster_timecode; + + // Write the timecode into the header. + uint8* buf = buffer_.get() + kClusterTimecodeOffset; + for (int i = 7; i >= 0; --i) { + buf[i] = cluster_timecode & 0xff; + cluster_timecode >>= 8; + } +} + +void ClusterBuilder::AddSimpleBlock(int track_num, int64 timecode, int flags, + const uint8* data, int size) { + int block_size = size + 4; + int bytes_needed = sizeof(kSimpleBlockHeader) + block_size; + if (bytes_needed > (buffer_size_ - bytes_used_)) + ExtendBuffer(bytes_needed); + + uint8* buf = buffer_.get() + bytes_used_; + int block_offset = bytes_used_; + memcpy(buf, kSimpleBlockHeader, sizeof(kSimpleBlockHeader)); + UpdateUInt64(block_offset + kSimpleBlockSizeOffset, block_size); + buf += sizeof(kSimpleBlockHeader); + + WriteBlock(buf, track_num, timecode, flags, data, size); + + bytes_used_ += bytes_needed; +} + +void ClusterBuilder::AddBlockGroup(int track_num, int64 timecode, int duration, + int flags, const uint8* data, int size) { + int block_size = size + 4; + int bytes_needed = sizeof(kBlockGroupHeader) + block_size; + int block_group_size = bytes_needed - 9; + + if (bytes_needed > (buffer_size_ - bytes_used_)) + ExtendBuffer(bytes_needed); + + uint8* buf = buffer_.get() + bytes_used_; + int block_group_offset = bytes_used_; + memcpy(buf, kBlockGroupHeader, sizeof(kBlockGroupHeader)); + UpdateUInt64(block_group_offset + kBlockGroupSizeOffset, block_group_size); + UpdateUInt64(block_group_offset + kBlockGroupDurationOffset, duration); + UpdateUInt64(block_group_offset + kBlockGroupBlockSizeOffset, block_size); + buf += sizeof(kBlockGroupHeader); + + // Make sure the 4 most-significant bits are 0. + // http://www.matroska.org/technical/specs/index.html#block_structure + flags &= 0x0f; + + WriteBlock(buf, track_num, timecode, flags, data, size); + + bytes_used_ += bytes_needed; +} + +void ClusterBuilder::WriteBlock(uint8* buf, int track_num, int64 timecode, + int flags, const uint8* data, int size) { + DCHECK_GE(track_num, 0); + DCHECK_LE(track_num, 126); + DCHECK_GE(flags, 0); + DCHECK_LE(flags, 0xff); + DCHECK(data); + DCHECK_GT(size, 0); + DCHECK_NE(cluster_timecode_, -1); + + int64 timecode_delta = timecode - cluster_timecode_; + DCHECK_GE(timecode_delta, -32768); + DCHECK_LE(timecode_delta, 32767); + + buf[0] = 0x80 | (track_num & 0x7F); + buf[1] = (timecode_delta >> 8) & 0xff; + buf[2] = timecode_delta & 0xff; + buf[3] = flags & 0xff; + memcpy(buf + 4, data, size); +} + +scoped_ptr<Cluster> ClusterBuilder::Finish() { + DCHECK_NE(cluster_timecode_, -1); + + UpdateUInt64(kClusterSizeOffset, bytes_used_ - (kClusterSizeOffset + 8)); + + scoped_ptr<Cluster> ret(new Cluster(buffer_.Pass(), bytes_used_)); + Reset(); + return ret.Pass(); +} + +void ClusterBuilder::Reset() { + buffer_size_ = kInitialBufferSize; + buffer_.reset(new uint8[buffer_size_]); + memcpy(buffer_.get(), kClusterHeader, sizeof(kClusterHeader)); + bytes_used_ = sizeof(kClusterHeader); + cluster_timecode_ = -1; +} + +void ClusterBuilder::ExtendBuffer(int bytes_needed) { + int new_buffer_size = 2 * buffer_size_; + + while ((new_buffer_size - bytes_used_) < bytes_needed) + new_buffer_size *= 2; + + scoped_ptr<uint8[]> new_buffer(new uint8[new_buffer_size]); + + memcpy(new_buffer.get(), buffer_.get(), bytes_used_); + buffer_.reset(new_buffer.release()); + buffer_size_ = new_buffer_size; +} + +void ClusterBuilder::UpdateUInt64(int offset, int64 value) { + DCHECK_LE(offset + 7, buffer_size_); + uint8* buf = buffer_.get() + offset; + + // Fill the last 7 bytes of size field in big-endian order. + for (int i = 7; i > 0; i--) { + buf[i] = value & 0xff; + value >>= 8; + } +} + +} // namespace media diff --git a/media/formats/webm/cluster_builder.h b/media/formats/webm/cluster_builder.h new file mode 100644 index 0000000..98f0f6c --- /dev/null +++ b/media/formats/webm/cluster_builder.h @@ -0,0 +1,59 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_CLUSTER_BUILDER_H_ +#define MEDIA_FORMATS_WEBM_CLUSTER_BUILDER_H_ + +#include "base/basictypes.h" +#include "base/memory/scoped_ptr.h" +#include "media/base/buffers.h" + +namespace media { + +class Cluster { + public: + Cluster(scoped_ptr<uint8[]> data, int size); + ~Cluster(); + + const uint8* data() const { return data_.get(); } + int size() const { return size_; } + + private: + scoped_ptr<uint8[]> data_; + int size_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(Cluster); +}; + +class ClusterBuilder { + public: + ClusterBuilder(); + ~ClusterBuilder(); + + void SetClusterTimecode(int64 cluster_timecode); + void AddSimpleBlock(int track_num, int64 timecode, int flags, + const uint8* data, int size); + void AddBlockGroup(int track_num, int64 timecode, int duration, int flags, + const uint8* data, int size); + + scoped_ptr<Cluster> Finish(); + + private: + void Reset(); + void ExtendBuffer(int bytes_needed); + void UpdateUInt64(int offset, int64 value); + void WriteBlock(uint8* buf, int track_num, int64 timecode, int flags, + const uint8* data, int size); + + scoped_ptr<uint8[]> buffer_; + int buffer_size_; + int bytes_used_; + int64 cluster_timecode_; + + DISALLOW_COPY_AND_ASSIGN(ClusterBuilder); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_CLUSTER_BUILDER_H_ diff --git a/media/formats/webm/tracks_builder.cc b/media/formats/webm/tracks_builder.cc new file mode 100644 index 0000000..5a49ce0 --- /dev/null +++ b/media/formats/webm/tracks_builder.cc @@ -0,0 +1,216 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/tracks_builder.h" + +#include "media/formats/webm/webm_constants.h" + +namespace media { + +// Returns size of an integer, formatted using Matroska serialization. +static int GetUIntMkvSize(uint64 value) { + if (value < 0x07FULL) + return 1; + if (value < 0x03FFFULL) + return 2; + if (value < 0x01FFFFFULL) + return 3; + if (value < 0x0FFFFFFFULL) + return 4; + if (value < 0x07FFFFFFFFULL) + return 5; + if (value < 0x03FFFFFFFFFFULL) + return 6; + if (value < 0x01FFFFFFFFFFFFULL) + return 7; + return 8; +} + +// Returns the minimium size required to serialize an integer value. +static int GetUIntSize(uint64 value) { + if (value < 0x0100ULL) + return 1; + if (value < 0x010000ULL) + return 2; + if (value < 0x01000000ULL) + return 3; + if (value < 0x0100000000ULL) + return 4; + if (value < 0x010000000000ULL) + return 5; + if (value < 0x01000000000000ULL) + return 6; + if (value < 0x0100000000000000ULL) + return 7; + return 8; +} + +static int MasterElementSize(int element_id, int payload_size) { + return GetUIntSize(element_id) + GetUIntMkvSize(payload_size) + payload_size; +} + +static int IntElementSize(int element_id, int value) { + return GetUIntSize(element_id) + 1 + GetUIntSize(value); +} + +static int StringElementSize(int element_id, const std::string& value) { + return GetUIntSize(element_id) + + GetUIntMkvSize(value.length()) + + value.length(); +} + +static void SerializeInt(uint8** buf_ptr, int* buf_size_ptr, + int64 value, int size) { + uint8*& buf = *buf_ptr; + int& buf_size = *buf_size_ptr; + + for (int idx = 1; idx <= size; ++idx) { + *buf++ = static_cast<uint8>(value >> ((size - idx) * 8)); + --buf_size; + } +} + +static void WriteElementId(uint8** buf, int* buf_size, int element_id) { + SerializeInt(buf, buf_size, element_id, GetUIntSize(element_id)); +} + +static void WriteUInt(uint8** buf, int* buf_size, uint64 value) { + const int size = GetUIntMkvSize(value); + value |= (1ULL << (size * 7)); // Matroska formatting + SerializeInt(buf, buf_size, value, size); +} + +static void WriteMasterElement(uint8** buf, int* buf_size, + int element_id, int payload_size) { + WriteElementId(buf, buf_size, element_id); + WriteUInt(buf, buf_size, payload_size); +} + +static void WriteIntElement(uint8** buf, int* buf_size, + int element_id, int value) { + WriteElementId(buf, buf_size, element_id); + + const int size = GetUIntSize(value); + WriteUInt(buf, buf_size, size); + + SerializeInt(buf, buf_size, value, size); +} + +static void WriteStringElement(uint8** buf_ptr, int* buf_size_ptr, + int element_id, const std::string& value) { + uint8*& buf = *buf_ptr; + int& buf_size = *buf_size_ptr; + + WriteElementId(&buf, &buf_size, element_id); + + const uint64 size = value.length(); + WriteUInt(&buf, &buf_size, size); + + memcpy(buf, value.data(), size); + buf += size; + buf_size -= size; +} + +TracksBuilder::TracksBuilder() {} +TracksBuilder::~TracksBuilder() {} + +void TracksBuilder::AddTrack( + int track_num, + int track_type, + int track_uid, + const std::string& codec_id, + const std::string& name, + const std::string& language) { + tracks_.push_back(Track(track_num, track_type, track_uid, codec_id, name, + language)); +} + +std::vector<uint8> TracksBuilder::Finish() { + // Allocate the storage + std::vector<uint8> buffer; + buffer.resize(GetTracksSize()); + + // Populate the storage with a tracks header + WriteTracks(&buffer[0], buffer.size()); + + return buffer; +} + +int TracksBuilder::GetTracksSize() const { + return MasterElementSize(kWebMIdTracks, GetTracksPayloadSize()); +} + +int TracksBuilder::GetTracksPayloadSize() const { + int payload_size = 0; + + for (TrackList::const_iterator itr = tracks_.begin(); + itr != tracks_.end(); ++itr) { + payload_size += itr->GetSize(); + } + + return payload_size; +} + +void TracksBuilder::WriteTracks(uint8* buf, int buf_size) const { + WriteMasterElement(&buf, &buf_size, kWebMIdTracks, GetTracksPayloadSize()); + + for (TrackList::const_iterator itr = tracks_.begin(); + itr != tracks_.end(); ++itr) { + itr->Write(&buf, &buf_size); + } +} + +TracksBuilder::Track::Track(int track_num, int track_type, int track_uid, + const std::string& codec_id, + const std::string& name, + const std::string& language) + : track_num_(track_num), + track_type_(track_type), + track_uid_(track_uid), + codec_id_(codec_id), + name_(name), + language_(language) { +} + +int TracksBuilder::Track::GetSize() const { + return MasterElementSize(kWebMIdTrackEntry, GetPayloadSize()); +} + +int TracksBuilder::Track::GetPayloadSize() const { + int size = 0; + + size += IntElementSize(kWebMIdTrackNumber, track_num_); + size += IntElementSize(kWebMIdTrackType, track_type_); + size += IntElementSize(kWebMIdTrackUID, track_uid_); + + if (!codec_id_.empty()) + size += StringElementSize(kWebMIdCodecID, codec_id_); + + if (!name_.empty()) + size += StringElementSize(kWebMIdName, name_); + + if (!language_.empty()) + size += StringElementSize(kWebMIdLanguage, language_); + + return size; +} + +void TracksBuilder::Track::Write(uint8** buf, int* buf_size) const { + WriteMasterElement(buf, buf_size, kWebMIdTrackEntry, GetPayloadSize()); + + WriteIntElement(buf, buf_size, kWebMIdTrackNumber, track_num_); + WriteIntElement(buf, buf_size, kWebMIdTrackType, track_type_); + WriteIntElement(buf, buf_size, kWebMIdTrackUID, track_uid_); + + if (!codec_id_.empty()) + WriteStringElement(buf, buf_size, kWebMIdCodecID, codec_id_); + + if (!name_.empty()) + WriteStringElement(buf, buf_size, kWebMIdName, name_); + + if (!language_.empty()) + WriteStringElement(buf, buf_size, kWebMIdLanguage, language_); +} + +} // namespace media diff --git a/media/formats/webm/tracks_builder.h b/media/formats/webm/tracks_builder.h new file mode 100644 index 0000000..ecd0fe5 --- /dev/null +++ b/media/formats/webm/tracks_builder.h @@ -0,0 +1,59 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_TRACKS_BUILDER_H_ +#define MEDIA_FORMATS_WEBM_TRACKS_BUILDER_H_ + +#include <list> +#include <string> +#include <vector> + +#include "base/basictypes.h" + +namespace media { + +class TracksBuilder { + public: + TracksBuilder(); + ~TracksBuilder(); + + void AddTrack(int track_num, int track_type, int track_uid, + const std::string& codec_id, const std::string& name, + const std::string& language); + + std::vector<uint8> Finish(); + + private: + int GetTracksSize() const; + int GetTracksPayloadSize() const; + void WriteTracks(uint8* buffer, int buffer_size) const; + + class Track { + public: + Track(int track_num, int track_type, int track_uid, + const std::string& codec_id, const std::string& name, + const std::string& language); + + int GetSize() const; + void Write(uint8** buf, int* buf_size) const; + private: + int GetPayloadSize() const; + + int track_num_; + int track_type_; + int track_uid_; + std::string codec_id_; + std::string name_; + std::string language_; + }; + + typedef std::list<Track> TrackList; + TrackList tracks_; + + DISALLOW_COPY_AND_ASSIGN(TracksBuilder); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_TRACKS_BUILDER_H_ diff --git a/media/formats/webm/webm_audio_client.cc b/media/formats/webm/webm_audio_client.cc new file mode 100644 index 0000000..6de4502 --- /dev/null +++ b/media/formats/webm/webm_audio_client.cc @@ -0,0 +1,121 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_audio_client.h" + +#include "media/base/audio_decoder_config.h" +#include "media/base/channel_layout.h" +#include "media/formats/webm/webm_constants.h" + +namespace media { + +WebMAudioClient::WebMAudioClient(const LogCB& log_cb) + : log_cb_(log_cb) { + Reset(); +} + +WebMAudioClient::~WebMAudioClient() { +} + +void WebMAudioClient::Reset() { + channels_ = -1; + samples_per_second_ = -1; + output_samples_per_second_ = -1; +} + +bool WebMAudioClient::InitializeConfig( + const std::string& codec_id, const std::vector<uint8>& codec_private, + int64 seek_preroll, int64 codec_delay, bool is_encrypted, + AudioDecoderConfig* config) { + DCHECK(config); + + AudioCodec audio_codec = kUnknownAudioCodec; + if (codec_id == "A_VORBIS") { + audio_codec = kCodecVorbis; + } else if (codec_id == "A_OPUS") { + audio_codec = kCodecOpus; + } else { + MEDIA_LOG(log_cb_) << "Unsupported audio codec_id " << codec_id; + return false; + } + + if (samples_per_second_ <= 0) + return false; + + // Set channel layout default if a Channels element was not present. + if (channels_ == -1) + channels_ = 1; + + ChannelLayout channel_layout = GuessChannelLayout(channels_); + + if (channel_layout == CHANNEL_LAYOUT_UNSUPPORTED) { + MEDIA_LOG(log_cb_) << "Unsupported channel count " << channels_; + return false; + } + + int samples_per_second = samples_per_second_; + if (output_samples_per_second_ > 0) + samples_per_second = output_samples_per_second_; + + const uint8* extra_data = NULL; + size_t extra_data_size = 0; + if (codec_private.size() > 0) { + extra_data = &codec_private[0]; + extra_data_size = codec_private.size(); + } + + config->Initialize( + audio_codec, + (audio_codec == kCodecOpus) ? kSampleFormatS16 : kSampleFormatPlanarF32, + channel_layout, + samples_per_second, extra_data, extra_data_size, is_encrypted, true, + base::TimeDelta::FromMicroseconds( + (seek_preroll != -1 ? seek_preroll : 0) / 1000), + base::TimeDelta::FromMicroseconds( + (codec_delay != -1 ? codec_delay : 0) / 1000)); + return config->IsValidConfig(); +} + +bool WebMAudioClient::OnUInt(int id, int64 val) { + if (id == kWebMIdChannels) { + if (channels_ != -1) { + MEDIA_LOG(log_cb_) << "Multiple values for id " << std::hex << id + << " specified. (" << channels_ << " and " << val + << ")"; + return false; + } + + channels_ = val; + } + return true; +} + +bool WebMAudioClient::OnFloat(int id, double val) { + double* dst = NULL; + + switch (id) { + case kWebMIdSamplingFrequency: + dst = &samples_per_second_; + break; + case kWebMIdOutputSamplingFrequency: + dst = &output_samples_per_second_; + break; + default: + return true; + } + + if (val <= 0) + return false; + + if (*dst != -1) { + MEDIA_LOG(log_cb_) << "Multiple values for id " << std::hex << id + << " specified (" << *dst << " and " << val << ")"; + return false; + } + + *dst = val; + return true; +} + +} // namespace media diff --git a/media/formats/webm/webm_audio_client.h b/media/formats/webm/webm_audio_client.h new file mode 100644 index 0000000..a723b0d --- /dev/null +++ b/media/formats/webm/webm_audio_client.h @@ -0,0 +1,54 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_AUDIO_CLIENT_H_ +#define MEDIA_FORMATS_WEBM_WEBM_AUDIO_CLIENT_H_ + +#include <string> +#include <vector> + +#include "media/base/media_log.h" +#include "media/formats/webm/webm_parser.h" + +namespace media { +class AudioDecoderConfig; + +// Helper class used to parse an Audio element inside a TrackEntry element. +class WebMAudioClient : public WebMParserClient { + public: + explicit WebMAudioClient(const LogCB& log_cb); + virtual ~WebMAudioClient(); + + // Reset this object's state so it can process a new audio track element. + void Reset(); + + // Initialize |config| with the data in |codec_id|, |codec_private|, + // |is_encrypted| and the fields parsed from the last audio track element this + // object was used to parse. + // Returns true if |config| was successfully initialized. + // Returns false if there was unexpected values in the provided parameters or + // audio track element fields. + bool InitializeConfig(const std::string& codec_id, + const std::vector<uint8>& codec_private, + const int64 seek_preroll, + const int64 codec_delay, + bool is_encrypted, + AudioDecoderConfig* config); + + private: + // WebMParserClient implementation. + virtual bool OnUInt(int id, int64 val) OVERRIDE; + virtual bool OnFloat(int id, double val) OVERRIDE; + + LogCB log_cb_; + int channels_; + double samples_per_second_; + double output_samples_per_second_; + + DISALLOW_COPY_AND_ASSIGN(WebMAudioClient); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_AUDIO_CLIENT_H_ diff --git a/media/formats/webm/webm_cluster_parser.cc b/media/formats/webm/webm_cluster_parser.cc new file mode 100644 index 0000000..e662f83 --- /dev/null +++ b/media/formats/webm/webm_cluster_parser.cc @@ -0,0 +1,471 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_cluster_parser.h" + +#include <vector> + +#include "base/logging.h" +#include "base/sys_byteorder.h" +#include "media/base/buffers.h" +#include "media/base/decrypt_config.h" +#include "media/filters/webvtt_util.h" +#include "media/formats/webm/webm_constants.h" +#include "media/formats/webm/webm_crypto_helpers.h" +#include "media/formats/webm/webm_webvtt_parser.h" + +namespace media { + +WebMClusterParser::TextTrackIterator::TextTrackIterator( + const TextTrackMap& text_track_map) : + iterator_(text_track_map.begin()), + iterator_end_(text_track_map.end()) { +} + +WebMClusterParser::TextTrackIterator::TextTrackIterator( + const TextTrackIterator& rhs) : + iterator_(rhs.iterator_), + iterator_end_(rhs.iterator_end_) { +} + +WebMClusterParser::TextTrackIterator::~TextTrackIterator() { +} + +bool WebMClusterParser::TextTrackIterator::operator()( + int* track_num, + const BufferQueue** buffers) { + if (iterator_ == iterator_end_) { + *track_num = 0; + *buffers = NULL; + + return false; + } + + *track_num = iterator_->first; + *buffers = &iterator_->second.buffers(); + + ++iterator_; + return true; +} + +WebMClusterParser::WebMClusterParser( + int64 timecode_scale, int audio_track_num, int video_track_num, + const WebMTracksParser::TextTracks& text_tracks, + const std::set<int64>& ignored_tracks, + const std::string& audio_encryption_key_id, + const std::string& video_encryption_key_id, + const LogCB& log_cb) + : timecode_multiplier_(timecode_scale / 1000.0), + ignored_tracks_(ignored_tracks), + audio_encryption_key_id_(audio_encryption_key_id), + video_encryption_key_id_(video_encryption_key_id), + parser_(kWebMIdCluster, this), + last_block_timecode_(-1), + block_data_size_(-1), + block_duration_(-1), + block_add_id_(-1), + block_additional_data_size_(-1), + discard_padding_(-1), + cluster_timecode_(-1), + cluster_start_time_(kNoTimestamp()), + cluster_ended_(false), + audio_(audio_track_num, false), + video_(video_track_num, true), + log_cb_(log_cb) { + for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin(); + it != text_tracks.end(); + ++it) { + text_track_map_.insert(std::make_pair(it->first, Track(it->first, false))); + } +} + +WebMClusterParser::~WebMClusterParser() {} + +void WebMClusterParser::Reset() { + last_block_timecode_ = -1; + cluster_timecode_ = -1; + cluster_start_time_ = kNoTimestamp(); + cluster_ended_ = false; + parser_.Reset(); + audio_.Reset(); + video_.Reset(); + ResetTextTracks(); +} + +int WebMClusterParser::Parse(const uint8* buf, int size) { + audio_.Reset(); + video_.Reset(); + ResetTextTracks(); + + int result = parser_.Parse(buf, size); + + if (result < 0) { + cluster_ended_ = false; + return result; + } + + cluster_ended_ = parser_.IsParsingComplete(); + if (cluster_ended_) { + // If there were no buffers in this cluster, set the cluster start time to + // be the |cluster_timecode_|. + if (cluster_start_time_ == kNoTimestamp()) { + // If the cluster did not even have a |cluster_timecode_|, signal parse + // error. + if (cluster_timecode_ < 0) + return -1; + + cluster_start_time_ = base::TimeDelta::FromMicroseconds( + cluster_timecode_ * timecode_multiplier_); + } + + // Reset the parser if we're done parsing so that + // it is ready to accept another cluster on the next + // call. + parser_.Reset(); + + last_block_timecode_ = -1; + cluster_timecode_ = -1; + } + + return result; +} + +WebMClusterParser::TextTrackIterator +WebMClusterParser::CreateTextTrackIterator() const { + return TextTrackIterator(text_track_map_); +} + +WebMParserClient* WebMClusterParser::OnListStart(int id) { + if (id == kWebMIdCluster) { + cluster_timecode_ = -1; + cluster_start_time_ = kNoTimestamp(); + } else if (id == kWebMIdBlockGroup) { + block_data_.reset(); + block_data_size_ = -1; + block_duration_ = -1; + discard_padding_ = -1; + discard_padding_set_ = false; + } else if (id == kWebMIdBlockAdditions) { + block_add_id_ = -1; + block_additional_data_.reset(); + block_additional_data_size_ = -1; + } + + return this; +} + +bool WebMClusterParser::OnListEnd(int id) { + if (id != kWebMIdBlockGroup) + return true; + + // Make sure the BlockGroup actually had a Block. + if (block_data_size_ == -1) { + MEDIA_LOG(log_cb_) << "Block missing from BlockGroup."; + return false; + } + + bool result = ParseBlock(false, block_data_.get(), block_data_size_, + block_additional_data_.get(), + block_additional_data_size_, block_duration_, + discard_padding_set_ ? discard_padding_ : 0); + block_data_.reset(); + block_data_size_ = -1; + block_duration_ = -1; + block_add_id_ = -1; + block_additional_data_.reset(); + block_additional_data_size_ = -1; + discard_padding_ = -1; + discard_padding_set_ = false; + return result; +} + +bool WebMClusterParser::OnUInt(int id, int64 val) { + int64* dst; + switch (id) { + case kWebMIdTimecode: + dst = &cluster_timecode_; + break; + case kWebMIdBlockDuration: + dst = &block_duration_; + break; + case kWebMIdBlockAddID: + dst = &block_add_id_; + break; + case kWebMIdDiscardPadding: + if (discard_padding_set_) + return false; + discard_padding_set_ = true; + discard_padding_ = val; + return true; + default: + return true; + } + if (*dst != -1) + return false; + *dst = val; + return true; +} + +bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf, + int size, const uint8* additional, + int additional_size, int duration, + int64 discard_padding) { + if (size < 4) + return false; + + // Return an error if the trackNum > 127. We just aren't + // going to support large track numbers right now. + if (!(buf[0] & 0x80)) { + MEDIA_LOG(log_cb_) << "TrackNumber over 127 not supported"; + return false; + } + + int track_num = buf[0] & 0x7f; + int timecode = buf[1] << 8 | buf[2]; + int flags = buf[3] & 0xff; + int lacing = (flags >> 1) & 0x3; + + if (lacing) { + MEDIA_LOG(log_cb_) << "Lacing " << lacing << " is not supported yet."; + return false; + } + + // Sign extend negative timecode offsets. + if (timecode & 0x8000) + timecode |= ~0xffff; + + const uint8* frame_data = buf + 4; + int frame_size = size - (frame_data - buf); + return OnBlock(is_simple_block, track_num, timecode, duration, flags, + frame_data, frame_size, additional, additional_size, + discard_padding); +} + +bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { + switch (id) { + case kWebMIdSimpleBlock: + return ParseBlock(true, data, size, NULL, -1, -1, 0); + + case kWebMIdBlock: + if (block_data_) { + MEDIA_LOG(log_cb_) << "More than 1 Block in a BlockGroup is not " + "supported."; + return false; + } + block_data_.reset(new uint8[size]); + memcpy(block_data_.get(), data, size); + block_data_size_ = size; + return true; + + case kWebMIdBlockAdditional: { + uint64 block_add_id = base::HostToNet64(block_add_id_); + if (block_additional_data_) { + // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed + // as per matroska spec. But for now we don't have a use case to + // support parsing of such files. Take a look at this again when such a + // case arises. + MEDIA_LOG(log_cb_) << "More than 1 BlockAdditional in a BlockGroup is " + "not supported."; + return false; + } + // First 8 bytes of side_data in DecoderBuffer is the BlockAddID + // element's value in Big Endian format. This is done to mimic ffmpeg + // demuxer's behavior. + block_additional_data_size_ = size + sizeof(block_add_id); + block_additional_data_.reset(new uint8[block_additional_data_size_]); + memcpy(block_additional_data_.get(), &block_add_id, + sizeof(block_add_id)); + memcpy(block_additional_data_.get() + 8, data, size); + return true; + } + + default: + return true; + } +} + +bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, + int timecode, + int block_duration, + int flags, + const uint8* data, int size, + const uint8* additional, int additional_size, + int64 discard_padding) { + DCHECK_GE(size, 0); + if (cluster_timecode_ == -1) { + MEDIA_LOG(log_cb_) << "Got a block before cluster timecode."; + return false; + } + + // TODO(acolwell): Should relative negative timecode offsets be rejected? Or + // only when the absolute timecode is negative? See http://crbug.com/271794 + if (timecode < 0) { + MEDIA_LOG(log_cb_) << "Got a block with negative timecode offset " + << timecode; + return false; + } + + if (last_block_timecode_ != -1 && timecode < last_block_timecode_) { + MEDIA_LOG(log_cb_) + << "Got a block with a timecode before the previous block."; + return false; + } + + Track* track = NULL; + bool is_text = false; + std::string encryption_key_id; + if (track_num == audio_.track_num()) { + track = &audio_; + encryption_key_id = audio_encryption_key_id_; + } else if (track_num == video_.track_num()) { + track = &video_; + encryption_key_id = video_encryption_key_id_; + } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) { + return true; + } else if (Track* const text_track = FindTextTrack(track_num)) { + if (is_simple_block) // BlockGroup is required for WebVTT cues + return false; + if (block_duration < 0) // not specified + return false; + track = text_track; + is_text = true; + } else { + MEDIA_LOG(log_cb_) << "Unexpected track number " << track_num; + return false; + } + + last_block_timecode_ = timecode; + + base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds( + (cluster_timecode_ + timecode) * timecode_multiplier_); + + scoped_refptr<StreamParserBuffer> buffer; + if (!is_text) { + // The first bit of the flags is set when a SimpleBlock contains only + // keyframes. If this is a Block, then inspection of the payload is + // necessary to determine whether it contains a keyframe or not. + // http://www.matroska.org/technical/specs/index.html + bool is_keyframe = + is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size); + + // Every encrypted Block has a signal byte and IV prepended to it. Current + // encrypted WebM request for comments specification is here + // http://wiki.webmproject.org/encryption/webm-encryption-rfc + scoped_ptr<DecryptConfig> decrypt_config; + int data_offset = 0; + if (!encryption_key_id.empty() && + !WebMCreateDecryptConfig( + data, size, + reinterpret_cast<const uint8*>(encryption_key_id.data()), + encryption_key_id.size(), + &decrypt_config, &data_offset)) { + return false; + } + + buffer = StreamParserBuffer::CopyFrom( + data + data_offset, size - data_offset, + additional, additional_size, + is_keyframe); + + if (decrypt_config) + buffer->set_decrypt_config(decrypt_config.Pass()); + } else { + std::string id, settings, content; + WebMWebVTTParser::Parse(data, size, &id, &settings, &content); + + std::vector<uint8> side_data; + MakeSideData(id.begin(), id.end(), + settings.begin(), settings.end(), + &side_data); + + buffer = StreamParserBuffer::CopyFrom( + reinterpret_cast<const uint8*>(content.data()), + content.length(), + &side_data[0], + side_data.size(), + true); + } + + buffer->set_timestamp(timestamp); + if (cluster_start_time_ == kNoTimestamp()) + cluster_start_time_ = timestamp; + + if (block_duration >= 0) { + buffer->set_duration(base::TimeDelta::FromMicroseconds( + block_duration * timecode_multiplier_)); + } + + if (discard_padding != 0) { + buffer->set_discard_padding(base::TimeDelta::FromMicroseconds( + discard_padding / 1000)); + } + + return track->AddBuffer(buffer); +} + +WebMClusterParser::Track::Track(int track_num, bool is_video) + : track_num_(track_num), + is_video_(is_video) { +} + +WebMClusterParser::Track::~Track() {} + +bool WebMClusterParser::Track::AddBuffer( + const scoped_refptr<StreamParserBuffer>& buffer) { + DVLOG(2) << "AddBuffer() : " << track_num_ + << " ts " << buffer->timestamp().InSecondsF() + << " dur " << buffer->duration().InSecondsF() + << " kf " << buffer->IsKeyframe() + << " size " << buffer->data_size(); + + buffers_.push_back(buffer); + return true; +} + +void WebMClusterParser::Track::Reset() { + buffers_.clear(); +} + +bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const { + // For now, assume that all blocks are keyframes for datatypes other than + // video. This is a valid assumption for Vorbis, WebVTT, & Opus. + if (!is_video_) + return true; + + // Make sure the block is big enough for the minimal keyframe header size. + if (size < 7) + return false; + + // The LSb of the first byte must be a 0 for a keyframe. + // http://tools.ietf.org/html/rfc6386 Section 19.1 + if ((data[0] & 0x01) != 0) + return false; + + // Verify VP8 keyframe startcode. + // http://tools.ietf.org/html/rfc6386 Section 19.1 + if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a) + return false; + + return true; +} + +void WebMClusterParser::ResetTextTracks() { + for (TextTrackMap::iterator it = text_track_map_.begin(); + it != text_track_map_.end(); + ++it) { + it->second.Reset(); + } +} + +WebMClusterParser::Track* +WebMClusterParser::FindTextTrack(int track_num) { + const TextTrackMap::iterator it = text_track_map_.find(track_num); + + if (it == text_track_map_.end()) + return NULL; + + return &it->second; +} + +} // namespace media diff --git a/media/formats/webm/webm_cluster_parser.h b/media/formats/webm/webm_cluster_parser.h new file mode 100644 index 0000000..db683f6 --- /dev/null +++ b/media/formats/webm/webm_cluster_parser.h @@ -0,0 +1,159 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_ +#define MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_ + +#include <deque> +#include <map> +#include <set> +#include <string> + +#include "base/memory/scoped_ptr.h" +#include "media/base/media_export.h" +#include "media/base/media_log.h" +#include "media/base/stream_parser_buffer.h" +#include "media/formats/webm/webm_parser.h" +#include "media/formats/webm/webm_tracks_parser.h" + +namespace media { + +class MEDIA_EXPORT WebMClusterParser : public WebMParserClient { + private: + // Helper class that manages per-track state. + class Track { + public: + Track(int track_num, bool is_video); + ~Track(); + + int track_num() const { return track_num_; } + const std::deque<scoped_refptr<StreamParserBuffer> >& buffers() const { + return buffers_; + } + + bool AddBuffer(const scoped_refptr<StreamParserBuffer>& buffer); + + // Clears all buffer state. + void Reset(); + + // Helper function used to inspect block data to determine if the + // block is a keyframe. + // |data| contains the bytes in the block. + // |size| indicates the number of bytes in |data|. + bool IsKeyframe(const uint8* data, int size) const; + + private: + int track_num_; + std::deque<scoped_refptr<StreamParserBuffer> > buffers_; + bool is_video_; + }; + + typedef std::map<int, Track> TextTrackMap; + + public: + typedef std::deque<scoped_refptr<StreamParserBuffer> > BufferQueue; + + class MEDIA_EXPORT TextTrackIterator { + public: + explicit TextTrackIterator(const TextTrackMap& text_track_map); + TextTrackIterator(const TextTrackIterator& rhs); + ~TextTrackIterator(); + + // To visit each text track. If the iterator is exhausted, it returns + // as parameters the values 0 and NULL, and the function returns false. + // Otherwise, it returns the buffers for the associated track, and the + // function returns true. + bool operator()(int* track_num, const BufferQueue** buffers); + private: + TextTrackIterator& operator=(const TextTrackIterator&); + + TextTrackMap::const_iterator iterator_; + const TextTrackMap::const_iterator iterator_end_; + }; + + WebMClusterParser(int64 timecode_scale, + int audio_track_num, + int video_track_num, + const WebMTracksParser::TextTracks& text_tracks, + const std::set<int64>& ignored_tracks, + const std::string& audio_encryption_key_id, + const std::string& video_encryption_key_id, + const LogCB& log_cb); + virtual ~WebMClusterParser(); + + // Resets the parser state so it can accept a new cluster. + void Reset(); + + // Parses a WebM cluster element in |buf|. + // + // Returns -1 if the parse fails. + // Returns 0 if more data is needed. + // Returns the number of bytes parsed on success. + int Parse(const uint8* buf, int size); + + base::TimeDelta cluster_start_time() const { return cluster_start_time_; } + const BufferQueue& audio_buffers() const { return audio_.buffers(); } + const BufferQueue& video_buffers() const { return video_.buffers(); } + + // Returns an iterator object, allowing each text track to be visited. + TextTrackIterator CreateTextTrackIterator() const; + + // Returns true if the last Parse() call stopped at the end of a cluster. + bool cluster_ended() const { return cluster_ended_; } + + private: + // WebMParserClient methods. + virtual WebMParserClient* OnListStart(int id) OVERRIDE; + virtual bool OnListEnd(int id) OVERRIDE; + virtual bool OnUInt(int id, int64 val) OVERRIDE; + virtual bool OnBinary(int id, const uint8* data, int size) OVERRIDE; + + bool ParseBlock(bool is_simple_block, const uint8* buf, int size, + const uint8* additional, int additional_size, int duration, + int64 discard_padding); + bool OnBlock(bool is_simple_block, int track_num, int timecode, int duration, + int flags, const uint8* data, int size, + const uint8* additional, int additional_size, + int64 discard_padding); + + // Resets the Track objects associated with each text track. + void ResetTextTracks(); + + // Search for the indicated track_num among the text tracks. Returns NULL + // if that track num is not a text track. + Track* FindTextTrack(int track_num); + + double timecode_multiplier_; // Multiplier used to convert timecodes into + // microseconds. + std::set<int64> ignored_tracks_; + std::string audio_encryption_key_id_; + std::string video_encryption_key_id_; + + WebMListParser parser_; + + int64 last_block_timecode_; + scoped_ptr<uint8[]> block_data_; + int block_data_size_; + int64 block_duration_; + int64 block_add_id_; + scoped_ptr<uint8[]> block_additional_data_; + int block_additional_data_size_; + int64 discard_padding_; + bool discard_padding_set_; + + int64 cluster_timecode_; + base::TimeDelta cluster_start_time_; + bool cluster_ended_; + + Track audio_; + Track video_; + TextTrackMap text_track_map_; + LogCB log_cb_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(WebMClusterParser); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_ diff --git a/media/formats/webm/webm_cluster_parser_unittest.cc b/media/formats/webm/webm_cluster_parser_unittest.cc new file mode 100644 index 0000000..a358cfe --- /dev/null +++ b/media/formats/webm/webm_cluster_parser_unittest.cc @@ -0,0 +1,545 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <algorithm> + +#include "base/bind.h" +#include "base/logging.h" +#include "media/base/decrypt_config.h" +#include "media/formats/webm/cluster_builder.h" +#include "media/formats/webm/webm_cluster_parser.h" +#include "media/formats/webm/webm_constants.h" +#include "testing/gmock/include/gmock/gmock.h" +#include "testing/gtest/include/gtest/gtest.h" + +using ::testing::InSequence; +using ::testing::Return; +using ::testing::_; + +namespace media { + +enum { + kTimecodeScale = 1000000, // Timecode scale for millisecond timestamps. + kAudioTrackNum = 1, + kVideoTrackNum = 2, + kTextTrackNum = 3, +}; + +struct BlockInfo { + int track_num; + int timestamp; + int duration; + bool use_simple_block; +}; + +static const BlockInfo kDefaultBlockInfo[] = { + { kAudioTrackNum, 0, 23, true }, + { kAudioTrackNum, 23, 23, true }, + { kVideoTrackNum, 33, 34, true }, + { kAudioTrackNum, 46, 23, true }, + { kVideoTrackNum, 67, 33, false }, + { kAudioTrackNum, 69, 23, false }, + { kVideoTrackNum, 100, 33, false }, +}; + +static const uint8 kEncryptedFrame[] = { + 0x01, // Block is encrypted + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 // IV +}; + +static scoped_ptr<Cluster> CreateCluster(int timecode, + const BlockInfo* block_info, + int block_count) { + ClusterBuilder cb; + cb.SetClusterTimecode(0); + + for (int i = 0; i < block_count; i++) { + uint8 data[] = { 0x00 }; + if (block_info[i].use_simple_block) { + cb.AddSimpleBlock(block_info[i].track_num, + block_info[i].timestamp, + 0, data, sizeof(data)); + continue; + } + + CHECK_GE(block_info[i].duration, 0); + cb.AddBlockGroup(block_info[i].track_num, + block_info[i].timestamp, + block_info[i].duration, + 0, data, sizeof(data)); + } + + return cb.Finish(); +} + +// Creates a Cluster with one encrypted Block. |bytes_to_write| is number of +// bytes of the encrypted frame to write. +static scoped_ptr<Cluster> CreateEncryptedCluster(int bytes_to_write) { + CHECK_GT(bytes_to_write, 0); + CHECK_LE(bytes_to_write, static_cast<int>(sizeof(kEncryptedFrame))); + + ClusterBuilder cb; + cb.SetClusterTimecode(0); + cb.AddSimpleBlock(kVideoTrackNum, 0, 0, kEncryptedFrame, bytes_to_write); + return cb.Finish(); +} + +static bool VerifyBuffers(const WebMClusterParser::BufferQueue& audio_buffers, + const WebMClusterParser::BufferQueue& video_buffers, + const WebMClusterParser::BufferQueue& text_buffers, + const BlockInfo* block_info, + int block_count) { + size_t audio_offset = 0; + size_t video_offset = 0; + size_t text_offset = 0; + for (int i = 0; i < block_count; i++) { + const WebMClusterParser::BufferQueue* buffers = NULL; + size_t* offset; + + if (block_info[i].track_num == kAudioTrackNum) { + buffers = &audio_buffers; + offset = &audio_offset; + } else if (block_info[i].track_num == kVideoTrackNum) { + buffers = &video_buffers; + offset = &video_offset; + } else if (block_info[i].track_num == kTextTrackNum) { + buffers = &text_buffers; + offset = &text_offset; + } else { + LOG(ERROR) << "Unexpected track number " << block_info[i].track_num; + return false; + } + + if (*offset >= buffers->size()) + return false; + + scoped_refptr<StreamParserBuffer> buffer = (*buffers)[(*offset)++]; + + + EXPECT_EQ(buffer->timestamp().InMilliseconds(), block_info[i].timestamp); + + if (!block_info[i].use_simple_block) + EXPECT_NE(buffer->duration(), kNoTimestamp()); + + if (buffer->duration() != kNoTimestamp()) + EXPECT_EQ(buffer->duration().InMilliseconds(), block_info[i].duration); + } + + return true; +} + +static bool VerifyBuffers(const scoped_ptr<WebMClusterParser>& parser, + const BlockInfo* block_info, + int block_count) { + typedef WebMClusterParser::TextTrackIterator TextTrackIterator; + TextTrackIterator text_it = parser->CreateTextTrackIterator(); + + int text_track_num; + const WebMClusterParser::BufferQueue* text_buffers; + + while (text_it(&text_track_num, &text_buffers)) + break; + + const WebMClusterParser::BufferQueue no_text_buffers; + + if (text_buffers == NULL) + text_buffers = &no_text_buffers; + + return VerifyBuffers(parser->audio_buffers(), + parser->video_buffers(), + *text_buffers, + block_info, + block_count); +} + +static bool VerifyTextBuffers( + const scoped_ptr<WebMClusterParser>& parser, + const BlockInfo* block_info_ptr, + int block_count, + int text_track_num, + const WebMClusterParser::BufferQueue& text_buffers) { + const BlockInfo* const block_info_end = block_info_ptr + block_count; + + typedef WebMClusterParser::BufferQueue::const_iterator TextBufferIter; + TextBufferIter buffer_iter = text_buffers.begin(); + const TextBufferIter buffer_end = text_buffers.end(); + + while (block_info_ptr != block_info_end) { + const BlockInfo& block_info = *block_info_ptr++; + + if (block_info.track_num != text_track_num) + continue; + + EXPECT_FALSE(block_info.use_simple_block); + EXPECT_FALSE(buffer_iter == buffer_end); + + const scoped_refptr<StreamParserBuffer> buffer = *buffer_iter++; + EXPECT_EQ(buffer->timestamp().InMilliseconds(), block_info.timestamp); + EXPECT_EQ(buffer->duration().InMilliseconds(), block_info.duration); + } + + EXPECT_TRUE(buffer_iter == buffer_end); + return true; +} + +static void VerifyEncryptedBuffer( + scoped_refptr<StreamParserBuffer> buffer) { + EXPECT_TRUE(buffer->decrypt_config()); + EXPECT_EQ(static_cast<unsigned long>(DecryptConfig::kDecryptionKeySize), + buffer->decrypt_config()->iv().length()); +} + +static void AppendToEnd(const WebMClusterParser::BufferQueue& src, + WebMClusterParser::BufferQueue* dest) { + for (WebMClusterParser::BufferQueue::const_iterator itr = src.begin(); + itr != src.end(); ++itr) { + dest->push_back(*itr); + } +} + +class WebMClusterParserTest : public testing::Test { + public: + WebMClusterParserTest() + : parser_(new WebMClusterParser(kTimecodeScale, + kAudioTrackNum, + kVideoTrackNum, + WebMTracksParser::TextTracks(), + std::set<int64>(), + std::string(), + std::string(), + LogCB())) {} + + protected: + scoped_ptr<WebMClusterParser> parser_; +}; + +TEST_F(WebMClusterParserTest, Reset) { + InSequence s; + + int block_count = arraysize(kDefaultBlockInfo); + scoped_ptr<Cluster> cluster(CreateCluster(0, kDefaultBlockInfo, block_count)); + + // Send slightly less than the full cluster so all but the last block is + // parsed. + int result = parser_->Parse(cluster->data(), cluster->size() - 1); + EXPECT_GT(result, 0); + EXPECT_LT(result, cluster->size()); + + ASSERT_TRUE(VerifyBuffers(parser_, kDefaultBlockInfo, block_count - 1)); + parser_->Reset(); + + // Now parse a whole cluster to verify that all the blocks will get parsed. + result = parser_->Parse(cluster->data(), cluster->size()); + EXPECT_EQ(result, cluster->size()); + ASSERT_TRUE(VerifyBuffers(parser_, kDefaultBlockInfo, block_count)); +} + +TEST_F(WebMClusterParserTest, ParseClusterWithSingleCall) { + int block_count = arraysize(kDefaultBlockInfo); + scoped_ptr<Cluster> cluster(CreateCluster(0, kDefaultBlockInfo, block_count)); + + int result = parser_->Parse(cluster->data(), cluster->size()); + EXPECT_EQ(cluster->size(), result); + ASSERT_TRUE(VerifyBuffers(parser_, kDefaultBlockInfo, block_count)); +} + +TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) { + int block_count = arraysize(kDefaultBlockInfo); + scoped_ptr<Cluster> cluster(CreateCluster(0, kDefaultBlockInfo, block_count)); + + WebMClusterParser::BufferQueue audio_buffers; + WebMClusterParser::BufferQueue video_buffers; + const WebMClusterParser::BufferQueue no_text_buffers; + + const uint8* data = cluster->data(); + int size = cluster->size(); + int default_parse_size = 3; + int parse_size = std::min(default_parse_size, size); + + while (size > 0) { + int result = parser_->Parse(data, parse_size); + ASSERT_GE(result, 0); + ASSERT_LE(result, parse_size); + + if (result == 0) { + // The parser needs more data so increase the parse_size a little. + parse_size += default_parse_size; + parse_size = std::min(parse_size, size); + continue; + } + + AppendToEnd(parser_->audio_buffers(), &audio_buffers); + AppendToEnd(parser_->video_buffers(), &video_buffers); + + parse_size = default_parse_size; + + data += result; + size -= result; + } + ASSERT_TRUE(VerifyBuffers(audio_buffers, video_buffers, + no_text_buffers, kDefaultBlockInfo, + block_count)); +} + +// Verify that both BlockGroups with the BlockDuration before the Block +// and BlockGroups with the BlockDuration after the Block are supported +// correctly. +// Note: Raw bytes are use here because ClusterBuilder only generates +// one of these scenarios. +TEST_F(WebMClusterParserTest, ParseBlockGroup) { + const BlockInfo kBlockInfo[] = { + { kAudioTrackNum, 0, 23, false }, + { kVideoTrackNum, 33, 34, false }, + }; + int block_count = arraysize(kBlockInfo); + + const uint8 kClusterData[] = { + 0x1F, 0x43, 0xB6, 0x75, 0x9B, // Cluster(size=27) + 0xE7, 0x81, 0x00, // Timecode(size=1, value=0) + // BlockGroup with BlockDuration before Block. + 0xA0, 0x8A, // BlockGroup(size=10) + 0x9B, 0x81, 0x17, // BlockDuration(size=1, value=23) + 0xA1, 0x85, 0x81, 0x00, 0x00, 0x00, 0xaa, // Block(size=5, track=1, ts=0) + // BlockGroup with BlockDuration after Block. + 0xA0, 0x8A, // BlockGroup(size=10) + 0xA1, 0x85, 0x82, 0x00, 0x21, 0x00, 0x55, // Block(size=5, track=2, ts=33) + 0x9B, 0x81, 0x22, // BlockDuration(size=1, value=34) + }; + const int kClusterSize = sizeof(kClusterData); + + int result = parser_->Parse(kClusterData, kClusterSize); + EXPECT_EQ(result, kClusterSize); + ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); +} + +TEST_F(WebMClusterParserTest, ParseSimpleBlockAndBlockGroupMixture) { + const BlockInfo kBlockInfo[] = { + { kAudioTrackNum, 0, 23, true }, + { kAudioTrackNum, 23, 23, false }, + { kVideoTrackNum, 33, 34, true }, + { kAudioTrackNum, 46, 23, false }, + { kVideoTrackNum, 67, 33, false }, + }; + int block_count = arraysize(kBlockInfo); + scoped_ptr<Cluster> cluster(CreateCluster(0, kBlockInfo, block_count)); + + int result = parser_->Parse(cluster->data(), cluster->size()); + EXPECT_EQ(cluster->size(), result); + ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); +} + +TEST_F(WebMClusterParserTest, IgnoredTracks) { + std::set<int64> ignored_tracks; + ignored_tracks.insert(kTextTrackNum); + + parser_.reset(new WebMClusterParser(kTimecodeScale, + kAudioTrackNum, + kVideoTrackNum, + WebMTracksParser::TextTracks(), + ignored_tracks, + std::string(), + std::string(), + LogCB())); + + const BlockInfo kInputBlockInfo[] = { + { kAudioTrackNum, 0, 23, true }, + { kAudioTrackNum, 23, 23, true }, + { kVideoTrackNum, 33, 33, true }, + { kTextTrackNum, 33, 99, true }, + { kAudioTrackNum, 46, 23, true }, + { kVideoTrackNum, 67, 33, true }, + }; + int input_block_count = arraysize(kInputBlockInfo); + + const BlockInfo kOutputBlockInfo[] = { + { kAudioTrackNum, 0, 23, true }, + { kAudioTrackNum, 23, 23, true }, + { kVideoTrackNum, 33, 33, true }, + { kAudioTrackNum, 46, 23, true }, + { kVideoTrackNum, 67, 33, true }, + }; + int output_block_count = arraysize(kOutputBlockInfo); + + scoped_ptr<Cluster> cluster( + CreateCluster(0, kInputBlockInfo, input_block_count)); + + int result = parser_->Parse(cluster->data(), cluster->size()); + EXPECT_EQ(cluster->size(), result); + ASSERT_TRUE(VerifyBuffers(parser_, kOutputBlockInfo, output_block_count)); +} + +TEST_F(WebMClusterParserTest, ParseTextTracks) { + typedef WebMTracksParser::TextTracks TextTracks; + TextTracks text_tracks; + + text_tracks.insert(std::make_pair(TextTracks::key_type(kTextTrackNum), + TextTrackConfig(kTextSubtitles, "", "", + ""))); + + parser_.reset(new WebMClusterParser(kTimecodeScale, + kAudioTrackNum, + kVideoTrackNum, + text_tracks, + std::set<int64>(), + std::string(), + std::string(), + LogCB())); + + const BlockInfo kInputBlockInfo[] = { + { kAudioTrackNum, 0, 23, true }, + { kAudioTrackNum, 23, 23, true }, + { kVideoTrackNum, 33, 33, true }, + { kTextTrackNum, 33, 42, false }, + { kAudioTrackNum, 46, 23, true }, + { kTextTrackNum, 55, 44, false }, + { kVideoTrackNum, 67, 33, true }, + }; + int input_block_count = arraysize(kInputBlockInfo); + + scoped_ptr<Cluster> cluster( + CreateCluster(0, kInputBlockInfo, input_block_count)); + + int result = parser_->Parse(cluster->data(), cluster->size()); + EXPECT_EQ(cluster->size(), result); + ASSERT_TRUE(VerifyBuffers(parser_, kInputBlockInfo, input_block_count)); +} + +TEST_F(WebMClusterParserTest, TextTracksSimpleBlock) { + typedef WebMTracksParser::TextTracks TextTracks; + WebMTracksParser::TextTracks text_tracks; + + text_tracks.insert(std::make_pair(TextTracks::key_type(kTextTrackNum), + TextTrackConfig(kTextSubtitles, "", "", + ""))); + + parser_.reset(new WebMClusterParser(kTimecodeScale, + kAudioTrackNum, + kVideoTrackNum, + text_tracks, + std::set<int64>(), + std::string(), + std::string(), + LogCB())); + + const BlockInfo kInputBlockInfo[] = { + { kTextTrackNum, 33, 42, true }, + }; + int input_block_count = arraysize(kInputBlockInfo); + + scoped_ptr<Cluster> cluster( + CreateCluster(0, kInputBlockInfo, input_block_count)); + + int result = parser_->Parse(cluster->data(), cluster->size()); + EXPECT_LT(result, 0); +} + +TEST_F(WebMClusterParserTest, ParseMultipleTextTracks) { + typedef WebMTracksParser::TextTracks TextTracks; + TextTracks text_tracks; + + const int kSubtitleTextTrackNum = kTextTrackNum; + const int kCaptionTextTrackNum = kTextTrackNum + 1; + + text_tracks.insert(std::make_pair(TextTracks::key_type(kSubtitleTextTrackNum), + TextTrackConfig(kTextSubtitles, "", "", + ""))); + + text_tracks.insert(std::make_pair(TextTracks::key_type(kCaptionTextTrackNum), + TextTrackConfig(kTextCaptions, "", "", + ""))); + + parser_.reset(new WebMClusterParser(kTimecodeScale, + kAudioTrackNum, + kVideoTrackNum, + text_tracks, + std::set<int64>(), + std::string(), + std::string(), + LogCB())); + + const BlockInfo kInputBlockInfo[] = { + { kAudioTrackNum, 0, 23, true }, + { kAudioTrackNum, 23, 23, true }, + { kVideoTrackNum, 33, 33, true }, + { kSubtitleTextTrackNum, 33, 42, false }, + { kAudioTrackNum, 46, 23, true }, + { kCaptionTextTrackNum, 55, 44, false }, + { kVideoTrackNum, 67, 33, true }, + { kSubtitleTextTrackNum, 67, 33, false }, + }; + int input_block_count = arraysize(kInputBlockInfo); + + scoped_ptr<Cluster> cluster( + CreateCluster(0, kInputBlockInfo, input_block_count)); + + int result = parser_->Parse(cluster->data(), cluster->size()); + EXPECT_EQ(cluster->size(), result); + + WebMClusterParser::TextTrackIterator text_it = + parser_->CreateTextTrackIterator(); + + int text_track_num; + const WebMClusterParser::BufferQueue* text_buffers; + + while (text_it(&text_track_num, &text_buffers)) { + const WebMTracksParser::TextTracks::const_iterator find_result = + text_tracks.find(text_track_num); + ASSERT_TRUE(find_result != text_tracks.end()); + ASSERT_TRUE(VerifyTextBuffers(parser_, kInputBlockInfo, input_block_count, + text_track_num, *text_buffers)); + } +} + +TEST_F(WebMClusterParserTest, ParseEncryptedBlock) { + scoped_ptr<Cluster> cluster(CreateEncryptedCluster(sizeof(kEncryptedFrame))); + + parser_.reset(new WebMClusterParser(kTimecodeScale, + kAudioTrackNum, + kVideoTrackNum, + WebMTracksParser::TextTracks(), + std::set<int64>(), + std::string(), + "video_key_id", + LogCB())); + int result = parser_->Parse(cluster->data(), cluster->size()); + EXPECT_EQ(cluster->size(), result); + ASSERT_EQ(1UL, parser_->video_buffers().size()); + scoped_refptr<StreamParserBuffer> buffer = parser_->video_buffers()[0]; + VerifyEncryptedBuffer(buffer); +} + +TEST_F(WebMClusterParserTest, ParseBadEncryptedBlock) { + scoped_ptr<Cluster> cluster( + CreateEncryptedCluster(sizeof(kEncryptedFrame) - 1)); + + parser_.reset(new WebMClusterParser(kTimecodeScale, + kAudioTrackNum, + kVideoTrackNum, + WebMTracksParser::TextTracks(), + std::set<int64>(), + std::string(), + "video_key_id", + LogCB())); + int result = parser_->Parse(cluster->data(), cluster->size()); + EXPECT_EQ(-1, result); +} + +TEST_F(WebMClusterParserTest, ParseInvalidZeroSizedCluster) { + const uint8 kBuffer[] = { + 0x1F, 0x43, 0xB6, 0x75, 0x80, // CLUSTER (size = 0) + }; + + EXPECT_EQ(parser_->Parse(kBuffer, sizeof(kBuffer)), -1); +} + +TEST_F(WebMClusterParserTest, ParseInvalidUnknownButActuallyZeroSizedCluster) { + const uint8 kBuffer[] = { + 0x1F, 0x43, 0xB6, 0x75, 0xFF, // CLUSTER (size = "unknown") + 0x1F, 0x43, 0xB6, 0x75, 0x85, // CLUSTER (size = 5) + }; + + EXPECT_EQ(parser_->Parse(kBuffer, sizeof(kBuffer)), -1); +} + +} // namespace media diff --git a/media/formats/webm/webm_constants.cc b/media/formats/webm/webm_constants.cc new file mode 100644 index 0000000..d6c5536 --- /dev/null +++ b/media/formats/webm/webm_constants.cc @@ -0,0 +1,14 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_constants.h" + +namespace media { + +const char kWebMCodecSubtitles[] = "D_WEBVTT/SUBTITLES"; +const char kWebMCodecCaptions[] = "D_WEBVTT/CAPTIONS"; +const char kWebMCodecDescriptions[] = "D_WEBVTT/DESCRIPTIONS"; +const char kWebMCodecMetadata[] = "D_WEBVTT/METADATA"; + +} // namespace media diff --git a/media/formats/webm/webm_constants.h b/media/formats/webm/webm_constants.h new file mode 100644 index 0000000..6803bf7 --- /dev/null +++ b/media/formats/webm/webm_constants.h @@ -0,0 +1,229 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_CONSTANTS_H_ +#define MEDIA_FORMATS_WEBM_WEBM_CONSTANTS_H_ + +#include "base/basictypes.h" +#include "media/base/media_export.h" + +namespace media { + +// WebM element IDs. +// This is a subset of the IDs in the Matroska spec. +// http://www.matroska.org/technical/specs/index.html +const int kWebMIdAESSettingsCipherMode = 0x47E8; +const int kWebMIdAlphaMode = 0x53C0; +const int kWebMIdAspectRatioType = 0x54B3; +const int kWebMIdAttachedFile = 0x61A7; +const int kWebMIdAttachmentLink = 0x7446; +const int kWebMIdAttachments = 0x1941A469; +const int kWebMIdAudio = 0xE1; +const int kWebMIdBitDepth = 0x6264; +const int kWebMIdBlock = 0xA1; +const int kWebMIdBlockAddID = 0xEE; +const int kWebMIdBlockAdditions = 0x75A1; +const int kWebMIdBlockAdditional = 0xA5; +const int kWebMIdBlockDuration = 0x9B; +const int kWebMIdBlockGroup = 0xA0; +const int kWebMIdBlockMore = 0xA6; +const int kWebMIdChannels = 0x9F; +const int kWebMIdChapCountry = 0x437E; +const int kWebMIdChapLanguage = 0x437C; +const int kWebMIdChapProcess = 0x6944; +const int kWebMIdChapProcessCodecID = 0x6955; +const int kWebMIdChapProcessCommand = 0x6911; +const int kWebMIdChapProcessData = 0x6933; +const int kWebMIdChapProcessPrivate = 0x450D; +const int kWebMIdChapProcessTime = 0x6922; +const int kWebMIdChapString = 0x85; +const int kWebMIdChapterAtom = 0xB6; +const int kWebMIdChapterDisplay = 0x80; +const int kWebMIdChapterFlagEnabled = 0x4598; +const int kWebMIdChapterFlagHidden = 0x98; +const int kWebMIdChapterPhysicalEquiv = 0x63C3; +const int kWebMIdChapters = 0x1043A770; +const int kWebMIdChapterSegmentEditionUID = 0x6EBC; +const int kWebMIdChapterSegmentUID = 0x6E67; +const int kWebMIdChapterTimeEnd = 0x92; +const int kWebMIdChapterTimeStart = 0x91; +const int kWebMIdChapterTrack = 0x8F; +const int kWebMIdChapterTrackNumber = 0x89; +const int kWebMIdChapterTranslate = 0x6924; +const int kWebMIdChapterTranslateCodec = 0x69BF; +const int kWebMIdChapterTranslateEditionUID = 0x69FC; +const int kWebMIdChapterTranslateID = 0x69A5; +const int kWebMIdChapterUID = 0x73C4; +const int kWebMIdCluster = 0x1F43B675; +const int kWebMIdCodecDecodeAll = 0xAA; +const int kWebMIdCodecDelay = 0x56AA; +const int kWebMIdCodecID = 0x86; +const int kWebMIdCodecName = 0x258688; +const int kWebMIdCodecPrivate = 0x63A2; +const int kWebMIdCodecState = 0xA4; +const int kWebMIdColorSpace = 0x2EB524; +const int kWebMIdContentCompAlgo = 0x4254; +const int kWebMIdContentCompression = 0x5034; +const int kWebMIdContentCompSettings = 0x4255; +const int kWebMIdContentEncAESSettings = 0x47E7; +const int kWebMIdContentEncAlgo = 0x47E1; +const int kWebMIdContentEncKeyID = 0x47E2; +const int kWebMIdContentEncoding = 0x6240; +const int kWebMIdContentEncodingOrder = 0x5031; +const int kWebMIdContentEncodings = 0x6D80; +const int kWebMIdContentEncodingScope = 0x5032; +const int kWebMIdContentEncodingType = 0x5033; +const int kWebMIdContentEncryption = 0x5035; +const int kWebMIdContentSigAlgo = 0x47E5; +const int kWebMIdContentSigHashAlgo = 0x47E6; +const int kWebMIdContentSigKeyID = 0x47E4; +const int kWebMIdContentSignature = 0x47E3; +const int kWebMIdCRC32 = 0xBF; +const int kWebMIdCueBlockNumber = 0x5378; +const int kWebMIdCueClusterPosition = 0xF1; +const int kWebMIdCueCodecState = 0xEA; +const int kWebMIdCuePoint = 0xBB; +const int kWebMIdCueReference = 0xDB; +const int kWebMIdCueRefTime = 0x96; +const int kWebMIdCues = 0x1C53BB6B; +const int kWebMIdCueTime = 0xB3; +const int kWebMIdCueTrack = 0xF7; +const int kWebMIdCueTrackPositions = 0xB7; +const int kWebMIdDateUTC = 0x4461; +const int kWebMIdDefaultDuration = 0x23E383; +const int kWebMIdDiscardPadding = 0x75A2; +const int kWebMIdDisplayHeight = 0x54BA; +const int kWebMIdDisplayUnit = 0x54B2; +const int kWebMIdDisplayWidth = 0x54B0; +const int kWebMIdDocType = 0x4282; +const int kWebMIdDocTypeReadVersion = 0x4285; +const int kWebMIdDocTypeVersion = 0x4287; +const int kWebMIdDuration = 0x4489; +const int kWebMIdEBMLHeader = 0x1A45DFA3; +const int kWebMIdEBMLMaxIDLength = 0x42F2; +const int kWebMIdEBMLMaxSizeLength = 0x42F3; +const int kWebMIdEBMLReadVersion = 0x42F7; +const int kWebMIdEBMLVersion = 0x4286; +const int kWebMIdEditionEntry = 0x45B9; +const int kWebMIdEditionFlagDefault = 0x45DB; +const int kWebMIdEditionFlagHidden = 0x45BD; +const int kWebMIdEditionFlagOrdered = 0x45DD; +const int kWebMIdEditionUID = 0x45BC; +const int kWebMIdFileData = 0x465C; +const int kWebMIdFileDescription = 0x467E; +const int kWebMIdFileMimeType = 0x4660; +const int kWebMIdFileName = 0x466E; +const int kWebMIdFileUID = 0x46AE; +const int kWebMIdFlagDefault = 0x88; +const int kWebMIdFlagEnabled = 0xB9; +const int kWebMIdFlagForced = 0x55AA; +const int kWebMIdFlagInterlaced = 0x9A; +const int kWebMIdFlagLacing = 0x9C; +const int kWebMIdFrameRate = 0x2383E3; +const int kWebMIdInfo = 0x1549A966; +const int kWebMIdJoinBlocks = 0xE9; +const int kWebMIdLaceNumber = 0xCC; +const int kWebMIdLanguage = 0x22B59C; +const int kWebMIdMaxBlockAdditionId = 0x55EE; +const int kWebMIdMaxCache = 0x6DF8; +const int kWebMIdMinCache = 0x6DE7; +const int kWebMIdMuxingApp = 0x4D80; +const int kWebMIdName = 0x536E; +const int kWebMIdNextFilename = 0x3E83BB; +const int kWebMIdNextUID = 0x3EB923; +const int kWebMIdOutputSamplingFrequency = 0x78B5; +const int kWebMIdPixelCropBottom = 0x54AA; +const int kWebMIdPixelCropLeft = 0x54CC; +const int kWebMIdPixelCropRight = 0x54DD; +const int kWebMIdPixelCropTop = 0x54BB; +const int kWebMIdPixelHeight = 0xBA; +const int kWebMIdPixelWidth = 0xB0; +const int kWebMIdPosition = 0xA7; +const int kWebMIdPrevFilename = 0x3C83AB; +const int kWebMIdPrevSize = 0xAB; +const int kWebMIdPrevUID = 0x3CB923; +const int kWebMIdReferenceBlock = 0xFB; +const int kWebMIdReferencePriority = 0xFA; +const int kWebMIdSamplingFrequency = 0xB5; +const int kWebMIdSeek = 0x4DBB; +const int kWebMIdSeekHead = 0x114D9B74; +const int kWebMIdSeekID = 0x53AB; +const int kWebMIdSeekPosition = 0x53AC; +const int kWebMIdSeekPreRoll = 0x56BB; +const int kWebMIdSegment = 0x18538067; +const int kWebMIdSegmentFamily = 0x4444; +const int kWebMIdSegmentFilename = 0x7384; +const int kWebMIdSegmentUID = 0x73A4; +const int kWebMIdSilentTrackNumber = 0x58D7; +const int kWebMIdSilentTracks = 0x5854; +const int kWebMIdSimpleBlock = 0xA3; +const int kWebMIdSimpleTag = 0x67C8; +const int kWebMIdSlices = 0x8E; +const int kWebMIdStereoMode = 0x53B8; +const int kWebMIdTag = 0x7373; +const int kWebMIdTagAttachmentUID = 0x63C6; +const int kWebMIdTagBinary = 0x4485; +const int kWebMIdTagChapterUID = 0x63C4; +const int kWebMIdTagDefault = 0x4484; +const int kWebMIdTagEditionUID = 0x63C9; +const int kWebMIdTagLanguage = 0x447A; +const int kWebMIdTagName = 0x45A3; +const int kWebMIdTags = 0x1254C367; +const int kWebMIdTagString = 0x4487; +const int kWebMIdTagTrackUID = 0x63C5; +const int kWebMIdTargets = 0x63C0; +const int kWebMIdTargetType = 0x63CA; +const int kWebMIdTargetTypeValue = 0x68CA; +const int kWebMIdTimecode = 0xE7; +const int kWebMIdTimecodeScale = 0x2AD7B1; +const int kWebMIdTimeSlice = 0xE8; +const int kWebMIdTitle = 0x7BA9; +const int kWebMIdTrackCombinePlanes = 0xE3; +const int kWebMIdTrackEntry = 0xAE; +const int kWebMIdTrackJoinUID = 0xED; +const int kWebMIdTrackNumber = 0xD7; +const int kWebMIdTrackOperation = 0xE2; +const int kWebMIdTrackOverlay = 0x6FAB; +const int kWebMIdTrackPlane = 0xE4; +const int kWebMIdTrackPlaneType = 0xE6; +const int kWebMIdTrackPlaneUID = 0xE5; +const int kWebMIdTracks = 0x1654AE6B; +const int kWebMIdTrackTimecodeScale = 0x23314F; +const int kWebMIdTrackTranslate = 0x6624; +const int kWebMIdTrackTranslateCodec = 0x66BF; +const int kWebMIdTrackTranslateEditionUID = 0x66FC; +const int kWebMIdTrackTranslateTrackID = 0x66A5; +const int kWebMIdTrackType = 0x83; +const int kWebMIdTrackUID = 0x73C5; +const int kWebMIdVideo = 0xE0; +const int kWebMIdVoid = 0xEC; +const int kWebMIdWritingApp = 0x5741; + +const int64 kWebMReservedId = 0x1FFFFFFF; +const int64 kWebMUnknownSize = GG_LONGLONG(0x00FFFFFFFFFFFFFF); + +const uint8 kWebMFlagKeyframe = 0x80; + +// Current encrypted WebM request for comments specification is here +// http://wiki.webmproject.org/encryption/webm-encryption-rfc +const uint8 kWebMFlagEncryptedFrame = 0x1; +const int kWebMIvSize = 8; +const int kWebMSignalByteSize = 1; + +// Current specification for WebVTT embedded in WebM +// http://wiki.webmproject.org/webm-metadata/temporal-metadata/webvtt-in-webm + +const int kWebMTrackTypeVideo = 1; +const int kWebMTrackTypeAudio = 2; +const int kWebMTrackTypeSubtitlesOrCaptions = 0x11; +const int kWebMTrackTypeDescriptionsOrMetadata = 0x21; + +MEDIA_EXPORT extern const char kWebMCodecSubtitles[]; +MEDIA_EXPORT extern const char kWebMCodecCaptions[]; +MEDIA_EXPORT extern const char kWebMCodecDescriptions[]; +MEDIA_EXPORT extern const char kWebMCodecMetadata[]; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_CONSTANTS_H_ diff --git a/media/formats/webm/webm_content_encodings.cc b/media/formats/webm/webm_content_encodings.cc new file mode 100644 index 0000000..157c6ac --- /dev/null +++ b/media/formats/webm/webm_content_encodings.cc @@ -0,0 +1,28 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/logging.h" +#include "media/formats/webm/webm_content_encodings.h" + +namespace media { + +ContentEncoding::ContentEncoding() + : order_(kOrderInvalid), + scope_(kScopeInvalid), + type_(kTypeInvalid), + encryption_algo_(kEncAlgoInvalid), + cipher_mode_(kCipherModeInvalid) { +} + +ContentEncoding::~ContentEncoding() {} + +void ContentEncoding::SetEncryptionKeyId(const uint8* encryption_key_id, + int size) { + DCHECK(encryption_key_id); + DCHECK_GT(size, 0); + encryption_key_id_.assign(reinterpret_cast<const char*>(encryption_key_id), + size); +} + +} // namespace media diff --git a/media/formats/webm/webm_content_encodings.h b/media/formats/webm/webm_content_encodings.h new file mode 100644 index 0000000..5890ecf --- /dev/null +++ b/media/formats/webm/webm_content_encodings.h @@ -0,0 +1,88 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_CONTENT_ENCODINGS_H_ +#define MEDIA_FORMATS_WEBM_WEBM_CONTENT_ENCODINGS_H_ + +#include <string> + +#include "base/basictypes.h" +#include "base/memory/scoped_ptr.h" +#include "media/base/media_export.h" + +namespace media { + +class MEDIA_EXPORT ContentEncoding { + public: + // The following enum definitions are based on the ContentEncoding element + // specified in the Matroska spec. + + static const int kOrderInvalid = -1; + + enum Scope { + kScopeInvalid = 0, + kScopeAllFrameContents = 1, + kScopeTrackPrivateData = 2, + kScopeNextContentEncodingData = 4, + kScopeMax = 7, + }; + + enum Type { + kTypeInvalid = -1, + kTypeCompression = 0, + kTypeEncryption = 1, + }; + + enum EncryptionAlgo { + kEncAlgoInvalid = -1, + kEncAlgoNotEncrypted = 0, + kEncAlgoDes = 1, + kEncAlgo3des = 2, + kEncAlgoTwofish = 3, + kEncAlgoBlowfish = 4, + kEncAlgoAes = 5, + }; + + enum CipherMode { + kCipherModeInvalid = 0, + kCipherModeCtr = 1, + }; + + ContentEncoding(); + ~ContentEncoding(); + + int64 order() const { return order_; } + void set_order(int64 order) { order_ = order; } + + Scope scope() const { return scope_; } + void set_scope(Scope scope) { scope_ = scope; } + + Type type() const { return type_; } + void set_type(Type type) { type_ = type; } + + EncryptionAlgo encryption_algo() const { return encryption_algo_; } + void set_encryption_algo(EncryptionAlgo encryption_algo) { + encryption_algo_ = encryption_algo; + } + + const std::string& encryption_key_id() const { return encryption_key_id_; } + void SetEncryptionKeyId(const uint8* encryption_key_id, int size); + + CipherMode cipher_mode() const { return cipher_mode_; } + void set_cipher_mode(CipherMode mode) { cipher_mode_ = mode; } + + private: + int64 order_; + Scope scope_; + Type type_; + EncryptionAlgo encryption_algo_; + std::string encryption_key_id_; + CipherMode cipher_mode_; + + DISALLOW_COPY_AND_ASSIGN(ContentEncoding); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_CONTENT_ENCODINGS_H_ diff --git a/media/formats/webm/webm_content_encodings_client.cc b/media/formats/webm/webm_content_encodings_client.cc new file mode 100644 index 0000000..f2294de --- /dev/null +++ b/media/formats/webm/webm_content_encodings_client.cc @@ -0,0 +1,265 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_content_encodings_client.h" + +#include "base/logging.h" +#include "base/stl_util.h" +#include "media/formats/webm/webm_constants.h" + +namespace media { + +WebMContentEncodingsClient::WebMContentEncodingsClient(const LogCB& log_cb) + : log_cb_(log_cb), + content_encryption_encountered_(false), + content_encodings_ready_(false) { +} + +WebMContentEncodingsClient::~WebMContentEncodingsClient() { + STLDeleteElements(&content_encodings_); +} + +const ContentEncodings& WebMContentEncodingsClient::content_encodings() const { + DCHECK(content_encodings_ready_); + return content_encodings_; +} + +WebMParserClient* WebMContentEncodingsClient::OnListStart(int id) { + if (id == kWebMIdContentEncodings) { + DCHECK(!cur_content_encoding_.get()); + DCHECK(!content_encryption_encountered_); + STLDeleteElements(&content_encodings_); + content_encodings_ready_ = false; + return this; + } + + if (id == kWebMIdContentEncoding) { + DCHECK(!cur_content_encoding_.get()); + DCHECK(!content_encryption_encountered_); + cur_content_encoding_.reset(new ContentEncoding()); + return this; + } + + if (id == kWebMIdContentEncryption) { + DCHECK(cur_content_encoding_.get()); + if (content_encryption_encountered_) { + MEDIA_LOG(log_cb_) << "Unexpected multiple ContentEncryption."; + return NULL; + } + content_encryption_encountered_ = true; + return this; + } + + if (id == kWebMIdContentEncAESSettings) { + DCHECK(cur_content_encoding_.get()); + return this; + } + + // This should not happen if WebMListParser is working properly. + DCHECK(false); + return NULL; +} + +// Mandatory occurrence restriction is checked in this function. Multiple +// occurrence restriction is checked in OnUInt and OnBinary. +bool WebMContentEncodingsClient::OnListEnd(int id) { + if (id == kWebMIdContentEncodings) { + // ContentEncoding element is mandatory. Check this! + if (content_encodings_.empty()) { + MEDIA_LOG(log_cb_) << "Missing ContentEncoding."; + return false; + } + content_encodings_ready_ = true; + return true; + } + + if (id == kWebMIdContentEncoding) { + DCHECK(cur_content_encoding_.get()); + + // + // Specify default values to missing mandatory elements. + // + + if (cur_content_encoding_->order() == ContentEncoding::kOrderInvalid) { + // Default value of encoding order is 0, which should only be used on the + // first ContentEncoding. + if (!content_encodings_.empty()) { + MEDIA_LOG(log_cb_) << "Missing ContentEncodingOrder."; + return false; + } + cur_content_encoding_->set_order(0); + } + + if (cur_content_encoding_->scope() == ContentEncoding::kScopeInvalid) + cur_content_encoding_->set_scope(ContentEncoding::kScopeAllFrameContents); + + if (cur_content_encoding_->type() == ContentEncoding::kTypeInvalid) + cur_content_encoding_->set_type(ContentEncoding::kTypeCompression); + + // Check for elements valid in spec but not supported for now. + if (cur_content_encoding_->type() == ContentEncoding::kTypeCompression) { + MEDIA_LOG(log_cb_) << "ContentCompression not supported."; + return false; + } + + // Enforce mandatory elements without default values. + DCHECK(cur_content_encoding_->type() == ContentEncoding::kTypeEncryption); + if (!content_encryption_encountered_) { + MEDIA_LOG(log_cb_) << "ContentEncodingType is encryption but" + << " ContentEncryption is missing."; + return false; + } + + content_encodings_.push_back(cur_content_encoding_.release()); + content_encryption_encountered_ = false; + return true; + } + + if (id == kWebMIdContentEncryption) { + DCHECK(cur_content_encoding_.get()); + // Specify default value for elements that are not present. + if (cur_content_encoding_->encryption_algo() == + ContentEncoding::kEncAlgoInvalid) { + cur_content_encoding_->set_encryption_algo( + ContentEncoding::kEncAlgoNotEncrypted); + } + return true; + } + + if (id == kWebMIdContentEncAESSettings) { + if (cur_content_encoding_->cipher_mode() == + ContentEncoding::kCipherModeInvalid) + cur_content_encoding_->set_cipher_mode(ContentEncoding::kCipherModeCtr); + return true; + } + + // This should not happen if WebMListParser is working properly. + DCHECK(false); + return false; +} + +// Multiple occurrence restriction and range are checked in this function. +// Mandatory occurrence restriction is checked in OnListEnd. +bool WebMContentEncodingsClient::OnUInt(int id, int64 val) { + DCHECK(cur_content_encoding_.get()); + + if (id == kWebMIdContentEncodingOrder) { + if (cur_content_encoding_->order() != ContentEncoding::kOrderInvalid) { + MEDIA_LOG(log_cb_) << "Unexpected multiple ContentEncodingOrder."; + return false; + } + + if (val != static_cast<int64>(content_encodings_.size())) { + // According to the spec, encoding order starts with 0 and counts upwards. + MEDIA_LOG(log_cb_) << "Unexpected ContentEncodingOrder."; + return false; + } + + cur_content_encoding_->set_order(val); + return true; + } + + if (id == kWebMIdContentEncodingScope) { + if (cur_content_encoding_->scope() != ContentEncoding::kScopeInvalid) { + MEDIA_LOG(log_cb_) << "Unexpected multiple ContentEncodingScope."; + return false; + } + + if (val == ContentEncoding::kScopeInvalid || + val > ContentEncoding::kScopeMax) { + MEDIA_LOG(log_cb_) << "Unexpected ContentEncodingScope."; + return false; + } + + if (val & ContentEncoding::kScopeNextContentEncodingData) { + MEDIA_LOG(log_cb_) << "Encoded next ContentEncoding is not supported."; + return false; + } + + cur_content_encoding_->set_scope(static_cast<ContentEncoding::Scope>(val)); + return true; + } + + if (id == kWebMIdContentEncodingType) { + if (cur_content_encoding_->type() != ContentEncoding::kTypeInvalid) { + MEDIA_LOG(log_cb_) << "Unexpected multiple ContentEncodingType."; + return false; + } + + if (val == ContentEncoding::kTypeCompression) { + MEDIA_LOG(log_cb_) << "ContentCompression not supported."; + return false; + } + + if (val != ContentEncoding::kTypeEncryption) { + MEDIA_LOG(log_cb_) << "Unexpected ContentEncodingType " << val << "."; + return false; + } + + cur_content_encoding_->set_type(static_cast<ContentEncoding::Type>(val)); + return true; + } + + if (id == kWebMIdContentEncAlgo) { + if (cur_content_encoding_->encryption_algo() != + ContentEncoding::kEncAlgoInvalid) { + MEDIA_LOG(log_cb_) << "Unexpected multiple ContentEncAlgo."; + return false; + } + + if (val < ContentEncoding::kEncAlgoNotEncrypted || + val > ContentEncoding::kEncAlgoAes) { + MEDIA_LOG(log_cb_) << "Unexpected ContentEncAlgo " << val << "."; + return false; + } + + cur_content_encoding_->set_encryption_algo( + static_cast<ContentEncoding::EncryptionAlgo>(val)); + return true; + } + + if (id == kWebMIdAESSettingsCipherMode) { + if (cur_content_encoding_->cipher_mode() != + ContentEncoding::kCipherModeInvalid) { + MEDIA_LOG(log_cb_) << "Unexpected multiple AESSettingsCipherMode."; + return false; + } + + if (val != ContentEncoding::kCipherModeCtr) { + MEDIA_LOG(log_cb_) << "Unexpected AESSettingsCipherMode " << val << "."; + return false; + } + + cur_content_encoding_->set_cipher_mode( + static_cast<ContentEncoding::CipherMode>(val)); + return true; + } + + // This should not happen if WebMListParser is working properly. + DCHECK(false); + return false; +} + +// Multiple occurrence restriction is checked in this function. Mandatory +// restriction is checked in OnListEnd. +bool WebMContentEncodingsClient::OnBinary(int id, const uint8* data, int size) { + DCHECK(cur_content_encoding_.get()); + DCHECK(data); + DCHECK_GT(size, 0); + + if (id == kWebMIdContentEncKeyID) { + if (!cur_content_encoding_->encryption_key_id().empty()) { + MEDIA_LOG(log_cb_) << "Unexpected multiple ContentEncKeyID"; + return false; + } + cur_content_encoding_->SetEncryptionKeyId(data, size); + return true; + } + + // This should not happen if WebMListParser is working properly. + DCHECK(false); + return false; +} + +} // namespace media diff --git a/media/formats/webm/webm_content_encodings_client.h b/media/formats/webm/webm_content_encodings_client.h new file mode 100644 index 0000000..d00281e --- /dev/null +++ b/media/formats/webm/webm_content_encodings_client.h @@ -0,0 +1,50 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_CONTENT_ENCODINGS_CLIENT_H_ +#define MEDIA_FORMATS_WEBM_WEBM_CONTENT_ENCODINGS_CLIENT_H_ + +#include <vector> + +#include "base/callback.h" +#include "base/compiler_specific.h" +#include "base/memory/scoped_ptr.h" +#include "media/base/media_export.h" +#include "media/base/media_log.h" +#include "media/formats/webm/webm_content_encodings.h" +#include "media/formats/webm/webm_parser.h" + +namespace media { + +typedef std::vector<ContentEncoding*> ContentEncodings; + +// Parser for WebM ContentEncodings element. +class MEDIA_EXPORT WebMContentEncodingsClient : public WebMParserClient { + public: + explicit WebMContentEncodingsClient(const LogCB& log_cb); + virtual ~WebMContentEncodingsClient(); + + const ContentEncodings& content_encodings() const; + + // WebMParserClient methods + virtual WebMParserClient* OnListStart(int id) OVERRIDE; + virtual bool OnListEnd(int id) OVERRIDE; + virtual bool OnUInt(int id, int64 val) OVERRIDE; + virtual bool OnBinary(int id, const uint8* data, int size) OVERRIDE; + + private: + LogCB log_cb_; + scoped_ptr<ContentEncoding> cur_content_encoding_; + bool content_encryption_encountered_; + ContentEncodings content_encodings_; + + // |content_encodings_| is ready. For debugging purpose. + bool content_encodings_ready_; + + DISALLOW_COPY_AND_ASSIGN(WebMContentEncodingsClient); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_CONTENT_ENCODINGS_CLIENT_H_ diff --git a/media/formats/webm/webm_content_encodings_client_unittest.cc b/media/formats/webm/webm_content_encodings_client_unittest.cc new file mode 100644 index 0000000..e124f2d --- /dev/null +++ b/media/formats/webm/webm_content_encodings_client_unittest.cc @@ -0,0 +1,238 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/bind.h" +#include "media/formats/webm/webm_constants.h" +#include "media/formats/webm/webm_content_encodings_client.h" +#include "media/formats/webm/webm_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { + +class WebMContentEncodingsClientTest : public testing::Test { + public: + WebMContentEncodingsClientTest() + : client_(LogCB()), + parser_(kWebMIdContentEncodings, &client_) {} + + void ParseAndExpectToFail(const uint8* buf, int size) { + int result = parser_.Parse(buf, size); + EXPECT_EQ(-1, result); + } + + protected: + WebMContentEncodingsClient client_; + WebMListParser parser_; +}; + +TEST_F(WebMContentEncodingsClientTest, EmptyContentEncodings) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0x80, // ContentEncodings (size = 0) + }; + int size = sizeof(kContentEncodings); + ParseAndExpectToFail(kContentEncodings, size); +} + +TEST_F(WebMContentEncodingsClientTest, EmptyContentEncoding) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0x83, // ContentEncodings (size = 3) + 0x63, 0x40, 0x80, // ContentEncoding (size = 0) + }; + int size = sizeof(kContentEncodings); + ParseAndExpectToFail(kContentEncodings, size); +} + +TEST_F(WebMContentEncodingsClientTest, SingleContentEncoding) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0xA1, // ContentEncodings (size = 33) + 0x62, 0x40, 0x9e, // ContentEncoding (size = 30) + 0x50, 0x31, 0x81, 0x00, // ContentEncodingOrder (size = 1) + 0x50, 0x32, 0x81, 0x01, // ContentEncodingScope (size = 1) + 0x50, 0x33, 0x81, 0x01, // ContentEncodingType (size = 1) + 0x50, 0x35, 0x8F, // ContentEncryption (size = 15) + 0x47, 0xE1, 0x81, 0x05, // ContentEncAlgo (size = 1) + 0x47, 0xE2, 0x88, // ContentEncKeyID (size = 8) + 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, + }; + int size = sizeof(kContentEncodings); + + int result = parser_.Parse(kContentEncodings, size); + ASSERT_EQ(size, result); + const ContentEncodings& content_encodings = client_.content_encodings(); + + ASSERT_EQ(1u, content_encodings.size()); + ASSERT_TRUE(content_encodings[0]); + EXPECT_EQ(0, content_encodings[0]->order()); + EXPECT_EQ(ContentEncoding::kScopeAllFrameContents, + content_encodings[0]->scope()); + EXPECT_EQ(ContentEncoding::kTypeEncryption, content_encodings[0]->type()); + EXPECT_EQ(ContentEncoding::kEncAlgoAes, + content_encodings[0]->encryption_algo()); + EXPECT_EQ(8u, content_encodings[0]->encryption_key_id().size()); +} + +TEST_F(WebMContentEncodingsClientTest, MultipleContentEncoding) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0xC2, // ContentEncodings (size = 66) + 0x62, 0x40, 0x9e, // ContentEncoding (size = 30) + 0x50, 0x31, 0x81, 0x00, // ContentEncodingOrder (size = 1) + 0x50, 0x32, 0x81, 0x03, // ContentEncodingScope (size = 1) + 0x50, 0x33, 0x81, 0x01, // ContentEncodingType (size = 1) + 0x50, 0x35, 0x8F, // ContentEncryption (size = 15) + 0x47, 0xE1, 0x81, 0x05, // ContentEncAlgo (size = 1) + 0x47, 0xE2, 0x88, // ContentEncKeyID (size = 8) + 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, + 0x62, 0x40, 0x9e, // ContentEncoding (size = 30) + 0x50, 0x31, 0x81, 0x01, // ContentEncodingOrder (size = 1) + 0x50, 0x32, 0x81, 0x03, // ContentEncodingScope (size = 1) + 0x50, 0x33, 0x81, 0x01, // ContentEncodingType (size = 1) + 0x50, 0x35, 0x8F, // ContentEncryption (size = 15) + 0x47, 0xE1, 0x81, 0x01, // ContentEncAlgo (size = 1) + 0x47, 0xE2, 0x88, // ContentEncKeyID (size = 8) + 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, + }; + int size = sizeof(kContentEncodings); + + int result = parser_.Parse(kContentEncodings, size); + ASSERT_EQ(size, result); + const ContentEncodings& content_encodings = client_.content_encodings(); + ASSERT_EQ(2u, content_encodings.size()); + + for (int i = 0; i < 2; ++i) { + ASSERT_TRUE(content_encodings[i]); + EXPECT_EQ(i, content_encodings[i]->order()); + EXPECT_EQ(ContentEncoding::kScopeAllFrameContents | + ContentEncoding::kScopeTrackPrivateData, + content_encodings[i]->scope()); + EXPECT_EQ(ContentEncoding::kTypeEncryption, content_encodings[i]->type()); + EXPECT_EQ(!i ? ContentEncoding::kEncAlgoAes : ContentEncoding::kEncAlgoDes, + content_encodings[i]->encryption_algo()); + EXPECT_EQ(8u, content_encodings[i]->encryption_key_id().size()); + } +} + +TEST_F(WebMContentEncodingsClientTest, DefaultValues) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0x8A, // ContentEncodings (size = 10) + 0x62, 0x40, 0x87, // ContentEncoding (size = 7) + // ContentEncodingOrder missing + // ContentEncodingScope missing + 0x50, 0x33, 0x81, 0x01, // ContentEncodingType (size = 1) + 0x50, 0x35, 0x80, // ContentEncryption (size = 0) + // ContentEncAlgo missing + }; + int size = sizeof(kContentEncodings); + + int result = parser_.Parse(kContentEncodings, size); + ASSERT_EQ(size, result); + const ContentEncodings& content_encodings = client_.content_encodings(); + + ASSERT_EQ(1u, content_encodings.size()); + ASSERT_TRUE(content_encodings[0]); + EXPECT_EQ(0, content_encodings[0]->order()); + EXPECT_EQ(ContentEncoding::kScopeAllFrameContents, + content_encodings[0]->scope()); + EXPECT_EQ(ContentEncoding::kTypeEncryption, content_encodings[0]->type()); + EXPECT_EQ(ContentEncoding::kEncAlgoNotEncrypted, + content_encodings[0]->encryption_algo()); + EXPECT_TRUE(content_encodings[0]->encryption_key_id().empty()); +} + +TEST_F(WebMContentEncodingsClientTest, ContentEncodingsClientReuse) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0xA1, // ContentEncodings (size = 33) + 0x62, 0x40, 0x9e, // ContentEncoding (size = 30) + 0x50, 0x31, 0x81, 0x00, // ContentEncodingOrder (size = 1) + 0x50, 0x32, 0x81, 0x01, // ContentEncodingScope (size = 1) + 0x50, 0x33, 0x81, 0x01, // ContentEncodingType (size = 1) + 0x50, 0x35, 0x8F, // ContentEncryption (size = 15) + 0x47, 0xE1, 0x81, 0x05, // ContentEncAlgo (size = 1) + 0x47, 0xE2, 0x88, // ContentEncKeyID (size = 8) + 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, + }; + int size = sizeof(kContentEncodings); + + // Parse for the first time. + int result = parser_.Parse(kContentEncodings, size); + ASSERT_EQ(size, result); + + // Parse again. + parser_.Reset(); + result = parser_.Parse(kContentEncodings, size); + ASSERT_EQ(size, result); + const ContentEncodings& content_encodings = client_.content_encodings(); + + ASSERT_EQ(1u, content_encodings.size()); + ASSERT_TRUE(content_encodings[0]); + EXPECT_EQ(0, content_encodings[0]->order()); + EXPECT_EQ(ContentEncoding::kScopeAllFrameContents, + content_encodings[0]->scope()); + EXPECT_EQ(ContentEncoding::kTypeEncryption, content_encodings[0]->type()); + EXPECT_EQ(ContentEncoding::kEncAlgoAes, + content_encodings[0]->encryption_algo()); + EXPECT_EQ(8u, content_encodings[0]->encryption_key_id().size()); +} + +TEST_F(WebMContentEncodingsClientTest, InvalidContentEncodingOrder) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0x8E, // ContentEncodings (size = 14) + 0x62, 0x40, 0x8B, // ContentEncoding (size = 11) + 0x50, 0x31, 0x81, 0xEE, // ContentEncodingOrder (size = 1), invalid + 0x50, 0x33, 0x81, 0x01, // ContentEncodingType (size = 1) + 0x50, 0x35, 0x80, // ContentEncryption (size = 0) + }; + int size = sizeof(kContentEncodings); + ParseAndExpectToFail(kContentEncodings, size); +} + +TEST_F(WebMContentEncodingsClientTest, InvalidContentEncodingScope) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0x8E, // ContentEncodings (size = 14) + 0x62, 0x40, 0x8B, // ContentEncoding (size = 11) + 0x50, 0x32, 0x81, 0xEE, // ContentEncodingScope (size = 1), invalid + 0x50, 0x33, 0x81, 0x01, // ContentEncodingType (size = 1) + 0x50, 0x35, 0x80, // ContentEncryption (size = 0) + }; + int size = sizeof(kContentEncodings); + ParseAndExpectToFail(kContentEncodings, size); +} + +TEST_F(WebMContentEncodingsClientTest, InvalidContentEncodingType) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0x8E, // ContentEncodings (size = 14) + 0x62, 0x40, 0x8B, // ContentEncoding (size = 11) + 0x50, 0x33, 0x81, 0x00, // ContentEncodingType (size = 1), invalid + 0x50, 0x35, 0x80, // ContentEncryption (size = 0) + }; + int size = sizeof(kContentEncodings); + ParseAndExpectToFail(kContentEncodings, size); +} + +// ContentEncodingType is encryption but no ContentEncryption present. +TEST_F(WebMContentEncodingsClientTest, MissingContentEncryption) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0x87, // ContentEncodings (size = 7) + 0x62, 0x40, 0x84, // ContentEncoding (size = 4) + 0x50, 0x33, 0x81, 0x01, // ContentEncodingType (size = 1) + // ContentEncryption missing + }; + int size = sizeof(kContentEncodings); + ParseAndExpectToFail(kContentEncodings, size); +} + +TEST_F(WebMContentEncodingsClientTest, InvalidContentEncAlgo) { + const uint8 kContentEncodings[] = { + 0x6D, 0x80, 0x99, // ContentEncodings (size = 25) + 0x62, 0x40, 0x96, // ContentEncoding (size = 22) + 0x50, 0x33, 0x81, 0x01, // ContentEncodingType (size = 1) + 0x50, 0x35, 0x8F, // ContentEncryption (size = 15) + 0x47, 0xE1, 0x81, 0xEE, // ContentEncAlgo (size = 1), invalid + 0x47, 0xE2, 0x88, // ContentEncKeyID (size = 8) + 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, + }; + int size = sizeof(kContentEncodings); + ParseAndExpectToFail(kContentEncodings, size); +} + +} // namespace media diff --git a/media/formats/webm/webm_crypto_helpers.cc b/media/formats/webm/webm_crypto_helpers.cc new file mode 100644 index 0000000..bd473bc --- /dev/null +++ b/media/formats/webm/webm_crypto_helpers.cc @@ -0,0 +1,62 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_crypto_helpers.h" + +#include "base/logging.h" +#include "base/sys_byteorder.h" +#include "media/base/decrypt_config.h" +#include "media/formats/webm/webm_constants.h" + +namespace media { +namespace { + +// Generates a 16 byte CTR counter block. The CTR counter block format is a +// CTR IV appended with a CTR block counter. |iv| is an 8 byte CTR IV. +// |iv_size| is the size of |iv| in btyes. Returns a string of +// kDecryptionKeySize bytes. +std::string GenerateWebMCounterBlock(const uint8* iv, int iv_size) { + std::string counter_block(reinterpret_cast<const char*>(iv), iv_size); + counter_block.append(DecryptConfig::kDecryptionKeySize - iv_size, 0); + return counter_block; +} + +} // namespace anonymous + +bool WebMCreateDecryptConfig(const uint8* data, int data_size, + const uint8* key_id, int key_id_size, + scoped_ptr<DecryptConfig>* decrypt_config, + int* data_offset) { + if (data_size < kWebMSignalByteSize) { + DVLOG(1) << "Got a block from an encrypted stream with no data."; + return false; + } + + uint8 signal_byte = data[0]; + int frame_offset = sizeof(signal_byte); + + // Setting the DecryptConfig object of the buffer while leaving the + // initialization vector empty will tell the decryptor that the frame is + // unencrypted. + std::string counter_block; + + if (signal_byte & kWebMFlagEncryptedFrame) { + if (data_size < kWebMSignalByteSize + kWebMIvSize) { + DVLOG(1) << "Got an encrypted block with not enough data " << data_size; + return false; + } + counter_block = GenerateWebMCounterBlock(data + frame_offset, kWebMIvSize); + frame_offset += kWebMIvSize; + } + + decrypt_config->reset(new DecryptConfig( + std::string(reinterpret_cast<const char*>(key_id), key_id_size), + counter_block, + std::vector<SubsampleEntry>())); + *data_offset = frame_offset; + + return true; +} + +} // namespace media diff --git a/media/formats/webm/webm_crypto_helpers.h b/media/formats/webm/webm_crypto_helpers.h new file mode 100644 index 0000000..23095f3 --- /dev/null +++ b/media/formats/webm/webm_crypto_helpers.h @@ -0,0 +1,33 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_CRYPTO_HELPERS_H_ +#define MEDIA_FORMATS_WEBM_WEBM_CRYPTO_HELPERS_H_ + +#include "base/basictypes.h" +#include "base/memory/scoped_ptr.h" +#include "media/base/decoder_buffer.h" + +namespace media { + +// TODO(xhwang): Figure out the init data type appropriately once it's spec'ed. +// See https://www.w3.org/Bugs/Public/show_bug.cgi?id=19096 for more +// information. +const char kWebMEncryptInitDataType[] = "video/webm"; + +// Fills an initialized DecryptConfig, which can be sent to the Decryptor if +// the stream has potentially encrypted frames. Also sets |data_offset| which +// indicates where the encrypted data starts. Leaving the IV empty will tell +// the decryptor that the frame is unencrypted. Returns true if |data| is valid, +// false otherwise, in which case |decrypt_config| and |data_offset| will not be +// changed. Current encrypted WebM request for comments specification is here +// http://wiki.webmproject.org/encryption/webm-encryption-rfc +bool WebMCreateDecryptConfig(const uint8* data, int data_size, + const uint8* key_id, int key_id_size, + scoped_ptr<DecryptConfig>* decrypt_config, + int* data_offset); + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_CRYPT_HELPERS_H_ diff --git a/media/formats/webm/webm_info_parser.cc b/media/formats/webm/webm_info_parser.cc new file mode 100644 index 0000000..ac4f08c --- /dev/null +++ b/media/formats/webm/webm_info_parser.cc @@ -0,0 +1,84 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_info_parser.h" + +#include "base/logging.h" +#include "media/formats/webm/webm_constants.h" + +namespace media { + +// Default timecode scale if the TimecodeScale element is +// not specified in the INFO element. +static const int kWebMDefaultTimecodeScale = 1000000; + +WebMInfoParser::WebMInfoParser() + : timecode_scale_(-1), + duration_(-1) { +} + +WebMInfoParser::~WebMInfoParser() {} + +int WebMInfoParser::Parse(const uint8* buf, int size) { + timecode_scale_ = -1; + duration_ = -1; + + WebMListParser parser(kWebMIdInfo, this); + int result = parser.Parse(buf, size); + + if (result <= 0) + return result; + + // For now we do all or nothing parsing. + return parser.IsParsingComplete() ? result : 0; +} + +WebMParserClient* WebMInfoParser::OnListStart(int id) { return this; } + +bool WebMInfoParser::OnListEnd(int id) { + if (id == kWebMIdInfo && timecode_scale_ == -1) { + // Set timecode scale to default value if it isn't present in + // the Info element. + timecode_scale_ = kWebMDefaultTimecodeScale; + } + return true; +} + +bool WebMInfoParser::OnUInt(int id, int64 val) { + if (id != kWebMIdTimecodeScale) + return true; + + if (timecode_scale_ != -1) { + DVLOG(1) << "Multiple values for id " << std::hex << id << " specified"; + return false; + } + + timecode_scale_ = val; + return true; +} + +bool WebMInfoParser::OnFloat(int id, double val) { + if (id != kWebMIdDuration) { + DVLOG(1) << "Unexpected float for id" << std::hex << id; + return false; + } + + if (duration_ != -1) { + DVLOG(1) << "Multiple values for duration."; + return false; + } + + duration_ = val; + return true; +} + +bool WebMInfoParser::OnBinary(int id, const uint8* data, int size) { + return true; +} + +bool WebMInfoParser::OnString(int id, const std::string& str) { + return true; +} + +} // namespace media diff --git a/media/formats/webm/webm_info_parser.h b/media/formats/webm/webm_info_parser.h new file mode 100644 index 0000000..504b927 --- /dev/null +++ b/media/formats/webm/webm_info_parser.h @@ -0,0 +1,47 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_INFO_PARSER_H_ +#define MEDIA_FORMATS_WEBM_WEBM_INFO_PARSER_H_ + +#include "base/compiler_specific.h" +#include "media/base/media_export.h" +#include "media/formats/webm/webm_parser.h" + +namespace media { + +// Parser for WebM Info element. +class MEDIA_EXPORT WebMInfoParser : public WebMParserClient { + public: + WebMInfoParser(); + virtual ~WebMInfoParser(); + + // Parses a WebM Info element in |buf|. + // + // Returns -1 if the parse fails. + // Returns 0 if more data is needed. + // Returns the number of bytes parsed on success. + int Parse(const uint8* buf, int size); + + int64 timecode_scale() const { return timecode_scale_; } + double duration() const { return duration_; } + + private: + // WebMParserClient methods + virtual WebMParserClient* OnListStart(int id) OVERRIDE; + virtual bool OnListEnd(int id) OVERRIDE; + virtual bool OnUInt(int id, int64 val) OVERRIDE; + virtual bool OnFloat(int id, double val) OVERRIDE; + virtual bool OnBinary(int id, const uint8* data, int size) OVERRIDE; + virtual bool OnString(int id, const std::string& str) OVERRIDE; + + int64 timecode_scale_; + double duration_; + + DISALLOW_COPY_AND_ASSIGN(WebMInfoParser); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_INFO_PARSER_H_ diff --git a/media/formats/webm/webm_parser.cc b/media/formats/webm/webm_parser.cc new file mode 100644 index 0000000..2f2a1d2 --- /dev/null +++ b/media/formats/webm/webm_parser.cc @@ -0,0 +1,947 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_parser.h" + +// This file contains code to parse WebM file elements. It was created +// from information in the Matroska spec. +// http://www.matroska.org/technical/specs/index.html +// This file contains code for encrypted WebM. Current WebM +// encrypted request for comments specification is here +// http://wiki.webmproject.org/encryption/webm-encryption-rfc + +#include <iomanip> + +#include "base/logging.h" +#include "media/formats/webm/webm_constants.h" + +namespace media { + +enum ElementType { + UNKNOWN, + LIST, // Referred to as Master Element in the Matroska spec. + UINT, + FLOAT, + BINARY, + STRING, + SKIP, +}; + +struct ElementIdInfo { + ElementType type_; + int id_; +}; + +struct ListElementInfo { + int id_; + int level_; + const ElementIdInfo* id_info_; + int id_info_count_; +}; + +// The following are tables indicating what IDs are valid sub-elements +// of particular elements. If an element is encountered that doesn't +// appear in the list, a parsing error is signalled. Some elements are +// marked as SKIP because they are valid, but we don't care about them +// right now. +static const ElementIdInfo kEBMLHeaderIds[] = { + {UINT, kWebMIdEBMLVersion}, + {UINT, kWebMIdEBMLReadVersion}, + {UINT, kWebMIdEBMLMaxIDLength}, + {UINT, kWebMIdEBMLMaxSizeLength}, + {STRING, kWebMIdDocType}, + {UINT, kWebMIdDocTypeVersion}, + {UINT, kWebMIdDocTypeReadVersion}, +}; + +static const ElementIdInfo kSegmentIds[] = { + {LIST, kWebMIdSeekHead}, + {LIST, kWebMIdInfo}, + {LIST, kWebMIdCluster}, + {LIST, kWebMIdTracks}, + {LIST, kWebMIdCues}, + {LIST, kWebMIdAttachments}, + {LIST, kWebMIdChapters}, + {LIST, kWebMIdTags}, +}; + +static const ElementIdInfo kSeekHeadIds[] = { + {LIST, kWebMIdSeek}, +}; + +static const ElementIdInfo kSeekIds[] = { + {BINARY, kWebMIdSeekID}, + {UINT, kWebMIdSeekPosition}, +}; + +static const ElementIdInfo kInfoIds[] = { + {BINARY, kWebMIdSegmentUID}, + {STRING, kWebMIdSegmentFilename}, + {BINARY, kWebMIdPrevUID}, + {STRING, kWebMIdPrevFilename}, + {BINARY, kWebMIdNextUID}, + {STRING, kWebMIdNextFilename}, + {BINARY, kWebMIdSegmentFamily}, + {LIST, kWebMIdChapterTranslate}, + {UINT, kWebMIdTimecodeScale}, + {FLOAT, kWebMIdDuration}, + {BINARY, kWebMIdDateUTC}, + {STRING, kWebMIdTitle}, + {STRING, kWebMIdMuxingApp}, + {STRING, kWebMIdWritingApp}, +}; + +static const ElementIdInfo kChapterTranslateIds[] = { + {UINT, kWebMIdChapterTranslateEditionUID}, + {UINT, kWebMIdChapterTranslateCodec}, + {BINARY, kWebMIdChapterTranslateID}, +}; + +static const ElementIdInfo kClusterIds[] = { + {BINARY, kWebMIdSimpleBlock}, + {UINT, kWebMIdTimecode}, + {LIST, kWebMIdSilentTracks}, + {UINT, kWebMIdPosition}, + {UINT, kWebMIdPrevSize}, + {LIST, kWebMIdBlockGroup}, +}; + +static const ElementIdInfo kSilentTracksIds[] = { + {UINT, kWebMIdSilentTrackNumber}, +}; + +static const ElementIdInfo kBlockGroupIds[] = { + {BINARY, kWebMIdBlock}, + {LIST, kWebMIdBlockAdditions}, + {UINT, kWebMIdBlockDuration}, + {UINT, kWebMIdReferencePriority}, + {BINARY, kWebMIdReferenceBlock}, + {BINARY, kWebMIdCodecState}, + {UINT, kWebMIdDiscardPadding}, + {LIST, kWebMIdSlices}, +}; + +static const ElementIdInfo kBlockAdditionsIds[] = { + {LIST, kWebMIdBlockMore}, +}; + +static const ElementIdInfo kBlockMoreIds[] = { + {UINT, kWebMIdBlockAddID}, + {BINARY, kWebMIdBlockAdditional}, +}; + +static const ElementIdInfo kSlicesIds[] = { + {LIST, kWebMIdTimeSlice}, +}; + +static const ElementIdInfo kTimeSliceIds[] = { + {UINT, kWebMIdLaceNumber}, +}; + +static const ElementIdInfo kTracksIds[] = { + {LIST, kWebMIdTrackEntry}, +}; + +static const ElementIdInfo kTrackEntryIds[] = { + {UINT, kWebMIdTrackNumber}, + {UINT, kWebMIdTrackUID}, + {UINT, kWebMIdTrackType}, + {UINT, kWebMIdFlagEnabled}, + {UINT, kWebMIdFlagDefault}, + {UINT, kWebMIdFlagForced}, + {UINT, kWebMIdFlagLacing}, + {UINT, kWebMIdMinCache}, + {UINT, kWebMIdMaxCache}, + {UINT, kWebMIdDefaultDuration}, + {FLOAT, kWebMIdTrackTimecodeScale}, + {UINT, kWebMIdMaxBlockAdditionId}, + {STRING, kWebMIdName}, + {STRING, kWebMIdLanguage}, + {STRING, kWebMIdCodecID}, + {BINARY, kWebMIdCodecPrivate}, + {STRING, kWebMIdCodecName}, + {UINT, kWebMIdAttachmentLink}, + {UINT, kWebMIdCodecDecodeAll}, + {UINT, kWebMIdTrackOverlay}, + {UINT, kWebMIdCodecDelay}, + {UINT, kWebMIdSeekPreRoll}, + {LIST, kWebMIdTrackTranslate}, + {LIST, kWebMIdVideo}, + {LIST, kWebMIdAudio}, + {LIST, kWebMIdTrackOperation}, + {LIST, kWebMIdContentEncodings}, +}; + +static const ElementIdInfo kTrackTranslateIds[] = { + {UINT, kWebMIdTrackTranslateEditionUID}, + {UINT, kWebMIdTrackTranslateCodec}, + {BINARY, kWebMIdTrackTranslateTrackID}, +}; + +static const ElementIdInfo kVideoIds[] = { + {UINT, kWebMIdFlagInterlaced}, + {UINT, kWebMIdStereoMode}, + {UINT, kWebMIdAlphaMode}, + {UINT, kWebMIdPixelWidth}, + {UINT, kWebMIdPixelHeight}, + {UINT, kWebMIdPixelCropBottom}, + {UINT, kWebMIdPixelCropTop}, + {UINT, kWebMIdPixelCropLeft}, + {UINT, kWebMIdPixelCropRight}, + {UINT, kWebMIdDisplayWidth}, + {UINT, kWebMIdDisplayHeight}, + {UINT, kWebMIdDisplayUnit}, + {UINT, kWebMIdAspectRatioType}, + {BINARY, kWebMIdColorSpace}, + {FLOAT, kWebMIdFrameRate}, +}; + +static const ElementIdInfo kAudioIds[] = { + {FLOAT, kWebMIdSamplingFrequency}, + {FLOAT, kWebMIdOutputSamplingFrequency}, + {UINT, kWebMIdChannels}, + {UINT, kWebMIdBitDepth}, +}; + +static const ElementIdInfo kTrackOperationIds[] = { + {LIST, kWebMIdTrackCombinePlanes}, + {LIST, kWebMIdJoinBlocks}, +}; + +static const ElementIdInfo kTrackCombinePlanesIds[] = { + {LIST, kWebMIdTrackPlane}, +}; + +static const ElementIdInfo kTrackPlaneIds[] = { + {UINT, kWebMIdTrackPlaneUID}, + {UINT, kWebMIdTrackPlaneType}, +}; + +static const ElementIdInfo kJoinBlocksIds[] = { + {UINT, kWebMIdTrackJoinUID}, +}; + +static const ElementIdInfo kContentEncodingsIds[] = { + {LIST, kWebMIdContentEncoding}, +}; + +static const ElementIdInfo kContentEncodingIds[] = { + {UINT, kWebMIdContentEncodingOrder}, + {UINT, kWebMIdContentEncodingScope}, + {UINT, kWebMIdContentEncodingType}, + {LIST, kWebMIdContentCompression}, + {LIST, kWebMIdContentEncryption}, +}; + +static const ElementIdInfo kContentCompressionIds[] = { + {UINT, kWebMIdContentCompAlgo}, + {BINARY, kWebMIdContentCompSettings}, +}; + +static const ElementIdInfo kContentEncryptionIds[] = { + {LIST, kWebMIdContentEncAESSettings}, + {UINT, kWebMIdContentEncAlgo}, + {BINARY, kWebMIdContentEncKeyID}, + {BINARY, kWebMIdContentSignature}, + {BINARY, kWebMIdContentSigKeyID}, + {UINT, kWebMIdContentSigAlgo}, + {UINT, kWebMIdContentSigHashAlgo}, +}; + +static const ElementIdInfo kContentEncAESSettingsIds[] = { + {UINT, kWebMIdAESSettingsCipherMode}, +}; + +static const ElementIdInfo kCuesIds[] = { + {LIST, kWebMIdCuePoint}, +}; + +static const ElementIdInfo kCuePointIds[] = { + {UINT, kWebMIdCueTime}, + {LIST, kWebMIdCueTrackPositions}, +}; + +static const ElementIdInfo kCueTrackPositionsIds[] = { + {UINT, kWebMIdCueTrack}, + {UINT, kWebMIdCueClusterPosition}, + {UINT, kWebMIdCueBlockNumber}, + {UINT, kWebMIdCueCodecState}, + {LIST, kWebMIdCueReference}, +}; + +static const ElementIdInfo kCueReferenceIds[] = { + {UINT, kWebMIdCueRefTime}, +}; + +static const ElementIdInfo kAttachmentsIds[] = { + {LIST, kWebMIdAttachedFile}, +}; + +static const ElementIdInfo kAttachedFileIds[] = { + {STRING, kWebMIdFileDescription}, + {STRING, kWebMIdFileName}, + {STRING, kWebMIdFileMimeType}, + {BINARY, kWebMIdFileData}, + {UINT, kWebMIdFileUID}, +}; + +static const ElementIdInfo kChaptersIds[] = { + {LIST, kWebMIdEditionEntry}, +}; + +static const ElementIdInfo kEditionEntryIds[] = { + {UINT, kWebMIdEditionUID}, + {UINT, kWebMIdEditionFlagHidden}, + {UINT, kWebMIdEditionFlagDefault}, + {UINT, kWebMIdEditionFlagOrdered}, + {LIST, kWebMIdChapterAtom}, +}; + +static const ElementIdInfo kChapterAtomIds[] = { + {UINT, kWebMIdChapterUID}, + {UINT, kWebMIdChapterTimeStart}, + {UINT, kWebMIdChapterTimeEnd}, + {UINT, kWebMIdChapterFlagHidden}, + {UINT, kWebMIdChapterFlagEnabled}, + {BINARY, kWebMIdChapterSegmentUID}, + {UINT, kWebMIdChapterSegmentEditionUID}, + {UINT, kWebMIdChapterPhysicalEquiv}, + {LIST, kWebMIdChapterTrack}, + {LIST, kWebMIdChapterDisplay}, + {LIST, kWebMIdChapProcess}, +}; + +static const ElementIdInfo kChapterTrackIds[] = { + {UINT, kWebMIdChapterTrackNumber}, +}; + +static const ElementIdInfo kChapterDisplayIds[] = { + {STRING, kWebMIdChapString}, + {STRING, kWebMIdChapLanguage}, + {STRING, kWebMIdChapCountry}, +}; + +static const ElementIdInfo kChapProcessIds[] = { + {UINT, kWebMIdChapProcessCodecID}, + {BINARY, kWebMIdChapProcessPrivate}, + {LIST, kWebMIdChapProcessCommand}, +}; + +static const ElementIdInfo kChapProcessCommandIds[] = { + {UINT, kWebMIdChapProcessTime}, + {BINARY, kWebMIdChapProcessData}, +}; + +static const ElementIdInfo kTagsIds[] = { + {LIST, kWebMIdTag}, +}; + +static const ElementIdInfo kTagIds[] = { + {LIST, kWebMIdTargets}, + {LIST, kWebMIdSimpleTag}, +}; + +static const ElementIdInfo kTargetsIds[] = { + {UINT, kWebMIdTargetTypeValue}, + {STRING, kWebMIdTargetType}, + {UINT, kWebMIdTagTrackUID}, + {UINT, kWebMIdTagEditionUID}, + {UINT, kWebMIdTagChapterUID}, + {UINT, kWebMIdTagAttachmentUID}, +}; + +static const ElementIdInfo kSimpleTagIds[] = { + {STRING, kWebMIdTagName}, + {STRING, kWebMIdTagLanguage}, + {UINT, kWebMIdTagDefault}, + {STRING, kWebMIdTagString}, + {BINARY, kWebMIdTagBinary}, +}; + +#define LIST_ELEMENT_INFO(id, level, id_info) \ + { (id), (level), (id_info), arraysize(id_info) } + +static const ListElementInfo kListElementInfo[] = { + LIST_ELEMENT_INFO(kWebMIdCluster, 1, kClusterIds), + LIST_ELEMENT_INFO(kWebMIdEBMLHeader, 0, kEBMLHeaderIds), + LIST_ELEMENT_INFO(kWebMIdSegment, 0, kSegmentIds), + LIST_ELEMENT_INFO(kWebMIdSeekHead, 1, kSeekHeadIds), + LIST_ELEMENT_INFO(kWebMIdSeek, 2, kSeekIds), + LIST_ELEMENT_INFO(kWebMIdInfo, 1, kInfoIds), + LIST_ELEMENT_INFO(kWebMIdChapterTranslate, 2, kChapterTranslateIds), + LIST_ELEMENT_INFO(kWebMIdSilentTracks, 2, kSilentTracksIds), + LIST_ELEMENT_INFO(kWebMIdBlockGroup, 2, kBlockGroupIds), + LIST_ELEMENT_INFO(kWebMIdBlockAdditions, 3, kBlockAdditionsIds), + LIST_ELEMENT_INFO(kWebMIdBlockMore, 4, kBlockMoreIds), + LIST_ELEMENT_INFO(kWebMIdSlices, 3, kSlicesIds), + LIST_ELEMENT_INFO(kWebMIdTimeSlice, 4, kTimeSliceIds), + LIST_ELEMENT_INFO(kWebMIdTracks, 1, kTracksIds), + LIST_ELEMENT_INFO(kWebMIdTrackEntry, 2, kTrackEntryIds), + LIST_ELEMENT_INFO(kWebMIdTrackTranslate, 3, kTrackTranslateIds), + LIST_ELEMENT_INFO(kWebMIdVideo, 3, kVideoIds), + LIST_ELEMENT_INFO(kWebMIdAudio, 3, kAudioIds), + LIST_ELEMENT_INFO(kWebMIdTrackOperation, 3, kTrackOperationIds), + LIST_ELEMENT_INFO(kWebMIdTrackCombinePlanes, 4, kTrackCombinePlanesIds), + LIST_ELEMENT_INFO(kWebMIdTrackPlane, 5, kTrackPlaneIds), + LIST_ELEMENT_INFO(kWebMIdJoinBlocks, 4, kJoinBlocksIds), + LIST_ELEMENT_INFO(kWebMIdContentEncodings, 3, kContentEncodingsIds), + LIST_ELEMENT_INFO(kWebMIdContentEncoding, 4, kContentEncodingIds), + LIST_ELEMENT_INFO(kWebMIdContentCompression, 5, kContentCompressionIds), + LIST_ELEMENT_INFO(kWebMIdContentEncryption, 5, kContentEncryptionIds), + LIST_ELEMENT_INFO(kWebMIdContentEncAESSettings, 6, kContentEncAESSettingsIds), + LIST_ELEMENT_INFO(kWebMIdCues, 1, kCuesIds), + LIST_ELEMENT_INFO(kWebMIdCuePoint, 2, kCuePointIds), + LIST_ELEMENT_INFO(kWebMIdCueTrackPositions, 3, kCueTrackPositionsIds), + LIST_ELEMENT_INFO(kWebMIdCueReference, 4, kCueReferenceIds), + LIST_ELEMENT_INFO(kWebMIdAttachments, 1, kAttachmentsIds), + LIST_ELEMENT_INFO(kWebMIdAttachedFile, 2, kAttachedFileIds), + LIST_ELEMENT_INFO(kWebMIdChapters, 1, kChaptersIds), + LIST_ELEMENT_INFO(kWebMIdEditionEntry, 2, kEditionEntryIds), + LIST_ELEMENT_INFO(kWebMIdChapterAtom, 3, kChapterAtomIds), + LIST_ELEMENT_INFO(kWebMIdChapterTrack, 4, kChapterTrackIds), + LIST_ELEMENT_INFO(kWebMIdChapterDisplay, 4, kChapterDisplayIds), + LIST_ELEMENT_INFO(kWebMIdChapProcess, 4, kChapProcessIds), + LIST_ELEMENT_INFO(kWebMIdChapProcessCommand, 5, kChapProcessCommandIds), + LIST_ELEMENT_INFO(kWebMIdTags, 1, kTagsIds), + LIST_ELEMENT_INFO(kWebMIdTag, 2, kTagIds), + LIST_ELEMENT_INFO(kWebMIdTargets, 3, kTargetsIds), + LIST_ELEMENT_INFO(kWebMIdSimpleTag, 3, kSimpleTagIds), +}; + +// Parses an element header id or size field. These fields are variable length +// encoded. The first byte indicates how many bytes the field occupies. +// |buf| - The buffer to parse. +// |size| - The number of bytes in |buf| +// |max_bytes| - The maximum number of bytes the field can be. ID fields +// set this to 4 & element size fields set this to 8. If the +// first byte indicates a larger field size than this it is a +// parser error. +// |mask_first_byte| - For element size fields the field length encoding bits +// need to be masked off. This parameter is true for +// element size fields and is false for ID field values. +// +// Returns: The number of bytes parsed on success. -1 on error. +static int ParseWebMElementHeaderField(const uint8* buf, int size, + int max_bytes, bool mask_first_byte, + int64* num) { + DCHECK(buf); + DCHECK(num); + + if (size < 0) + return -1; + + if (size == 0) + return 0; + + int mask = 0x80; + uint8 ch = buf[0]; + int extra_bytes = -1; + bool all_ones = false; + for (int i = 0; i < max_bytes; ++i) { + if ((ch & mask) != 0) { + mask = ~mask & 0xff; + *num = mask_first_byte ? ch & mask : ch; + all_ones = (ch & mask) == mask; + extra_bytes = i; + break; + } + mask = 0x80 | mask >> 1; + } + + if (extra_bytes == -1) + return -1; + + // Return 0 if we need more data. + if ((1 + extra_bytes) > size) + return 0; + + int bytes_used = 1; + + for (int i = 0; i < extra_bytes; ++i) { + ch = buf[bytes_used++]; + all_ones &= (ch == 0xff); + *num = (*num << 8) | ch; + } + + if (all_ones) + *num = kint64max; + + return bytes_used; +} + +int WebMParseElementHeader(const uint8* buf, int size, + int* id, int64* element_size) { + DCHECK(buf); + DCHECK_GE(size, 0); + DCHECK(id); + DCHECK(element_size); + + if (size == 0) + return 0; + + int64 tmp = 0; + int num_id_bytes = ParseWebMElementHeaderField(buf, size, 4, false, &tmp); + + if (num_id_bytes <= 0) + return num_id_bytes; + + if (tmp == kint64max) + tmp = kWebMReservedId; + + *id = static_cast<int>(tmp); + + int num_size_bytes = ParseWebMElementHeaderField(buf + num_id_bytes, + size - num_id_bytes, + 8, true, &tmp); + + if (num_size_bytes <= 0) + return num_size_bytes; + + if (tmp == kint64max) + tmp = kWebMUnknownSize; + + *element_size = tmp; + DVLOG(3) << "WebMParseElementHeader() : id " << std::hex << *id << std::dec + << " size " << *element_size; + return num_id_bytes + num_size_bytes; +} + +// Finds ElementType for a specific ID. +static ElementType FindIdType(int id, + const ElementIdInfo* id_info, + int id_info_count) { + + // Check for global element IDs that can be anywhere. + if (id == kWebMIdVoid || id == kWebMIdCRC32) + return SKIP; + + for (int i = 0; i < id_info_count; ++i) { + if (id == id_info[i].id_) + return id_info[i].type_; + } + + return UNKNOWN; +} + +// Finds ListElementInfo for a specific ID. +static const ListElementInfo* FindListInfo(int id) { + for (size_t i = 0; i < arraysize(kListElementInfo); ++i) { + if (id == kListElementInfo[i].id_) + return &kListElementInfo[i]; + } + + return NULL; +} + +static int FindListLevel(int id) { + const ListElementInfo* list_info = FindListInfo(id); + if (list_info) + return list_info->level_; + + return -1; +} + +static int ParseUInt(const uint8* buf, int size, int id, + WebMParserClient* client) { + if ((size <= 0) || (size > 8)) + return -1; + + // Read in the big-endian integer. + int64 value = 0; + for (int i = 0; i < size; ++i) + value = (value << 8) | buf[i]; + + if (!client->OnUInt(id, value)) + return -1; + + return size; +} + +static int ParseFloat(const uint8* buf, int size, int id, + WebMParserClient* client) { + + if ((size != 4) && (size != 8)) + return -1; + + double value = -1; + + // Read the bytes from big-endian form into a native endian integer. + int64 tmp = 0; + for (int i = 0; i < size; ++i) + tmp = (tmp << 8) | buf[i]; + + // Use a union to convert the integer bit pattern into a floating point + // number. + if (size == 4) { + union { + int32 src; + float dst; + } tmp2; + tmp2.src = static_cast<int32>(tmp); + value = tmp2.dst; + } else if (size == 8) { + union { + int64 src; + double dst; + } tmp2; + tmp2.src = tmp; + value = tmp2.dst; + } else { + return -1; + } + + if (!client->OnFloat(id, value)) + return -1; + + return size; +} + +static int ParseBinary(const uint8* buf, int size, int id, + WebMParserClient* client) { + return client->OnBinary(id, buf, size) ? size : -1; +} + +static int ParseString(const uint8* buf, int size, int id, + WebMParserClient* client) { + const uint8* end = static_cast<const uint8*>(memchr(buf, '\0', size)); + int length = (end != NULL) ? static_cast<int>(end - buf) : size; + std::string str(reinterpret_cast<const char*>(buf), length); + return client->OnString(id, str) ? size : -1; +} + +static int ParseNonListElement(ElementType type, int id, int64 element_size, + const uint8* buf, int size, + WebMParserClient* client) { + DCHECK_GE(size, element_size); + + int result = -1; + switch(type) { + case LIST: + NOTIMPLEMENTED(); + result = -1; + break; + case UINT: + result = ParseUInt(buf, element_size, id, client); + break; + case FLOAT: + result = ParseFloat(buf, element_size, id, client); + break; + case BINARY: + result = ParseBinary(buf, element_size, id, client); + break; + case STRING: + result = ParseString(buf, element_size, id, client); + break; + case SKIP: + result = element_size; + break; + default: + DVLOG(1) << "Unhandled ID type " << type; + return -1; + }; + + DCHECK_LE(result, size); + return result; +} + +WebMParserClient::WebMParserClient() {} +WebMParserClient::~WebMParserClient() {} + +WebMParserClient* WebMParserClient::OnListStart(int id) { + DVLOG(1) << "Unexpected list element start with ID " << std::hex << id; + return NULL; +} + +bool WebMParserClient::OnListEnd(int id) { + DVLOG(1) << "Unexpected list element end with ID " << std::hex << id; + return false; +} + +bool WebMParserClient::OnUInt(int id, int64 val) { + DVLOG(1) << "Unexpected unsigned integer element with ID " << std::hex << id; + return false; +} + +bool WebMParserClient::OnFloat(int id, double val) { + DVLOG(1) << "Unexpected float element with ID " << std::hex << id; + return false; +} + +bool WebMParserClient::OnBinary(int id, const uint8* data, int size) { + DVLOG(1) << "Unexpected binary element with ID " << std::hex << id; + return false; +} + +bool WebMParserClient::OnString(int id, const std::string& str) { + DVLOG(1) << "Unexpected string element with ID " << std::hex << id; + return false; +} + +WebMListParser::WebMListParser(int id, WebMParserClient* client) + : state_(NEED_LIST_HEADER), + root_id_(id), + root_level_(FindListLevel(id)), + root_client_(client) { + DCHECK_GE(root_level_, 0); + DCHECK(client); +} + +WebMListParser::~WebMListParser() {} + +void WebMListParser::Reset() { + ChangeState(NEED_LIST_HEADER); + list_state_stack_.clear(); +} + +int WebMListParser::Parse(const uint8* buf, int size) { + DCHECK(buf); + + if (size < 0 || state_ == PARSE_ERROR || state_ == DONE_PARSING_LIST) + return -1; + + if (size == 0) + return 0; + + const uint8* cur = buf; + int cur_size = size; + int bytes_parsed = 0; + + while (cur_size > 0 && state_ != PARSE_ERROR && state_ != DONE_PARSING_LIST) { + int element_id = 0; + int64 element_size = 0; + int result = WebMParseElementHeader(cur, cur_size, &element_id, + &element_size); + + if (result < 0) + return result; + + if (result == 0) + return bytes_parsed; + + switch(state_) { + case NEED_LIST_HEADER: { + if (element_id != root_id_) { + ChangeState(PARSE_ERROR); + return -1; + } + + // Only allow Segment & Cluster to have an unknown size. + if (element_size == kWebMUnknownSize && + (element_id != kWebMIdSegment) && + (element_id != kWebMIdCluster)) { + ChangeState(PARSE_ERROR); + return -1; + } + + ChangeState(INSIDE_LIST); + if (!OnListStart(root_id_, element_size)) + return -1; + + break; + } + + case INSIDE_LIST: { + int header_size = result; + const uint8* element_data = cur + header_size; + int element_data_size = cur_size - header_size; + + if (element_size < element_data_size) + element_data_size = element_size; + + result = ParseListElement(header_size, element_id, element_size, + element_data, element_data_size); + + DCHECK_LE(result, header_size + element_data_size); + if (result < 0) { + ChangeState(PARSE_ERROR); + return -1; + } + + if (result == 0) + return bytes_parsed; + + break; + } + case DONE_PARSING_LIST: + case PARSE_ERROR: + // Shouldn't be able to get here. + NOTIMPLEMENTED(); + break; + } + + cur += result; + cur_size -= result; + bytes_parsed += result; + } + + return (state_ == PARSE_ERROR) ? -1 : bytes_parsed; +} + +bool WebMListParser::IsParsingComplete() const { + return state_ == DONE_PARSING_LIST; +} + +void WebMListParser::ChangeState(State new_state) { + state_ = new_state; +} + +int WebMListParser::ParseListElement(int header_size, + int id, int64 element_size, + const uint8* data, int size) { + DCHECK_GT(list_state_stack_.size(), 0u); + + ListState& list_state = list_state_stack_.back(); + DCHECK(list_state.element_info_); + + const ListElementInfo* element_info = list_state.element_info_; + ElementType id_type = + FindIdType(id, element_info->id_info_, element_info->id_info_count_); + + // Unexpected ID. + if (id_type == UNKNOWN) { + if (list_state.size_ != kWebMUnknownSize || + !IsSiblingOrAncestor(list_state.id_, id)) { + DVLOG(1) << "No ElementType info for ID 0x" << std::hex << id; + return -1; + } + + // We've reached the end of a list of unknown size. Update the size now that + // we know it and dispatch the end of list calls. + list_state.size_ = list_state.bytes_parsed_; + + if (!OnListEnd()) + return -1; + + // Check to see if all open lists have ended. + if (list_state_stack_.size() == 0) + return 0; + + list_state = list_state_stack_.back(); + } + + // Make sure the whole element can fit inside the current list. + int64 total_element_size = header_size + element_size; + if (list_state.size_ != kWebMUnknownSize && + list_state.size_ < list_state.bytes_parsed_ + total_element_size) { + return -1; + } + + if (id_type == LIST) { + list_state.bytes_parsed_ += header_size; + + if (!OnListStart(id, element_size)) + return -1; + return header_size; + } + + // Make sure we have the entire element before trying to parse a non-list + // element. + if (size < element_size) + return 0; + + int bytes_parsed = ParseNonListElement(id_type, id, element_size, + data, size, list_state.client_); + DCHECK_LE(bytes_parsed, size); + + // Return if an error occurred or we need more data. + // Note: bytes_parsed is 0 for a successful parse of a size 0 element. We + // need to check the element_size to disambiguate the "need more data" case + // from a successful parse. + if (bytes_parsed < 0 || (bytes_parsed == 0 && element_size != 0)) + return bytes_parsed; + + int result = header_size + bytes_parsed; + list_state.bytes_parsed_ += result; + + // See if we have reached the end of the current list. + if (list_state.bytes_parsed_ == list_state.size_) { + if (!OnListEnd()) + return -1; + } + + return result; +} + +bool WebMListParser::OnListStart(int id, int64 size) { + const ListElementInfo* element_info = FindListInfo(id); + if (!element_info) + return false; + + int current_level = root_level_ + list_state_stack_.size() - 1; + if (current_level + 1 != element_info->level_) + return false; + + WebMParserClient* current_list_client = NULL; + if (!list_state_stack_.empty()) { + // Make sure the new list doesn't go past the end of the current list. + ListState current_list_state = list_state_stack_.back(); + if (current_list_state.size_ != kWebMUnknownSize && + current_list_state.size_ < current_list_state.bytes_parsed_ + size) + return false; + current_list_client = current_list_state.client_; + } else { + current_list_client = root_client_; + } + + WebMParserClient* new_list_client = current_list_client->OnListStart(id); + if (!new_list_client) + return false; + + ListState new_list_state = { id, size, 0, element_info, new_list_client }; + list_state_stack_.push_back(new_list_state); + + if (size == 0) + return OnListEnd(); + + return true; +} + +bool WebMListParser::OnListEnd() { + int lists_ended = 0; + for (; !list_state_stack_.empty(); ++lists_ended) { + const ListState& list_state = list_state_stack_.back(); + + if (list_state.bytes_parsed_ != list_state.size_) + break; + + list_state_stack_.pop_back(); + + int64 bytes_parsed = list_state.bytes_parsed_; + WebMParserClient* client = NULL; + if (!list_state_stack_.empty()) { + // Update the bytes_parsed_ for the parent element. + list_state_stack_.back().bytes_parsed_ += bytes_parsed; + client = list_state_stack_.back().client_; + } else { + client = root_client_; + } + + if (!client->OnListEnd(list_state.id_)) + return false; + } + + DCHECK_GE(lists_ended, 1); + + if (list_state_stack_.empty()) + ChangeState(DONE_PARSING_LIST); + + return true; +} + +bool WebMListParser::IsSiblingOrAncestor(int id_a, int id_b) const { + DCHECK((id_a == kWebMIdSegment) || (id_a == kWebMIdCluster)); + + if (id_a == kWebMIdCluster) { + // kWebMIdCluster siblings. + for (size_t i = 0; i < arraysize(kSegmentIds); i++) { + if (kSegmentIds[i].id_ == id_b) + return true; + } + } + + // kWebMIdSegment siblings. + return ((id_b == kWebMIdSegment) || (id_b == kWebMIdEBMLHeader)); +} + +} // namespace media diff --git a/media/formats/webm/webm_parser.h b/media/formats/webm/webm_parser.h new file mode 100644 index 0000000..854db68 --- /dev/null +++ b/media/formats/webm/webm_parser.h @@ -0,0 +1,158 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_PARSER_H_ +#define MEDIA_FORMATS_WEBM_WEBM_PARSER_H_ + +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "media/base/media_export.h" + +namespace media { + +// Interface for receiving WebM parser events. +// +// Each method is called when an element of the specified type is parsed. +// The ID of the element that was parsed is given along with the value +// stored in the element. List elements generate calls at the start and +// end of the list. Any pointers passed to these methods are only guaranteed +// to be valid for the life of that call. Each method (except for OnListStart) +// returns a bool that indicates whether the parsed data is valid. OnListStart +// returns a pointer to a WebMParserClient object, which should be used to +// handle elements parsed out of the list being started. If false (or NULL by +// OnListStart) is returned then the parse is immediately terminated and an +// error is reported by the parser. +class MEDIA_EXPORT WebMParserClient { + public: + virtual ~WebMParserClient(); + + virtual WebMParserClient* OnListStart(int id); + virtual bool OnListEnd(int id); + virtual bool OnUInt(int id, int64 val); + virtual bool OnFloat(int id, double val); + virtual bool OnBinary(int id, const uint8* data, int size); + virtual bool OnString(int id, const std::string& str); + + protected: + WebMParserClient(); + + DISALLOW_COPY_AND_ASSIGN(WebMParserClient); +}; + +struct ListElementInfo; + +// Parses a WebM list element and all of its children. This +// class supports incremental parsing of the list so Parse() +// can be called multiple times with pieces of the list. +// IsParsingComplete() will return true once the entire list has +// been parsed. +class MEDIA_EXPORT WebMListParser { + public: + // |id| - Element ID of the list we intend to parse. + // |client| - Called as different elements in the list are parsed. + WebMListParser(int id, WebMParserClient* client); + ~WebMListParser(); + + // Resets the state of the parser so it can start parsing a new list. + void Reset(); + + // Parses list data contained in |buf|. + // + // Returns < 0 if the parse fails. + // Returns 0 if more data is needed. + // Returning > 0 indicates success & the number of bytes parsed. + int Parse(const uint8* buf, int size); + + // Returns true if the entire list has been parsed. + bool IsParsingComplete() const; + + private: + enum State { + NEED_LIST_HEADER, + INSIDE_LIST, + DONE_PARSING_LIST, + PARSE_ERROR, + }; + + struct ListState { + int id_; + int64 size_; + int64 bytes_parsed_; + const ListElementInfo* element_info_; + WebMParserClient* client_; + }; + + void ChangeState(State new_state); + + // Parses a single element in the current list. + // + // |header_size| - The size of the element header + // |id| - The ID of the element being parsed. + // |element_size| - The size of the element body. + // |data| - Pointer to the element contents. + // |size| - Number of bytes in |data| + // |client| - Client to pass the parsed data to. + // + // Returns < 0 if the parse fails. + // Returns 0 if more data is needed. + // Returning > 0 indicates success & the number of bytes parsed. + int ParseListElement(int header_size, + int id, int64 element_size, + const uint8* data, int size); + + // Called when starting to parse a new list. + // + // |id| - The ID of the new list. + // |size| - The size of the new list. + // |client| - The client object to notify that a new list is being parsed. + // + // Returns true if this list can be started in the current context. False + // if starting this list causes some sort of parse error. + bool OnListStart(int id, int64 size); + + // Called when the end of the current list has been reached. This may also + // signal the end of the current list's ancestors if the current list happens + // to be at the end of its parent. + // + // Returns true if no errors occurred while ending this list(s). + bool OnListEnd(); + + // Checks to see if |id_b| is a sibling or ancestor of |id_a|. + bool IsSiblingOrAncestor(int id_a, int id_b) const; + + State state_; + + // Element ID passed to the constructor. + const int root_id_; + + // Element level for |root_id_|. Used to verify that elements appear at + // the correct level. + const int root_level_; + + // WebMParserClient to handle the root list. + WebMParserClient* const root_client_; + + // Stack of state for all the lists currently being parsed. Lists are + // added and removed from this stack as they are parsed. + std::vector<ListState> list_state_stack_; + + DISALLOW_COPY_AND_ASSIGN(WebMListParser); +}; + +// Parses an element header & returns the ID and element size. +// +// Returns < 0 if the parse fails. +// Returns 0 if more data is needed. +// Returning > 0 indicates success & the number of bytes parsed. +// |*id| contains the element ID on success and is undefined otherwise. +// |*element_size| contains the element size on success and is undefined +// otherwise. +int MEDIA_EXPORT WebMParseElementHeader(const uint8* buf, int size, + int* id, int64* element_size); + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_PARSER_H_ diff --git a/media/formats/webm/webm_parser_unittest.cc b/media/formats/webm/webm_parser_unittest.cc new file mode 100644 index 0000000..a1249e8 --- /dev/null +++ b/media/formats/webm/webm_parser_unittest.cc @@ -0,0 +1,412 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/cluster_builder.h" +#include "media/formats/webm/webm_constants.h" +#include "media/formats/webm/webm_parser.h" +#include "testing/gmock/include/gmock/gmock.h" +#include "testing/gtest/include/gtest/gtest.h" + +using ::testing::InSequence; +using ::testing::Return; +using ::testing::ReturnNull; +using ::testing::StrictMock; +using ::testing::_; + +namespace media { + +enum { kBlockCount = 5 }; + +class MockWebMParserClient : public WebMParserClient { + public: + virtual ~MockWebMParserClient() {} + + // WebMParserClient methods. + MOCK_METHOD1(OnListStart, WebMParserClient*(int)); + MOCK_METHOD1(OnListEnd, bool(int)); + MOCK_METHOD2(OnUInt, bool(int, int64)); + MOCK_METHOD2(OnFloat, bool(int, double)); + MOCK_METHOD3(OnBinary, bool(int, const uint8*, int)); + MOCK_METHOD2(OnString, bool(int, const std::string&)); +}; + +class WebMParserTest : public testing::Test { + protected: + StrictMock<MockWebMParserClient> client_; +}; + +static scoped_ptr<Cluster> CreateCluster(int block_count) { + ClusterBuilder cb; + cb.SetClusterTimecode(0); + + for (int i = 0; i < block_count; i++) { + uint8 data[] = { 0x00 }; + cb.AddSimpleBlock(0, i, 0, data, sizeof(data)); + } + + return cb.Finish(); +} + +static void CreateClusterExpectations(int block_count, + bool is_complete_cluster, + MockWebMParserClient* client) { + + InSequence s; + EXPECT_CALL(*client, OnListStart(kWebMIdCluster)).WillOnce(Return(client)); + EXPECT_CALL(*client, OnUInt(kWebMIdTimecode, 0)) + .WillOnce(Return(true)); + + for (int i = 0; i < block_count; i++) { + EXPECT_CALL(*client, OnBinary(kWebMIdSimpleBlock, _, _)) + .WillOnce(Return(true)); + } + + if (is_complete_cluster) + EXPECT_CALL(*client, OnListEnd(kWebMIdCluster)).WillOnce(Return(true)); +} + +TEST_F(WebMParserTest, EmptyCluster) { + const uint8 kEmptyCluster[] = { + 0x1F, 0x43, 0xB6, 0x75, 0x80 // CLUSTER (size = 0) + }; + int size = sizeof(kEmptyCluster); + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdCluster)).WillOnce(Return(&client_)); + EXPECT_CALL(client_, OnListEnd(kWebMIdCluster)).WillOnce(Return(true)); + + WebMListParser parser(kWebMIdCluster, &client_); + EXPECT_EQ(size, parser.Parse(kEmptyCluster, size)); + EXPECT_TRUE(parser.IsParsingComplete()); +} + +TEST_F(WebMParserTest, EmptyClusterInSegment) { + const uint8 kBuffer[] = { + 0x18, 0x53, 0x80, 0x67, 0x85, // SEGMENT (size = 5) + 0x1F, 0x43, 0xB6, 0x75, 0x80, // CLUSTER (size = 0) + }; + int size = sizeof(kBuffer); + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdSegment)).WillOnce(Return(&client_)); + EXPECT_CALL(client_, OnListStart(kWebMIdCluster)).WillOnce(Return(&client_)); + EXPECT_CALL(client_, OnListEnd(kWebMIdCluster)).WillOnce(Return(true)); + EXPECT_CALL(client_, OnListEnd(kWebMIdSegment)).WillOnce(Return(true)); + + WebMListParser parser(kWebMIdSegment, &client_); + EXPECT_EQ(size, parser.Parse(kBuffer, size)); + EXPECT_TRUE(parser.IsParsingComplete()); +} + +// Test the case where a non-list child element has a size +// that is beyond the end of the parent. +TEST_F(WebMParserTest, ChildNonListLargerThanParent) { + const uint8 kBuffer[] = { + 0x1F, 0x43, 0xB6, 0x75, 0x81, // CLUSTER (size = 1) + 0xE7, 0x81, 0x01, // Timecode (size=1, value=1) + }; + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdCluster)).WillOnce(Return(&client_)); + + WebMListParser parser(kWebMIdCluster, &client_); + EXPECT_EQ(-1, parser.Parse(kBuffer, sizeof(kBuffer))); + EXPECT_FALSE(parser.IsParsingComplete()); +} + +// Test the case where a list child element has a size +// that is beyond the end of the parent. +TEST_F(WebMParserTest, ChildListLargerThanParent) { + const uint8 kBuffer[] = { + 0x18, 0x53, 0x80, 0x67, 0x85, // SEGMENT (size = 5) + 0x1F, 0x43, 0xB6, 0x75, 0x81, 0x11 // CLUSTER (size = 1) + }; + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdSegment)).WillOnce(Return(&client_)); + + WebMListParser parser(kWebMIdSegment, &client_); + EXPECT_EQ(-1, parser.Parse(kBuffer, sizeof(kBuffer))); + EXPECT_FALSE(parser.IsParsingComplete()); +} + +// Expecting to parse a Cluster, but get a Segment. +TEST_F(WebMParserTest, ListIdDoesNotMatch) { + const uint8 kBuffer[] = { + 0x18, 0x53, 0x80, 0x67, 0x80, // SEGMENT (size = 0) + }; + + WebMListParser parser(kWebMIdCluster, &client_); + EXPECT_EQ(-1, parser.Parse(kBuffer, sizeof(kBuffer))); + EXPECT_FALSE(parser.IsParsingComplete()); +} + +TEST_F(WebMParserTest, InvalidElementInList) { + const uint8 kBuffer[] = { + 0x18, 0x53, 0x80, 0x67, 0x82, // SEGMENT (size = 2) + 0xAE, 0x80, // TrackEntry (size = 0) + }; + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdSegment)).WillOnce(Return(&client_)); + + WebMListParser parser(kWebMIdSegment, &client_); + EXPECT_EQ(-1, parser.Parse(kBuffer, sizeof(kBuffer))); + EXPECT_FALSE(parser.IsParsingComplete()); +} + +// Test specific case of InvalidElementInList to verify EBMLHEADER within +// known-sized cluster causes parse error. +TEST_F(WebMParserTest, InvalidEBMLHeaderInCluster) { + const uint8 kBuffer[] = { + 0x1F, 0x43, 0xB6, 0x75, 0x85, // CLUSTER (size = 5) + 0x1A, 0x45, 0xDF, 0xA3, 0x80, // EBMLHEADER (size = 0) + }; + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdCluster)).WillOnce(Return(&client_)); + + WebMListParser parser(kWebMIdCluster, &client_); + EXPECT_EQ(-1, parser.Parse(kBuffer, sizeof(kBuffer))); + EXPECT_FALSE(parser.IsParsingComplete()); +} + +// Verify that EBMLHEADER ends a preceding "unknown"-sized CLUSTER. +TEST_F(WebMParserTest, UnknownSizeClusterFollowedByEBMLHeader) { + const uint8 kBuffer[] = { + 0x1F, 0x43, 0xB6, 0x75, 0xFF, // CLUSTER (size = unknown; really 0 due to:) + 0x1A, 0x45, 0xDF, 0xA3, 0x80, // EBMLHEADER (size = 0) + }; + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdCluster)).WillOnce(Return(&client_)); + EXPECT_CALL(client_, OnListEnd(kWebMIdCluster)).WillOnce(Return(true)); + + WebMListParser parser(kWebMIdCluster, &client_); + + // List parse should consume the CLUSTER but not the EBMLHEADER. + EXPECT_EQ(5, parser.Parse(kBuffer, sizeof(kBuffer))); + EXPECT_TRUE(parser.IsParsingComplete()); +} + +TEST_F(WebMParserTest, VoidAndCRC32InList) { + const uint8 kBuffer[] = { + 0x18, 0x53, 0x80, 0x67, 0x99, // SEGMENT (size = 25) + 0xEC, 0x83, 0x00, 0x00, 0x00, // Void (size = 3) + 0xBF, 0x83, 0x00, 0x00, 0x00, // CRC32 (size = 3) + 0x1F, 0x43, 0xB6, 0x75, 0x8A, // CLUSTER (size = 10) + 0xEC, 0x83, 0x00, 0x00, 0x00, // Void (size = 3) + 0xBF, 0x83, 0x00, 0x00, 0x00, // CRC32 (size = 3) + }; + int size = sizeof(kBuffer); + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdSegment)).WillOnce(Return(&client_)); + EXPECT_CALL(client_, OnListStart(kWebMIdCluster)).WillOnce(Return(&client_)); + EXPECT_CALL(client_, OnListEnd(kWebMIdCluster)).WillOnce(Return(true)); + EXPECT_CALL(client_, OnListEnd(kWebMIdSegment)).WillOnce(Return(true)); + + WebMListParser parser(kWebMIdSegment, &client_); + EXPECT_EQ(size, parser.Parse(kBuffer, size)); + EXPECT_TRUE(parser.IsParsingComplete()); +} + + +TEST_F(WebMParserTest, ParseListElementWithSingleCall) { + scoped_ptr<Cluster> cluster(CreateCluster(kBlockCount)); + CreateClusterExpectations(kBlockCount, true, &client_); + + WebMListParser parser(kWebMIdCluster, &client_); + EXPECT_EQ(cluster->size(), parser.Parse(cluster->data(), cluster->size())); + EXPECT_TRUE(parser.IsParsingComplete()); +} + +TEST_F(WebMParserTest, ParseListElementWithMultipleCalls) { + scoped_ptr<Cluster> cluster(CreateCluster(kBlockCount)); + CreateClusterExpectations(kBlockCount, true, &client_); + + const uint8* data = cluster->data(); + int size = cluster->size(); + int default_parse_size = 3; + WebMListParser parser(kWebMIdCluster, &client_); + int parse_size = std::min(default_parse_size, size); + + while (size > 0) { + int result = parser.Parse(data, parse_size); + ASSERT_GE(result, 0); + ASSERT_LE(result, parse_size); + + if (result == 0) { + // The parser needs more data so increase the parse_size a little. + EXPECT_FALSE(parser.IsParsingComplete()); + parse_size += default_parse_size; + parse_size = std::min(parse_size, size); + continue; + } + + parse_size = default_parse_size; + + data += result; + size -= result; + + EXPECT_EQ((size == 0), parser.IsParsingComplete()); + } + EXPECT_TRUE(parser.IsParsingComplete()); +} + +TEST_F(WebMParserTest, Reset) { + InSequence s; + scoped_ptr<Cluster> cluster(CreateCluster(kBlockCount)); + + // First expect all but the last block. + CreateClusterExpectations(kBlockCount - 1, false, &client_); + + // Now expect all blocks. + CreateClusterExpectations(kBlockCount, true, &client_); + + WebMListParser parser(kWebMIdCluster, &client_); + + // Send slightly less than the full cluster so all but the last block is + // parsed. + int result = parser.Parse(cluster->data(), cluster->size() - 1); + EXPECT_GT(result, 0); + EXPECT_LT(result, cluster->size()); + EXPECT_FALSE(parser.IsParsingComplete()); + + parser.Reset(); + + // Now parse a whole cluster to verify that all the blocks will get parsed. + EXPECT_EQ(cluster->size(), parser.Parse(cluster->data(), cluster->size())); + EXPECT_TRUE(parser.IsParsingComplete()); +} + +// Test the case where multiple clients are used for different lists. +TEST_F(WebMParserTest, MultipleClients) { + const uint8 kBuffer[] = { + 0x18, 0x53, 0x80, 0x67, 0x94, // SEGMENT (size = 20) + 0x16, 0x54, 0xAE, 0x6B, 0x85, // TRACKS (size = 5) + 0xAE, 0x83, // TRACKENTRY (size = 3) + 0xD7, 0x81, 0x01, // TRACKNUMBER (size = 1) + 0x1F, 0x43, 0xB6, 0x75, 0x85, // CLUSTER (size = 5) + 0xEC, 0x83, 0x00, 0x00, 0x00, // Void (size = 3) + }; + int size = sizeof(kBuffer); + + StrictMock<MockWebMParserClient> c1_; + StrictMock<MockWebMParserClient> c2_; + StrictMock<MockWebMParserClient> c3_; + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdSegment)).WillOnce(Return(&c1_)); + EXPECT_CALL(c1_, OnListStart(kWebMIdTracks)).WillOnce(Return(&c2_)); + EXPECT_CALL(c2_, OnListStart(kWebMIdTrackEntry)).WillOnce(Return(&c3_)); + EXPECT_CALL(c3_, OnUInt(kWebMIdTrackNumber, 1)).WillOnce(Return(true)); + EXPECT_CALL(c2_, OnListEnd(kWebMIdTrackEntry)).WillOnce(Return(true)); + EXPECT_CALL(c1_, OnListEnd(kWebMIdTracks)).WillOnce(Return(true)); + EXPECT_CALL(c1_, OnListStart(kWebMIdCluster)).WillOnce(Return(&c2_)); + EXPECT_CALL(c1_, OnListEnd(kWebMIdCluster)).WillOnce(Return(true)); + EXPECT_CALL(client_, OnListEnd(kWebMIdSegment)).WillOnce(Return(true)); + + WebMListParser parser(kWebMIdSegment, &client_); + EXPECT_EQ(size, parser.Parse(kBuffer, size)); + EXPECT_TRUE(parser.IsParsingComplete()); +} + +// Test the case where multiple clients are used for different lists. +TEST_F(WebMParserTest, InvalidClient) { + const uint8 kBuffer[] = { + 0x18, 0x53, 0x80, 0x67, 0x85, // SEGMENT (size = 20) + 0x16, 0x54, 0xAE, 0x6B, 0x80, // TRACKS (size = 5) + }; + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdSegment)).WillOnce(ReturnNull()); + + WebMListParser parser(kWebMIdSegment, &client_); + EXPECT_EQ(-1, parser.Parse(kBuffer, sizeof(kBuffer))); + EXPECT_FALSE(parser.IsParsingComplete()); +} + +TEST_F(WebMParserTest, ReservedIds) { + const uint8 k1ByteReservedId[] = { 0xFF, 0x81 }; + const uint8 k2ByteReservedId[] = { 0x7F, 0xFF, 0x81 }; + const uint8 k3ByteReservedId[] = { 0x3F, 0xFF, 0xFF, 0x81 }; + const uint8 k4ByteReservedId[] = { 0x1F, 0xFF, 0xFF, 0xFF, 0x81 }; + const uint8* kBuffers[] = { + k1ByteReservedId, + k2ByteReservedId, + k3ByteReservedId, + k4ByteReservedId + }; + + for (size_t i = 0; i < arraysize(kBuffers); i++) { + int id; + int64 element_size; + int buffer_size = 2 + i; + EXPECT_EQ(buffer_size, WebMParseElementHeader(kBuffers[i], buffer_size, + &id, &element_size)); + EXPECT_EQ(id, kWebMReservedId); + EXPECT_EQ(element_size, 1); + } +} + +TEST_F(WebMParserTest, ReservedSizes) { + const uint8 k1ByteReservedSize[] = { 0xA3, 0xFF }; + const uint8 k2ByteReservedSize[] = { 0xA3, 0x7F, 0xFF }; + const uint8 k3ByteReservedSize[] = { 0xA3, 0x3F, 0xFF, 0xFF }; + const uint8 k4ByteReservedSize[] = { 0xA3, 0x1F, 0xFF, 0xFF, 0xFF }; + const uint8 k5ByteReservedSize[] = { 0xA3, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF }; + const uint8 k6ByteReservedSize[] = { 0xA3, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF }; + const uint8 k7ByteReservedSize[] = { 0xA3, 0x03, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF }; + const uint8 k8ByteReservedSize[] = { 0xA3, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF }; + const uint8* kBuffers[] = { + k1ByteReservedSize, + k2ByteReservedSize, + k3ByteReservedSize, + k4ByteReservedSize, + k5ByteReservedSize, + k6ByteReservedSize, + k7ByteReservedSize, + k8ByteReservedSize + }; + + for (size_t i = 0; i < arraysize(kBuffers); i++) { + int id; + int64 element_size; + int buffer_size = 2 + i; + EXPECT_EQ(buffer_size, WebMParseElementHeader(kBuffers[i], buffer_size, + &id, &element_size)); + EXPECT_EQ(id, 0xA3); + EXPECT_EQ(element_size, kWebMUnknownSize); + } +} + +TEST_F(WebMParserTest, ZeroPaddedStrings) { + const uint8 kBuffer[] = { + 0x1A, 0x45, 0xDF, 0xA3, 0x91, // EBMLHEADER (size = 17) + 0x42, 0x82, 0x80, // DocType (size = 0) + 0x42, 0x82, 0x81, 0x00, // DocType (size = 1) "" + 0x42, 0x82, 0x81, 'a', // DocType (size = 1) "a" + 0x42, 0x82, 0x83, 'a', 0x00, 0x00 // DocType (size = 3) "a" + }; + int size = sizeof(kBuffer); + + InSequence s; + EXPECT_CALL(client_, OnListStart(kWebMIdEBMLHeader)) + .WillOnce(Return(&client_)); + EXPECT_CALL(client_, OnString(kWebMIdDocType, "")).WillOnce(Return(true)); + EXPECT_CALL(client_, OnString(kWebMIdDocType, "")).WillOnce(Return(true)); + EXPECT_CALL(client_, OnString(kWebMIdDocType, "a")).WillOnce(Return(true)); + EXPECT_CALL(client_, OnString(kWebMIdDocType, "a")).WillOnce(Return(true)); + EXPECT_CALL(client_, OnListEnd(kWebMIdEBMLHeader)).WillOnce(Return(true)); + + WebMListParser parser(kWebMIdEBMLHeader, &client_); + EXPECT_EQ(size, parser.Parse(kBuffer, size)); + EXPECT_TRUE(parser.IsParsingComplete()); +} + +} // namespace media diff --git a/media/formats/webm/webm_stream_parser.cc b/media/formats/webm/webm_stream_parser.cc new file mode 100644 index 0000000..f39a8e3 --- /dev/null +++ b/media/formats/webm/webm_stream_parser.cc @@ -0,0 +1,300 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_stream_parser.h" + +#include <string> + +#include "base/callback.h" +#include "base/logging.h" +#include "media/formats/webm/webm_cluster_parser.h" +#include "media/formats/webm/webm_constants.h" +#include "media/formats/webm/webm_content_encodings.h" +#include "media/formats/webm/webm_crypto_helpers.h" +#include "media/formats/webm/webm_info_parser.h" +#include "media/formats/webm/webm_tracks_parser.h" + +namespace media { + +WebMStreamParser::WebMStreamParser() + : state_(kWaitingForInit), + parsing_cluster_(false) { +} + +WebMStreamParser::~WebMStreamParser() { +} + +void WebMStreamParser::Init(const InitCB& init_cb, + const NewConfigCB& config_cb, + const NewBuffersCB& new_buffers_cb, + const NewTextBuffersCB& text_cb, + const NeedKeyCB& need_key_cb, + const NewMediaSegmentCB& new_segment_cb, + const base::Closure& end_of_segment_cb, + const LogCB& log_cb) { + DCHECK_EQ(state_, kWaitingForInit); + DCHECK(init_cb_.is_null()); + DCHECK(!init_cb.is_null()); + DCHECK(!config_cb.is_null()); + DCHECK(!new_buffers_cb.is_null()); + DCHECK(!need_key_cb.is_null()); + DCHECK(!new_segment_cb.is_null()); + DCHECK(!end_of_segment_cb.is_null()); + + ChangeState(kParsingHeaders); + init_cb_ = init_cb; + config_cb_ = config_cb; + new_buffers_cb_ = new_buffers_cb; + text_cb_ = text_cb; + need_key_cb_ = need_key_cb; + new_segment_cb_ = new_segment_cb; + end_of_segment_cb_ = end_of_segment_cb; + log_cb_ = log_cb; +} + +void WebMStreamParser::Flush() { + DCHECK_NE(state_, kWaitingForInit); + + byte_queue_.Reset(); + parsing_cluster_ = false; + + if (state_ != kParsingClusters) + return; + + cluster_parser_->Reset(); +} + +bool WebMStreamParser::Parse(const uint8* buf, int size) { + DCHECK_NE(state_, kWaitingForInit); + + if (state_ == kError) + return false; + + byte_queue_.Push(buf, size); + + int result = 0; + int bytes_parsed = 0; + const uint8* cur = NULL; + int cur_size = 0; + + byte_queue_.Peek(&cur, &cur_size); + while (cur_size > 0) { + State oldState = state_; + switch (state_) { + case kParsingHeaders: + result = ParseInfoAndTracks(cur, cur_size); + break; + + case kParsingClusters: + result = ParseCluster(cur, cur_size); + break; + + case kWaitingForInit: + case kError: + return false; + } + + if (result < 0) { + ChangeState(kError); + return false; + } + + if (state_ == oldState && result == 0) + break; + + DCHECK_GE(result, 0); + cur += result; + cur_size -= result; + bytes_parsed += result; + } + + byte_queue_.Pop(bytes_parsed); + return true; +} + +void WebMStreamParser::ChangeState(State new_state) { + DVLOG(1) << "ChangeState() : " << state_ << " -> " << new_state; + state_ = new_state; +} + +int WebMStreamParser::ParseInfoAndTracks(const uint8* data, int size) { + DVLOG(2) << "ParseInfoAndTracks()"; + DCHECK(data); + DCHECK_GT(size, 0); + + const uint8* cur = data; + int cur_size = size; + int bytes_parsed = 0; + + int id; + int64 element_size; + int result = WebMParseElementHeader(cur, cur_size, &id, &element_size); + + if (result <= 0) + return result; + + switch (id) { + case kWebMIdEBMLHeader: + case kWebMIdSeekHead: + case kWebMIdVoid: + case kWebMIdCRC32: + case kWebMIdCues: + case kWebMIdChapters: + if (cur_size < (result + element_size)) { + // We don't have the whole element yet. Signal we need more data. + return 0; + } + // Skip the element. + return result + element_size; + break; + case kWebMIdSegment: + // Just consume the segment header. + return result; + break; + case kWebMIdInfo: + // We've found the element we are looking for. + break; + default: { + MEDIA_LOG(log_cb_) << "Unexpected element ID 0x" << std::hex << id; + return -1; + } + } + + WebMInfoParser info_parser; + result = info_parser.Parse(cur, cur_size); + + if (result <= 0) + return result; + + cur += result; + cur_size -= result; + bytes_parsed += result; + + WebMTracksParser tracks_parser(log_cb_, text_cb_.is_null()); + result = tracks_parser.Parse(cur, cur_size); + + if (result <= 0) + return result; + + bytes_parsed += result; + + base::TimeDelta duration = kInfiniteDuration(); + + if (info_parser.duration() > 0) { + double mult = info_parser.timecode_scale() / 1000.0; + int64 duration_in_us = info_parser.duration() * mult; + duration = base::TimeDelta::FromMicroseconds(duration_in_us); + } + + const AudioDecoderConfig& audio_config = tracks_parser.audio_decoder_config(); + if (audio_config.is_encrypted()) + FireNeedKey(tracks_parser.audio_encryption_key_id()); + + const VideoDecoderConfig& video_config = tracks_parser.video_decoder_config(); + if (video_config.is_encrypted()) + FireNeedKey(tracks_parser.video_encryption_key_id()); + + if (!config_cb_.Run(audio_config, + video_config, + tracks_parser.text_tracks())) { + DVLOG(1) << "New config data isn't allowed."; + return -1; + } + + cluster_parser_.reset(new WebMClusterParser( + info_parser.timecode_scale(), + tracks_parser.audio_track_num(), + tracks_parser.video_track_num(), + tracks_parser.text_tracks(), + tracks_parser.ignored_tracks(), + tracks_parser.audio_encryption_key_id(), + tracks_parser.video_encryption_key_id(), + log_cb_)); + + ChangeState(kParsingClusters); + + if (!init_cb_.is_null()) { + init_cb_.Run(true, duration); + init_cb_.Reset(); + } + + return bytes_parsed; +} + +int WebMStreamParser::ParseCluster(const uint8* data, int size) { + if (!cluster_parser_) + return -1; + + int id; + int64 element_size; + int result = WebMParseElementHeader(data, size, &id, &element_size); + + if (result <= 0) + return result; + + // TODO(matthewjheaney): implement support for chapters + if (id == kWebMIdCues || id == kWebMIdChapters) { + // TODO(wolenetz): Handle unknown-sized cluster parse completion correctly. + // See http://crbug.com/335676. + if (size < (result + element_size)) { + // We don't have the whole element yet. Signal we need more data. + return 0; + } + // Skip the element. + return result + element_size; + } + + if (id == kWebMIdEBMLHeader) { + // TODO(wolenetz): Handle unknown-sized cluster parse completion correctly. + // See http://crbug.com/335676. + ChangeState(kParsingHeaders); + return 0; + } + + int bytes_parsed = cluster_parser_->Parse(data, size); + + if (bytes_parsed <= 0) + return bytes_parsed; + + // If cluster detected, immediately notify new segment if we have not already + // done this. + if (id == kWebMIdCluster && !parsing_cluster_) { + parsing_cluster_ = true; + new_segment_cb_.Run(); + } + + const BufferQueue& audio_buffers = cluster_parser_->audio_buffers(); + const BufferQueue& video_buffers = cluster_parser_->video_buffers(); + bool cluster_ended = cluster_parser_->cluster_ended(); + + if ((!audio_buffers.empty() || !video_buffers.empty()) && + !new_buffers_cb_.Run(audio_buffers, video_buffers)) { + return -1; + } + + WebMClusterParser::TextTrackIterator text_track_iter = + cluster_parser_->CreateTextTrackIterator(); + + int text_track_num; + const BufferQueue* text_buffers; + + while (text_track_iter(&text_track_num, &text_buffers)) { + if (!text_buffers->empty() && !text_cb_.Run(text_track_num, *text_buffers)) + return -1; + } + + if (cluster_ended) { + parsing_cluster_ = false; + end_of_segment_cb_.Run(); + } + + return bytes_parsed; +} + +void WebMStreamParser::FireNeedKey(const std::string& key_id) { + std::vector<uint8> key_id_vector(key_id.begin(), key_id.end()); + need_key_cb_.Run(kWebMEncryptInitDataType, key_id_vector); +} + +} // namespace media diff --git a/media/formats/webm/webm_stream_parser.h b/media/formats/webm/webm_stream_parser.h new file mode 100644 index 0000000..5a4173f --- /dev/null +++ b/media/formats/webm/webm_stream_parser.h @@ -0,0 +1,90 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_STREAM_PARSER_H_ +#define MEDIA_FORMATS_WEBM_WEBM_STREAM_PARSER_H_ + +#include "base/callback_forward.h" +#include "base/memory/ref_counted.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/buffers.h" +#include "media/base/byte_queue.h" +#include "media/base/stream_parser.h" +#include "media/base/video_decoder_config.h" + +namespace media { + +class WebMClusterParser; + +class WebMStreamParser : public StreamParser { + public: + WebMStreamParser(); + virtual ~WebMStreamParser(); + + // StreamParser implementation. + virtual void Init(const InitCB& init_cb, const NewConfigCB& config_cb, + const NewBuffersCB& new_buffers_cb, + const NewTextBuffersCB& text_cb, + const NeedKeyCB& need_key_cb, + const NewMediaSegmentCB& new_segment_cb, + const base::Closure& end_of_segment_cb, + const LogCB& log_cb) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual bool Parse(const uint8* buf, int size) OVERRIDE; + + private: + enum State { + kWaitingForInit, + kParsingHeaders, + kParsingClusters, + kError + }; + + void ChangeState(State new_state); + + // Parses WebM Header, Info, Tracks elements. It also skips other level 1 + // elements that are not used right now. Once the Info & Tracks elements have + // been parsed, this method will transition the parser from PARSING_HEADERS to + // PARSING_CLUSTERS. + // + // Returns < 0 if the parse fails. + // Returns 0 if more data is needed. + // Returning > 0 indicates success & the number of bytes parsed. + int ParseInfoAndTracks(const uint8* data, int size); + + // Incrementally parses WebM cluster elements. This method also skips + // CUES elements if they are encountered since we currently don't use the + // data in these elements. + // + // Returns < 0 if the parse fails. + // Returns 0 if more data is needed. + // Returning > 0 indicates success & the number of bytes parsed. + int ParseCluster(const uint8* data, int size); + + // Fire needkey event through the |need_key_cb_|. + void FireNeedKey(const std::string& key_id); + + State state_; + InitCB init_cb_; + NewConfigCB config_cb_; + NewBuffersCB new_buffers_cb_; + NewTextBuffersCB text_cb_; + NeedKeyCB need_key_cb_; + + NewMediaSegmentCB new_segment_cb_; + base::Closure end_of_segment_cb_; + LogCB log_cb_; + + // True if a new cluster id has been seen and its end has not yet been parsed. + bool parsing_cluster_; + + scoped_ptr<WebMClusterParser> cluster_parser_; + ByteQueue byte_queue_; + + DISALLOW_COPY_AND_ASSIGN(WebMStreamParser); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_STREAM_PARSER_H_ diff --git a/media/formats/webm/webm_tracks_parser.cc b/media/formats/webm/webm_tracks_parser.cc new file mode 100644 index 0000000..01fc83bf --- /dev/null +++ b/media/formats/webm/webm_tracks_parser.cc @@ -0,0 +1,301 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_tracks_parser.h" + +#include "base/logging.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/string_util.h" +#include "media/base/buffers.h" +#include "media/formats/webm/webm_constants.h" +#include "media/formats/webm/webm_content_encodings.h" + +namespace media { + +static TextKind CodecIdToTextKind(const std::string& codec_id) { + if (codec_id == kWebMCodecSubtitles) + return kTextSubtitles; + + if (codec_id == kWebMCodecCaptions) + return kTextCaptions; + + if (codec_id == kWebMCodecDescriptions) + return kTextDescriptions; + + if (codec_id == kWebMCodecMetadata) + return kTextMetadata; + + return kTextNone; +} + +WebMTracksParser::WebMTracksParser(const LogCB& log_cb, bool ignore_text_tracks) + : track_type_(-1), + track_num_(-1), + track_uid_(-1), + seek_preroll_(-1), + codec_delay_(-1), + audio_track_num_(-1), + video_track_num_(-1), + ignore_text_tracks_(ignore_text_tracks), + log_cb_(log_cb), + audio_client_(log_cb), + video_client_(log_cb) { +} + +WebMTracksParser::~WebMTracksParser() {} + +int WebMTracksParser::Parse(const uint8* buf, int size) { + track_type_ =-1; + track_num_ = -1; + track_uid_ = -1; + track_name_.clear(); + track_language_.clear(); + audio_track_num_ = -1; + audio_decoder_config_ = AudioDecoderConfig(); + video_track_num_ = -1; + video_decoder_config_ = VideoDecoderConfig(); + text_tracks_.clear(); + ignored_tracks_.clear(); + + WebMListParser parser(kWebMIdTracks, this); + int result = parser.Parse(buf, size); + + if (result <= 0) + return result; + + // For now we do all or nothing parsing. + return parser.IsParsingComplete() ? result : 0; +} + +WebMParserClient* WebMTracksParser::OnListStart(int id) { + if (id == kWebMIdContentEncodings) { + DCHECK(!track_content_encodings_client_.get()); + track_content_encodings_client_.reset( + new WebMContentEncodingsClient(log_cb_)); + return track_content_encodings_client_->OnListStart(id); + } + + if (id == kWebMIdTrackEntry) { + track_type_ = -1; + track_num_ = -1; + track_name_.clear(); + track_language_.clear(); + codec_id_ = ""; + codec_private_.clear(); + audio_client_.Reset(); + video_client_.Reset(); + return this; + } + + if (id == kWebMIdAudio) + return &audio_client_; + + if (id == kWebMIdVideo) + return &video_client_; + + return this; +} + +bool WebMTracksParser::OnListEnd(int id) { + if (id == kWebMIdContentEncodings) { + DCHECK(track_content_encodings_client_.get()); + return track_content_encodings_client_->OnListEnd(id); + } + + if (id == kWebMIdTrackEntry) { + if (track_type_ == -1 || track_num_ == -1 || track_uid_ == -1) { + MEDIA_LOG(log_cb_) << "Missing TrackEntry data for " + << " TrackType " << track_type_ + << " TrackNum " << track_num_ + << " TrackUID " << track_uid_; + return false; + } + + if (track_type_ != kWebMTrackTypeAudio && + track_type_ != kWebMTrackTypeVideo && + track_type_ != kWebMTrackTypeSubtitlesOrCaptions && + track_type_ != kWebMTrackTypeDescriptionsOrMetadata) { + MEDIA_LOG(log_cb_) << "Unexpected TrackType " << track_type_; + return false; + } + + TextKind text_track_kind = kTextNone; + if (track_type_ == kWebMTrackTypeSubtitlesOrCaptions) { + text_track_kind = CodecIdToTextKind(codec_id_); + if (text_track_kind == kTextNone) { + MEDIA_LOG(log_cb_) << "Missing TrackEntry CodecID" + << " TrackNum " << track_num_; + return false; + } + + if (text_track_kind != kTextSubtitles && + text_track_kind != kTextCaptions) { + MEDIA_LOG(log_cb_) << "Wrong TrackEntry CodecID" + << " TrackNum " << track_num_; + return false; + } + } else if (track_type_ == kWebMTrackTypeDescriptionsOrMetadata) { + text_track_kind = CodecIdToTextKind(codec_id_); + if (text_track_kind == kTextNone) { + MEDIA_LOG(log_cb_) << "Missing TrackEntry CodecID" + << " TrackNum " << track_num_; + return false; + } + + if (text_track_kind != kTextDescriptions && + text_track_kind != kTextMetadata) { + MEDIA_LOG(log_cb_) << "Wrong TrackEntry CodecID" + << " TrackNum " << track_num_; + return false; + } + } + + std::string encryption_key_id; + if (track_content_encodings_client_) { + DCHECK(!track_content_encodings_client_->content_encodings().empty()); + // If we have multiple ContentEncoding in one track. Always choose the + // key id in the first ContentEncoding as the key id of the track. + encryption_key_id = track_content_encodings_client_-> + content_encodings()[0]->encryption_key_id(); + } + + if (track_type_ == kWebMTrackTypeAudio) { + if (audio_track_num_ == -1) { + audio_track_num_ = track_num_; + audio_encryption_key_id_ = encryption_key_id; + + DCHECK(!audio_decoder_config_.IsValidConfig()); + if (!audio_client_.InitializeConfig( + codec_id_, codec_private_, seek_preroll_, codec_delay_, + !audio_encryption_key_id_.empty(), &audio_decoder_config_)) { + return false; + } + } else { + MEDIA_LOG(log_cb_) << "Ignoring audio track " << track_num_; + ignored_tracks_.insert(track_num_); + } + } else if (track_type_ == kWebMTrackTypeVideo) { + if (video_track_num_ == -1) { + video_track_num_ = track_num_; + video_encryption_key_id_ = encryption_key_id; + + DCHECK(!video_decoder_config_.IsValidConfig()); + if (!video_client_.InitializeConfig( + codec_id_, codec_private_, !video_encryption_key_id_.empty(), + &video_decoder_config_)) { + return false; + } + } else { + MEDIA_LOG(log_cb_) << "Ignoring video track " << track_num_; + ignored_tracks_.insert(track_num_); + } + } else if (track_type_ == kWebMTrackTypeSubtitlesOrCaptions || + track_type_ == kWebMTrackTypeDescriptionsOrMetadata) { + if (ignore_text_tracks_) { + MEDIA_LOG(log_cb_) << "Ignoring text track " << track_num_; + ignored_tracks_.insert(track_num_); + } else { + std::string track_uid = base::Int64ToString(track_uid_); + text_tracks_[track_num_] = TextTrackConfig(text_track_kind, + track_name_, + track_language_, + track_uid); + } + } else { + MEDIA_LOG(log_cb_) << "Unexpected TrackType " << track_type_; + return false; + } + + track_type_ = -1; + track_num_ = -1; + track_uid_ = -1; + track_name_.clear(); + track_language_.clear(); + codec_id_ = ""; + codec_private_.clear(); + track_content_encodings_client_.reset(); + + audio_client_.Reset(); + video_client_.Reset(); + return true; + } + + return true; +} + +bool WebMTracksParser::OnUInt(int id, int64 val) { + int64* dst = NULL; + + switch (id) { + case kWebMIdTrackNumber: + dst = &track_num_; + break; + case kWebMIdTrackType: + dst = &track_type_; + break; + case kWebMIdTrackUID: + dst = &track_uid_; + break; + case kWebMIdSeekPreRoll: + dst = &seek_preroll_; + break; + case kWebMIdCodecDelay: + dst = &codec_delay_; + break; + default: + return true; + } + + if (*dst != -1) { + MEDIA_LOG(log_cb_) << "Multiple values for id " << std::hex << id + << " specified"; + return false; + } + + *dst = val; + return true; +} + +bool WebMTracksParser::OnFloat(int id, double val) { + return true; +} + +bool WebMTracksParser::OnBinary(int id, const uint8* data, int size) { + if (id == kWebMIdCodecPrivate) { + if (!codec_private_.empty()) { + MEDIA_LOG(log_cb_) << "Multiple CodecPrivate fields in a track."; + return false; + } + + codec_private_.assign(data, data + size); + return true; + } + return true; +} + +bool WebMTracksParser::OnString(int id, const std::string& str) { + if (id == kWebMIdCodecID) { + if (!codec_id_.empty()) { + MEDIA_LOG(log_cb_) << "Multiple CodecID fields in a track"; + return false; + } + + codec_id_ = str; + return true; + } + + if (id == kWebMIdName) { + track_name_ = str; + return true; + } + + if (id == kWebMIdLanguage) { + track_language_ = str; + return true; + } + + return true; +} + +} // namespace media diff --git a/media/formats/webm/webm_tracks_parser.h b/media/formats/webm/webm_tracks_parser.h new file mode 100644 index 0000000..4ace29d --- /dev/null +++ b/media/formats/webm/webm_tracks_parser.h @@ -0,0 +1,105 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_TRACKS_PARSER_H_ +#define MEDIA_FORMATS_WEBM_WEBM_TRACKS_PARSER_H_ + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "base/compiler_specific.h" +#include "base/memory/scoped_ptr.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/media_log.h" +#include "media/base/text_track_config.h" +#include "media/base/video_decoder_config.h" +#include "media/formats/webm/webm_audio_client.h" +#include "media/formats/webm/webm_content_encodings_client.h" +#include "media/formats/webm/webm_parser.h" +#include "media/formats/webm/webm_video_client.h" + +namespace media { + +// Parser for WebM Tracks element. +class MEDIA_EXPORT WebMTracksParser : public WebMParserClient { + public: + explicit WebMTracksParser(const LogCB& log_cb, bool ignore_text_tracks); + virtual ~WebMTracksParser(); + + // Parses a WebM Tracks element in |buf|. + // + // Returns -1 if the parse fails. + // Returns 0 if more data is needed. + // Returns the number of bytes parsed on success. + int Parse(const uint8* buf, int size); + + int64 audio_track_num() const { return audio_track_num_; } + int64 video_track_num() const { return video_track_num_; } + const std::set<int64>& ignored_tracks() const { return ignored_tracks_; } + + const std::string& audio_encryption_key_id() const { + return audio_encryption_key_id_; + } + + const AudioDecoderConfig& audio_decoder_config() { + return audio_decoder_config_; + } + + const std::string& video_encryption_key_id() const { + return video_encryption_key_id_; + } + + const VideoDecoderConfig& video_decoder_config() { + return video_decoder_config_; + } + + typedef std::map<int, TextTrackConfig> TextTracks; + + const TextTracks& text_tracks() const { + return text_tracks_; + } + + private: + // WebMParserClient implementation. + virtual WebMParserClient* OnListStart(int id) OVERRIDE; + virtual bool OnListEnd(int id) OVERRIDE; + virtual bool OnUInt(int id, int64 val) OVERRIDE; + virtual bool OnFloat(int id, double val) OVERRIDE; + virtual bool OnBinary(int id, const uint8* data, int size) OVERRIDE; + virtual bool OnString(int id, const std::string& str) OVERRIDE; + + int64 track_type_; + int64 track_num_; + int64 track_uid_; + std::string track_name_; + std::string track_language_; + std::string codec_id_; + std::vector<uint8> codec_private_; + int64 seek_preroll_; + int64 codec_delay_; + scoped_ptr<WebMContentEncodingsClient> track_content_encodings_client_; + + int64 audio_track_num_; + int64 video_track_num_; + bool ignore_text_tracks_; + TextTracks text_tracks_; + std::set<int64> ignored_tracks_; + std::string audio_encryption_key_id_; + std::string video_encryption_key_id_; + LogCB log_cb_; + + WebMAudioClient audio_client_; + AudioDecoderConfig audio_decoder_config_; + + WebMVideoClient video_client_; + VideoDecoderConfig video_decoder_config_; + + DISALLOW_COPY_AND_ASSIGN(WebMTracksParser); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_TRACKS_PARSER_H_ diff --git a/media/formats/webm/webm_tracks_parser_unittest.cc b/media/formats/webm/webm_tracks_parser_unittest.cc new file mode 100644 index 0000000..0f8e351 --- /dev/null +++ b/media/formats/webm/webm_tracks_parser_unittest.cc @@ -0,0 +1,122 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/logging.h" +#include "media/formats/webm/tracks_builder.h" +#include "media/formats/webm/webm_constants.h" +#include "media/formats/webm/webm_tracks_parser.h" +#include "testing/gmock/include/gmock/gmock.h" +#include "testing/gtest/include/gtest/gtest.h" + +using ::testing::InSequence; +using ::testing::Return; +using ::testing::_; + +namespace media { + +class WebMTracksParserTest : public testing::Test { + public: + WebMTracksParserTest() {} +}; + +static void VerifyTextTrackInfo(const uint8* buffer, + int buffer_size, + TextKind text_kind, + const std::string& name, + const std::string& language) { + scoped_ptr<WebMTracksParser> parser(new WebMTracksParser(LogCB(), false)); + + int result = parser->Parse(buffer, buffer_size); + EXPECT_GT(result, 0); + EXPECT_EQ(result, buffer_size); + + const WebMTracksParser::TextTracks& text_tracks = parser->text_tracks(); + EXPECT_EQ(text_tracks.size(), WebMTracksParser::TextTracks::size_type(1)); + + const WebMTracksParser::TextTracks::const_iterator itr = text_tracks.begin(); + EXPECT_EQ(itr->first, 1); // track num + + const TextTrackConfig& config = itr->second; + EXPECT_EQ(config.kind(), text_kind); + EXPECT_TRUE(config.label() == name); + EXPECT_TRUE(config.language() == language); +} + +TEST_F(WebMTracksParserTest, SubtitleNoNameNoLang) { + InSequence s; + + TracksBuilder tb; + tb.AddTrack(1, kWebMTrackTypeSubtitlesOrCaptions, 1, + kWebMCodecSubtitles, "", ""); + + const std::vector<uint8> buf = tb.Finish(); + VerifyTextTrackInfo(&buf[0], buf.size(), kTextSubtitles, "", ""); +} + +TEST_F(WebMTracksParserTest, SubtitleYesNameNoLang) { + InSequence s; + + TracksBuilder tb; + tb.AddTrack(1, kWebMTrackTypeSubtitlesOrCaptions, 1, + kWebMCodecSubtitles, "Spock", ""); + + const std::vector<uint8> buf = tb.Finish(); + VerifyTextTrackInfo(&buf[0], buf.size(), kTextSubtitles, "Spock", ""); +} + +TEST_F(WebMTracksParserTest, SubtitleNoNameYesLang) { + InSequence s; + + TracksBuilder tb; + tb.AddTrack(1, kWebMTrackTypeSubtitlesOrCaptions, 1, + kWebMCodecSubtitles, "", "eng"); + + const std::vector<uint8> buf = tb.Finish(); + VerifyTextTrackInfo(&buf[0], buf.size(), kTextSubtitles, "", "eng"); +} + +TEST_F(WebMTracksParserTest, SubtitleYesNameYesLang) { + InSequence s; + + TracksBuilder tb; + tb.AddTrack(1, kWebMTrackTypeSubtitlesOrCaptions, 1, + kWebMCodecSubtitles, "Picard", "fre"); + + const std::vector<uint8> buf = tb.Finish(); + VerifyTextTrackInfo(&buf[0], buf.size(), kTextSubtitles, "Picard", "fre"); +} + +TEST_F(WebMTracksParserTest, IgnoringTextTracks) { + InSequence s; + + TracksBuilder tb; + tb.AddTrack(1, kWebMTrackTypeSubtitlesOrCaptions, 1, + kWebMCodecSubtitles, "Subtitles", "fre"); + tb.AddTrack(2, kWebMTrackTypeSubtitlesOrCaptions, 2, + kWebMCodecSubtitles, "Commentary", "fre"); + + const std::vector<uint8> buf = tb.Finish(); + scoped_ptr<WebMTracksParser> parser(new WebMTracksParser(LogCB(), true)); + + int result = parser->Parse(&buf[0], buf.size()); + EXPECT_GT(result, 0); + EXPECT_EQ(result, static_cast<int>(buf.size())); + + EXPECT_EQ(parser->text_tracks().size(), 0u); + + const std::set<int64>& ignored_tracks = parser->ignored_tracks(); + EXPECT_TRUE(ignored_tracks.find(1) != ignored_tracks.end()); + EXPECT_TRUE(ignored_tracks.find(2) != ignored_tracks.end()); + + // Test again w/o ignoring the test tracks. + parser.reset(new WebMTracksParser(LogCB(), false)); + + result = parser->Parse(&buf[0], buf.size()); + EXPECT_GT(result, 0); + + EXPECT_EQ(parser->ignored_tracks().size(), 0u); + EXPECT_EQ(parser->text_tracks().size(), 2u); +} + +} // namespace media diff --git a/media/formats/webm/webm_video_client.cc b/media/formats/webm/webm_video_client.cc new file mode 100644 index 0000000..5ea398e --- /dev/null +++ b/media/formats/webm/webm_video_client.cc @@ -0,0 +1,163 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_video_client.h" + +#include "media/base/video_decoder_config.h" +#include "media/formats/webm/webm_constants.h" + +namespace media { + +WebMVideoClient::WebMVideoClient(const LogCB& log_cb) + : log_cb_(log_cb) { + Reset(); +} + +WebMVideoClient::~WebMVideoClient() { +} + +void WebMVideoClient::Reset() { + pixel_width_ = -1; + pixel_height_ = -1; + crop_bottom_ = -1; + crop_top_ = -1; + crop_left_ = -1; + crop_right_ = -1; + display_width_ = -1; + display_height_ = -1; + display_unit_ = -1; + alpha_mode_ = -1; +} + +bool WebMVideoClient::InitializeConfig( + const std::string& codec_id, const std::vector<uint8>& codec_private, + bool is_encrypted, VideoDecoderConfig* config) { + DCHECK(config); + + VideoCodec video_codec = kUnknownVideoCodec; + VideoCodecProfile profile = VIDEO_CODEC_PROFILE_UNKNOWN; + if (codec_id == "V_VP8") { + video_codec = kCodecVP8; + profile = VP8PROFILE_MAIN; + } else if (codec_id == "V_VP9") { + video_codec = kCodecVP9; + profile = VP9PROFILE_MAIN; + } else { + MEDIA_LOG(log_cb_) << "Unsupported video codec_id " << codec_id; + return false; + } + + VideoFrame::Format format = + (alpha_mode_ == 1) ? VideoFrame::YV12A : VideoFrame::YV12; + + if (pixel_width_ <= 0 || pixel_height_ <= 0) + return false; + + // Set crop and display unit defaults if these elements are not present. + if (crop_bottom_ == -1) + crop_bottom_ = 0; + + if (crop_top_ == -1) + crop_top_ = 0; + + if (crop_left_ == -1) + crop_left_ = 0; + + if (crop_right_ == -1) + crop_right_ = 0; + + if (display_unit_ == -1) + display_unit_ = 0; + + gfx::Size coded_size(pixel_width_, pixel_height_); + gfx::Rect visible_rect(crop_top_, crop_left_, + pixel_width_ - (crop_left_ + crop_right_), + pixel_height_ - (crop_top_ + crop_bottom_)); + gfx::Size natural_size = coded_size; + if (display_unit_ == 0) { + if (display_width_ <= 0) + display_width_ = pixel_width_; + if (display_height_ <= 0) + display_height_ = pixel_height_; + natural_size = gfx::Size(display_width_, display_height_); + } else if (display_unit_ == 3) { + if (display_width_ <= 0 || display_height_ <= 0) + return false; + natural_size = gfx::Size(display_width_, display_height_); + } else { + MEDIA_LOG(log_cb_) << "Unsupported display unit type " << display_unit_; + return false; + } + const uint8* extra_data = NULL; + size_t extra_data_size = 0; + if (codec_private.size() > 0) { + extra_data = &codec_private[0]; + extra_data_size = codec_private.size(); + } + + config->Initialize( + video_codec, profile, format, coded_size, visible_rect, natural_size, + extra_data, extra_data_size, is_encrypted, true); + return config->IsValidConfig(); +} + +bool WebMVideoClient::OnUInt(int id, int64 val) { + int64* dst = NULL; + + switch (id) { + case kWebMIdPixelWidth: + dst = &pixel_width_; + break; + case kWebMIdPixelHeight: + dst = &pixel_height_; + break; + case kWebMIdPixelCropTop: + dst = &crop_top_; + break; + case kWebMIdPixelCropBottom: + dst = &crop_bottom_; + break; + case kWebMIdPixelCropLeft: + dst = &crop_left_; + break; + case kWebMIdPixelCropRight: + dst = &crop_right_; + break; + case kWebMIdDisplayWidth: + dst = &display_width_; + break; + case kWebMIdDisplayHeight: + dst = &display_height_; + break; + case kWebMIdDisplayUnit: + dst = &display_unit_; + break; + case kWebMIdAlphaMode: + dst = &alpha_mode_; + break; + default: + return true; + } + + if (*dst != -1) { + MEDIA_LOG(log_cb_) << "Multiple values for id " << std::hex << id + << " specified (" << *dst << " and " << val << ")"; + return false; + } + + *dst = val; + return true; +} + +bool WebMVideoClient::OnBinary(int id, const uint8* data, int size) { + // Accept binary fields we don't care about for now. + return true; +} + +bool WebMVideoClient::OnFloat(int id, double val) { + // Accept float fields we don't care about for now. + return true; +} + +} // namespace media diff --git a/media/formats/webm/webm_video_client.h b/media/formats/webm/webm_video_client.h new file mode 100644 index 0000000..5545e0a --- /dev/null +++ b/media/formats/webm/webm_video_client.h @@ -0,0 +1,61 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_VIDEO_CLIENT_H_ +#define MEDIA_FORMATS_WEBM_WEBM_VIDEO_CLIENT_H_ + +#include <string> +#include <vector> + +#include "media/base/media_log.h" +#include "media/formats/webm/webm_parser.h" + +namespace media { +class VideoDecoderConfig; + +// Helper class used to parse a Video element inside a TrackEntry element. +class WebMVideoClient : public WebMParserClient { + public: + explicit WebMVideoClient(const LogCB& log_cb); + virtual ~WebMVideoClient(); + + // Reset this object's state so it can process a new video track element. + void Reset(); + + // Initialize |config| with the data in |codec_id|, |codec_private|, + // |is_encrypted| and the fields parsed from the last video track element this + // object was used to parse. + // Returns true if |config| was successfully initialized. + // Returns false if there was unexpected values in the provided parameters or + // video track element fields. The contents of |config| are undefined in this + // case and should not be relied upon. + bool InitializeConfig(const std::string& codec_id, + const std::vector<uint8>& codec_private, + bool is_encrypted, + VideoDecoderConfig* config); + + private: + // WebMParserClient implementation. + virtual bool OnUInt(int id, int64 val) OVERRIDE; + virtual bool OnBinary(int id, const uint8* data, int size) OVERRIDE; + virtual bool OnFloat(int id, double val) OVERRIDE; + + LogCB log_cb_; + int64 pixel_width_; + int64 pixel_height_; + int64 crop_bottom_; + int64 crop_top_; + int64 crop_left_; + int64 crop_right_; + int64 display_width_; + int64 display_height_; + int64 display_unit_; + int64 alpha_mode_; + + DISALLOW_COPY_AND_ASSIGN(WebMVideoClient); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_VIDEO_CLIENT_H_ diff --git a/media/formats/webm/webm_webvtt_parser.cc b/media/formats/webm/webm_webvtt_parser.cc new file mode 100644 index 0000000..64de1ef --- /dev/null +++ b/media/formats/webm/webm_webvtt_parser.cc @@ -0,0 +1,78 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_webvtt_parser.h" + +namespace media { + +void WebMWebVTTParser::Parse(const uint8* payload, int payload_size, + std::string* id, + std::string* settings, + std::string* content) { + WebMWebVTTParser parser(payload, payload_size); + parser.Parse(id, settings, content); +} + +WebMWebVTTParser::WebMWebVTTParser(const uint8* payload, int payload_size) + : ptr_(payload), + ptr_end_(payload + payload_size) { +} + +void WebMWebVTTParser::Parse(std::string* id, + std::string* settings, + std::string* content) { + ParseLine(id); + ParseLine(settings); + content->assign(ptr_, ptr_end_); +} + +bool WebMWebVTTParser::GetByte(uint8* byte) { + if (ptr_ >= ptr_end_) + return false; // indicates end-of-stream + + *byte = *ptr_++; + return true; +} + +void WebMWebVTTParser::UngetByte() { + --ptr_; +} + +void WebMWebVTTParser::ParseLine(std::string* line) { + line->clear(); + + // Consume characters from the stream, until we reach end-of-line. + + // The WebVTT spec states that lines may be terminated in any of the following + // three ways: + // LF + // CR + // CR LF + + // The spec is here: + // http://wiki.webmproject.org/webm-metadata/temporal-metadata/webvtt-in-webm + + enum { + kLF = '\x0A', + kCR = '\x0D' + }; + + for (;;) { + uint8 byte; + + if (!GetByte(&byte) || byte == kLF) + return; + + if (byte == kCR) { + if (GetByte(&byte) && byte != kLF) + UngetByte(); + + return; + } + + line->push_back(byte); + } +} + +} // namespace media diff --git a/media/formats/webm/webm_webvtt_parser.h b/media/formats/webm/webm_webvtt_parser.h new file mode 100644 index 0000000..12bbbd4 --- /dev/null +++ b/media/formats/webm/webm_webvtt_parser.h @@ -0,0 +1,49 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_WEBM_WEBM_WEBVTT_PARSER_H_ +#define MEDIA_FORMATS_WEBM_WEBM_WEBVTT_PARSER_H_ + +#include <string> + +#include "base/basictypes.h" +#include "media/base/media_export.h" + +namespace media { + +class MEDIA_EXPORT WebMWebVTTParser { + public: + // Utility function to parse the WebVTT cue from a byte stream. + static void Parse(const uint8* payload, int payload_size, + std::string* id, + std::string* settings, + std::string* content); + + private: + // The payload is the embedded WebVTT cue, stored in a WebM block. + // The parser treats this as a UTF-8 byte stream. + WebMWebVTTParser(const uint8* payload, int payload_size); + + // Parse the cue identifier, settings, and content from the stream. + void Parse(std::string* id, std::string* settings, std::string* content); + // Remove a byte from the stream, advancing the stream pointer. + // Returns true if a character was returned; false means "end of stream". + bool GetByte(uint8* byte); + + // Backup the stream pointer. + void UngetByte(); + + // Parse a line of text from the stream. + void ParseLine(std::string* line); + + // Represents the portion of the stream that has not been consumed yet. + const uint8* ptr_; + const uint8* const ptr_end_; + + DISALLOW_COPY_AND_ASSIGN(WebMWebVTTParser); +}; + +} // namespace media + +#endif // MEDIA_FORMATS_WEBM_WEBM_WEBVTT_PARSER_H_ diff --git a/media/formats/webm/webm_webvtt_parser_unittest.cc b/media/formats/webm/webm_webvtt_parser_unittest.cc new file mode 100644 index 0000000..ecdabd4 --- /dev/null +++ b/media/formats/webm/webm_webvtt_parser_unittest.cc @@ -0,0 +1,105 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/webm/webm_webvtt_parser.h" +#include "testing/gmock/include/gmock/gmock.h" +#include "testing/gtest/include/gtest/gtest.h" + +using ::testing::InSequence; + +namespace media { + +typedef std::vector<uint8> Cue; + +static Cue EncodeCue(const std::string& id, + const std::string& settings, + const std::string& content) { + const std::string result = id + '\n' + settings + '\n' + content; + const uint8* const buf = reinterpret_cast<const uint8*>(result.data()); + return Cue(buf, buf + result.length()); +} + +static void DecodeCue(const Cue& cue, + std::string* id, + std::string* settings, + std::string* content) { + WebMWebVTTParser::Parse(&cue[0], static_cast<int>(cue.size()), + id, settings, content); +} + +class WebMWebVTTParserTest : public testing::Test { + public: + WebMWebVTTParserTest() {} +}; + +TEST_F(WebMWebVTTParserTest, Blank) { + InSequence s; + + const Cue cue = EncodeCue("", "", "Subtitle"); + std::string id, settings, content; + + DecodeCue(cue, &id, &settings, &content); + EXPECT_EQ(id, ""); + EXPECT_EQ(settings, ""); + EXPECT_EQ(content, "Subtitle"); +} + +TEST_F(WebMWebVTTParserTest, Id) { + InSequence s; + + for (int i = 1; i <= 9; ++i) { + const std::string idsrc(1, '0'+i); + const Cue cue = EncodeCue(idsrc, "", "Subtitle"); + std::string id, settings, content; + + DecodeCue(cue, &id, &settings, &content); + EXPECT_EQ(id, idsrc); + EXPECT_EQ(settings, ""); + EXPECT_EQ(content, "Subtitle"); + } +} + +TEST_F(WebMWebVTTParserTest, Settings) { + InSequence s; + + enum { kSettingsCount = 4 }; + const char* const settings_str[kSettingsCount] = { + "vertical:lr", + "line:50%", + "position:42%", + "vertical:rl line:42% position:100%" }; + + for (int i = 0; i < kSettingsCount; ++i) { + const Cue cue = EncodeCue("", settings_str[i], "Subtitle"); + std::string id, settings, content; + + DecodeCue(cue, &id, &settings, &content); + EXPECT_EQ(id, ""); + EXPECT_EQ(settings, settings_str[i]); + EXPECT_EQ(content, "Subtitle"); + } +} + +TEST_F(WebMWebVTTParserTest, Content) { + InSequence s; + + enum { kContentCount = 4 }; + const char* const content_str[kContentCount] = { + "Subtitle", + "Another Subtitle", + "Yet Another Subtitle", + "Another Subtitle\nSplit Across Two Lines" }; + + for (int i = 0; i < kContentCount; ++i) { + const Cue cue = EncodeCue("", "", content_str[i]); + std::string id, settings, content; + + DecodeCue(cue, &id, &settings, &content); + EXPECT_EQ(id, ""); + EXPECT_EQ(settings, ""); + EXPECT_EQ(content, content_str[i]); + } +} + +} // namespace media |