diff options
author | damienv@chromium.org <damienv@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-02-11 04:41:20 +0000 |
---|---|---|
committer | damienv@chromium.org <damienv@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-02-11 04:41:20 +0000 |
commit | 8788b3d11f8942b89f7593d29d9dd94a36d09c78 (patch) | |
tree | 2be8ff94c02590a713517bdd5b12578b84bf1241 /media | |
parent | 89aaa13b2bbd747391cb71ad2f6d22e457991443 (diff) | |
download | chromium_src-8788b3d11f8942b89f7593d29d9dd94a36d09c78.zip chromium_src-8788b3d11f8942b89f7593d29d9dd94a36d09c78.tar.gz chromium_src-8788b3d11f8942b89f7593d29d9dd94a36d09c78.tar.bz2 |
Refactoring to use a single H264 parser in media code.
The H264 parser used by the Mpeg2 TS stream parser is now using internally the H264 parser provided in media/filters.
BUG=340426
Review URL: https://codereview.chromium.org/141033003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@250345 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r-- | media/filters/h264_parser.cc | 95 | ||||
-rw-r--r-- | media/filters/h264_parser.h | 26 | ||||
-rw-r--r-- | media/formats/mp2t/es_parser_h264.cc | 622 | ||||
-rw-r--r-- | media/formats/mp2t/es_parser_h264.h | 53 |
4 files changed, 308 insertions, 488 deletions
diff --git a/media/filters/h264_parser.cc b/media/filters/h264_parser.cc index b559363..c03ee88 100644 --- a/media/filters/h264_parser.cc +++ b/media/filters/h264_parser.cc @@ -155,14 +155,13 @@ static inline bool IsStartCode(const uint8* data) { return data[0] == 0x00 && data[1] == 0x00 && data[2] == 0x01; } -// Find offset from start of data to next NALU start code -// and size of found start code (3 or 4 bytes). -static bool FindStartCode(const uint8* data, off_t data_size, - off_t* offset, - off_t* start_code_size) { +// static +bool H264Parser::FindStartCode(const uint8* data, off_t data_size, + off_t* offset, off_t* start_code_size) { + DCHECK_GE(data_size, 0); off_t bytes_left = data_size; - while (bytes_left > 3) { + while (bytes_left >= 3) { if (IsStartCode(data)) { // Found three-byte start code, set pointer at its beginning. *offset = data_size - bytes_left; @@ -182,44 +181,51 @@ static bool FindStartCode(const uint8* data, off_t data_size, --bytes_left; } - // End of data. + // End of data: offset is pointing to the first byte that was not considered + // as a possible start of a start code. + // Note: there is no security issue when receiving a negative |data_size| + // since in this case, |bytes_left| is equal to |data_size| and thus + // |*offset| is equal to 0 (valid offset). + *offset = data_size - bytes_left; + *start_code_size = 0; return false; } -// Find the next NALU in stream, returning its start offset without the start -// code (i.e. at the beginning of NALU data). -// Size will include trailing zero bits, and will be from start offset to -// before the start code of the next NALU (or end of stream). -static bool LocateNALU(const uint8* stream, off_t stream_size, - off_t* nalu_start_off, off_t* nalu_size) { - off_t start_code_size; - - // Find start code of the next NALU. - if (!FindStartCode(stream, stream_size, nalu_start_off, &start_code_size)) { +bool H264Parser::LocateNALU(off_t* nalu_size, off_t* start_code_size) { + // Find the start code of next NALU. + off_t nalu_start_off = 0; + off_t annexb_start_code_size = 0; + if (!FindStartCode(stream_, bytes_left_, + &nalu_start_off, &annexb_start_code_size)) { DVLOG(4) << "Could not find start code, end of stream?"; return false; } - // Discard its start code. - *nalu_start_off += start_code_size; - // Move the stream to the beginning of it (skip the start code). - stream_size -= *nalu_start_off; - stream += *nalu_start_off; - - // Find the start code of next NALU; if successful, NALU size is the number - // of bytes from after previous start code to before this one; - // if next start code is not found, it is still a valid NALU if there - // are still some bytes left after the first start code. - // nalu_size is the offset to the next start code - if (!FindStartCode(stream, stream_size, nalu_size, &start_code_size)) { - // end of stream (no next NALU), but still valid NALU if any bytes left - *nalu_size = stream_size; - if (*nalu_size < 1) { - DVLOG(3) << "End of stream"; - return false; - } + // Move the stream to the beginning of the NALU (pointing at the start code). + stream_ += nalu_start_off; + bytes_left_ -= nalu_start_off; + + const uint8* nalu_data = stream_ + annexb_start_code_size; + off_t max_nalu_data_size = bytes_left_ - annexb_start_code_size; + if (max_nalu_data_size <= 0) { + DVLOG(3) << "End of stream"; + return false; } + // Find the start code of next NALU; + // if successful, |nalu_size_without_start_code| is the number of bytes from + // after previous start code to before this one; + // if next start code is not found, it is still a valid NALU since there + // are some bytes left after the first start code: all the remaining bytes + // belong to the current NALU. + off_t next_start_code_size = 0; + off_t nalu_size_without_start_code = 0; + if (!FindStartCode(nalu_data, max_nalu_data_size, + &nalu_size_without_start_code, &next_start_code_size)) { + nalu_size_without_start_code = max_nalu_data_size; + } + *nalu_size = nalu_size_without_start_code + annexb_start_code_size; + *start_code_size = annexb_start_code_size; return true; } @@ -266,32 +272,31 @@ H264Parser::Result H264Parser::ReadSE(int* val) { } H264Parser::Result H264Parser::AdvanceToNextNALU(H264NALU* nalu) { - int data; - off_t off_to_nalu_start; - - if (!LocateNALU(stream_, bytes_left_, &off_to_nalu_start, &nalu->size)) { + off_t start_code_size; + off_t nalu_size_with_start_code; + if (!LocateNALU(&nalu_size_with_start_code, &start_code_size)) { DVLOG(4) << "Could not find next NALU, bytes left in stream: " << bytes_left_; return kEOStream; } - nalu->data = stream_ + off_to_nalu_start; + nalu->data = stream_ + start_code_size; + nalu->size = nalu_size_with_start_code - start_code_size; + DVLOG(4) << "NALU found: size=" << nalu_size_with_start_code; // Initialize bit reader at the start of found NALU. if (!br_.Initialize(nalu->data, nalu->size)) return kEOStream; - DVLOG(4) << "Looking for NALU, Stream bytes left: " << bytes_left_ - << " off to next nalu: " << off_to_nalu_start; - // Move parser state to after this NALU, so next time AdvanceToNextNALU // is called, we will effectively be skipping it; // other parsing functions will use the position saved // in bit reader for parsing, so we don't have to remember it here. - stream_ += off_to_nalu_start + nalu->size; - bytes_left_ -= off_to_nalu_start + nalu->size; + stream_ += nalu_size_with_start_code; + bytes_left_ -= nalu_size_with_start_code; // Read NALU header, skip the forbidden_zero_bit, but check for it. + int data; READ_BITS_OR_RETURN(1, &data); TRUE_OR_RETURN(data == 0); diff --git a/media/filters/h264_parser.h b/media/filters/h264_parser.h index f336032..2f214c7 100644 --- a/media/filters/h264_parser.h +++ b/media/filters/h264_parser.h @@ -29,7 +29,8 @@ struct MEDIA_EXPORT H264NALU { kSEIMessage = 6, kSPS = 7, kPPS = 8, - kEOSeq = 9, + kAUD = 9, + kEOSeq = 10, kEOStream = 11, kCodedSliceExtension = 20, }; @@ -256,6 +257,20 @@ class MEDIA_EXPORT H264Parser { kEOStream, // end of stream }; + // Find offset from start of data to next NALU start code + // and size of found start code (3 or 4 bytes). + // If no start code is found, offset is pointing to the first unprocessed byte + // (i.e. the first byte that was not considered as a possible start of a start + // code) and |*start_code_size| is set to 0. + // Preconditions: + // - |data_size| >= 0 + // Postconditions: + // - |*offset| is between 0 and |data_size| included. + // It is strictly less than |data_size| if |data_size| > 0. + // - |*start_code_size| is either 0, 3 or 4. + static bool FindStartCode(const uint8* data, off_t data_size, + off_t* offset, off_t* start_code_size); + H264Parser(); ~H264Parser(); @@ -308,6 +323,15 @@ class MEDIA_EXPORT H264Parser { Result ParseSEI(H264SEIMessage* sei_msg); private: + // Move the stream pointer to the beginning of the next NALU, + // i.e. pointing at the next start code. + // Return true if a NALU has been found. + // If a NALU is found: + // - its size in bytes is returned in |*nalu_size| and includes + // the start code as well as the trailing zero bits. + // - the size in bytes of the start code is returned in |*start_code_size|. + bool LocateNALU(off_t* nalu_size, off_t* start_code_size); + // Exp-Golomb code parsing as specified in chapter 9.1 of the spec. // Read one unsigned exp-Golomb code from the stream and return in |*val|. Result ReadUE(int* val); diff --git a/media/formats/mp2t/es_parser_h264.cc b/media/formats/mp2t/es_parser_h264.cc index b685c84..51f789e 100644 --- a/media/formats/mp2t/es_parser_h264.cc +++ b/media/formats/mp2t/es_parser_h264.cc @@ -6,104 +6,32 @@ #include "base/basictypes.h" #include "base/logging.h" -#include "media/base/bit_reader.h" +#include "base/numerics/safe_conversions.h" #include "media/base/buffers.h" #include "media/base/stream_parser_buffer.h" #include "media/base/video_frame.h" +#include "media/filters/h264_parser.h" +#include "media/formats/common/offset_byte_queue.h" #include "media/formats/mp2t/mp2t_common.h" #include "ui/gfx/rect.h" #include "ui/gfx/size.h" -static const int kExtendedSar = 255; - -// ISO 14496 part 10 -// VUI parameters: Table E-1 "Meaning of sample aspect ratio indicator" -static const int kSarTableSize = 17; -static const int kTableSarWidth[kSarTableSize] = { - 0, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160, 4, 3, 2 -}; -static const int kTableSarHeight[kSarTableSize] = { - 0, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99, 3, 2, 1 -}; - -// Remove the start code emulation prevention ( 0x000003 ) -// and return the size of the converted buffer. -// Note: Size of |buf_rbsp| should be at least |size| to accomodate -// the worst case. -static int ConvertToRbsp(const uint8* buf, int size, uint8* buf_rbsp) { - int rbsp_size = 0; - int zero_count = 0; - for (int k = 0; k < size; k++) { - if (buf[k] == 0x3 && zero_count >= 2) { - zero_count = 0; - continue; - } - if (buf[k] == 0) - zero_count++; - else - zero_count = 0; - buf_rbsp[rbsp_size++] = buf[k]; - } - return rbsp_size; -} - namespace media { namespace mp2t { -// ISO 14496 - Part 10: Table 7-1 "NAL unit type codes" -enum NalUnitType { - kNalUnitTypeNonIdrSlice = 1, - kNalUnitTypeIdrSlice = 5, - kNalUnitTypeSPS = 7, - kNalUnitTypePPS = 8, - kNalUnitTypeAUD = 9, -}; - -class BitReaderH264 : public BitReader { - public: - BitReaderH264(const uint8* data, off_t size) - : BitReader(data, size) { } - - // Read an unsigned exp-golomb value. - // Return true if successful. - bool ReadBitsExpGolomb(uint32* exp_golomb_value); -}; - -bool BitReaderH264::ReadBitsExpGolomb(uint32* exp_golomb_value) { - // Get the number of leading zeros. - int zero_count = 0; - while (true) { - int one_bit; - RCHECK(ReadBits(1, &one_bit)); - if (one_bit != 0) - break; - zero_count++; - } - - // If zero_count is greater than 31, the calculated value will overflow. - if (zero_count > 31) { - SkipBits(zero_count); - return false; - } - - // Read the actual value. - uint32 base = (1 << zero_count) - 1; - uint32 offset; - RCHECK(ReadBits(zero_count, &offset)); - *exp_golomb_value = base + offset; - - return true; -} +// An AUD NALU is at least 4 bytes: +// 3 bytes for the start code + 1 byte for the NALU type. +const int kMinAUDSize = 4; EsParserH264::EsParserH264( const NewVideoConfigCB& new_video_config_cb, const EmitBufferCB& emit_buffer_cb) : new_video_config_cb_(new_video_config_cb), emit_buffer_cb_(emit_buffer_cb), - es_pos_(0), - current_nal_pos_(-1), - current_access_unit_pos_(-1), - is_key_frame_(false) { + es_queue_(new media::OffsetByteQueue()), + h264_parser_(new H264Parser()), + current_access_unit_pos_(0), + next_access_unit_pos_(0) { } EsParserH264::~EsParserH264() { @@ -118,7 +46,6 @@ bool EsParserH264::Parse(const uint8* buf, int size, // for each access unit (but this is just a recommendation and some streams // do not comply with this recommendation). - // Link position |raw_es_size| in the ES stream with a timing descriptor. // HLS recommendation: "In AVC video, you should have both a DTS and a // PTS in each PES header". if (dts == kNoTimestamp() && pts == kNoTimestamp()) { @@ -129,391 +56,254 @@ bool EsParserH264::Parse(const uint8* buf, int size, timing_desc.pts = pts; timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts; - int raw_es_size; - const uint8* raw_es; - es_byte_queue_.Peek(&raw_es, &raw_es_size); + // Link the end of the byte queue with the incoming timing descriptor. timing_desc_list_.push_back( - std::pair<int, TimingDesc>(raw_es_size, timing_desc)); + std::pair<int64, TimingDesc>(es_queue_->tail(), timing_desc)); // Add the incoming bytes to the ES queue. - es_byte_queue_.Push(buf, size); - - // Add NALs from the incoming buffer. - if (!ParseInternal()) - return false; - - // Discard emitted frames - // or every byte that was parsed so far if there is no current frame. - int skip_count = - (current_access_unit_pos_ >= 0) ? current_access_unit_pos_ : es_pos_; - DiscardEs(skip_count); - - return true; + es_queue_->Push(buf, size); + return ParseInternal(); } void EsParserH264::Flush() { - if (current_access_unit_pos_ < 0) + DVLOG(1) << "EsParserH264::Flush"; + if (!FindAUD(¤t_access_unit_pos_)) return; - // Force emitting the last access unit. - int next_aud_pos; - const uint8* raw_es; - es_byte_queue_.Peek(&raw_es, &next_aud_pos); - EmitFrameIfNeeded(next_aud_pos); - current_nal_pos_ = -1; - StartFrame(-1); - - // Discard the emitted frame. - DiscardEs(next_aud_pos); + // Simulate an additional AUD to force emitting the last access unit + // which is assumed to be complete at this point. + uint8 aud[] = { 0x00, 0x00, 0x01, 0x09 }; + es_queue_->Push(aud, sizeof(aud)); + ParseInternal(); } void EsParserH264::Reset() { DVLOG(1) << "EsParserH264::Reset"; - es_byte_queue_.Reset(); + es_queue_.reset(new media::OffsetByteQueue()); + h264_parser_.reset(new H264Parser()); + current_access_unit_pos_ = 0; + next_access_unit_pos_ = 0; timing_desc_list_.clear(); - es_pos_ = 0; - current_nal_pos_ = -1; - StartFrame(-1); last_video_decoder_config_ = VideoDecoderConfig(); } -bool EsParserH264::ParseInternal() { - int raw_es_size; - const uint8* raw_es; - es_byte_queue_.Peek(&raw_es, &raw_es_size); - - DCHECK_GE(es_pos_, 0); - DCHECK_LT(es_pos_, raw_es_size); - - // Resume h264 es parsing where it was left. - for ( ; es_pos_ < raw_es_size - 4; es_pos_++) { - // Make sure the syncword is either 00 00 00 01 or 00 00 01 - if (raw_es[es_pos_ + 0] != 0 || raw_es[es_pos_ + 1] != 0) - continue; - int syncword_length = 0; - if (raw_es[es_pos_ + 2] == 0 && raw_es[es_pos_ + 3] == 1) - syncword_length = 4; - else if (raw_es[es_pos_ + 2] == 1) - syncword_length = 3; - else - continue; - - // Parse the current NAL (and the new NAL then becomes the current one). - if (current_nal_pos_ >= 0) { - int nal_size = es_pos_ - current_nal_pos_; - DCHECK_GT(nal_size, 0); - RCHECK(NalParser(&raw_es[current_nal_pos_], nal_size)); - } - current_nal_pos_ = es_pos_ + syncword_length; - - // Retrieve the NAL type. - int nal_header = raw_es[current_nal_pos_]; - int forbidden_zero_bit = (nal_header >> 7) & 0x1; - RCHECK(forbidden_zero_bit == 0); - NalUnitType nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f); - DVLOG(LOG_LEVEL_ES) << "nal: offset=" << es_pos_ - << " type=" << nal_unit_type; - - // Emit a frame if needed. - if (nal_unit_type == kNalUnitTypeAUD) - RCHECK(EmitFrameIfNeeded(es_pos_)); - - // Skip the syncword. - es_pos_ += syncword_length; +bool EsParserH264::FindAUD(int64* stream_pos) { + while (true) { + const uint8* es; + int size; + es_queue_->PeekAt(*stream_pos, &es, &size); + + // Find a start code and move the stream to the start code parser position. + off_t start_code_offset; + off_t start_code_size; + bool start_code_found = H264Parser::FindStartCode( + es, size, &start_code_offset, &start_code_size); + *stream_pos += start_code_offset; + + // No H264 start code found or NALU type not available yet. + if (!start_code_found || start_code_offset + start_code_size >= size) + return false; + + // Exit the parser loop when an AUD is found. + // Note: NALU header for an AUD: + // - nal_ref_idc must be 0 + // - nal_unit_type must be H264NALU::kAUD + if (es[start_code_offset + start_code_size] == H264NALU::kAUD) + break; + + // The current NALU is not an AUD, skip the start code + // and continue parsing the stream. + *stream_pos += start_code_size; } return true; } -bool EsParserH264::EmitFrameIfNeeded(int next_aud_pos) { - // There is no current frame: start a new frame. - if (current_access_unit_pos_ < 0) { - StartFrame(next_aud_pos); +bool EsParserH264::ParseInternal() { + DCHECK_LE(es_queue_->head(), current_access_unit_pos_); + DCHECK_LE(current_access_unit_pos_, next_access_unit_pos_); + DCHECK_LE(next_access_unit_pos_, es_queue_->tail()); + + // Find the next AUD located at or after |current_access_unit_pos_|. This is + // needed since initially |current_access_unit_pos_| might not point to + // an AUD. + // Discard all the data before the updated |current_access_unit_pos_| + // since it won't be used again. + bool aud_found = FindAUD(¤t_access_unit_pos_); + es_queue_->Trim(current_access_unit_pos_); + if (next_access_unit_pos_ < current_access_unit_pos_) + next_access_unit_pos_ = current_access_unit_pos_; + + // Resume parsing later if no AUD was found. + if (!aud_found) + return true; + + // Find the next AUD to make sure we have a complete access unit. + if (next_access_unit_pos_ < current_access_unit_pos_ + kMinAUDSize) { + next_access_unit_pos_ = current_access_unit_pos_ + kMinAUDSize; + DCHECK_LE(next_access_unit_pos_, es_queue_->tail()); + } + if (!FindAUD(&next_access_unit_pos_)) return true; + + // At this point, we know we have a full access unit. + bool is_key_frame = false; + int pps_id_for_access_unit = -1; + + const uint8* es; + int size; + es_queue_->PeekAt(current_access_unit_pos_, &es, &size); + int access_unit_size = base::checked_cast<int, int64>( + next_access_unit_pos_ - current_access_unit_pos_); + DCHECK_LE(access_unit_size, size); + h264_parser_->SetStream(es, access_unit_size); + + while (true) { + bool is_eos = false; + H264NALU nalu; + switch (h264_parser_->AdvanceToNextNALU(&nalu)) { + case H264Parser::kOk: + break; + case H264Parser::kInvalidStream: + case H264Parser::kUnsupportedStream: + return false; + case H264Parser::kEOStream: + is_eos = true; + break; + } + if (is_eos) + break; + + switch (nalu.nal_unit_type) { + case H264NALU::kAUD: { + DVLOG(LOG_LEVEL_ES) << "NALU: AUD"; + break; + } + case H264NALU::kSPS: { + DVLOG(LOG_LEVEL_ES) << "NALU: SPS"; + int sps_id; + if (h264_parser_->ParseSPS(&sps_id) != H264Parser::kOk) + return false; + break; + } + case H264NALU::kPPS: { + DVLOG(LOG_LEVEL_ES) << "NALU: PPS"; + int pps_id; + if (h264_parser_->ParsePPS(&pps_id) != H264Parser::kOk) + return false; + break; + } + case H264NALU::kIDRSlice: + case H264NALU::kNonIDRSlice: { + is_key_frame = (nalu.nal_unit_type == H264NALU::kIDRSlice); + DVLOG(LOG_LEVEL_ES) << "NALU: slice IDR=" << is_key_frame; + H264SliceHeader shdr; + if (h264_parser_->ParseSliceHeader(nalu, &shdr) != H264Parser::kOk) { + // Only accept an invalid SPS/PPS at the beginning when the stream + // does not necessarily start with an SPS/PPS/IDR. + // TODO(damienv): Should be able to differentiate a missing SPS/PPS + // from a slice header parsing error. + if (last_video_decoder_config_.IsValidConfig()) + return false; + } else { + pps_id_for_access_unit = shdr.pic_parameter_set_id; + } + break; + } + default: { + DVLOG(LOG_LEVEL_ES) << "NALU: " << nalu.nal_unit_type; + } + } } + // Emit a frame and move the stream to the next AUD position. + RCHECK(EmitFrame(current_access_unit_pos_, access_unit_size, + is_key_frame, pps_id_for_access_unit)); + current_access_unit_pos_ = next_access_unit_pos_; + es_queue_->Trim(current_access_unit_pos_); + + return true; +} + +bool EsParserH264::EmitFrame(int64 access_unit_pos, int access_unit_size, + bool is_key_frame, int pps_id) { // Get the access unit timing info. TimingDesc current_timing_desc = {kNoTimestamp(), kNoTimestamp()}; while (!timing_desc_list_.empty() && - timing_desc_list_.front().first <= current_access_unit_pos_) { + timing_desc_list_.front().first <= access_unit_pos) { current_timing_desc = timing_desc_list_.front().second; timing_desc_list_.pop_front(); } - if (current_timing_desc.pts == kNoTimestamp()) return false; + // Update the video decoder configuration if needed. + const H264PPS* pps = h264_parser_->GetPPS(pps_id); + if (!pps) { + // Only accept an invalid PPS at the beginning when the stream + // does not necessarily start with an SPS/PPS/IDR. + // In this case, the initial frames are conveyed to the upper layer with + // an invalid VideoDecoderConfig and it's up to the upper layer + // to process this kind of frame accordingly. + if (last_video_decoder_config_.IsValidConfig()) + return false; + } else { + const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id); + if (!sps) + return false; + RCHECK(UpdateVideoDecoderConfig(sps)); + } + // Emit a frame. - int raw_es_size; - const uint8* raw_es; - es_byte_queue_.Peek(&raw_es, &raw_es_size); - int access_unit_size = next_aud_pos - current_access_unit_pos_; + DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_ + << " size=" << access_unit_size; + int es_size; + const uint8* es; + es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size); + CHECK_GE(es_size, access_unit_size); // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId // type and allow multiple video tracks. See https://crbug.com/341581. scoped_refptr<StreamParserBuffer> stream_parser_buffer = StreamParserBuffer::CopyFrom( - &raw_es[current_access_unit_pos_], + es, access_unit_size, - is_key_frame_, + is_key_frame, DemuxerStream::VIDEO, 0); stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts); stream_parser_buffer->set_timestamp(current_timing_desc.pts); emit_buffer_cb_.Run(stream_parser_buffer); - - // Set the current frame position to the next AUD position. - StartFrame(next_aud_pos); return true; } -void EsParserH264::StartFrame(int aud_pos) { - // Two cases: - // - if aud_pos < 0, clear the current frame and set |is_key_frame| to a - // default value (false). - // - if aud_pos >= 0, start a new frame and set |is_key_frame| to true - // |is_key_frame_| will be updated while parsing the NALs of that frame. - // If any NAL is a non IDR NAL, it will be set to false. - current_access_unit_pos_ = aud_pos; - is_key_frame_ = (aud_pos >= 0); -} - -void EsParserH264::DiscardEs(int nbytes) { - DCHECK_GE(nbytes, 0); - if (nbytes == 0) - return; - - // Update the position of - // - the parser, - // - the current NAL, - // - the current access unit. - es_pos_ -= nbytes; - if (es_pos_ < 0) - es_pos_ = 0; - - if (current_nal_pos_ >= 0) { - DCHECK_GE(current_nal_pos_, nbytes); - current_nal_pos_ -= nbytes; - } - if (current_access_unit_pos_ >= 0) { - DCHECK_GE(current_access_unit_pos_, nbytes); - current_access_unit_pos_ -= nbytes; - } - - // Update the timing information accordingly. - std::list<std::pair<int, TimingDesc> >::iterator timing_it - = timing_desc_list_.begin(); - for (; timing_it != timing_desc_list_.end(); ++timing_it) - timing_it->first -= nbytes; - - // Discard |nbytes| of ES. - es_byte_queue_.Pop(nbytes); -} - -bool EsParserH264::NalParser(const uint8* buf, int size) { - // Get the NAL header. - if (size < 1) { - DVLOG(1) << "NalParser: incomplete NAL"; - return false; - } - int nal_header = buf[0]; - buf += 1; - size -= 1; - - int forbidden_zero_bit = (nal_header >> 7) & 0x1; - if (forbidden_zero_bit != 0) - return false; - int nal_ref_idc = (nal_header >> 5) & 0x3; - int nal_unit_type = nal_header & 0x1f; - - // Process the NAL content. - switch (nal_unit_type) { - case kNalUnitTypeSPS: - DVLOG(LOG_LEVEL_ES) << "NAL: SPS"; - // |nal_ref_idc| should not be 0 for a SPS. - if (nal_ref_idc == 0) - return false; - return ProcessSPS(buf, size); - case kNalUnitTypeIdrSlice: - DVLOG(LOG_LEVEL_ES) << "NAL: IDR slice"; - return true; - case kNalUnitTypeNonIdrSlice: - DVLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice"; - is_key_frame_ = false; - return true; - case kNalUnitTypePPS: - DVLOG(LOG_LEVEL_ES) << "NAL: PPS"; - return true; - case kNalUnitTypeAUD: - DVLOG(LOG_LEVEL_ES) << "NAL: AUD"; - return true; - default: - DVLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type; - return true; - } - - NOTREACHED(); - return false; -} - -bool EsParserH264::ProcessSPS(const uint8* buf, int size) { - if (size <= 0) - return false; - - // Removes start code emulation prevention. - // TODO(damienv): refactoring in media/base - // so as to have a unique H264 bit reader in Chrome. - scoped_ptr<uint8[]> buf_rbsp(new uint8[size]); - int rbsp_size = ConvertToRbsp(buf, size, buf_rbsp.get()); - - BitReaderH264 bit_reader(buf_rbsp.get(), rbsp_size); - - int profile_idc; - int constraint_setX_flag; - int level_idc; - uint32 seq_parameter_set_id; - uint32 log2_max_frame_num_minus4; - uint32 pic_order_cnt_type; - RCHECK(bit_reader.ReadBits(8, &profile_idc)); - RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag)); - RCHECK(bit_reader.ReadBits(8, &level_idc)); - RCHECK(bit_reader.ReadBitsExpGolomb(&seq_parameter_set_id)); - - if (profile_idc == 100 || profile_idc == 110 || - profile_idc == 122 || profile_idc == 244 || - profile_idc == 44 || profile_idc == 83 || - profile_idc == 86 || profile_idc == 118 || - profile_idc == 128) { - uint32 chroma_format_idc; - RCHECK(bit_reader.ReadBitsExpGolomb(&chroma_format_idc)); - if (chroma_format_idc == 3) { - int separate_colour_plane_flag; - RCHECK(bit_reader.ReadBits(1, &separate_colour_plane_flag)); - } - uint32 bit_depth_luma_minus8; - uint32 bit_depth_chroma_minus8; - int qpprime_y_zero_transform_bypass_flag; - int seq_scaling_matrix_present_flag; - RCHECK(bit_reader.ReadBitsExpGolomb(&bit_depth_luma_minus8)); - RCHECK(bit_reader.ReadBitsExpGolomb(&bit_depth_chroma_minus8)); - RCHECK(bit_reader.ReadBits(1, &qpprime_y_zero_transform_bypass_flag)); - RCHECK(bit_reader.ReadBits(1, &seq_scaling_matrix_present_flag)); - if (seq_scaling_matrix_present_flag) { - int skip_count = (chroma_format_idc != 3) ? 8 : 12; - RCHECK(bit_reader.SkipBits(skip_count)); - } - } - - RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_frame_num_minus4)); - RCHECK(bit_reader.ReadBitsExpGolomb(&pic_order_cnt_type)); - - // |pic_order_cnt_type| shall be in the range of 0 to 2. - RCHECK(pic_order_cnt_type <= 2); - if (pic_order_cnt_type == 0) { - uint32 log2_max_pic_order_cnt_lsb_minus4; - RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4)); - } else if (pic_order_cnt_type == 1) { - // Note: |offset_for_non_ref_pic| and |offset_for_top_to_bottom_field| - // corresponds to their codenum not to their actual value. - int delta_pic_order_always_zero_flag; - uint32 offset_for_non_ref_pic; - uint32 offset_for_top_to_bottom_field; - uint32 num_ref_frames_in_pic_order_cnt_cycle; - RCHECK(bit_reader.ReadBits(1, &delta_pic_order_always_zero_flag)); - RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_non_ref_pic)); - RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_top_to_bottom_field)); - RCHECK( - bit_reader.ReadBitsExpGolomb(&num_ref_frames_in_pic_order_cnt_cycle)); - for (uint32 i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) { - uint32 offset_for_ref_frame_codenum; - RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_ref_frame_codenum)); - } - } - - uint32 num_ref_frames; - int gaps_in_frame_num_value_allowed_flag; - uint32 pic_width_in_mbs_minus1; - uint32 pic_height_in_map_units_minus1; - RCHECK(bit_reader.ReadBitsExpGolomb(&num_ref_frames)); - RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag)); - RCHECK(bit_reader.ReadBitsExpGolomb(&pic_width_in_mbs_minus1)); - RCHECK(bit_reader.ReadBitsExpGolomb(&pic_height_in_map_units_minus1)); - - int frame_mbs_only_flag; - RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag)); - if (!frame_mbs_only_flag) { - int mb_adaptive_frame_field_flag; - RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag)); - } - - int direct_8x8_inference_flag; - RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag)); - - int frame_cropping_flag; - uint32 frame_crop_left_offset = 0; - uint32 frame_crop_right_offset = 0; - uint32 frame_crop_top_offset = 0; - uint32 frame_crop_bottom_offset = 0; - RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag)); - if (frame_cropping_flag) { - RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_left_offset)); - RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_right_offset)); - RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_top_offset)); - RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_bottom_offset)); - } - - int vui_parameters_present_flag; - RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag)); - int sar_width = 1; - int sar_height = 1; - if (vui_parameters_present_flag) { - // Read only the aspect ratio information from the VUI section. - // TODO(damienv): check whether other VUI info are useful. - int aspect_ratio_info_present_flag; - RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag)); - if (aspect_ratio_info_present_flag) { - int aspect_ratio_idc; - RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc)); - if (aspect_ratio_idc == kExtendedSar) { - RCHECK(bit_reader.ReadBits(16, &sar_width)); - RCHECK(bit_reader.ReadBits(16, &sar_height)); - } else if (aspect_ratio_idc < kSarTableSize) { - sar_width = kTableSarWidth[aspect_ratio_idc]; - sar_height = kTableSarHeight[aspect_ratio_idc]; - } - } - } - - if (sar_width == 0 || sar_height == 0) { - DVLOG(1) << "Unspecified SAR not supported"; - return false; - } +bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) { + // Set the SAR to 1 when not specified in the H264 stream. + int sar_width = (sps->sar_width == 0) ? 1 : sps->sar_width; + int sar_height = (sps->sar_height == 0) ? 1 : sps->sar_height; // TODO(damienv): a MAP unit can be either 16 or 32 pixels. // although it's 16 pixels for progressive non MBAFF frames. - gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16, - (pic_height_in_map_units_minus1 + 1) * 16); + gfx::Size coded_size((sps->pic_width_in_mbs_minus1 + 1) * 16, + (sps->pic_height_in_map_units_minus1 + 1) * 16); gfx::Rect visible_rect( - frame_crop_left_offset, - frame_crop_top_offset, - (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset, - (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset); + sps->frame_crop_left_offset, + sps->frame_crop_top_offset, + (coded_size.width() - sps->frame_crop_right_offset) - + sps->frame_crop_left_offset, + (coded_size.height() - sps->frame_crop_bottom_offset) - + sps->frame_crop_top_offset); if (visible_rect.width() <= 0 || visible_rect.height() <= 0) return false; - gfx::Size natural_size((visible_rect.width() * sar_width) / sar_height, - visible_rect.height()); + gfx::Size natural_size( + (visible_rect.width() * sar_width) / sar_height, + visible_rect.height()); if (natural_size.width() == 0) return false; - // TODO(damienv): - // Assuming the SPS is used right away by the PPS - // and the slice headers is a strong assumption. - // In theory, we should process the SPS and PPS - // and only when one of the slice header is switching - // the PPS id, the video decoder config should be changed. VideoDecoderConfig video_decoder_config( kCodecH264, - VIDEO_CODEC_PROFILE_UNKNOWN, // TODO(damienv) + VIDEO_CODEC_PROFILE_UNKNOWN, VideoFrame::YV12, coded_size, visible_rect, @@ -522,12 +312,14 @@ bool EsParserH264::ProcessSPS(const uint8* buf, int size) { false); if (!video_decoder_config.Matches(last_video_decoder_config_)) { - DVLOG(1) << "Profile IDC: " << profile_idc; - DVLOG(1) << "Level IDC: " << level_idc; - DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16; - DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16; - DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4; - DVLOG(1) << "SAR: width=" << sar_width << " height=" << sar_height; + DVLOG(1) << "Profile IDC: " << sps->profile_idc; + DVLOG(1) << "Level IDC: " << sps->level_idc; + DVLOG(1) << "Pic width: " << coded_size.width(); + DVLOG(1) << "Pic height: " << coded_size.height(); + DVLOG(1) << "log2_max_frame_num_minus4: " + << sps->log2_max_frame_num_minus4; + DVLOG(1) << "SAR: width=" << sps->sar_width + << " height=" << sps->sar_height; last_video_decoder_config_ = video_decoder_config; new_video_config_cb_.Run(video_decoder_config); } diff --git a/media/formats/mp2t/es_parser_h264.h b/media/formats/mp2t/es_parser_h264.h index b3da98c..2c58420 100644 --- a/media/formats/mp2t/es_parser_h264.h +++ b/media/formats/mp2t/es_parser_h264.h @@ -11,14 +11,15 @@ #include "base/basictypes.h" #include "base/callback.h" #include "base/compiler_specific.h" +#include "base/memory/scoped_ptr.h" #include "base/time/time.h" -#include "media/base/byte_queue.h" #include "media/base/video_decoder_config.h" #include "media/formats/mp2t/es_parser.h" namespace media { -class BitReader; -class StreamParserBuffer; +class H264Parser; +struct H264SPS; +class OffsetByteQueue; } namespace media { @@ -50,42 +51,40 @@ class EsParserH264 : public EsParser { base::TimeDelta pts; }; - // H264 parser. - // It resumes parsing from byte position |es_pos_|. + // Find the AUD located at or after |*stream_pos|. + // Return true if an AUD is found. + // If found, |*stream_pos| corresponds to the position of the AUD start code + // in the stream. Otherwise, |*stream_pos| corresponds to the last position + // of the start code parser. + bool FindAUD(int64* stream_pos); + + // Resumes the H264 ES parsing. + // Return true if successful. bool ParseInternal(); - // Emit a frame if a frame has been started earlier. + // Emit a frame whose position in the ES queue starts at |access_unit_pos|. // Returns true if successful, false if no PTS is available for the frame. - bool EmitFrameIfNeeded(int next_aud_pos); - - // Start a new frame. - // Note: if aud_pos < 0, clear the current frame. - void StartFrame(int aud_pos); - - // Discard |nbytes| of ES from the ES byte queue. - void DiscardEs(int nbytes); + bool EmitFrame(int64 access_unit_pos, int access_unit_size, + bool is_key_frame, int pps_id); - // Parse a NAL / SPS. - // Returns true if successful (compliant bitstream). - bool NalParser(const uint8* buf, int size); - bool ProcessSPS(const uint8* buf, int size); + // Update the video decoder config based on an H264 SPS. + // Return true if successful. + bool UpdateVideoDecoderConfig(const H264SPS* sps); // Callbacks to pass the stream configuration and the frames. NewVideoConfigCB new_video_config_cb_; EmitBufferCB emit_buffer_cb_; // Bytes of the ES stream that have not been emitted yet. - ByteQueue es_byte_queue_; - std::list<std::pair<int, TimingDesc> > timing_desc_list_; + scoped_ptr<media::OffsetByteQueue> es_queue_; + std::list<std::pair<int64, TimingDesc> > timing_desc_list_; // H264 parser state. - // Note: |current_access_unit_pos_| is pointing to an annexB syncword - // while |current_nal_pos_| is pointing to the NAL unit - // (i.e. does not include the annexB syncword). - int es_pos_; - int current_nal_pos_; - int current_access_unit_pos_; - bool is_key_frame_; + // - |current_access_unit_pos_| is pointing to an annexB syncword + // representing the first NALU of an H264 access unit. + scoped_ptr<H264Parser> h264_parser_; + int64 current_access_unit_pos_; + int64 next_access_unit_pos_; // Last video decoder config. VideoDecoderConfig last_video_decoder_config_; |