summaryrefslogtreecommitdiffstats
path: root/media/formats/mp2t/es_parser_h264.cc
blob: 3b47fec055a42fcfc2ee352f66a575c1ce7710c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "media/formats/mp2t/es_parser_h264.h"

#include "base/logging.h"
#include "base/numerics/safe_conversions.h"
#include "media/base/encryption_scheme.h"
#include "media/base/media_util.h"
#include "media/base/stream_parser_buffer.h"
#include "media/base/timestamp_constants.h"
#include "media/base/video_frame.h"
#include "media/filters/h264_parser.h"
#include "media/formats/common/offset_byte_queue.h"
#include "media/formats/mp2t/mp2t_common.h"
#include "ui/gfx/geometry/rect.h"
#include "ui/gfx/geometry/size.h"

namespace media {
namespace mp2t {

namespace {

VideoCodecProfile ProfileIDCToVideoCodecProfile(int profile_idc) {
  switch (profile_idc) {
    case H264SPS::kProfileIDCBaseline:
      return H264PROFILE_BASELINE;
    case H264SPS::kProfileIDCMain:
      return H264PROFILE_MAIN;
    case H264SPS::kProfileIDCHigh:
      return H264PROFILE_HIGH;
    case H264SPS::kProfileIDHigh10:
      return H264PROFILE_HIGH10PROFILE;
    case H264SPS::kProfileIDHigh422:
      return H264PROFILE_HIGH422PROFILE;
    case H264SPS::kProfileIDHigh444Predictive:
      return H264PROFILE_HIGH444PREDICTIVEPROFILE;
    case H264SPS::kProfileIDScalableBaseline:
      return H264PROFILE_SCALABLEBASELINE;
    case H264SPS::kProfileIDScalableHigh:
      return H264PROFILE_SCALABLEHIGH;
    case H264SPS::kProfileIDStereoHigh:
      return H264PROFILE_STEREOHIGH;
    case H264SPS::kProfileIDSMultiviewHigh:
      return H264PROFILE_MULTIVIEWHIGH;
  }
  NOTREACHED() << "unknown video profile: " << profile_idc;
  return VIDEO_CODEC_PROFILE_UNKNOWN;
}

}  // namespace

// An AUD NALU is at least 4 bytes:
// 3 bytes for the start code + 1 byte for the NALU type.
const int kMinAUDSize = 4;

EsParserH264::EsParserH264(
    const NewVideoConfigCB& new_video_config_cb,
    const EmitBufferCB& emit_buffer_cb)
    : es_adapter_(new_video_config_cb, emit_buffer_cb),
      h264_parser_(new H264Parser()),
      current_access_unit_pos_(0),
      next_access_unit_pos_(0) {
}

EsParserH264::~EsParserH264() {
}

void EsParserH264::Flush() {
  DVLOG(1) << __FUNCTION__;
  if (!FindAUD(&current_access_unit_pos_))
    return;

  // Simulate an additional AUD to force emitting the last access unit
  // which is assumed to be complete at this point.
  uint8_t aud[] = {0x00, 0x00, 0x01, 0x09};
  es_queue_->Push(aud, sizeof(aud));
  ParseFromEsQueue();

  es_adapter_.Flush();
}

void EsParserH264::ResetInternal() {
  DVLOG(1) << __FUNCTION__;
  h264_parser_.reset(new H264Parser());
  current_access_unit_pos_ = 0;
  next_access_unit_pos_ = 0;
  last_video_decoder_config_ = VideoDecoderConfig();
  es_adapter_.Reset();
}

bool EsParserH264::FindAUD(int64_t* stream_pos) {
  while (true) {
    const uint8_t* es;
    int size;
    es_queue_->PeekAt(*stream_pos, &es, &size);

    // Find a start code and move the stream to the start code parser position.
    off_t start_code_offset;
    off_t start_code_size;
    bool start_code_found = H264Parser::FindStartCode(
        es, size, &start_code_offset, &start_code_size);
    *stream_pos += start_code_offset;

    // No H264 start code found or NALU type not available yet.
    if (!start_code_found || start_code_offset + start_code_size >= size)
      return false;

    // Exit the parser loop when an AUD is found.
    // Note: NALU header for an AUD:
    // - nal_ref_idc must be 0
    // - nal_unit_type must be H264NALU::kAUD
    if (es[start_code_offset + start_code_size] == H264NALU::kAUD)
      break;

    // The current NALU is not an AUD, skip the start code
    // and continue parsing the stream.
    *stream_pos += start_code_size;
  }

  return true;
}

bool EsParserH264::ParseFromEsQueue() {
  DCHECK_LE(es_queue_->head(), current_access_unit_pos_);
  DCHECK_LE(current_access_unit_pos_, next_access_unit_pos_);
  DCHECK_LE(next_access_unit_pos_, es_queue_->tail());

  // Find the next AUD located at or after |current_access_unit_pos_|. This is
  // needed since initially |current_access_unit_pos_| might not point to
  // an AUD.
  // Discard all the data before the updated |current_access_unit_pos_|
  // since it won't be used again.
  bool aud_found = FindAUD(&current_access_unit_pos_);
  es_queue_->Trim(current_access_unit_pos_);
  if (next_access_unit_pos_ < current_access_unit_pos_)
    next_access_unit_pos_ = current_access_unit_pos_;

  // Resume parsing later if no AUD was found.
  if (!aud_found)
    return true;

  // Find the next AUD to make sure we have a complete access unit.
  if (next_access_unit_pos_ < current_access_unit_pos_ + kMinAUDSize) {
    next_access_unit_pos_ = current_access_unit_pos_ + kMinAUDSize;
    DCHECK_LE(next_access_unit_pos_, es_queue_->tail());
  }
  if (!FindAUD(&next_access_unit_pos_))
    return true;

  // At this point, we know we have a full access unit.
  bool is_key_frame = false;
  int pps_id_for_access_unit = -1;

  const uint8_t* es;
  int size;
  es_queue_->PeekAt(current_access_unit_pos_, &es, &size);
  int access_unit_size = base::checked_cast<int, int64_t>(
      next_access_unit_pos_ - current_access_unit_pos_);
  DCHECK_LE(access_unit_size, size);
  h264_parser_->SetStream(es, access_unit_size);

  while (true) {
    bool is_eos = false;
    H264NALU nalu;
    switch (h264_parser_->AdvanceToNextNALU(&nalu)) {
      case H264Parser::kOk:
        break;
      case H264Parser::kInvalidStream:
      case H264Parser::kUnsupportedStream:
        return false;
      case H264Parser::kEOStream:
        is_eos = true;
        break;
    }
    if (is_eos)
      break;

    switch (nalu.nal_unit_type) {
      case H264NALU::kAUD: {
        DVLOG(LOG_LEVEL_ES) << "NALU: AUD";
        break;
      }
      case H264NALU::kSPS: {
        DVLOG(LOG_LEVEL_ES) << "NALU: SPS";
        int sps_id;
        if (h264_parser_->ParseSPS(&sps_id) != H264Parser::kOk)
          return false;
        break;
      }
      case H264NALU::kPPS: {
        DVLOG(LOG_LEVEL_ES) << "NALU: PPS";
        int pps_id;
        if (h264_parser_->ParsePPS(&pps_id) != H264Parser::kOk)
          return false;
        break;
      }
      case H264NALU::kIDRSlice:
      case H264NALU::kNonIDRSlice: {
        is_key_frame = (nalu.nal_unit_type == H264NALU::kIDRSlice);
        DVLOG(LOG_LEVEL_ES) << "NALU: slice IDR=" << is_key_frame;
        H264SliceHeader shdr;
        if (h264_parser_->ParseSliceHeader(nalu, &shdr) != H264Parser::kOk) {
          // Only accept an invalid SPS/PPS at the beginning when the stream
          // does not necessarily start with an SPS/PPS/IDR.
          // TODO(damienv): Should be able to differentiate a missing SPS/PPS
          // from a slice header parsing error.
          if (last_video_decoder_config_.IsValidConfig())
            return false;
        } else {
          pps_id_for_access_unit = shdr.pic_parameter_set_id;
        }
        break;
      }
      default: {
        DVLOG(LOG_LEVEL_ES) << "NALU: " << nalu.nal_unit_type;
      }
    }
  }

  // Emit a frame and move the stream to the next AUD position.
  RCHECK(EmitFrame(current_access_unit_pos_, access_unit_size,
                   is_key_frame, pps_id_for_access_unit));
  current_access_unit_pos_ = next_access_unit_pos_;
  es_queue_->Trim(current_access_unit_pos_);

  return true;
}

bool EsParserH264::EmitFrame(int64_t access_unit_pos,
                             int access_unit_size,
                             bool is_key_frame,
                             int pps_id) {
  // Get the access unit timing info.
  // Note: |current_timing_desc.pts| might be |kNoTimestamp()| at this point
  // if:
  // - the stream is not fully MPEG-2 compliant.
  // - or if the stream relies on H264 VUI parameters to compute the timestamps.
  //   See H.222 spec: section 2.7.5 "Conditional coding of timestamps".
  //   This part is not yet implemented in EsParserH264.
  // |es_adapter_| will take care of the missing timestamps.
  TimingDesc current_timing_desc = GetTimingDescriptor(access_unit_pos);
  DVLOG_IF(1, current_timing_desc.pts == kNoTimestamp())
      << "Missing timestamp";

  // If only the PTS is provided, copy the PTS into the DTS.
  if (current_timing_desc.dts == kNoDecodeTimestamp()) {
    current_timing_desc.dts =
        DecodeTimestamp::FromPresentationTime(current_timing_desc.pts);
  }

  // Update the video decoder configuration if needed.
  const H264PPS* pps = h264_parser_->GetPPS(pps_id);
  if (!pps) {
    // Only accept an invalid PPS at the beginning when the stream
    // does not necessarily start with an SPS/PPS/IDR.
    // In this case, the initial frames are conveyed to the upper layer with
    // an invalid VideoDecoderConfig and it's up to the upper layer
    // to process this kind of frame accordingly.
    if (last_video_decoder_config_.IsValidConfig())
      return false;
  } else {
    const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id);
    if (!sps)
      return false;
    RCHECK(UpdateVideoDecoderConfig(sps, Unencrypted()));
  }

  // Emit a frame.
  DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_
                      << " size=" << access_unit_size;
  int es_size;
  const uint8_t* es;
  es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size);
  CHECK_GE(es_size, access_unit_size);

  // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
  // type and allow multiple video tracks. See https://crbug.com/341581.
  scoped_refptr<StreamParserBuffer> stream_parser_buffer =
      StreamParserBuffer::CopyFrom(
          es,
          access_unit_size,
          is_key_frame,
          DemuxerStream::VIDEO,
          0);
  stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts);
  stream_parser_buffer->set_timestamp(current_timing_desc.pts);
  return es_adapter_.OnNewBuffer(stream_parser_buffer);
}

bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps,
                                            const EncryptionScheme& scheme) {
  // Set the SAR to 1 when not specified in the H264 stream.
  int sar_width = (sps->sar_width == 0) ? 1 : sps->sar_width;
  int sar_height = (sps->sar_height == 0) ? 1 : sps->sar_height;

  // TODO(damienv): a MAP unit can be either 16 or 32 pixels.
  // although it's 16 pixels for progressive non MBAFF frames.
  gfx::Size coded_size((sps->pic_width_in_mbs_minus1 + 1) * 16,
                       (sps->pic_height_in_map_units_minus1 + 1) * 16);
  gfx::Rect visible_rect(
      sps->frame_crop_left_offset,
      sps->frame_crop_top_offset,
      (coded_size.width() - sps->frame_crop_right_offset) -
      sps->frame_crop_left_offset,
      (coded_size.height() - sps->frame_crop_bottom_offset) -
      sps->frame_crop_top_offset);
  if (visible_rect.width() <= 0 || visible_rect.height() <= 0)
    return false;
  gfx::Size natural_size(
      (visible_rect.width() * sar_width) / sar_height,
      visible_rect.height());
  if (natural_size.width() == 0)
    return false;

  VideoDecoderConfig video_decoder_config(
      kCodecH264, ProfileIDCToVideoCodecProfile(sps->profile_idc),
      PIXEL_FORMAT_YV12, COLOR_SPACE_HD_REC709, coded_size, visible_rect,
      natural_size, EmptyExtraData(), scheme);

  if (!video_decoder_config.Matches(last_video_decoder_config_)) {
    DVLOG(1) << "Profile IDC: " << sps->profile_idc;
    DVLOG(1) << "Level IDC: " << sps->level_idc;
    DVLOG(1) << "Pic width: " << coded_size.width();
    DVLOG(1) << "Pic height: " << coded_size.height();
    DVLOG(1) << "log2_max_frame_num_minus4: "
             << sps->log2_max_frame_num_minus4;
    DVLOG(1) << "SAR: width=" << sps->sar_width
             << " height=" << sps->sar_height;
    last_video_decoder_config_ = video_decoder_config;
    es_adapter_.OnConfigChanged(video_decoder_config);
  }

  return true;
}

}  // namespace mp2t
}  // namespace media