summaryrefslogtreecommitdiffstats
path: root/media/filters/vp9_parser.h
blob: 17fbe51fd12d5fbc3a9d36238b0aa7977d2cd8c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file contains an implementation of a VP9 bitstream parser. The main
// purpose of this parser is to support hardware decode acceleration. Some
// accelerators, e.g. libva which implements VA-API, require the caller
// (chrome) to feed them parsed VP9 frame header.
//
// See content::VP9Decoder for example usage.
//
#ifndef MEDIA_FILTERS_VP9_PARSER_H_
#define MEDIA_FILTERS_VP9_PARSER_H_

#include <stddef.h>
#include <stdint.h>

#include <deque>

#include "base/macros.h"
#include "media/base/media_export.h"
#include "media/filters/vp9_raw_bits_reader.h"

namespace media {

const int kVp9MaxProfile = 4;
const int kVp9NumRefFramesLog2 = 3;
const size_t kVp9NumRefFrames = 1 << kVp9NumRefFramesLog2;
const uint8_t kVp9MaxProb = 255;
const size_t kVp9NumRefsPerFrame = 3;

enum class Vp9ColorSpace {
  UNKNOWN = 0,
  BT_601 = 1,
  BT_709 = 2,
  SMPTE_170 = 3,
  SMPTE_240 = 4,
  BT_2020 = 5,
  RESERVED = 6,
  SRGB = 7,
};

enum Vp9InterpFilter {
  EIGHTTAP = 0,
  EIGHTTAP_SMOOTH = 1,
  EIGHTTAP_SHARP = 2,
  BILINEAR = 3,
  SWICHABLE = 4,
};

struct MEDIA_EXPORT Vp9Segmentation {
  static const size_t kNumSegments = 8;
  static const size_t kNumTreeProbs = kNumSegments - 1;
  static const size_t kNumPredictionProbs = 3;
  enum SegmentLevelFeature {
    SEG_LVL_ALT_Q = 0,
    SEG_LVL_ALT_LF = 1,
    SEG_LVL_REF_FRAME = 2,
    SEG_LVL_SKIP = 3,
    SEG_LVL_MAX
  };

  bool enabled;

  bool update_map;
  uint8_t tree_probs[kNumTreeProbs];
  bool temporal_update;
  uint8_t pred_probs[kNumPredictionProbs];

  bool update_data;
  bool abs_delta;
  bool feature_enabled[kNumSegments][SEG_LVL_MAX];
  int16_t feature_data[kNumSegments][SEG_LVL_MAX];

  int16_t y_dequant[kNumSegments][2];
  int16_t uv_dequant[kNumSegments][2];

  bool FeatureEnabled(size_t seg_id, SegmentLevelFeature feature) const {
    return feature_enabled[seg_id][feature];
  }

  int16_t FeatureData(size_t seg_id, SegmentLevelFeature feature) const {
    return feature_data[seg_id][feature];
  }
};

struct MEDIA_EXPORT Vp9LoopFilter {
  enum Vp9FrameType {
    VP9_FRAME_INTRA = 0,
    VP9_FRAME_LAST = 1,
    VP9_FRAME_GOLDEN = 2,
    VP9_FRAME_ALTREF = 3,
    VP9_FRAME_MAX = 4,
  };

  static const size_t kNumModeDeltas = 2;

  uint8_t filter_level;
  uint8_t sharpness_level;

  bool mode_ref_delta_enabled;
  bool mode_ref_delta_update;
  bool update_ref_deltas[VP9_FRAME_MAX];
  int8_t ref_deltas[VP9_FRAME_MAX];
  bool update_mode_deltas[kNumModeDeltas];
  int8_t mode_deltas[kNumModeDeltas];

  uint8_t lvl[Vp9Segmentation::kNumSegments][VP9_FRAME_MAX][kNumModeDeltas];
};

// Members of Vp9FrameHeader will be 0-initialized by Vp9Parser::ParseNextFrame.
struct MEDIA_EXPORT Vp9QuantizationParams {
  bool IsLossless() const {
    return base_qindex == 0 && y_dc_delta == 0 && uv_dc_delta == 0 &&
           uv_ac_delta == 0;
  }

  uint8_t base_qindex;
  int8_t y_dc_delta;
  int8_t uv_dc_delta;
  int8_t uv_ac_delta;
};

// VP9 frame header.
struct MEDIA_EXPORT Vp9FrameHeader {
  enum FrameType {
    KEYFRAME = 0,
    INTERFRAME = 1,
  };

  bool IsKeyframe() const;
  bool RefreshFlag(size_t i) const { return !!(refresh_flags & (1u << i)); }

  uint8_t profile;

  bool show_existing_frame;
  uint8_t frame_to_show;

  FrameType frame_type;

  bool show_frame;
  bool error_resilient_mode;

  uint8_t bit_depth;
  Vp9ColorSpace color_space;
  bool yuv_range;
  uint8_t subsampling_x;
  uint8_t subsampling_y;

  // The range of width and height is 1..2^16.
  uint32_t width;
  uint32_t height;
  uint32_t display_width;
  uint32_t display_height;

  bool intra_only;
  uint8_t reset_context;
  uint8_t refresh_flags;
  uint8_t frame_refs[kVp9NumRefsPerFrame];
  bool ref_sign_biases[kVp9NumRefsPerFrame];
  bool allow_high_precision_mv;
  Vp9InterpFilter interp_filter;

  bool refresh_frame_context;
  bool frame_parallel_decoding_mode;
  uint8_t frame_context_idx;

  Vp9QuantizationParams quant_params;

  uint8_t log2_tile_cols;
  uint8_t log2_tile_rows;

  // Pointer to the beginning of frame data. It is a responsibility of the
  // client of the Vp9Parser to maintain validity of this data while it is
  // being used outside of that class.
  const uint8_t* data;

  // Size of |data| in bytes.
  size_t frame_size;

  // Size of compressed header in bytes.
  size_t first_partition_size;

  // Size of uncompressed header in bytes.
  size_t uncompressed_header_size;
};

// A parser for VP9 bitstream.
class MEDIA_EXPORT Vp9Parser {
 public:
  // ParseNextFrame() return values. See documentation for ParseNextFrame().
  enum Result {
    kOk,
    kInvalidStream,
    kEOStream,
  };

  Vp9Parser();
  ~Vp9Parser();

  // Set a new stream buffer to read from, starting at |stream| and of size
  // |stream_size| in bytes. |stream| must point to the beginning of a single
  // frame or a single superframe, is owned by caller and must remain valid
  // until the next call to SetStream().
  void SetStream(const uint8_t* stream, off_t stream_size);

  // Parse the next frame in the current stream buffer, filling |fhdr| with
  // the parsed frame header and updating current segmentation and loop filter
  // state. Return kOk if a frame has successfully been parsed, kEOStream if
  // there is no more data in the current stream buffer, or kInvalidStream
  // on error.
  Result ParseNextFrame(Vp9FrameHeader* fhdr);

  // Return current segmentation state.
  const Vp9Segmentation& GetSegmentation() const { return segmentation_; }

  // Return current loop filter state.
  const Vp9LoopFilter& GetLoopFilter() const { return loop_filter_; }

  // Clear parser state and return to an initialized state.
  void Reset();

 private:
  // The parsing context to keep track of references.
  struct ReferenceSlot {
    uint32_t width;
    uint32_t height;
  };

  bool ParseSuperframe();
  uint8_t ReadProfile();
  bool VerifySyncCode();
  bool ReadBitDepthColorSpaceSampling(Vp9FrameHeader* fhdr);
  void ReadFrameSize(Vp9FrameHeader* fhdr);
  bool ReadFrameSizeFromRefs(Vp9FrameHeader* fhdr);
  void ReadDisplayFrameSize(Vp9FrameHeader* fhdr);
  Vp9InterpFilter ReadInterpFilter();
  void ReadLoopFilter();
  void ReadQuantization(Vp9QuantizationParams* quants);
  void ReadSegmentationMap();
  void ReadSegmentationData();
  void ReadSegmentation();
  void ReadTiles(Vp9FrameHeader* fhdr);
  bool ParseUncompressedHeader(const uint8_t* stream,
                               off_t frame_size,
                               Vp9FrameHeader* fhdr);
  void UpdateSlots(const Vp9FrameHeader* fhdr);

  void ResetLoopfilter();
  void SetupPastIndependence();
  size_t GetQIndex(const Vp9QuantizationParams& quant, size_t segid) const;
  void SetupSegmentationDequant(const Vp9QuantizationParams& quant);
  void SetupLoopFilter();

  // Current address in the bitstream buffer.
  const uint8_t* stream_;

  // Remaining bytes in stream_.
  off_t bytes_left_;

  // Stores start pointer and size of each frame within the current superframe.
  struct FrameInfo {
    FrameInfo(const uint8_t* ptr, off_t size);

    // Starting address of the frame.
    const uint8_t* ptr;

    // Size of the frame in bytes.
    off_t size;
  };

  // FrameInfo for the remaining frames in the current superframe to be parsed.
  std::deque<FrameInfo> frames_;

  // Raw bits decoder for uncompressed frame header.
  Vp9RawBitsReader reader_;

  // Segmentation and loop filter state that persists across frames.
  Vp9Segmentation segmentation_;
  Vp9LoopFilter loop_filter_;

  // The parsing context to keep track of references.
  ReferenceSlot ref_slots_[kVp9NumRefFrames];

  DISALLOW_COPY_AND_ASSIGN(Vp9Parser);
};

}  // namespace media

#endif  // MEDIA_FILTERS_VP9_PARSER_H_