summaryrefslogtreecommitdiffstats
path: root/content/common/gpu/media/exynos_video_decode_accelerator.h
blob: 52ae989aa91888824cf5780dcd41d39f3519d057 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file contains an implementation of VideoDecoderAccelerator
// that utilizes the hardware video decoder present on the Exynos SoC.

#ifndef CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_
#define CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_

#include <queue>
#include <vector>

#include "base/callback_forward.h"
#include "base/memory/linked_ptr.h"
#include "base/memory/scoped_ptr.h"
#include "base/threading/thread.h"
#include "content/common/content_export.h"
#include "content/common/gpu/media/video_decode_accelerator_impl.h"
#include "media/base/limits.h"
#include "media/base/video_decoder_config.h"
#include "media/video/picture.h"
#include "ui/gfx/size.h"
#include "ui/gl/gl_bindings.h"

namespace base {
class MessageLoopProxy;
}

namespace content {
class H264Parser;

// This class handles Exynos video acceleration directly through the V4L2
// device exported by the Multi Format Codec hardware block.
//
// The threading model of this class is driven by the fact that it needs to
// interface two fundamentally different event queues -- the one Chromium
// provides through MessageLoop, and the one driven by the V4L2 devices which
// is waited on with epoll().  There are three threads involved in this class:
//
// * The child thread, which is the main GPU process thread which calls the
//   media::VideoDecodeAccelerator entry points.  Calls from this thread
//   generally do not block (with the exception of Initialize() and Destroy()).
//   They post tasks to the decoder_thread_, which actually services the task
//   and calls back when complete through the
//   media::VideoDecodeAccelerator::Client interface.
// * The decoder_thread_, owned by this class.  It services API tasks, through
//   the *Task() routines, as well as V4L2 device events, through
//   ServiceDeviceTask().  Almost all state modification is done on this thread.
// * The device_poll_thread_, owned by this class.  All it does is epoll() on
//   the V4L2 in DevicePollTask() and schedule a ServiceDeviceTask() on the
//   decoder_thread_ when something interesting happens.
//   TODO(sheu): replace this thread with an TYPE_IO decoder_thread_.
//
// Note that this class has no locks!  Everything's serviced on the
// decoder_thread_, so there are no synchronization issues.
// ... well, there are, but it's a matter of getting messages posted in the
// right order, not fiddling with locks.
class CONTENT_EXPORT ExynosVideoDecodeAccelerator
    : public VideoDecodeAcceleratorImpl {
 public:
  ExynosVideoDecodeAccelerator(
      EGLDisplay egl_display,
      Client* client,
      const base::WeakPtr<Client>& io_client_,
      const base::Callback<bool(void)>& make_context_current,
      const scoped_refptr<base::MessageLoopProxy>& io_message_loop_proxy);
  virtual ~ExynosVideoDecodeAccelerator();

  // media::VideoDecodeAccelerator implementation.
  // Note: Initialize() and Destroy() are synchronous.
  virtual bool Initialize(media::VideoCodecProfile profile) OVERRIDE;
  virtual void Decode(const media::BitstreamBuffer& bitstream_buffer) OVERRIDE;
  virtual void AssignPictureBuffers(
      const std::vector<media::PictureBuffer>& buffers) OVERRIDE;
  virtual void ReusePictureBuffer(int32 picture_buffer_id) OVERRIDE;
  virtual void Flush() OVERRIDE;
  virtual void Reset() OVERRIDE;
  virtual void Destroy() OVERRIDE;

  // VideoDecodeAcceleratorImpl implementation.
  virtual bool CanDecodeOnIOThread() OVERRIDE;

 private:
  // These are rather subjectively tuned.
  enum {
    kMfcInputBufferCount = 8,
    // TODO(posciak): determine MFC input buffer size based on level limits.
    // See http://crbug.com/255116.
    kMfcInputBufferMaxSize = 1024 * 1024,
    // Number of output buffers to use for each VDA stage above what's required
    // by the decoder (e.g. DPB size, in H264).  We need
    // media::limits::kMaxVideoFrames to fill up the GpuVideoDecode pipeline,
    // and +1 for a frame in transit.
    kDpbOutputBufferExtraCount = media::limits::kMaxVideoFrames + 1,
  };

  // Internal state of the decoder.
  enum State {
    kUninitialized,      // Initialize() not yet called.
    kInitialized,        // Initialize() returned true; ready to start decoding.
    kDecoding,           // DecodeBufferInitial() successful; decoding frames.
    kResetting,          // Presently resetting.
    kAfterReset,         // After Reset(), ready to start decoding again.
    kChangingResolution, // Performing resolution change, all remaining
                         // pre-change frames decoded and processed.
    kError,              // Error in kDecoding state.
  };

  enum BufferId {
    kFlushBufferId = -2  // Buffer id for flush buffer, queued by FlushTask().
  };

  // File descriptors we need to poll.
  enum PollFds {
    kPollMfc = (1 << 0),
  };

  // Auto-destruction reference for BitstreamBuffer, for message-passing from
  // Decode() to DecodeTask().
  struct BitstreamBufferRef;

  // Auto-destruction reference for an array of PictureBuffer, for
  // message-passing from AssignPictureBuffers() to AssignPictureBuffersTask().
  struct PictureBufferArrayRef;

  // Auto-destruction reference for EGLSync (for message-passing).
  struct EGLSyncKHRRef;

  // Record for decoded pictures that can be sent to PictureReady.
  struct PictureRecord;

  // Record for MFC input buffers.
  struct MfcInputRecord {
    MfcInputRecord();
    ~MfcInputRecord();
    bool at_device;         // held by device.
    void* address;          // mmap() address.
    size_t length;          // mmap() length.
    off_t bytes_used;       // bytes filled in the mmap() segment.
    int32 input_id;         // triggering input_id as given to Decode().
  };

  // Record for MFC output buffers.
  struct MfcOutputRecord {
    MfcOutputRecord();
    ~MfcOutputRecord();
    bool at_device;         // held by device.
    bool at_client;         // held by client.
    int fds[2];             // file descriptors for each plane.
    EGLImageKHR egl_image;  // EGLImageKHR for the output buffer.
    EGLSyncKHR egl_sync;    // sync the compositor's use of the EGLImage.
    int32 picture_id;       // picture buffer id as returned to PictureReady().
    bool cleared;           // Whether the texture is cleared and safe to render
                            // from. See TextureManager for details.
  };

  //
  // Decoding tasks, to be run on decode_thread_.
  //

  // Enqueue a BitstreamBuffer to decode.  This will enqueue a buffer to the
  // decoder_input_queue_, then queue a DecodeBufferTask() to actually decode
  // the buffer.
  void DecodeTask(const media::BitstreamBuffer& bitstream_buffer);

  // Decode from the buffers queued in decoder_input_queue_.  Calls
  // DecodeBufferInitial() or DecodeBufferContinue() as appropriate.
  void DecodeBufferTask();
  // Advance to the next fragment that begins a frame.
  bool AdvanceFrameFragment(const uint8* data, size_t size, size_t* endpos);
  // Schedule another DecodeBufferTask() if we're behind.
  void ScheduleDecodeBufferTaskIfNeeded();

  // Return true if we should continue to schedule DecodeBufferTask()s after
  // completion.  Store the amount of input actually consumed in |endpos|.
  bool DecodeBufferInitial(const void* data, size_t size, size_t* endpos);
  bool DecodeBufferContinue(const void* data, size_t size);

  // Accumulate data for the next frame to decode.  May return false in
  // non-error conditions; for example when pipeline is full and should be
  // retried later.
  bool AppendToInputFrame(const void* data, size_t size);
  // Flush data for one decoded frame.
  bool FlushInputFrame();

  // Process an AssignPictureBuffers() API call.  After this, the
  // device_poll_thread_ can be started safely, since we have all our
  // buffers.
  void AssignPictureBuffersTask(scoped_ptr<PictureBufferArrayRef> pic_buffers);

  // Service I/O on the V4L2 devices.  This task should only be scheduled from
  // DevicePollTask().  If |mfc_event_pending| is true, one or more events
  // on MFC file descriptor are pending.
  void ServiceDeviceTask(bool mfc_event_pending);
  // Handle the various device queues.
  void EnqueueMfc();
  void DequeueMfc();
  // Handle incoming MFC events.
  void DequeueMfcEvents();
  // Enqueue a buffer on the corresponding queue.
  bool EnqueueMfcInputRecord();
  bool EnqueueMfcOutputRecord();

  // Process a ReusePictureBuffer() API call.  The API call create an EGLSync
  // object on the main (GPU process) thread; we will record this object so we
  // can wait on it before reusing the buffer.
  void ReusePictureBufferTask(int32 picture_buffer_id,
                              scoped_ptr<EGLSyncKHRRef> egl_sync_ref);

  // Flush() task.  Child thread should not submit any more buffers until it
  // receives the NotifyFlushDone callback.  This task will schedule an empty
  // BitstreamBufferRef (with input_id == kFlushBufferId) to perform the flush.
  void FlushTask();
  // Notify the client of a flush completion, if required.  This should be
  // called any time a relevant queue could potentially be emptied: see
  // function definition.
  void NotifyFlushDoneIfNeeded();

  // Reset() task.  This task will schedule a ResetDoneTask() that will send
  // the NotifyResetDone callback, then set the decoder state to kResetting so
  // that all intervening tasks will drain.
  void ResetTask();
  // ResetDoneTask() will set the decoder state back to kAfterReset, so
  // subsequent decoding can continue.
  void ResetDoneTask();

  // Device destruction task.
  void DestroyTask();

  // Attempt to start/stop device_poll_thread_.
  bool StartDevicePoll();
  // If |keep_mfc_input_state| is true, don't reset MFC input state; used during
  // resolution change.
  bool StopDevicePoll(bool keep_mfc_input_state);
  // Set/clear the device poll interrupt (using device_poll_interrupt_fd_).
  bool SetDevicePollInterrupt();
  bool ClearDevicePollInterrupt();

  void StartResolutionChangeIfNeeded();
  void FinishResolutionChange();
  void ResumeAfterResolutionChange();

  // Try to get output format from MFC, detected after parsing the beginning
  // of the stream. Sets |again| to true if more parsing is needed.
  bool GetFormatInfo(struct v4l2_format* format, bool* again);
  // Create MFC output buffers for the given |format|.
  bool CreateBuffersForFormat(const struct v4l2_format& format);

  //
  // Device tasks, to be run on device_poll_thread_.
  //

  // The device task.
  void DevicePollTask(unsigned int poll_fds);

  //
  // Safe from any thread.
  //

  // Error notification (using PostTask() to child thread, if necessary).
  void NotifyError(Error error);

  // Set the decoder_thread_ state (using PostTask to decoder thread, if
  // necessary).
  void SetDecoderState(State state);

  //
  // Other utility functions.  Called on decoder_thread_, unless
  // decoder_thread_ is not yet started, in which case the child thread can call
  // these (e.g. in Initialize() or Destroy()).
  //

  // Create the buffers we need.
  bool CreateMfcInputBuffers();
  bool CreateMfcOutputBuffers();

  //
  // Methods run on child thread.
  //

  // Destroy buffers.
  void DestroyMfcInputBuffers();
  void DestroyMfcOutputBuffers();
  void ResolutionChangeDestroyBuffers();

  // Send decoded pictures to PictureReady.
  void SendPictureReady();

  // Callback that indicates a picture has been cleared.
  void PictureCleared();

  // Our original calling message loop for the child thread.
  scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_;

  // Message loop of the IO thread.
  scoped_refptr<base::MessageLoopProxy> io_message_loop_proxy_;

  // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
  // device worker threads back to the child thread.  Because the worker threads
  // are members of this class, any task running on those threads is guaranteed
  // that this object is still alive.  As a result, tasks posted from the child
  // thread to the decoder or device thread should use base::Unretained(this),
  // and tasks posted the other way should use |weak_this_|.
  base::WeakPtr<ExynosVideoDecodeAccelerator> weak_this_;

  // To expose client callbacks from VideoDecodeAccelerator.
  // NOTE: all calls to these objects *MUST* be executed on
  // child_message_loop_proxy_.
  base::WeakPtrFactory<Client> client_ptr_factory_;
  base::WeakPtr<Client> client_;
  // Callbacks to |io_client_| must be executed on |io_message_loop_proxy_|.
  base::WeakPtr<Client> io_client_;

  //
  // Decoder state, owned and operated by decoder_thread_.
  // Before decoder_thread_ has started, the decoder state is managed by
  // the child (main) thread.  After decoder_thread_ has started, the decoder
  // thread should be the only one managing these.
  //

  // This thread services tasks posted from the VDA API entry points by the
  // child thread and device service callbacks posted from the device thread.
  base::Thread decoder_thread_;
  // Decoder state machine state.
  State decoder_state_;
  // BitstreamBuffer we're presently reading.
  scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
  // FlushTask() and ResetTask() should not affect buffers that have been
  // queued afterwards.  For flushing or resetting the pipeline then, we will
  // delay these buffers until after the flush or reset completes.
  int decoder_delay_bitstream_buffer_id_;
  // MFC input buffer we're presently filling.
  int decoder_current_input_buffer_;
  // We track the number of buffer decode tasks we have scheduled, since each
  // task execution should complete one buffer.  If we fall behind (due to
  // resource backpressure, etc.), we'll have to schedule more to catch up.
  int decoder_decode_buffer_tasks_scheduled_;
  // Picture buffers held by the client.
  int decoder_frames_at_client_;
  // Are we flushing?
  bool decoder_flushing_;
  // Got a notification from driver that it reached resolution change point
  // in the stream.
  bool resolution_change_pending_;
  // Got a reset request while we were performing resolution change.
  bool resolution_change_reset_pending_;
  // Input queue for decoder_thread_: BitstreamBuffers in.
  std::queue<linked_ptr<BitstreamBufferRef> > decoder_input_queue_;
  // For H264 decode, hardware requires that we send it frame-sized chunks.
  // We'll need to parse the stream.
  scoped_ptr<content::H264Parser> decoder_h264_parser_;
  // Set if the decoder has a pending incomplete frame in an input buffer.
  bool decoder_partial_frame_pending_;

  //
  // Hardware state and associated queues.  Since decoder_thread_ services
  // the hardware, decoder_thread_ owns these too.
  //

  // Completed decode buffers, waiting for MFC.
  std::queue<int> mfc_input_ready_queue_;

  // MFC decode device.
  int mfc_fd_;

  // MFC input buffer state.
  bool mfc_input_streamon_;
  // MFC input buffers enqueued to device.
  int mfc_input_buffer_queued_count_;
  // Input buffers ready to use, as a LIFO since we don't care about ordering.
  std::vector<int> mfc_free_input_buffers_;
  // Mapping of int index to MFC input buffer record.
  std::vector<MfcInputRecord> mfc_input_buffer_map_;

  // MFC output buffer state.
  bool mfc_output_streamon_;
  // MFC output buffers enqueued to device.
  int mfc_output_buffer_queued_count_;
  // Output buffers ready to use, as a FIFO since we want oldest-first to hide
  // synchronization latency with GL.
  std::queue<int> mfc_free_output_buffers_;
  // Mapping of int index to MFC output buffer record.
  std::vector<MfcOutputRecord> mfc_output_buffer_map_;
  // MFC output pixel format.
  uint32 mfc_output_buffer_pixelformat_;
  // Required size of DPB for decoding.
  int mfc_output_dpb_size_;

  // Pictures that are ready but not sent to PictureReady yet.
  std::queue<PictureRecord> pending_picture_ready_;

  // The number of pictures that are sent to PictureReady and will be cleared.
  int picture_clearing_count_;

  // Output picture size.
  gfx::Size frame_buffer_size_;

  //
  // The device polling thread handles notifications of V4L2 device changes.
  //

  // The thread.
  base::Thread device_poll_thread_;
  // eventfd fd to signal device poll thread when its poll() should be
  // interrupted.
  int device_poll_interrupt_fd_;

  //
  // Other state, held by the child (main) thread.
  //

  // Make our context current before running any EGL entry points.
  base::Callback<bool(void)> make_context_current_;

  // EGL state
  EGLDisplay egl_display_;

  // The codec we'll be decoding for.
  media::VideoCodecProfile video_profile_;

  DISALLOW_COPY_AND_ASSIGN(ExynosVideoDecodeAccelerator);
};

}  // namespace content

#endif  // CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_