From 245c2589c6a7203e68a18039c6aa1f328dd45542 Mon Sep 17 00:00:00 2001
From: mcasas <mcasas@chromium.org>
Date: Mon, 1 Feb 2016 18:00:44 -0800
Subject: MediaRecorder: make WebmMuxer hold on to all video frames while
 waiting for audio

Currently WebmMuxer holds on to the last encoded video key frame
while waiting for audio to arrive. This causes corruption on the
field if there is a large distance between the keyframe and the
non-keyframe where recording is started.

This CL holds on to all encoded video frames while waiting for
audio. To avoid this buffering from getting out of control, the
std::queue used to store the encoded data is flushed on key frame
arrival.

Unittests updated.

BUG=582165, 547948

Review URL: https://codereview.chromium.org/1659443003

Cr-Commit-Position: refs/heads/master@{#372855}
---
 media/capture/webm_muxer.cc          | 47 ++++++++++++++++++++++--------------
 media/capture/webm_muxer.h           | 21 +++++++++++++---
 media/capture/webm_muxer_unittest.cc | 36 +++++++++++++++------------
 3 files changed, 67 insertions(+), 37 deletions(-)

(limited to 'media/capture')
diff --git a/media/capture/webm_muxer.cc b/media/capture/webm_muxer.cc
index 44376ce..c017032 100644
--- a/media/capture/webm_muxer.cc
+++ b/media/capture/webm_muxer.cc
@@ -121,22 +121,24 @@ void WebmMuxer::OnEncodedVideo(const scoped_refptr<VideoFrame>& video_frame,
       first_frame_timestamp_ = timestamp;
   }
 
-  // TODO(ajose): Don't drop data. http://crbug.com/547948
-  // TODO(ajose): Update this when we support multiple tracks.
-  // http://crbug.com/528523
+  // TODO(ajose): Support multiple tracks: http://crbug.com/528523
   if (has_audio_ && !audio_track_index_) {
     DVLOG(1) << __FUNCTION__ << ": delaying until audio track ready.";
-    if (is_key_frame) {
-      most_recent_encoded_video_keyframe_ = std::move(encoded_data);
-      saved_keyframe_timestamp_ = timestamp;
-    }
+    if (is_key_frame)  // Upon Key frame reception, empty the encoded queue.
+      encoded_frames_queue_.clear();
+
+    encoded_frames_queue_.push_back(make_scoped_ptr(new EncodedVideoFrame(
+        std::move(encoded_data), timestamp, is_key_frame)));
     return;
   }
 
-  // If have a saved keyframe, add it first.
-  if (most_recent_encoded_video_keyframe_.get())
-    AddFrame(std::move(most_recent_encoded_video_keyframe_), video_track_index_,
-             saved_keyframe_timestamp_, true /* is_key_frame */);
+  // Dump all saved encoded video frames if any.
+  while (!encoded_frames_queue_.empty()) {
+    AddFrame(std::move(encoded_frames_queue_.front()->data), video_track_index_,
+             encoded_frames_queue_.front()->timestamp,
+             encoded_frames_queue_.front()->is_keyframe);
+    encoded_frames_queue_.pop_front();
+  }
 
   AddFrame(std::move(encoded_data), video_track_index_, timestamp,
            is_key_frame);
@@ -154,18 +156,20 @@ void WebmMuxer::OnEncodedAudio(const media::AudioParameters& params,
       first_frame_timestamp_ = timestamp;
   }
 
-  // TODO(ajose): Don't drop data. http://crbug.com/547948
-  // TODO(ajose): Update this when we support multiple tracks.
-  // http://crbug.com/528523
+  // TODO(ajose): Don't drop audio data: http://crbug.com/547948
+  // TODO(ajose): Support multiple tracks: http://crbug.com/528523
   if (has_video_ && !video_track_index_) {
     DVLOG(1) << __FUNCTION__ << ": delaying until video track ready.";
     return;
   }
 
-  // If have a saved keyframe, add it first.
-  if (most_recent_encoded_video_keyframe_.get())
-    AddFrame(std::move(most_recent_encoded_video_keyframe_), video_track_index_,
-             saved_keyframe_timestamp_, true /* is_key_frame */);
+  // Dump all saved encoded video frames if any.
+  while (!encoded_frames_queue_.empty()) {
+    AddFrame(std::move(encoded_frames_queue_.front()->data), video_track_index_,
+             encoded_frames_queue_.front()->timestamp,
+             encoded_frames_queue_.front()->is_keyframe);
+    encoded_frames_queue_.pop_front();
+  }
 
   AddFrame(std::move(encoded_data), audio_track_index_, timestamp,
            true /* is_key_frame -- always true for audio */);
@@ -275,4 +279,11 @@ void WebmMuxer::AddFrame(scoped_ptr<std::string> encoded_data,
                     is_key_frame);
 }
 
+WebmMuxer::EncodedVideoFrame::EncodedVideoFrame(scoped_ptr<std::string> data,
+                                                base::TimeTicks timestamp,
+                                                bool is_keyframe)
+    : data(std::move(data)), timestamp(timestamp), is_keyframe(is_keyframe) {}
+
+WebmMuxer::EncodedVideoFrame::~EncodedVideoFrame() {}
+
 }  // namespace media
diff --git a/media/capture/webm_muxer.h b/media/capture/webm_muxer.h
index 796d8d9..1f5e3d3 100644
--- a/media/capture/webm_muxer.h
+++ b/media/capture/webm_muxer.h
@@ -7,6 +7,8 @@
 
 #include <stdint.h>
 
+#include <deque>
+
 #include "base/callback.h"
 #include "base/macros.h"
 #include "base/numerics/safe_math.h"
@@ -115,9 +117,22 @@ class MEDIA_EXPORT WebmMuxer : public NON_EXPORTED_BASE(mkvmuxer::IMkvWriter) {
   // The MkvMuxer active element.
   mkvmuxer::Segment segment_;
 
-  // Save the most recent video keyframe if we're waiting for audio to come in.
-  scoped_ptr<std::string> most_recent_encoded_video_keyframe_;
-  base::TimeTicks saved_keyframe_timestamp_;
+  // Hold on to all encoded video frames to dump them with and when audio is
+  // received, if expected, since WebM headers can only be written once.
+  struct EncodedVideoFrame {
+    EncodedVideoFrame(scoped_ptr<std::string> data,
+                      base::TimeTicks timestamp,
+                      bool is_keyframe);
+    ~EncodedVideoFrame();
+
+    scoped_ptr<std::string> data;
+    base::TimeTicks timestamp;
+    bool is_keyframe;
+
+   private:
+    DISALLOW_IMPLICIT_CONSTRUCTORS(EncodedVideoFrame);
+  };
+  std::deque<scoped_ptr<EncodedVideoFrame>> encoded_frames_queue_;
 
   DISALLOW_COPY_AND_ASSIGN(WebmMuxer);
 };
diff --git a/media/capture/webm_muxer_unittest.cc b/media/capture/webm_muxer_unittest.cc
index b5e59be..19b1e70 100644
--- a/media/capture/webm_muxer_unittest.cc
+++ b/media/capture/webm_muxer_unittest.cc
@@ -144,9 +144,9 @@ TEST_P(WebmMuxerTest, OnEncodedAudioTwoFrames) {
   if (GetParam().num_video_tracks > 0)
     return;
 
-  int sample_rate = 48000;
-  int bits_per_sample = 16;
-  int frames_per_buffer = 480;
+  const int sample_rate = 48000;
+  const int bits_per_sample = 16;
+  const int frames_per_buffer = 480;
   media::AudioParameters audio_params(
       media::AudioParameters::Format::AUDIO_PCM_LOW_LATENCY,
       media::CHANNEL_LAYOUT_MONO, sample_rate, bits_per_sample,
@@ -188,13 +188,9 @@ TEST_P(WebmMuxerTest, OnEncodedAudioTwoFrames) {
             accumulated_position_);
 }
 
-// Currently, when WebmMuxer is told it will have both audio and video tracks,
-// it drops data until it's receiving data for _both_ tracks, to avoid issues
-// related to writing the webm header.
 // This test verifies that when video data comes before audio data, we save the
-// most recent video keyframe and add it to the video track as soon as audio
-// data comes.
-TEST_P(WebmMuxerTest, VideoKeyframeIsSaved) {
+// encoded video frames and add it to the video track when audio data arrives.
+TEST_P(WebmMuxerTest, VideoIsStoredWhileWaitingForAudio) {
   // This test is only relevant if we have both kinds of tracks.
   if (GetParam().num_video_tracks == 0 || GetParam().num_audio_tracks == 0)
     return;
@@ -204,25 +200,33 @@ TEST_P(WebmMuxerTest, VideoKeyframeIsSaved) {
   const scoped_refptr<VideoFrame> video_frame =
       VideoFrame::CreateBlackFrame(frame_size);
   const std::string encoded_video("thisisanencodedvideopacket");
-  // Won't write anything.
   webm_muxer_.OnEncodedVideo(video_frame,
                              make_scoped_ptr(new std::string(encoded_video)),
                              base::TimeTicks::Now(), true /* keyframe */);
+  // A few encoded non key frames.
+  const int kNumNonKeyFrames = 2;
+  for (int i = 0; i < kNumNonKeyFrames; ++i) {
+    webm_muxer_.OnEncodedVideo(video_frame,
+                               make_scoped_ptr(new std::string(encoded_video)),
+                               base::TimeTicks::Now(), false /* keyframe */);
+  }
 
-  // Then send some audio. The header will be written and muxing will proceed
+  // Send some audio. The header will be written and muxing will proceed
   // normally.
-  int sample_rate = 48000;
-  int bits_per_sample = 16;
-  int frames_per_buffer = 480;
+  const int sample_rate = 48000;
+  const int bits_per_sample = 16;
+  const int frames_per_buffer = 480;
   media::AudioParameters audio_params(
       media::AudioParameters::Format::AUDIO_PCM_LOW_LATENCY,
       media::CHANNEL_LAYOUT_MONO, sample_rate, bits_per_sample,
       frames_per_buffer);
   const std::string encoded_audio("thisisanencodedaudiopacket");
 
-  // We should first get the video keyframe, then the audio frame.
+  // We should first get the encoded video frames, then the encoded audio frame.
   Sequence s;
-  EXPECT_CALL(*this, WriteCallback(Eq(encoded_video))).Times(1).InSequence(s);
+  EXPECT_CALL(*this, WriteCallback(Eq(encoded_video)))
+      .Times(1 + kNumNonKeyFrames)
+      .InSequence(s);
   EXPECT_CALL(*this, WriteCallback(Eq(encoded_audio))).Times(1).InSequence(s);
   // We'll also get lots of other header-related stuff.
   EXPECT_CALL(*this, WriteCallback(
-- 
cgit v1.1