[cast] Allow audio encoder implementations to specify the frame length.

The previous implementation hardcoded 100 audio frames per second, which yielded 10ms frames. In this context, frame is understood as a grouping of audio samples (across all channels) sent to the receiver in some format (raw PCM or encoded/compressed) with an associated RTP timestamp. The implementation accumulates samples submitted by the sender in a buffer until it has |frame duration| samples, after which it encodes the samples and sends the data. 10ms is conveniently one of the supported Opus frame lengths. It obviously also worka for raw PCM. However, other codecs may have different frame lengths. In particular, AAC uses 1024 or 960 samples frames, regardless of the sampling rate. This patch changes the audio encoder to allow implementations to specify the number of samples per Cast audio frame. Existing implementations specify |sampling_rate / 100|, which yields the same 10ms length regardless of sampling rate. An AAC implementation could specify 1024 or 960 instead. BUG=417861 Review URL: https://codereview.chromium.org/605803004 Cr-Commit-Position: refs/heads/master@{#301011}
author: jfroy <jfroy@chromium.org> 2014-10-23 18:02:34 -0700
committer: Commit bot <commit-bot@chromium.org> 2014-10-24 01:02:53 +0000
commit: 488b1dc973220b5f529fa95955c14961502c0269 (patch)
tree: 902a2f7a9ee091d0b169ed788061dbfed575afd0 /media
parent: a38764be36077683d9fd7e5190fbae0ce8733d4c (diff)
download: chromium_src-488b1dc973220b5f529fa95955c14961502c0269.zip
chromium_src-488b1dc973220b5f529fa95955c14961502c0269.tar.gz
chromium_src-488b1dc973220b5f529fa95955c14961502c0269.tar.bz2
4 files changed, 69 insertions, 45 deletions
diff --git a/media/cast/sender/audio_encoder.cc b/media/cast/sender/audio_encoder.cc
index bf1d3ac..992e519 100644
--- a/media/cast/sender/audio_encoder.cc
+++ b/media/cast/sender/audio_encoder.cc
@@ -22,18 +22,11 @@ namespace cast {
 
 namespace {
 
-// The fixed number of audio frames per second and, inversely, the duration of
-// one frame's worth of samples.
-const int kFramesPerSecond = 100;
-const int kFrameDurationMillis = 1000 / kFramesPerSecond;  // No remainder!
-
-// Threshold used to decide whether audio being delivered to the encoder is
-// coming in too slow with respect to the capture timestamps.
-const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis;
+const int kUnderrunSkipThreshold = 3;
+const int kDefaultFramesPerSecond = 100;
 
 }  // namespace
 
-
 // Base class that handles the common problem of feeding one or more AudioBus'
 // data into a buffer and then, once the buffer is full, encoding the signal and
 // emitting an EncodedFrame via the FrameEncodedCallback.
@@ -47,13 +40,17 @@ class AudioEncoder::ImplBase
            Codec codec,
            int num_channels,
            int sampling_rate,
+           int samples_per_frame,
            const FrameEncodedCallback& callback)
       : cast_environment_(cast_environment),
         codec_(codec),
         num_channels_(num_channels),
-        samples_per_frame_(sampling_rate / kFramesPerSecond),
+        samples_per_frame_(samples_per_frame),
         callback_(callback),
         cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED),
+        frame_duration_(base::TimeDelta::FromMicroseconds(
+            base::Time::kMicrosecondsPerSecond * samples_per_frame_ /
+            sampling_rate)),
         buffer_fill_end_(0),
         frame_id_(0),
         frame_rtp_timestamp_(0),
@@ -61,7 +58,7 @@ class AudioEncoder::ImplBase
     // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
     const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
     if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
-        sampling_rate % kFramesPerSecond != 0 ||
+        frame_duration_ == base::TimeDelta() ||
         samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
       cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION;
     }
@@ -75,6 +72,8 @@ class AudioEncoder::ImplBase
     return samples_per_frame_;
   }
 
+  base::TimeDelta frame_duration() const { return frame_duration_; }
+
   void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
                    const base::TimeTicks& recorded_time) {
     DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED);
@@ -86,20 +85,16 @@ class AudioEncoder::ImplBase
     // frame's RTP timestamp by the estimated number of frames missed.  On the
     // other hand, don't attempt to resolve overruns: A receiver should
     // gracefully deal with an excess of audio data.
-    const base::TimeDelta frame_duration =
-        base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
     base::TimeDelta buffer_fill_duration =
-        buffer_fill_end_ * frame_duration / samples_per_frame_;
+        buffer_fill_end_ * frame_duration_ / samples_per_frame_;
     if (!frame_capture_time_.is_null()) {
       const base::TimeDelta amount_ahead_by =
           recorded_time - (frame_capture_time_ + buffer_fill_duration);
-      if (amount_ahead_by >
-              base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) {
+      const int64 num_frames_missed = amount_ahead_by / frame_duration_;
+      if (num_frames_missed > kUnderrunSkipThreshold) {
         samples_dropped_from_buffer_ += buffer_fill_end_;
         buffer_fill_end_ = 0;
         buffer_fill_duration = base::TimeDelta();
-        const int64 num_frames_missed = amount_ahead_by /
-            base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
         frame_rtp_timestamp_ +=
             static_cast<uint32>(num_frames_missed * samples_per_frame_);
         DVLOG(1) << "Skipping RTP timestamp ahead to account for "
@@ -145,7 +140,7 @@ class AudioEncoder::ImplBase
       buffer_fill_end_ = 0;
       ++frame_id_;
       frame_rtp_timestamp_ += samples_per_frame_;
-      frame_capture_time_ += frame_duration;
+      frame_capture_time_ += frame_duration_;
     }
   }
 
@@ -168,6 +163,10 @@ class AudioEncoder::ImplBase
   // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED.
   CastInitializationStatus cast_initialization_status_;
 
+  // The duration of one frame of encoded audio samples. Derived from
+  // |samples_per_frame_| and the sampling rate.
+  const base::TimeDelta frame_duration_;
+
  private:
   // In the case where a call to EncodeAudio() cannot completely fill the
   // buffer, this points to the position at which to populate data in a later
@@ -209,12 +208,16 @@ class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
                  CODEC_AUDIO_OPUS,
                  num_channels,
                  sampling_rate,
+                 sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
                  callback),
         encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
         opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
         buffer_(new float[num_channels * samples_per_frame_]) {
-    if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
+    if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED ||
+        sampling_rate % samples_per_frame_ != 0 ||
+        !IsValidFrameDuration(frame_duration_)) {
       return;
+    }
     if (opus_encoder_init(opus_encoder_,
                           sampling_rate,
                           num_channels,
@@ -274,6 +277,16 @@ class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
     }
   }
 
+  static bool IsValidFrameDuration(base::TimeDelta duration) {
+    // See https://tools.ietf.org/html/rfc6716#section-2.1.4
+    return duration == base::TimeDelta::FromMicroseconds(2500) ||
+           duration == base::TimeDelta::FromMilliseconds(5) ||
+           duration == base::TimeDelta::FromMilliseconds(10) ||
+           duration == base::TimeDelta::FromMilliseconds(20) ||
+           duration == base::TimeDelta::FromMilliseconds(40) ||
+           duration == base::TimeDelta::FromMilliseconds(60);
+  }
+
   const scoped_ptr<uint8[]> encoder_memory_;
   OpusEncoder* const opus_encoder_;
   const scoped_ptr<float[]> buffer_;
@@ -299,6 +312,7 @@ class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
                  CODEC_AUDIO_PCM16,
                  num_channels,
                  sampling_rate,
+                 sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
                  callback),
         buffer_(new int16[num_channels * samples_per_frame_]) {
     if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
@@ -387,6 +401,15 @@ int AudioEncoder::GetSamplesPerFrame() const {
   return impl_->samples_per_frame();
 }
 
+base::TimeDelta AudioEncoder::GetFrameDuration() const {
+  DCHECK(insert_thread_checker_.CalledOnValidThread());
+  if (InitializationResult() != STATUS_AUDIO_INITIALIZED) {
+    NOTREACHED();
+    return base::TimeDelta();
+  }
+  return impl_->frame_duration();
+}
+
 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
                                const base::TimeTicks& recorded_time) {
   DCHECK(insert_thread_checker_.CalledOnValidThread());
diff --git a/media/cast/sender/audio_encoder.h b/media/cast/sender/audio_encoder.h
index e0a3d8a..f0d041d 100644
--- a/media/cast/sender/audio_encoder.h
+++ b/media/cast/sender/audio_encoder.h
@@ -36,6 +36,7 @@ class AudioEncoder {
   CastInitializationStatus InitializationResult() const;
 
   int GetSamplesPerFrame() const;
+  base::TimeDelta GetFrameDuration() const;
 
   void InsertAudio(scoped_ptr<AudioBus> audio_bus,
                    const base::TimeTicks& recorded_time);
diff --git a/media/cast/sender/audio_encoder_unittest.cc b/media/cast/sender/audio_encoder_unittest.cc
index a33ed3b..3868f0d 100644
--- a/media/cast/sender/audio_encoder_unittest.cc
+++ b/media/cast/sender/audio_encoder_unittest.cc
@@ -39,6 +39,10 @@ class TestEncodedAudioFrameReceiver {
     upper_bound_ = upper_bound;
   }
 
+  void SetSamplesPerFrame(int samples_per_frame) {
+    samples_per_frame_ = samples_per_frame;
+  }
+
   void FrameEncoded(scoped_ptr<EncodedFrame> encoded_frame,
                     int samples_skipped) {
     EXPECT_EQ(encoded_frame->dependency, EncodedFrame::KEY);
@@ -49,9 +53,7 @@ class TestEncodedAudioFrameReceiver {
     // of the fixed frame size.
     EXPECT_LE(rtp_lower_bound_, encoded_frame->rtp_timestamp);
     rtp_lower_bound_ = encoded_frame->rtp_timestamp;
-    // Note: In audio_encoder.cc, 100 is the fixed audio frame rate.
-    const int kSamplesPerFrame = kDefaultAudioSamplingRate / 100;
-    EXPECT_EQ(0u, encoded_frame->rtp_timestamp % kSamplesPerFrame);
+    EXPECT_EQ(0u, encoded_frame->rtp_timestamp % samples_per_frame_);
     EXPECT_TRUE(!encoded_frame->data.empty());
 
     EXPECT_LE(lower_bound_, encoded_frame->reference_time);
@@ -65,6 +67,7 @@ class TestEncodedAudioFrameReceiver {
   const Codec codec_;
   int frames_received_;
   uint32 rtp_lower_bound_;
+  int samples_per_frame_;
   base::TimeTicks lower_bound_;
   base::TimeTicks upper_bound_;
 
@@ -116,9 +119,7 @@ class AudioEncoderTest : public ::testing::TestWithParam<TestScenario> {
 
     CreateObjectsForCodec(codec);
 
-    // Note: In audio_encoder.cc, 10 ms is the fixed frame duration.
-    const base::TimeDelta frame_duration =
-        base::TimeDelta::FromMilliseconds(10);
+    const base::TimeDelta frame_duration = audio_encoder_->GetFrameDuration();
 
     for (size_t i = 0; i < scenario.num_durations; ++i) {
       const bool simulate_missing_data = scenario.durations_in_ms[i] < 0;
@@ -160,6 +161,8 @@ class AudioEncoderTest : public ::testing::TestWithParam<TestScenario> {
         codec,
         base::Bind(&TestEncodedAudioFrameReceiver::FrameEncoded,
                    base::Unretained(receiver_.get()))));
+
+    receiver_->SetSamplesPerFrame(audio_encoder_->GetSamplesPerFrame());
   }
 
   base::SimpleTestTickClock* testing_clock_;  // Owned by CastEnvironment.
diff --git a/media/cast/sender/audio_sender.cc b/media/cast/sender/audio_sender.cc
index 8916a17..4748218c 100644
--- a/media/cast/sender/audio_sender.cc
+++ b/media/cast/sender/audio_sender.cc
@@ -13,29 +13,20 @@
 
 namespace media {
 namespace cast {
-namespace {
-
-// TODO(miu): This should be specified in AudioSenderConfig, but currently it is
-// fixed to 100 FPS (i.e., 10 ms per frame), and AudioEncoder assumes this as
-// well.
-const int kAudioFrameRate = 100;
-
-}  // namespace
 
 AudioSender::AudioSender(scoped_refptr<CastEnvironment> cast_environment,
                          const AudioSenderConfig& audio_config,
                          CastTransportSender* const transport_sender)
-    : FrameSender(
-        cast_environment,
-        true,
-        transport_sender,
-        base::TimeDelta::FromMilliseconds(audio_config.rtcp_interval),
-        audio_config.frequency,
-        audio_config.ssrc,
-        kAudioFrameRate,
-        audio_config.min_playout_delay,
-        audio_config.max_playout_delay,
-        NewFixedCongestionControl(audio_config.bitrate)),
+    : FrameSender(cast_environment,
+                  true,
+                  transport_sender,
+                  base::TimeDelta::FromMilliseconds(audio_config.rtcp_interval),
+                  audio_config.frequency,
+                  audio_config.ssrc,
+                  0,  // |max_frame_rate_| is set after encoder initialization.
+                  audio_config.min_playout_delay,
+                  audio_config.max_playout_delay,
+                  NewFixedCongestionControl(audio_config.bitrate)),
       samples_in_encoder_(0),
       weak_factory_(this) {
   cast_initialization_status_ = STATUS_AUDIO_UNINITIALIZED;
@@ -56,6 +47,12 @@ AudioSender::AudioSender(scoped_refptr<CastEnvironment> cast_environment,
     cast_initialization_status_ = STATUS_AUDIO_UNINITIALIZED;
   }
 
+  // The number of samples per encoded audio frame depends on the codec and its
+  // initialization parameters. Now that we have an encoder, we can calculate
+  // the maximum frame rate.
+  max_frame_rate_ =
+      audio_config.frequency / audio_encoder_->GetSamplesPerFrame();
+
   media::cast::CastTransportRtpConfig transport_config;
   transport_config.ssrc = audio_config.ssrc;
   transport_config.feedback_ssrc = audio_config.incoming_feedback_ssrc;
author	jfroy <jfroy@chromium.org>	2014-10-23 18:02:34 -0700
committer	Commit bot <commit-bot@chromium.org>	2014-10-24 01:02:53 +0000
commit	488b1dc973220b5f529fa95955c14961502c0269 (patch)
tree	902a2f7a9ee091d0b169ed788061dbfed575afd0 /media
parent	a38764be36077683d9fd7e5190fbae0ce8733d4c (diff)
download	chromium_src-488b1dc973220b5f529fa95955c14961502c0269.zip chromium_src-488b1dc973220b5f529fa95955c14961502c0269.tar.gz chromium_src-488b1dc973220b5f529fa95955c14961502c0269.tar.bz2