diff options
author | jfroy <jfroy@chromium.org> | 2014-10-23 18:02:34 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2014-10-24 01:02:53 +0000 |
commit | 488b1dc973220b5f529fa95955c14961502c0269 (patch) | |
tree | 902a2f7a9ee091d0b169ed788061dbfed575afd0 /media | |
parent | a38764be36077683d9fd7e5190fbae0ce8733d4c (diff) | |
download | chromium_src-488b1dc973220b5f529fa95955c14961502c0269.zip chromium_src-488b1dc973220b5f529fa95955c14961502c0269.tar.gz chromium_src-488b1dc973220b5f529fa95955c14961502c0269.tar.bz2 |
[cast] Allow audio encoder implementations to specify the frame length.
The previous implementation hardcoded 100 audio frames per second, which
yielded 10ms frames. In this context, frame is understood as a grouping
of audio samples (across all channels) sent to the receiver in some
format (raw PCM or encoded/compressed) with an associated RTP timestamp.
The implementation accumulates samples submitted by the sender in a
buffer until it has |frame duration| samples, after which it encodes the
samples and sends the data.
10ms is conveniently one of the supported Opus frame lengths. It
obviously also worka for raw PCM. However, other codecs may have
different frame lengths. In particular, AAC uses 1024 or 960 samples
frames, regardless of the sampling rate.
This patch changes the audio encoder to allow implementations to specify
the number of samples per Cast audio frame. Existing implementations
specify |sampling_rate / 100|, which yields the same 10ms length
regardless of sampling rate. An AAC implementation could specify 1024 or
960 instead.
BUG=417861
Review URL: https://codereview.chromium.org/605803004
Cr-Commit-Position: refs/heads/master@{#301011}
Diffstat (limited to 'media')
-rw-r--r-- | media/cast/sender/audio_encoder.cc | 63 | ||||
-rw-r--r-- | media/cast/sender/audio_encoder.h | 1 | ||||
-rw-r--r-- | media/cast/sender/audio_encoder_unittest.cc | 15 | ||||
-rw-r--r-- | media/cast/sender/audio_sender.cc | 35 |
4 files changed, 69 insertions, 45 deletions
diff --git a/media/cast/sender/audio_encoder.cc b/media/cast/sender/audio_encoder.cc index bf1d3ac..992e519 100644 --- a/media/cast/sender/audio_encoder.cc +++ b/media/cast/sender/audio_encoder.cc @@ -22,18 +22,11 @@ namespace cast { namespace { -// The fixed number of audio frames per second and, inversely, the duration of -// one frame's worth of samples. -const int kFramesPerSecond = 100; -const int kFrameDurationMillis = 1000 / kFramesPerSecond; // No remainder! - -// Threshold used to decide whether audio being delivered to the encoder is -// coming in too slow with respect to the capture timestamps. -const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis; +const int kUnderrunSkipThreshold = 3; +const int kDefaultFramesPerSecond = 100; } // namespace - // Base class that handles the common problem of feeding one or more AudioBus' // data into a buffer and then, once the buffer is full, encoding the signal and // emitting an EncodedFrame via the FrameEncodedCallback. @@ -47,13 +40,17 @@ class AudioEncoder::ImplBase Codec codec, int num_channels, int sampling_rate, + int samples_per_frame, const FrameEncodedCallback& callback) : cast_environment_(cast_environment), codec_(codec), num_channels_(num_channels), - samples_per_frame_(sampling_rate / kFramesPerSecond), + samples_per_frame_(samples_per_frame), callback_(callback), cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED), + frame_duration_(base::TimeDelta::FromMicroseconds( + base::Time::kMicrosecondsPerSecond * samples_per_frame_ / + sampling_rate)), buffer_fill_end_(0), frame_id_(0), frame_rtp_timestamp_(0), @@ -61,7 +58,7 @@ class AudioEncoder::ImplBase // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration. const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100; if (num_channels_ <= 0 || samples_per_frame_ <= 0 || - sampling_rate % kFramesPerSecond != 0 || + frame_duration_ == base::TimeDelta() || samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) { cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION; } @@ -75,6 +72,8 @@ class AudioEncoder::ImplBase return samples_per_frame_; } + base::TimeDelta frame_duration() const { return frame_duration_; } + void EncodeAudio(scoped_ptr<AudioBus> audio_bus, const base::TimeTicks& recorded_time) { DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED); @@ -86,20 +85,16 @@ class AudioEncoder::ImplBase // frame's RTP timestamp by the estimated number of frames missed. On the // other hand, don't attempt to resolve overruns: A receiver should // gracefully deal with an excess of audio data. - const base::TimeDelta frame_duration = - base::TimeDelta::FromMilliseconds(kFrameDurationMillis); base::TimeDelta buffer_fill_duration = - buffer_fill_end_ * frame_duration / samples_per_frame_; + buffer_fill_end_ * frame_duration_ / samples_per_frame_; if (!frame_capture_time_.is_null()) { const base::TimeDelta amount_ahead_by = recorded_time - (frame_capture_time_ + buffer_fill_duration); - if (amount_ahead_by > - base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) { + const int64 num_frames_missed = amount_ahead_by / frame_duration_; + if (num_frames_missed > kUnderrunSkipThreshold) { samples_dropped_from_buffer_ += buffer_fill_end_; buffer_fill_end_ = 0; buffer_fill_duration = base::TimeDelta(); - const int64 num_frames_missed = amount_ahead_by / - base::TimeDelta::FromMilliseconds(kFrameDurationMillis); frame_rtp_timestamp_ += static_cast<uint32>(num_frames_missed * samples_per_frame_); DVLOG(1) << "Skipping RTP timestamp ahead to account for " @@ -145,7 +140,7 @@ class AudioEncoder::ImplBase buffer_fill_end_ = 0; ++frame_id_; frame_rtp_timestamp_ += samples_per_frame_; - frame_capture_time_ += frame_duration; + frame_capture_time_ += frame_duration_; } } @@ -168,6 +163,10 @@ class AudioEncoder::ImplBase // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED. CastInitializationStatus cast_initialization_status_; + // The duration of one frame of encoded audio samples. Derived from + // |samples_per_frame_| and the sampling rate. + const base::TimeDelta frame_duration_; + private: // In the case where a call to EncodeAudio() cannot completely fill the // buffer, this points to the position at which to populate data in a later @@ -209,12 +208,16 @@ class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase { CODEC_AUDIO_OPUS, num_channels, sampling_rate, + sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */ callback), encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]), opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())), buffer_(new float[num_channels * samples_per_frame_]) { - if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) + if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED || + sampling_rate % samples_per_frame_ != 0 || + !IsValidFrameDuration(frame_duration_)) { return; + } if (opus_encoder_init(opus_encoder_, sampling_rate, num_channels, @@ -274,6 +277,16 @@ class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase { } } + static bool IsValidFrameDuration(base::TimeDelta duration) { + // See https://tools.ietf.org/html/rfc6716#section-2.1.4 + return duration == base::TimeDelta::FromMicroseconds(2500) || + duration == base::TimeDelta::FromMilliseconds(5) || + duration == base::TimeDelta::FromMilliseconds(10) || + duration == base::TimeDelta::FromMilliseconds(20) || + duration == base::TimeDelta::FromMilliseconds(40) || + duration == base::TimeDelta::FromMilliseconds(60); + } + const scoped_ptr<uint8[]> encoder_memory_; OpusEncoder* const opus_encoder_; const scoped_ptr<float[]> buffer_; @@ -299,6 +312,7 @@ class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase { CODEC_AUDIO_PCM16, num_channels, sampling_rate, + sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */ callback), buffer_(new int16[num_channels * samples_per_frame_]) { if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) @@ -387,6 +401,15 @@ int AudioEncoder::GetSamplesPerFrame() const { return impl_->samples_per_frame(); } +base::TimeDelta AudioEncoder::GetFrameDuration() const { + DCHECK(insert_thread_checker_.CalledOnValidThread()); + if (InitializationResult() != STATUS_AUDIO_INITIALIZED) { + NOTREACHED(); + return base::TimeDelta(); + } + return impl_->frame_duration(); +} + void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus, const base::TimeTicks& recorded_time) { DCHECK(insert_thread_checker_.CalledOnValidThread()); diff --git a/media/cast/sender/audio_encoder.h b/media/cast/sender/audio_encoder.h index e0a3d8a..f0d041d 100644 --- a/media/cast/sender/audio_encoder.h +++ b/media/cast/sender/audio_encoder.h @@ -36,6 +36,7 @@ class AudioEncoder { CastInitializationStatus InitializationResult() const; int GetSamplesPerFrame() const; + base::TimeDelta GetFrameDuration() const; void InsertAudio(scoped_ptr<AudioBus> audio_bus, const base::TimeTicks& recorded_time); diff --git a/media/cast/sender/audio_encoder_unittest.cc b/media/cast/sender/audio_encoder_unittest.cc index a33ed3b..3868f0d 100644 --- a/media/cast/sender/audio_encoder_unittest.cc +++ b/media/cast/sender/audio_encoder_unittest.cc @@ -39,6 +39,10 @@ class TestEncodedAudioFrameReceiver { upper_bound_ = upper_bound; } + void SetSamplesPerFrame(int samples_per_frame) { + samples_per_frame_ = samples_per_frame; + } + void FrameEncoded(scoped_ptr<EncodedFrame> encoded_frame, int samples_skipped) { EXPECT_EQ(encoded_frame->dependency, EncodedFrame::KEY); @@ -49,9 +53,7 @@ class TestEncodedAudioFrameReceiver { // of the fixed frame size. EXPECT_LE(rtp_lower_bound_, encoded_frame->rtp_timestamp); rtp_lower_bound_ = encoded_frame->rtp_timestamp; - // Note: In audio_encoder.cc, 100 is the fixed audio frame rate. - const int kSamplesPerFrame = kDefaultAudioSamplingRate / 100; - EXPECT_EQ(0u, encoded_frame->rtp_timestamp % kSamplesPerFrame); + EXPECT_EQ(0u, encoded_frame->rtp_timestamp % samples_per_frame_); EXPECT_TRUE(!encoded_frame->data.empty()); EXPECT_LE(lower_bound_, encoded_frame->reference_time); @@ -65,6 +67,7 @@ class TestEncodedAudioFrameReceiver { const Codec codec_; int frames_received_; uint32 rtp_lower_bound_; + int samples_per_frame_; base::TimeTicks lower_bound_; base::TimeTicks upper_bound_; @@ -116,9 +119,7 @@ class AudioEncoderTest : public ::testing::TestWithParam<TestScenario> { CreateObjectsForCodec(codec); - // Note: In audio_encoder.cc, 10 ms is the fixed frame duration. - const base::TimeDelta frame_duration = - base::TimeDelta::FromMilliseconds(10); + const base::TimeDelta frame_duration = audio_encoder_->GetFrameDuration(); for (size_t i = 0; i < scenario.num_durations; ++i) { const bool simulate_missing_data = scenario.durations_in_ms[i] < 0; @@ -160,6 +161,8 @@ class AudioEncoderTest : public ::testing::TestWithParam<TestScenario> { codec, base::Bind(&TestEncodedAudioFrameReceiver::FrameEncoded, base::Unretained(receiver_.get())))); + + receiver_->SetSamplesPerFrame(audio_encoder_->GetSamplesPerFrame()); } base::SimpleTestTickClock* testing_clock_; // Owned by CastEnvironment. diff --git a/media/cast/sender/audio_sender.cc b/media/cast/sender/audio_sender.cc index 8916a17..4748218c 100644 --- a/media/cast/sender/audio_sender.cc +++ b/media/cast/sender/audio_sender.cc @@ -13,29 +13,20 @@ namespace media { namespace cast { -namespace { - -// TODO(miu): This should be specified in AudioSenderConfig, but currently it is -// fixed to 100 FPS (i.e., 10 ms per frame), and AudioEncoder assumes this as -// well. -const int kAudioFrameRate = 100; - -} // namespace AudioSender::AudioSender(scoped_refptr<CastEnvironment> cast_environment, const AudioSenderConfig& audio_config, CastTransportSender* const transport_sender) - : FrameSender( - cast_environment, - true, - transport_sender, - base::TimeDelta::FromMilliseconds(audio_config.rtcp_interval), - audio_config.frequency, - audio_config.ssrc, - kAudioFrameRate, - audio_config.min_playout_delay, - audio_config.max_playout_delay, - NewFixedCongestionControl(audio_config.bitrate)), + : FrameSender(cast_environment, + true, + transport_sender, + base::TimeDelta::FromMilliseconds(audio_config.rtcp_interval), + audio_config.frequency, + audio_config.ssrc, + 0, // |max_frame_rate_| is set after encoder initialization. + audio_config.min_playout_delay, + audio_config.max_playout_delay, + NewFixedCongestionControl(audio_config.bitrate)), samples_in_encoder_(0), weak_factory_(this) { cast_initialization_status_ = STATUS_AUDIO_UNINITIALIZED; @@ -56,6 +47,12 @@ AudioSender::AudioSender(scoped_refptr<CastEnvironment> cast_environment, cast_initialization_status_ = STATUS_AUDIO_UNINITIALIZED; } + // The number of samples per encoded audio frame depends on the codec and its + // initialization parameters. Now that we have an encoder, we can calculate + // the maximum frame rate. + max_frame_rate_ = + audio_config.frequency / audio_encoder_->GetSamplesPerFrame(); + media::cast::CastTransportRtpConfig transport_config; transport_config.ssrc = audio_config.ssrc; transport_config.feedback_ssrc = audio_config.incoming_feedback_ssrc; |