diff options
-rw-r--r-- | media/base/audio_buffer.cc | 43 | ||||
-rw-r--r-- | media/base/audio_buffer.h | 4 | ||||
-rw-r--r-- | media/base/audio_buffer_unittest.cc | 119 | ||||
-rw-r--r-- | media/base/audio_discard_helper.cc | 99 | ||||
-rw-r--r-- | media/base/audio_discard_helper.h | 22 | ||||
-rw-r--r-- | media/base/audio_discard_helper_unittest.cc | 150 | ||||
-rw-r--r-- | media/base/decoder_buffer.cc | 3 | ||||
-rw-r--r-- | media/base/decoder_buffer.h | 11 | ||||
-rw-r--r-- | media/filters/ffmpeg_audio_decoder.cc | 3 | ||||
-rw-r--r-- | media/filters/ffmpeg_demuxer.cc | 37 | ||||
-rw-r--r-- | media/filters/opus_audio_decoder.cc | 3 | ||||
-rw-r--r-- | media/filters/pipeline_integration_test.cc | 11 | ||||
-rw-r--r-- | media/formats/webm/webm_cluster_parser.cc | 5 |
13 files changed, 460 insertions, 50 deletions
diff --git a/media/base/audio_buffer.cc b/media/base/audio_buffer.cc index 08bb1e3..33d4ecb 100644 --- a/media/base/audio_buffer.cc +++ b/media/base/audio_buffer.cc @@ -266,4 +266,47 @@ void AudioBuffer::TrimEnd(int frames_to_trim) { duration_ = CalculateDuration(adjusted_frame_count_, sample_rate_); } +void AudioBuffer::TrimRange(int start, int end) { + CHECK_GE(start, 0); + CHECK_LE(end, adjusted_frame_count_); + + const int frames_to_trim = end - start; + CHECK_GE(frames_to_trim, 0); + CHECK_LE(frames_to_trim, adjusted_frame_count_); + + const int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format_); + const int frames_to_copy = adjusted_frame_count_ - end; + if (frames_to_copy > 0) { + switch (sample_format_) { + case kSampleFormatPlanarS16: + case kSampleFormatPlanarF32: + // Planar data must be shifted per channel. + for (int ch = 0; ch < channel_count_; ++ch) { + memmove(channel_data_[ch] + (trim_start_ + start) * bytes_per_channel, + channel_data_[ch] + (trim_start_ + end) * bytes_per_channel, + bytes_per_channel * frames_to_copy); + } + break; + case kSampleFormatU8: + case kSampleFormatS16: + case kSampleFormatS32: + case kSampleFormatF32: { + // Interleaved data can be shifted all at once. + const int frame_size = channel_count_ * bytes_per_channel; + memmove(channel_data_[0] + (trim_start_ + start) * frame_size, + channel_data_[0] + (trim_start_ + end) * frame_size, + frame_size * frames_to_copy); + break; + } + case kUnknownSampleFormat: + NOTREACHED() << "Invalid sample format!"; + } + } else { + CHECK_EQ(frames_to_copy, 0); + } + + // Trim the leftover data off the end of the buffer and update duration. + TrimEnd(frames_to_trim); +} + } // namespace media diff --git a/media/base/audio_buffer.h b/media/base/audio_buffer.h index 4ccd3a8f..a07985c 100644 --- a/media/base/audio_buffer.h +++ b/media/base/audio_buffer.h @@ -83,6 +83,10 @@ class MEDIA_EXPORT AudioBuffer // Duration is adjusted to reflect the fewer frames. void TrimEnd(int frames_to_trim); + // Trim an AudioBuffer by removing |end - start| frames from [|start|, |end|). + // Even if |start| is zero, timestamp() is not adjusted, only duration(). + void TrimRange(int start, int end); + // Return the number of channels. int channel_count() const { return channel_count_; } diff --git a/media/base/audio_buffer_unittest.cc b/media/base/audio_buffer_unittest.cc index 55ff4ed..c0fbf6b 100644 --- a/media/base/audio_buffer_unittest.cc +++ b/media/base/audio_buffer_unittest.cc @@ -11,16 +11,123 @@ namespace media { static const int kSampleRate = 48000; -static void VerifyBus(AudioBus* bus, int frames, float start, float increment) { + +static void VerifyBusWithOffset(AudioBus* bus, + int offset, + int frames, + float start, + float increment) { for (int ch = 0; ch < bus->channels(); ++ch) { const float v = start + ch * bus->frames() * increment; - for (int i = 0; i < frames; ++i) { + for (int i = offset; i < frames; ++i) { ASSERT_FLOAT_EQ(v + i * increment, bus->channel(ch)[i]) << "i=" << i << ", ch=" << ch; } } } +static void VerifyBus(AudioBus* bus, int frames, float start, float increment) { + VerifyBusWithOffset(bus, 0, frames, start, increment); +} + +static void TrimRangeTest(SampleFormat sample_format) { + const ChannelLayout channel_layout = CHANNEL_LAYOUT_4_0; + const int channels = ChannelLayoutToChannelCount(channel_layout); + const int frames = kSampleRate / 10; + const base::TimeDelta timestamp = base::TimeDelta(); + const base::TimeDelta duration = base::TimeDelta::FromMilliseconds(100); + scoped_refptr<AudioBuffer> buffer = MakeAudioBuffer<float>(sample_format, + channel_layout, + channels, + kSampleRate, + 0, + 1, + frames, + timestamp); + EXPECT_EQ(frames, buffer->frame_count()); + EXPECT_EQ(timestamp, buffer->timestamp()); + EXPECT_EQ(duration, buffer->duration()); + + scoped_ptr<AudioBus> bus = AudioBus::Create(channels, frames); + + // Verify all frames before trimming. + buffer->ReadFrames(frames, 0, 0, bus.get()); + VerifyBus(bus.get(), frames, 0, 1); + + // Trim 10ms of frames from the middle of the buffer. + int trim_start = frames / 2; + const int trim_length = kSampleRate / 100; + const base::TimeDelta trim_duration = base::TimeDelta::FromMilliseconds(10); + buffer->TrimRange(trim_start, trim_start + trim_length); + EXPECT_EQ(frames - trim_length, buffer->frame_count()); + EXPECT_EQ(timestamp, buffer->timestamp()); + EXPECT_EQ(duration - trim_duration, buffer->duration()); + bus->Zero(); + buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get()); + VerifyBus(bus.get(), trim_start, 0, 1); + VerifyBusWithOffset( + bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1); + + // Trim 10ms of frames from the start, which just adjusts the buffer's + // internal start offset. + buffer->TrimStart(trim_length); + trim_start -= trim_length; + EXPECT_EQ(frames - 2 * trim_length, buffer->frame_count()); + EXPECT_EQ(timestamp + trim_duration, buffer->timestamp()); + EXPECT_EQ(duration - 2 * trim_duration, buffer->duration()); + bus->Zero(); + buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get()); + VerifyBus(bus.get(), trim_start, trim_length, 1); + VerifyBusWithOffset( + bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1); + + // Trim 10ms of frames from the end, which just adjusts the buffer's frame + // count. + buffer->TrimEnd(trim_length); + EXPECT_EQ(frames - 3 * trim_length, buffer->frame_count()); + EXPECT_EQ(timestamp + trim_duration, buffer->timestamp()); + EXPECT_EQ(duration - 3 * trim_duration, buffer->duration()); + bus->Zero(); + buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get()); + VerifyBus(bus.get(), trim_start, trim_length, 1); + VerifyBusWithOffset( + bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1); + + // Trim another 10ms from the inner portion of the buffer. + buffer->TrimRange(trim_start, trim_start + trim_length); + EXPECT_EQ(frames - 4 * trim_length, buffer->frame_count()); + EXPECT_EQ(timestamp + trim_duration, buffer->timestamp()); + EXPECT_EQ(duration - 4 * trim_duration, buffer->duration()); + bus->Zero(); + buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get()); + VerifyBus(bus.get(), trim_start, trim_length, 1); + VerifyBusWithOffset( + bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1); + + // Trim off the end using TrimRange() to ensure end index is exclusive. + buffer->TrimRange(buffer->frame_count() - trim_length, buffer->frame_count()); + EXPECT_EQ(frames - 5 * trim_length, buffer->frame_count()); + EXPECT_EQ(timestamp + trim_duration, buffer->timestamp()); + EXPECT_EQ(duration - 5 * trim_duration, buffer->duration()); + bus->Zero(); + buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get()); + VerifyBus(bus.get(), trim_start, trim_length, 1); + VerifyBusWithOffset( + bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1); + + // Trim off the start using TrimRange() to ensure start index is inclusive. + buffer->TrimRange(0, trim_length); + trim_start -= trim_length; + EXPECT_EQ(frames - 6 * trim_length, buffer->frame_count()); + EXPECT_EQ(timestamp + trim_duration, buffer->timestamp()); + EXPECT_EQ(duration - 6 * trim_duration, buffer->duration()); + bus->Zero(); + buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get()); + VerifyBus(bus.get(), trim_start, 2 * trim_length, 1); + VerifyBusWithOffset( + bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1); +} + TEST(AudioBufferTest, CopyFrom) { const ChannelLayout kChannelLayout = CHANNEL_LAYOUT_MONO; scoped_refptr<AudioBuffer> original_buffer = @@ -312,4 +419,12 @@ TEST(AudioBufferTest, Trim) { EXPECT_EQ(base::TimeDelta(), buffer->duration()); } +TEST(AudioBufferTest, TrimRangePlanar) { + TrimRangeTest(kSampleFormatPlanarF32); +} + +TEST(AudioBufferTest, TrimRangeInterleaved) { + TrimRangeTest(kSampleFormatF32); +} + } // namespace media diff --git a/media/base/audio_discard_helper.cc b/media/base/audio_discard_helper.cc index d868382..f7279d7 100644 --- a/media/base/audio_discard_helper.cc +++ b/media/base/audio_discard_helper.cc @@ -9,7 +9,6 @@ #include "base/logging.h" #include "media/base/audio_buffer.h" #include "media/base/buffers.h" -#include "media/base/decoder_buffer.h" namespace media { @@ -24,11 +23,13 @@ static void WarnOnNonMonotonicTimestamps(base::TimeDelta last_timestamp, << " diff " << diff.InMicroseconds() << " us"; } -AudioDiscardHelper::AudioDiscardHelper(int sample_rate) +AudioDiscardHelper::AudioDiscardHelper(int sample_rate, size_t decoder_delay) : sample_rate_(sample_rate), + decoder_delay_(decoder_delay), timestamp_helper_(sample_rate_), discard_frames_(0), - last_input_timestamp_(kNoTimestamp()) { + last_input_timestamp_(kNoTimestamp()), + delayed_discard_(false) { DCHECK_GT(sample_rate_, 0); } @@ -44,6 +45,8 @@ void AudioDiscardHelper::Reset(size_t initial_discard) { discard_frames_ = initial_discard; last_input_timestamp_ = kNoTimestamp(); timestamp_helper_.SetBaseTimestamp(kNoTimestamp()); + delayed_discard_ = false; + delayed_discard_padding_ = DecoderBuffer::DiscardPadding(); } bool AudioDiscardHelper::ProcessBuffers( @@ -59,15 +62,32 @@ bool AudioDiscardHelper::ProcessBuffers( last_input_timestamp_ = encoded_buffer->timestamp(); // If this is the first buffer seen, setup the timestamp helper. - if (!initialized()) { + const bool first_buffer = !initialized(); + if (first_buffer) { // Clamp the base timestamp to zero. timestamp_helper_.SetBaseTimestamp( std::max(base::TimeDelta(), encoded_buffer->timestamp())); } DCHECK(initialized()); - if (!decoded_buffer || !decoded_buffer->frame_count()) + if (!decoded_buffer) { + // If there's a one buffer delay for decoding, we need to save it so it can + // be processed with the next decoder buffer. + if (first_buffer) { + delayed_discard_ = true; + delayed_discard_padding_ = encoded_buffer->discard_padding(); + } return false; + } + + const size_t original_frame_count = decoded_buffer->frame_count(); + + // If there's a one buffer delay for decoding, pick up the last encoded + // buffer's discard padding for processing with the current decoded buffer. + DecoderBuffer::DiscardPadding current_discard_padding = + encoded_buffer->discard_padding(); + if (delayed_discard_) + std::swap(current_discard_padding, delayed_discard_padding_); if (discard_frames_ > 0) { const size_t decoded_frames = decoded_buffer->frame_count(); @@ -75,20 +95,73 @@ bool AudioDiscardHelper::ProcessBuffers( discard_frames_ -= frames_to_discard; // If everything would be discarded, indicate a new buffer is required. - if (frames_to_discard == decoded_frames) + if (frames_to_discard == decoded_frames) { + // For simplicity disallow cases where a buffer with discard padding is + // present. Doing so allows us to avoid complexity around tracking + // discards across buffers. + DCHECK(current_discard_padding.first == base::TimeDelta()); + DCHECK(current_discard_padding.second == base::TimeDelta()); return false; + } decoded_buffer->TrimStart(frames_to_discard); } - // TODO(dalecurtis): Applying the current buffer's discard padding doesn't - // make sense in the Vorbis case because there is a delay of one buffer before - // decoded buffers are returned. Fix and add support for more than just end - // trimming. See http://crbug.com/360961. - if (encoded_buffer->discard_padding() > base::TimeDelta()) { + // Handle front discard padding. + if (current_discard_padding.first > base::TimeDelta()) { + const size_t decoded_frames = decoded_buffer->frame_count(); + const size_t start_frames_to_discard = + TimeDeltaToFrames(current_discard_padding.first); + + // Regardless of the timestamp on the encoded buffer, the corresponding + // decoded output will appear |decoder_delay_| frames later. + size_t discard_start = decoder_delay_; + if (decoder_delay_ > 0) { + // If we have a |decoder_delay_| and have already discarded frames from + // this buffer, the |discard_start| must be adjusted by the number of + // frames already discarded. + const size_t frames_discarded_so_far = + original_frame_count - decoded_buffer->frame_count(); + CHECK_LE(frames_discarded_so_far, decoder_delay_); + discard_start -= frames_discarded_so_far; + } + + // For simplicity require the start of the discard to be within the current + // buffer. Doing so allows us avoid complexity around tracking discards + // across buffers. + CHECK_LT(discard_start, decoded_frames); + + const size_t frames_to_discard = + std::min(start_frames_to_discard, decoded_frames - discard_start); + + // Carry over any frames which need to be discarded from the front of the + // next buffer. + DCHECK(!discard_frames_); + discard_frames_ = start_frames_to_discard - frames_to_discard; + + // If everything would be discarded, indicate a new buffer is required. + if (frames_to_discard == decoded_frames) { + // The buffer should not have been marked with end discard if the front + // discard removes everything. + DCHECK(current_discard_padding.second == base::TimeDelta()); + return false; + } + + decoded_buffer->TrimRange(discard_start, discard_start + frames_to_discard); + } else { + DCHECK(current_discard_padding.first == base::TimeDelta()); + } + + // Handle end discard padding. + if (current_discard_padding.second > base::TimeDelta()) { + // Limit end discarding to when there is no |decoder_delay_|, otherwise it's + // non-trivial determining where to start discarding end frames. + CHECK(!decoder_delay_); + const size_t decoded_frames = decoded_buffer->frame_count(); const size_t end_frames_to_discard = - TimeDeltaToFrames(encoded_buffer->discard_padding()); + TimeDeltaToFrames(current_discard_padding.second); + if (end_frames_to_discard > decoded_frames) { DLOG(ERROR) << "Encountered invalid discard padding value."; return false; @@ -100,7 +173,7 @@ bool AudioDiscardHelper::ProcessBuffers( decoded_buffer->TrimEnd(end_frames_to_discard); } else { - DCHECK(encoded_buffer->discard_padding() == base::TimeDelta()); + DCHECK(current_discard_padding.second == base::TimeDelta()); } // Assign timestamp to the buffer. diff --git a/media/base/audio_discard_helper.h b/media/base/audio_discard_helper.h index 388cbd4..deeb45f 100644 --- a/media/base/audio_discard_helper.h +++ b/media/base/audio_discard_helper.h @@ -9,17 +9,31 @@ #include "base/time/time.h" #include "media/base/audio_timestamp_helper.h" #include "media/base/buffers.h" +#include "media/base/decoder_buffer.h" #include "media/base/media_export.h" namespace media { class AudioBuffer; -class DecoderBuffer; // Helper class for managing timestamps and discard events around decoding. class MEDIA_EXPORT AudioDiscardHelper { public: - explicit AudioDiscardHelper(int sample_rate); + // |sample_rate| is the sample rate of decoded data which will be handed into + // the ProcessBuffers() call. + // + // |decoder_delay| is the number of frames a decoder will output before data + // corresponding to the first encoded buffer is output. Callers only need to + // specify this if the decoder inserts frames which have no corresponding + // encoded buffer. + // + // For example, most MP3 decoders will output 529 junk frames before the data + // corresponding to the first encoded buffer is output. These frames are not + // represented in the encoded data stream and instead are an artifact of how + // most MP3 decoders work. See http://lame.sourceforge.net/tech-FAQ.txt + // + // NOTE: End discard is only supported when there is no |decoder_delay|. + AudioDiscardHelper(int sample_rate, size_t decoder_delay); ~AudioDiscardHelper(); // Converts a TimeDelta to a frame count based on the constructed sample rate. @@ -50,11 +64,15 @@ class MEDIA_EXPORT AudioDiscardHelper { private: const int sample_rate_; + const size_t decoder_delay_; AudioTimestampHelper timestamp_helper_; size_t discard_frames_; base::TimeDelta last_input_timestamp_; + bool delayed_discard_; + DecoderBuffer::DiscardPadding delayed_discard_padding_; + DISALLOW_IMPLICIT_CONSTRUCTORS(AudioDiscardHelper); }; diff --git a/media/base/audio_discard_helper_unittest.cc b/media/base/audio_discard_helper_unittest.cc index e3f21c0..55d2b61 100644 --- a/media/base/audio_discard_helper_unittest.cc +++ b/media/base/audio_discard_helper_unittest.cc @@ -46,7 +46,7 @@ static float ExtractDecodedData(const scoped_refptr<AudioBuffer>& buffer, } TEST(AudioDiscardHelperTest, TimeDeltaToFrames) { - AudioDiscardHelper discard_helper(kSampleRate); + AudioDiscardHelper discard_helper(kSampleRate, 0); EXPECT_EQ(0u, discard_helper.TimeDeltaToFrames(base::TimeDelta())); EXPECT_EQ( @@ -70,7 +70,7 @@ TEST(AudioDiscardHelperTest, TimeDeltaToFrames) { } TEST(AudioDiscardHelperTest, BasicProcessBuffers) { - AudioDiscardHelper discard_helper(kSampleRate); + AudioDiscardHelper discard_helper(kSampleRate, 0); ASSERT_FALSE(discard_helper.initialized()); const base::TimeDelta kTimestamp = base::TimeDelta(); @@ -102,7 +102,7 @@ TEST(AudioDiscardHelperTest, BasicProcessBuffers) { } TEST(AudioDiscardHelperTest, NegativeTimestampClampsToZero) { - AudioDiscardHelper discard_helper(kSampleRate); + AudioDiscardHelper discard_helper(kSampleRate, 0); ASSERT_FALSE(discard_helper.initialized()); const base::TimeDelta kTimestamp = -base::TimeDelta::FromSeconds(1); @@ -122,7 +122,7 @@ TEST(AudioDiscardHelperTest, NegativeTimestampClampsToZero) { } TEST(AudioDiscardHelperTest, ProcessBuffersWithInitialDiscard) { - AudioDiscardHelper discard_helper(kSampleRate); + AudioDiscardHelper discard_helper(kSampleRate, 0); ASSERT_FALSE(discard_helper.initialized()); const base::TimeDelta kTimestamp = base::TimeDelta(); @@ -148,7 +148,7 @@ TEST(AudioDiscardHelperTest, ProcessBuffersWithInitialDiscard) { } TEST(AudioDiscardHelperTest, ProcessBuffersWithLargeInitialDiscard) { - AudioDiscardHelper discard_helper(kSampleRate); + AudioDiscardHelper discard_helper(kSampleRate, 0); ASSERT_FALSE(discard_helper.initialized()); const base::TimeDelta kTimestamp = base::TimeDelta(); @@ -181,7 +181,7 @@ TEST(AudioDiscardHelperTest, ProcessBuffersWithLargeInitialDiscard) { } TEST(AudioDiscardHelperTest, AllowNonMonotonicTimestamps) { - AudioDiscardHelper discard_helper(kSampleRate); + AudioDiscardHelper discard_helper(kSampleRate, 0); ASSERT_FALSE(discard_helper.initialized()); const base::TimeDelta kTimestamp = base::TimeDelta(); @@ -206,8 +206,8 @@ TEST(AudioDiscardHelperTest, AllowNonMonotonicTimestamps) { EXPECT_EQ(kTestFrames, decoded_buffer->frame_count()); } -TEST(AudioDiscardHelperTest, DiscardPadding) { - AudioDiscardHelper discard_helper(kSampleRate); +TEST(AudioDiscardHelperTest, DiscardEndPadding) { + AudioDiscardHelper discard_helper(kSampleRate, 0); ASSERT_FALSE(discard_helper.initialized()); const base::TimeDelta kTimestamp = base::TimeDelta(); @@ -219,18 +219,39 @@ TEST(AudioDiscardHelperTest, DiscardPadding) { scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames); // Set a discard padding equivalent to half the buffer. - encoded_buffer->set_discard_padding(kDuration / 2); + encoded_buffer->set_discard_padding( + std::make_pair(base::TimeDelta(), kDuration / 2)); ASSERT_TRUE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer)); ASSERT_TRUE(discard_helper.initialized()); EXPECT_EQ(kTimestamp, decoded_buffer->timestamp()); EXPECT_EQ(kDuration / 2, decoded_buffer->duration()); EXPECT_EQ(kTestFrames / 2, decoded_buffer->frame_count()); - ASSERT_FLOAT_EQ(0, ExtractDecodedData(decoded_buffer, 0)); } -TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardPadding) { - AudioDiscardHelper discard_helper(kSampleRate); +TEST(AudioDiscardHelperTest, BadDiscardEndPadding) { + AudioDiscardHelper discard_helper(kSampleRate, 0); + ASSERT_FALSE(discard_helper.initialized()); + + const base::TimeDelta kTimestamp = base::TimeDelta(); + const base::TimeDelta kDuration = base::TimeDelta::FromMilliseconds(10); + const int kTestFrames = discard_helper.TimeDeltaToFrames(kDuration); + + scoped_refptr<DecoderBuffer> encoded_buffer = + CreateEncodedBuffer(kTimestamp, kDuration); + scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames); + + // Set a discard padding equivalent to double the buffer size. + encoded_buffer->set_discard_padding( + std::make_pair(base::TimeDelta(), kDuration * 2)); + + // Verify the end discard padding is rejected. + ASSERT_FALSE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer)); + ASSERT_TRUE(discard_helper.initialized()); +} + +TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardEndPadding) { + AudioDiscardHelper discard_helper(kSampleRate, 0); ASSERT_FALSE(discard_helper.initialized()); const base::TimeDelta kTimestamp = base::TimeDelta(); @@ -242,7 +263,8 @@ TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardPadding) { scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames); // Set a discard padding equivalent to a quarter of the buffer. - encoded_buffer->set_discard_padding(kDuration / 4); + encoded_buffer->set_discard_padding( + std::make_pair(base::TimeDelta(), kDuration / 4)); // Set an initial discard of a quarter of the buffer. const int kDiscardFrames = kTestFrames / 4; @@ -257,4 +279,106 @@ TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardPadding) { ExtractDecodedData(decoded_buffer, 0)); } +TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardPadding) { + AudioDiscardHelper discard_helper(kSampleRate, 0); + ASSERT_FALSE(discard_helper.initialized()); + + const base::TimeDelta kTimestamp = base::TimeDelta(); + const base::TimeDelta kDuration = base::TimeDelta::FromMilliseconds(10); + const int kTestFrames = discard_helper.TimeDeltaToFrames(kDuration); + + scoped_refptr<DecoderBuffer> encoded_buffer = + CreateEncodedBuffer(kTimestamp, kDuration); + scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames); + + // Set all the discard values to be different to ensure each is properly used. + const int kDiscardFrames = kTestFrames / 4; + encoded_buffer->set_discard_padding( + std::make_pair(kDuration / 8, kDuration / 16)); + discard_helper.Reset(kDiscardFrames); + + ASSERT_TRUE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer)); + ASSERT_TRUE(discard_helper.initialized()); + EXPECT_EQ(kTimestamp, decoded_buffer->timestamp()); + EXPECT_EQ(kDuration - kDuration / 4 - kDuration / 8 - kDuration / 16, + decoded_buffer->duration()); + EXPECT_EQ(kTestFrames - kTestFrames / 4 - kTestFrames / 8 - kTestFrames / 16, + decoded_buffer->frame_count()); +} + +TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardPaddingAndCodecDelay) { + // Use a codec delay of 5ms. + const int kCodecDelay = kSampleRate / 100 / 2; + AudioDiscardHelper discard_helper(kSampleRate, kCodecDelay); + ASSERT_FALSE(discard_helper.initialized()); + discard_helper.Reset(kCodecDelay); + + const base::TimeDelta kTimestamp = base::TimeDelta(); + const base::TimeDelta kDuration = base::TimeDelta::FromMilliseconds(10); + const int kTestFrames = discard_helper.TimeDeltaToFrames(kDuration); + + scoped_refptr<DecoderBuffer> encoded_buffer = + CreateEncodedBuffer(kTimestamp, kDuration); + scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames); + + // Set a discard padding equivalent to half of the buffer. + encoded_buffer->set_discard_padding( + std::make_pair(kDuration / 2, base::TimeDelta())); + + // All of the first buffer should be discarded. + ASSERT_FALSE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer)); + ASSERT_TRUE(discard_helper.initialized()); + + // Processing another buffer (with the same discard padding) should discard + // the back half of the buffer since kCodecDelay is half a buffer. + encoded_buffer->set_timestamp(kTimestamp + kDuration); + decoded_buffer = CreateDecodedBuffer(kTestFrames); + ASSERT_FLOAT_EQ(0.0f, ExtractDecodedData(decoded_buffer, 0)); + ASSERT_NEAR(kCodecDelay * kDataStep, + ExtractDecodedData(decoded_buffer, kCodecDelay), + kDataStep * 1000); + ASSERT_TRUE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer)); + EXPECT_EQ(kTimestamp, decoded_buffer->timestamp()); + EXPECT_EQ(kDuration / 2, decoded_buffer->duration()); + EXPECT_EQ(kTestFrames / 2, decoded_buffer->frame_count()); + + // Verify it was actually the latter half of the buffer that was removed. + ASSERT_FLOAT_EQ(0.0f, ExtractDecodedData(decoded_buffer, 0)); +} + +TEST(AudioDiscardHelperTest, DelayedDiscardInitialDiscardAndDiscardPadding) { + AudioDiscardHelper discard_helper(kSampleRate, 0); + ASSERT_FALSE(discard_helper.initialized()); + + const base::TimeDelta kTimestamp = base::TimeDelta(); + const base::TimeDelta kDuration = base::TimeDelta::FromMilliseconds(10); + const int kTestFrames = discard_helper.TimeDeltaToFrames(kDuration); + + scoped_refptr<DecoderBuffer> encoded_buffer = + CreateEncodedBuffer(kTimestamp, kDuration); + + // Set all the discard values to be different to ensure each is properly used. + const int kDiscardFrames = kTestFrames / 4; + encoded_buffer->set_discard_padding( + std::make_pair(kDuration / 8, kDuration / 16)); + discard_helper.Reset(kDiscardFrames); + + // Verify nothing is output for the first buffer, yet initialized is true. + ASSERT_FALSE(discard_helper.ProcessBuffers(encoded_buffer, NULL)); + ASSERT_TRUE(discard_helper.initialized()); + + // Create an encoded buffer with no discard padding. + encoded_buffer = CreateEncodedBuffer(kTimestamp + kDuration, kDuration); + scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames); + + // Verify that when the decoded buffer is consumed, the discards from the + // previous encoded buffer are applied. + ASSERT_TRUE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer)); + EXPECT_EQ(kTimestamp, decoded_buffer->timestamp()); + EXPECT_EQ(kDuration - kDuration / 4 - kDuration / 8 - kDuration / 16, + decoded_buffer->duration()); + EXPECT_EQ(kTestFrames - kTestFrames / 4 - kTestFrames / 8 - kTestFrames / 16, + decoded_buffer->frame_count()); +} + } // namespace media diff --git a/media/base/decoder_buffer.cc b/media/base/decoder_buffer.cc index b6d7d26..2059817 100644 --- a/media/base/decoder_buffer.cc +++ b/media/base/decoder_buffer.cc @@ -83,7 +83,8 @@ std::string DecoderBuffer::AsHumanReadableString() { << " size: " << size_ << " side_data_size: " << side_data_size_ << " encrypted: " << (decrypt_config_ != NULL) - << " discard_padding (ms): " << discard_padding_.InMilliseconds(); + << " discard_padding (ms): (" << discard_padding_.first.InMilliseconds() + << ", " << discard_padding_.second.InMilliseconds() << ")"; return s.str(); } diff --git a/media/base/decoder_buffer.h b/media/base/decoder_buffer.h index 8edc539..4ff836d 100644 --- a/media/base/decoder_buffer.h +++ b/media/base/decoder_buffer.h @@ -6,6 +6,7 @@ #define MEDIA_BASE_DECODER_BUFFER_H_ #include <string> +#include <utility> #include "base/logging.h" #include "base/memory/aligned_memory.h" @@ -105,12 +106,16 @@ class MEDIA_EXPORT DecoderBuffer return side_data_size_; } - base::TimeDelta discard_padding() const { + // A discard window indicates the amount of data which should be discard from + // this buffer after decoding. The first value is the amount of the front and + // the second the amount off the back. + typedef std::pair<base::TimeDelta, base::TimeDelta> DiscardPadding; + const DiscardPadding& discard_padding() const { DCHECK(!end_of_stream()); return discard_padding_; } - void set_discard_padding(const base::TimeDelta discard_padding) { + void set_discard_padding(const DiscardPadding& discard_padding) { DCHECK(!end_of_stream()); discard_padding_ = discard_padding; } @@ -166,7 +171,7 @@ class MEDIA_EXPORT DecoderBuffer int side_data_size_; scoped_ptr<uint8, base::AlignedFreeDeleter> side_data_; scoped_ptr<DecryptConfig> decrypt_config_; - base::TimeDelta discard_padding_; + DiscardPadding discard_padding_; base::TimeDelta splice_timestamp_; // Constructor helper method for memory allocations. diff --git a/media/filters/ffmpeg_audio_decoder.cc b/media/filters/ffmpeg_audio_decoder.cc index fec5da52..5c9a29c 100644 --- a/media/filters/ffmpeg_audio_decoder.cc +++ b/media/filters/ffmpeg_audio_decoder.cc @@ -427,7 +427,8 @@ bool FFmpegAudioDecoder::ConfigureDecoder() { // Success! av_frame_.reset(av_frame_alloc()); - discard_helper_.reset(new AudioDiscardHelper(config_.samples_per_second())); + discard_helper_.reset(new AudioDiscardHelper(config_.samples_per_second(), + config_.codec_delay())); av_sample_format_ = codec_context_->sample_fmt; if (codec_context_->channels != diff --git a/media/filters/ffmpeg_demuxer.cc b/media/filters/ffmpeg_demuxer.cc index 40a8c91..aa829d7 100644 --- a/media/filters/ffmpeg_demuxer.cc +++ b/media/filters/ffmpeg_demuxer.cc @@ -50,6 +50,11 @@ static base::Time ExtractTimelineOffset(AVFormatContext* format_context) { return base::Time(); } +static base::TimeDelta FramesToTimeDelta(int frames, double sample_rate) { + return base::TimeDelta::FromMicroseconds( + frames * base::Time::kMicrosecondsPerSecond / sample_rate); +} + // // FFmpegDemuxerStream // @@ -189,20 +194,28 @@ void FFmpegDemuxerStream::EnqueuePacket(ScopedAVPacket packet) { } int skip_samples_size = 0; - uint8* skip_samples = av_packet_get_side_data(packet.get(), - AV_PKT_DATA_SKIP_SAMPLES, - &skip_samples_size); + const uint32* skip_samples_ptr = + reinterpret_cast<const uint32*>(av_packet_get_side_data( + packet.get(), AV_PKT_DATA_SKIP_SAMPLES, &skip_samples_size)); const int kSkipSamplesValidSize = 10; - const int kSkipSamplesOffset = 4; + const int kSkipEndSamplesOffset = 1; if (skip_samples_size >= kSkipSamplesValidSize) { - int discard_padding_samples = base::ByteSwapToLE32( - *(reinterpret_cast<const uint32*>(skip_samples + - kSkipSamplesOffset))); - // TODO(vigneshv): Change decoder buffer to use number of samples so that - // this conversion can be avoided. - buffer->set_discard_padding(base::TimeDelta::FromMicroseconds( - discard_padding_samples * 1000000.0 / - audio_decoder_config().samples_per_second())); + // Because FFmpeg rolls codec delay and skip samples into one we can only + // allow front discard padding on the first buffer. Otherwise the discard + // helper can't figure out which data to discard. See AudioDiscardHelper. + int discard_front_samples = base::ByteSwapToLE32(*skip_samples_ptr); + if (last_packet_timestamp_ != kNoTimestamp()) { + DLOG(ERROR) << "Skip samples are only allowed for the first packet."; + discard_front_samples = 0; + } + + const int discard_end_samples = + base::ByteSwapToLE32(*(skip_samples_ptr + kSkipEndSamplesOffset)); + const int samples_per_second = + audio_decoder_config().samples_per_second(); + buffer->set_discard_padding(std::make_pair( + FramesToTimeDelta(discard_front_samples, samples_per_second), + FramesToTimeDelta(discard_end_samples, samples_per_second))); } if (decrypt_config) diff --git a/media/filters/opus_audio_decoder.cc b/media/filters/opus_audio_decoder.cc index bbdcb3f..27c58c6 100644 --- a/media/filters/opus_audio_decoder.cc +++ b/media/filters/opus_audio_decoder.cc @@ -418,7 +418,8 @@ bool OpusAudioDecoder::ConfigureDecoder() { return false; } - discard_helper_.reset(new AudioDiscardHelper(config_.samples_per_second())); + discard_helper_.reset( + new AudioDiscardHelper(config_.samples_per_second(), 0)); start_input_timestamp_ = kNoTimestamp(); return true; } diff --git a/media/filters/pipeline_integration_test.cc b/media/filters/pipeline_integration_test.cc index 96a4e34..7ff2f0f 100644 --- a/media/filters/pipeline_integration_test.cc +++ b/media/filters/pipeline_integration_test.cc @@ -855,6 +855,17 @@ TEST_P(PipelineIntegrationTest, MediaSource_ADTS_TimestampOffset) { EXPECT_TRUE(WaitUntilOnEnded()); } +TEST_F(PipelineIntegrationTest, BasicPlaybackHashed_MP3) { + ASSERT_TRUE(Start(GetTestDataFilePath("sfx.mp3"), PIPELINE_OK, kHashed)); + + Play(); + + ASSERT_TRUE(WaitUntilOnEnded()); + + // Verify codec delay and preroll are stripped. + EXPECT_EQ("3.05,2.87,3.00,3.32,3.58,4.08,", GetAudioHash()); +} + TEST_P(PipelineIntegrationTest, MediaSource_MP3) { MockMediaSource source("sfx.mp3", kMP3, kAppendWholeFile, GetParam()); StartHashedPipelineWithMediaSource(&source); diff --git a/media/formats/webm/webm_cluster_parser.cc b/media/formats/webm/webm_cluster_parser.cc index 3816fdb..172eafa 100644 --- a/media/formats/webm/webm_cluster_parser.cc +++ b/media/formats/webm/webm_cluster_parser.cc @@ -416,8 +416,9 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, } if (discard_padding != 0) { - buffer->set_discard_padding(base::TimeDelta::FromMicroseconds( - discard_padding / 1000)); + buffer->set_discard_padding(std::make_pair( + base::TimeDelta(), + base::TimeDelta::FromMicroseconds(discard_padding / 1000))); } return track->AddBuffer(buffer); |