summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--media/base/audio_buffer.cc43
-rw-r--r--media/base/audio_buffer.h4
-rw-r--r--media/base/audio_buffer_unittest.cc119
-rw-r--r--media/base/audio_discard_helper.cc99
-rw-r--r--media/base/audio_discard_helper.h22
-rw-r--r--media/base/audio_discard_helper_unittest.cc150
-rw-r--r--media/base/decoder_buffer.cc3
-rw-r--r--media/base/decoder_buffer.h11
-rw-r--r--media/filters/ffmpeg_audio_decoder.cc3
-rw-r--r--media/filters/ffmpeg_demuxer.cc37
-rw-r--r--media/filters/opus_audio_decoder.cc3
-rw-r--r--media/filters/pipeline_integration_test.cc11
-rw-r--r--media/formats/webm/webm_cluster_parser.cc5
13 files changed, 460 insertions, 50 deletions
diff --git a/media/base/audio_buffer.cc b/media/base/audio_buffer.cc
index 08bb1e3..33d4ecb 100644
--- a/media/base/audio_buffer.cc
+++ b/media/base/audio_buffer.cc
@@ -266,4 +266,47 @@ void AudioBuffer::TrimEnd(int frames_to_trim) {
duration_ = CalculateDuration(adjusted_frame_count_, sample_rate_);
}
+void AudioBuffer::TrimRange(int start, int end) {
+ CHECK_GE(start, 0);
+ CHECK_LE(end, adjusted_frame_count_);
+
+ const int frames_to_trim = end - start;
+ CHECK_GE(frames_to_trim, 0);
+ CHECK_LE(frames_to_trim, adjusted_frame_count_);
+
+ const int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format_);
+ const int frames_to_copy = adjusted_frame_count_ - end;
+ if (frames_to_copy > 0) {
+ switch (sample_format_) {
+ case kSampleFormatPlanarS16:
+ case kSampleFormatPlanarF32:
+ // Planar data must be shifted per channel.
+ for (int ch = 0; ch < channel_count_; ++ch) {
+ memmove(channel_data_[ch] + (trim_start_ + start) * bytes_per_channel,
+ channel_data_[ch] + (trim_start_ + end) * bytes_per_channel,
+ bytes_per_channel * frames_to_copy);
+ }
+ break;
+ case kSampleFormatU8:
+ case kSampleFormatS16:
+ case kSampleFormatS32:
+ case kSampleFormatF32: {
+ // Interleaved data can be shifted all at once.
+ const int frame_size = channel_count_ * bytes_per_channel;
+ memmove(channel_data_[0] + (trim_start_ + start) * frame_size,
+ channel_data_[0] + (trim_start_ + end) * frame_size,
+ frame_size * frames_to_copy);
+ break;
+ }
+ case kUnknownSampleFormat:
+ NOTREACHED() << "Invalid sample format!";
+ }
+ } else {
+ CHECK_EQ(frames_to_copy, 0);
+ }
+
+ // Trim the leftover data off the end of the buffer and update duration.
+ TrimEnd(frames_to_trim);
+}
+
} // namespace media
diff --git a/media/base/audio_buffer.h b/media/base/audio_buffer.h
index 4ccd3a8f..a07985c 100644
--- a/media/base/audio_buffer.h
+++ b/media/base/audio_buffer.h
@@ -83,6 +83,10 @@ class MEDIA_EXPORT AudioBuffer
// Duration is adjusted to reflect the fewer frames.
void TrimEnd(int frames_to_trim);
+ // Trim an AudioBuffer by removing |end - start| frames from [|start|, |end|).
+ // Even if |start| is zero, timestamp() is not adjusted, only duration().
+ void TrimRange(int start, int end);
+
// Return the number of channels.
int channel_count() const { return channel_count_; }
diff --git a/media/base/audio_buffer_unittest.cc b/media/base/audio_buffer_unittest.cc
index 55ff4ed..c0fbf6b 100644
--- a/media/base/audio_buffer_unittest.cc
+++ b/media/base/audio_buffer_unittest.cc
@@ -11,16 +11,123 @@ namespace media {
static const int kSampleRate = 48000;
-static void VerifyBus(AudioBus* bus, int frames, float start, float increment) {
+
+static void VerifyBusWithOffset(AudioBus* bus,
+ int offset,
+ int frames,
+ float start,
+ float increment) {
for (int ch = 0; ch < bus->channels(); ++ch) {
const float v = start + ch * bus->frames() * increment;
- for (int i = 0; i < frames; ++i) {
+ for (int i = offset; i < frames; ++i) {
ASSERT_FLOAT_EQ(v + i * increment, bus->channel(ch)[i]) << "i=" << i
<< ", ch=" << ch;
}
}
}
+static void VerifyBus(AudioBus* bus, int frames, float start, float increment) {
+ VerifyBusWithOffset(bus, 0, frames, start, increment);
+}
+
+static void TrimRangeTest(SampleFormat sample_format) {
+ const ChannelLayout channel_layout = CHANNEL_LAYOUT_4_0;
+ const int channels = ChannelLayoutToChannelCount(channel_layout);
+ const int frames = kSampleRate / 10;
+ const base::TimeDelta timestamp = base::TimeDelta();
+ const base::TimeDelta duration = base::TimeDelta::FromMilliseconds(100);
+ scoped_refptr<AudioBuffer> buffer = MakeAudioBuffer<float>(sample_format,
+ channel_layout,
+ channels,
+ kSampleRate,
+ 0,
+ 1,
+ frames,
+ timestamp);
+ EXPECT_EQ(frames, buffer->frame_count());
+ EXPECT_EQ(timestamp, buffer->timestamp());
+ EXPECT_EQ(duration, buffer->duration());
+
+ scoped_ptr<AudioBus> bus = AudioBus::Create(channels, frames);
+
+ // Verify all frames before trimming.
+ buffer->ReadFrames(frames, 0, 0, bus.get());
+ VerifyBus(bus.get(), frames, 0, 1);
+
+ // Trim 10ms of frames from the middle of the buffer.
+ int trim_start = frames / 2;
+ const int trim_length = kSampleRate / 100;
+ const base::TimeDelta trim_duration = base::TimeDelta::FromMilliseconds(10);
+ buffer->TrimRange(trim_start, trim_start + trim_length);
+ EXPECT_EQ(frames - trim_length, buffer->frame_count());
+ EXPECT_EQ(timestamp, buffer->timestamp());
+ EXPECT_EQ(duration - trim_duration, buffer->duration());
+ bus->Zero();
+ buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get());
+ VerifyBus(bus.get(), trim_start, 0, 1);
+ VerifyBusWithOffset(
+ bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1);
+
+ // Trim 10ms of frames from the start, which just adjusts the buffer's
+ // internal start offset.
+ buffer->TrimStart(trim_length);
+ trim_start -= trim_length;
+ EXPECT_EQ(frames - 2 * trim_length, buffer->frame_count());
+ EXPECT_EQ(timestamp + trim_duration, buffer->timestamp());
+ EXPECT_EQ(duration - 2 * trim_duration, buffer->duration());
+ bus->Zero();
+ buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get());
+ VerifyBus(bus.get(), trim_start, trim_length, 1);
+ VerifyBusWithOffset(
+ bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1);
+
+ // Trim 10ms of frames from the end, which just adjusts the buffer's frame
+ // count.
+ buffer->TrimEnd(trim_length);
+ EXPECT_EQ(frames - 3 * trim_length, buffer->frame_count());
+ EXPECT_EQ(timestamp + trim_duration, buffer->timestamp());
+ EXPECT_EQ(duration - 3 * trim_duration, buffer->duration());
+ bus->Zero();
+ buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get());
+ VerifyBus(bus.get(), trim_start, trim_length, 1);
+ VerifyBusWithOffset(
+ bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1);
+
+ // Trim another 10ms from the inner portion of the buffer.
+ buffer->TrimRange(trim_start, trim_start + trim_length);
+ EXPECT_EQ(frames - 4 * trim_length, buffer->frame_count());
+ EXPECT_EQ(timestamp + trim_duration, buffer->timestamp());
+ EXPECT_EQ(duration - 4 * trim_duration, buffer->duration());
+ bus->Zero();
+ buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get());
+ VerifyBus(bus.get(), trim_start, trim_length, 1);
+ VerifyBusWithOffset(
+ bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1);
+
+ // Trim off the end using TrimRange() to ensure end index is exclusive.
+ buffer->TrimRange(buffer->frame_count() - trim_length, buffer->frame_count());
+ EXPECT_EQ(frames - 5 * trim_length, buffer->frame_count());
+ EXPECT_EQ(timestamp + trim_duration, buffer->timestamp());
+ EXPECT_EQ(duration - 5 * trim_duration, buffer->duration());
+ bus->Zero();
+ buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get());
+ VerifyBus(bus.get(), trim_start, trim_length, 1);
+ VerifyBusWithOffset(
+ bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1);
+
+ // Trim off the start using TrimRange() to ensure start index is inclusive.
+ buffer->TrimRange(0, trim_length);
+ trim_start -= trim_length;
+ EXPECT_EQ(frames - 6 * trim_length, buffer->frame_count());
+ EXPECT_EQ(timestamp + trim_duration, buffer->timestamp());
+ EXPECT_EQ(duration - 6 * trim_duration, buffer->duration());
+ bus->Zero();
+ buffer->ReadFrames(buffer->frame_count(), 0, 0, bus.get());
+ VerifyBus(bus.get(), trim_start, 2 * trim_length, 1);
+ VerifyBusWithOffset(
+ bus.get(), trim_start, buffer->frame_count() - trim_start, 0, 1);
+}
+
TEST(AudioBufferTest, CopyFrom) {
const ChannelLayout kChannelLayout = CHANNEL_LAYOUT_MONO;
scoped_refptr<AudioBuffer> original_buffer =
@@ -312,4 +419,12 @@ TEST(AudioBufferTest, Trim) {
EXPECT_EQ(base::TimeDelta(), buffer->duration());
}
+TEST(AudioBufferTest, TrimRangePlanar) {
+ TrimRangeTest(kSampleFormatPlanarF32);
+}
+
+TEST(AudioBufferTest, TrimRangeInterleaved) {
+ TrimRangeTest(kSampleFormatF32);
+}
+
} // namespace media
diff --git a/media/base/audio_discard_helper.cc b/media/base/audio_discard_helper.cc
index d868382..f7279d7 100644
--- a/media/base/audio_discard_helper.cc
+++ b/media/base/audio_discard_helper.cc
@@ -9,7 +9,6 @@
#include "base/logging.h"
#include "media/base/audio_buffer.h"
#include "media/base/buffers.h"
-#include "media/base/decoder_buffer.h"
namespace media {
@@ -24,11 +23,13 @@ static void WarnOnNonMonotonicTimestamps(base::TimeDelta last_timestamp,
<< " diff " << diff.InMicroseconds() << " us";
}
-AudioDiscardHelper::AudioDiscardHelper(int sample_rate)
+AudioDiscardHelper::AudioDiscardHelper(int sample_rate, size_t decoder_delay)
: sample_rate_(sample_rate),
+ decoder_delay_(decoder_delay),
timestamp_helper_(sample_rate_),
discard_frames_(0),
- last_input_timestamp_(kNoTimestamp()) {
+ last_input_timestamp_(kNoTimestamp()),
+ delayed_discard_(false) {
DCHECK_GT(sample_rate_, 0);
}
@@ -44,6 +45,8 @@ void AudioDiscardHelper::Reset(size_t initial_discard) {
discard_frames_ = initial_discard;
last_input_timestamp_ = kNoTimestamp();
timestamp_helper_.SetBaseTimestamp(kNoTimestamp());
+ delayed_discard_ = false;
+ delayed_discard_padding_ = DecoderBuffer::DiscardPadding();
}
bool AudioDiscardHelper::ProcessBuffers(
@@ -59,15 +62,32 @@ bool AudioDiscardHelper::ProcessBuffers(
last_input_timestamp_ = encoded_buffer->timestamp();
// If this is the first buffer seen, setup the timestamp helper.
- if (!initialized()) {
+ const bool first_buffer = !initialized();
+ if (first_buffer) {
// Clamp the base timestamp to zero.
timestamp_helper_.SetBaseTimestamp(
std::max(base::TimeDelta(), encoded_buffer->timestamp()));
}
DCHECK(initialized());
- if (!decoded_buffer || !decoded_buffer->frame_count())
+ if (!decoded_buffer) {
+ // If there's a one buffer delay for decoding, we need to save it so it can
+ // be processed with the next decoder buffer.
+ if (first_buffer) {
+ delayed_discard_ = true;
+ delayed_discard_padding_ = encoded_buffer->discard_padding();
+ }
return false;
+ }
+
+ const size_t original_frame_count = decoded_buffer->frame_count();
+
+ // If there's a one buffer delay for decoding, pick up the last encoded
+ // buffer's discard padding for processing with the current decoded buffer.
+ DecoderBuffer::DiscardPadding current_discard_padding =
+ encoded_buffer->discard_padding();
+ if (delayed_discard_)
+ std::swap(current_discard_padding, delayed_discard_padding_);
if (discard_frames_ > 0) {
const size_t decoded_frames = decoded_buffer->frame_count();
@@ -75,20 +95,73 @@ bool AudioDiscardHelper::ProcessBuffers(
discard_frames_ -= frames_to_discard;
// If everything would be discarded, indicate a new buffer is required.
- if (frames_to_discard == decoded_frames)
+ if (frames_to_discard == decoded_frames) {
+ // For simplicity disallow cases where a buffer with discard padding is
+ // present. Doing so allows us to avoid complexity around tracking
+ // discards across buffers.
+ DCHECK(current_discard_padding.first == base::TimeDelta());
+ DCHECK(current_discard_padding.second == base::TimeDelta());
return false;
+ }
decoded_buffer->TrimStart(frames_to_discard);
}
- // TODO(dalecurtis): Applying the current buffer's discard padding doesn't
- // make sense in the Vorbis case because there is a delay of one buffer before
- // decoded buffers are returned. Fix and add support for more than just end
- // trimming. See http://crbug.com/360961.
- if (encoded_buffer->discard_padding() > base::TimeDelta()) {
+ // Handle front discard padding.
+ if (current_discard_padding.first > base::TimeDelta()) {
+ const size_t decoded_frames = decoded_buffer->frame_count();
+ const size_t start_frames_to_discard =
+ TimeDeltaToFrames(current_discard_padding.first);
+
+ // Regardless of the timestamp on the encoded buffer, the corresponding
+ // decoded output will appear |decoder_delay_| frames later.
+ size_t discard_start = decoder_delay_;
+ if (decoder_delay_ > 0) {
+ // If we have a |decoder_delay_| and have already discarded frames from
+ // this buffer, the |discard_start| must be adjusted by the number of
+ // frames already discarded.
+ const size_t frames_discarded_so_far =
+ original_frame_count - decoded_buffer->frame_count();
+ CHECK_LE(frames_discarded_so_far, decoder_delay_);
+ discard_start -= frames_discarded_so_far;
+ }
+
+ // For simplicity require the start of the discard to be within the current
+ // buffer. Doing so allows us avoid complexity around tracking discards
+ // across buffers.
+ CHECK_LT(discard_start, decoded_frames);
+
+ const size_t frames_to_discard =
+ std::min(start_frames_to_discard, decoded_frames - discard_start);
+
+ // Carry over any frames which need to be discarded from the front of the
+ // next buffer.
+ DCHECK(!discard_frames_);
+ discard_frames_ = start_frames_to_discard - frames_to_discard;
+
+ // If everything would be discarded, indicate a new buffer is required.
+ if (frames_to_discard == decoded_frames) {
+ // The buffer should not have been marked with end discard if the front
+ // discard removes everything.
+ DCHECK(current_discard_padding.second == base::TimeDelta());
+ return false;
+ }
+
+ decoded_buffer->TrimRange(discard_start, discard_start + frames_to_discard);
+ } else {
+ DCHECK(current_discard_padding.first == base::TimeDelta());
+ }
+
+ // Handle end discard padding.
+ if (current_discard_padding.second > base::TimeDelta()) {
+ // Limit end discarding to when there is no |decoder_delay_|, otherwise it's
+ // non-trivial determining where to start discarding end frames.
+ CHECK(!decoder_delay_);
+
const size_t decoded_frames = decoded_buffer->frame_count();
const size_t end_frames_to_discard =
- TimeDeltaToFrames(encoded_buffer->discard_padding());
+ TimeDeltaToFrames(current_discard_padding.second);
+
if (end_frames_to_discard > decoded_frames) {
DLOG(ERROR) << "Encountered invalid discard padding value.";
return false;
@@ -100,7 +173,7 @@ bool AudioDiscardHelper::ProcessBuffers(
decoded_buffer->TrimEnd(end_frames_to_discard);
} else {
- DCHECK(encoded_buffer->discard_padding() == base::TimeDelta());
+ DCHECK(current_discard_padding.second == base::TimeDelta());
}
// Assign timestamp to the buffer.
diff --git a/media/base/audio_discard_helper.h b/media/base/audio_discard_helper.h
index 388cbd4..deeb45f 100644
--- a/media/base/audio_discard_helper.h
+++ b/media/base/audio_discard_helper.h
@@ -9,17 +9,31 @@
#include "base/time/time.h"
#include "media/base/audio_timestamp_helper.h"
#include "media/base/buffers.h"
+#include "media/base/decoder_buffer.h"
#include "media/base/media_export.h"
namespace media {
class AudioBuffer;
-class DecoderBuffer;
// Helper class for managing timestamps and discard events around decoding.
class MEDIA_EXPORT AudioDiscardHelper {
public:
- explicit AudioDiscardHelper(int sample_rate);
+ // |sample_rate| is the sample rate of decoded data which will be handed into
+ // the ProcessBuffers() call.
+ //
+ // |decoder_delay| is the number of frames a decoder will output before data
+ // corresponding to the first encoded buffer is output. Callers only need to
+ // specify this if the decoder inserts frames which have no corresponding
+ // encoded buffer.
+ //
+ // For example, most MP3 decoders will output 529 junk frames before the data
+ // corresponding to the first encoded buffer is output. These frames are not
+ // represented in the encoded data stream and instead are an artifact of how
+ // most MP3 decoders work. See http://lame.sourceforge.net/tech-FAQ.txt
+ //
+ // NOTE: End discard is only supported when there is no |decoder_delay|.
+ AudioDiscardHelper(int sample_rate, size_t decoder_delay);
~AudioDiscardHelper();
// Converts a TimeDelta to a frame count based on the constructed sample rate.
@@ -50,11 +64,15 @@ class MEDIA_EXPORT AudioDiscardHelper {
private:
const int sample_rate_;
+ const size_t decoder_delay_;
AudioTimestampHelper timestamp_helper_;
size_t discard_frames_;
base::TimeDelta last_input_timestamp_;
+ bool delayed_discard_;
+ DecoderBuffer::DiscardPadding delayed_discard_padding_;
+
DISALLOW_IMPLICIT_CONSTRUCTORS(AudioDiscardHelper);
};
diff --git a/media/base/audio_discard_helper_unittest.cc b/media/base/audio_discard_helper_unittest.cc
index e3f21c0..55d2b61 100644
--- a/media/base/audio_discard_helper_unittest.cc
+++ b/media/base/audio_discard_helper_unittest.cc
@@ -46,7 +46,7 @@ static float ExtractDecodedData(const scoped_refptr<AudioBuffer>& buffer,
}
TEST(AudioDiscardHelperTest, TimeDeltaToFrames) {
- AudioDiscardHelper discard_helper(kSampleRate);
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
EXPECT_EQ(0u, discard_helper.TimeDeltaToFrames(base::TimeDelta()));
EXPECT_EQ(
@@ -70,7 +70,7 @@ TEST(AudioDiscardHelperTest, TimeDeltaToFrames) {
}
TEST(AudioDiscardHelperTest, BasicProcessBuffers) {
- AudioDiscardHelper discard_helper(kSampleRate);
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
ASSERT_FALSE(discard_helper.initialized());
const base::TimeDelta kTimestamp = base::TimeDelta();
@@ -102,7 +102,7 @@ TEST(AudioDiscardHelperTest, BasicProcessBuffers) {
}
TEST(AudioDiscardHelperTest, NegativeTimestampClampsToZero) {
- AudioDiscardHelper discard_helper(kSampleRate);
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
ASSERT_FALSE(discard_helper.initialized());
const base::TimeDelta kTimestamp = -base::TimeDelta::FromSeconds(1);
@@ -122,7 +122,7 @@ TEST(AudioDiscardHelperTest, NegativeTimestampClampsToZero) {
}
TEST(AudioDiscardHelperTest, ProcessBuffersWithInitialDiscard) {
- AudioDiscardHelper discard_helper(kSampleRate);
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
ASSERT_FALSE(discard_helper.initialized());
const base::TimeDelta kTimestamp = base::TimeDelta();
@@ -148,7 +148,7 @@ TEST(AudioDiscardHelperTest, ProcessBuffersWithInitialDiscard) {
}
TEST(AudioDiscardHelperTest, ProcessBuffersWithLargeInitialDiscard) {
- AudioDiscardHelper discard_helper(kSampleRate);
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
ASSERT_FALSE(discard_helper.initialized());
const base::TimeDelta kTimestamp = base::TimeDelta();
@@ -181,7 +181,7 @@ TEST(AudioDiscardHelperTest, ProcessBuffersWithLargeInitialDiscard) {
}
TEST(AudioDiscardHelperTest, AllowNonMonotonicTimestamps) {
- AudioDiscardHelper discard_helper(kSampleRate);
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
ASSERT_FALSE(discard_helper.initialized());
const base::TimeDelta kTimestamp = base::TimeDelta();
@@ -206,8 +206,8 @@ TEST(AudioDiscardHelperTest, AllowNonMonotonicTimestamps) {
EXPECT_EQ(kTestFrames, decoded_buffer->frame_count());
}
-TEST(AudioDiscardHelperTest, DiscardPadding) {
- AudioDiscardHelper discard_helper(kSampleRate);
+TEST(AudioDiscardHelperTest, DiscardEndPadding) {
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
ASSERT_FALSE(discard_helper.initialized());
const base::TimeDelta kTimestamp = base::TimeDelta();
@@ -219,18 +219,39 @@ TEST(AudioDiscardHelperTest, DiscardPadding) {
scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames);
// Set a discard padding equivalent to half the buffer.
- encoded_buffer->set_discard_padding(kDuration / 2);
+ encoded_buffer->set_discard_padding(
+ std::make_pair(base::TimeDelta(), kDuration / 2));
ASSERT_TRUE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer));
ASSERT_TRUE(discard_helper.initialized());
EXPECT_EQ(kTimestamp, decoded_buffer->timestamp());
EXPECT_EQ(kDuration / 2, decoded_buffer->duration());
EXPECT_EQ(kTestFrames / 2, decoded_buffer->frame_count());
- ASSERT_FLOAT_EQ(0, ExtractDecodedData(decoded_buffer, 0));
}
-TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardPadding) {
- AudioDiscardHelper discard_helper(kSampleRate);
+TEST(AudioDiscardHelperTest, BadDiscardEndPadding) {
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
+ ASSERT_FALSE(discard_helper.initialized());
+
+ const base::TimeDelta kTimestamp = base::TimeDelta();
+ const base::TimeDelta kDuration = base::TimeDelta::FromMilliseconds(10);
+ const int kTestFrames = discard_helper.TimeDeltaToFrames(kDuration);
+
+ scoped_refptr<DecoderBuffer> encoded_buffer =
+ CreateEncodedBuffer(kTimestamp, kDuration);
+ scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames);
+
+ // Set a discard padding equivalent to double the buffer size.
+ encoded_buffer->set_discard_padding(
+ std::make_pair(base::TimeDelta(), kDuration * 2));
+
+ // Verify the end discard padding is rejected.
+ ASSERT_FALSE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer));
+ ASSERT_TRUE(discard_helper.initialized());
+}
+
+TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardEndPadding) {
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
ASSERT_FALSE(discard_helper.initialized());
const base::TimeDelta kTimestamp = base::TimeDelta();
@@ -242,7 +263,8 @@ TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardPadding) {
scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames);
// Set a discard padding equivalent to a quarter of the buffer.
- encoded_buffer->set_discard_padding(kDuration / 4);
+ encoded_buffer->set_discard_padding(
+ std::make_pair(base::TimeDelta(), kDuration / 4));
// Set an initial discard of a quarter of the buffer.
const int kDiscardFrames = kTestFrames / 4;
@@ -257,4 +279,106 @@ TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardPadding) {
ExtractDecodedData(decoded_buffer, 0));
}
+TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardPadding) {
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
+ ASSERT_FALSE(discard_helper.initialized());
+
+ const base::TimeDelta kTimestamp = base::TimeDelta();
+ const base::TimeDelta kDuration = base::TimeDelta::FromMilliseconds(10);
+ const int kTestFrames = discard_helper.TimeDeltaToFrames(kDuration);
+
+ scoped_refptr<DecoderBuffer> encoded_buffer =
+ CreateEncodedBuffer(kTimestamp, kDuration);
+ scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames);
+
+ // Set all the discard values to be different to ensure each is properly used.
+ const int kDiscardFrames = kTestFrames / 4;
+ encoded_buffer->set_discard_padding(
+ std::make_pair(kDuration / 8, kDuration / 16));
+ discard_helper.Reset(kDiscardFrames);
+
+ ASSERT_TRUE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer));
+ ASSERT_TRUE(discard_helper.initialized());
+ EXPECT_EQ(kTimestamp, decoded_buffer->timestamp());
+ EXPECT_EQ(kDuration - kDuration / 4 - kDuration / 8 - kDuration / 16,
+ decoded_buffer->duration());
+ EXPECT_EQ(kTestFrames - kTestFrames / 4 - kTestFrames / 8 - kTestFrames / 16,
+ decoded_buffer->frame_count());
+}
+
+TEST(AudioDiscardHelperTest, InitialDiscardAndDiscardPaddingAndCodecDelay) {
+ // Use a codec delay of 5ms.
+ const int kCodecDelay = kSampleRate / 100 / 2;
+ AudioDiscardHelper discard_helper(kSampleRate, kCodecDelay);
+ ASSERT_FALSE(discard_helper.initialized());
+ discard_helper.Reset(kCodecDelay);
+
+ const base::TimeDelta kTimestamp = base::TimeDelta();
+ const base::TimeDelta kDuration = base::TimeDelta::FromMilliseconds(10);
+ const int kTestFrames = discard_helper.TimeDeltaToFrames(kDuration);
+
+ scoped_refptr<DecoderBuffer> encoded_buffer =
+ CreateEncodedBuffer(kTimestamp, kDuration);
+ scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames);
+
+ // Set a discard padding equivalent to half of the buffer.
+ encoded_buffer->set_discard_padding(
+ std::make_pair(kDuration / 2, base::TimeDelta()));
+
+ // All of the first buffer should be discarded.
+ ASSERT_FALSE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer));
+ ASSERT_TRUE(discard_helper.initialized());
+
+ // Processing another buffer (with the same discard padding) should discard
+ // the back half of the buffer since kCodecDelay is half a buffer.
+ encoded_buffer->set_timestamp(kTimestamp + kDuration);
+ decoded_buffer = CreateDecodedBuffer(kTestFrames);
+ ASSERT_FLOAT_EQ(0.0f, ExtractDecodedData(decoded_buffer, 0));
+ ASSERT_NEAR(kCodecDelay * kDataStep,
+ ExtractDecodedData(decoded_buffer, kCodecDelay),
+ kDataStep * 1000);
+ ASSERT_TRUE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer));
+ EXPECT_EQ(kTimestamp, decoded_buffer->timestamp());
+ EXPECT_EQ(kDuration / 2, decoded_buffer->duration());
+ EXPECT_EQ(kTestFrames / 2, decoded_buffer->frame_count());
+
+ // Verify it was actually the latter half of the buffer that was removed.
+ ASSERT_FLOAT_EQ(0.0f, ExtractDecodedData(decoded_buffer, 0));
+}
+
+TEST(AudioDiscardHelperTest, DelayedDiscardInitialDiscardAndDiscardPadding) {
+ AudioDiscardHelper discard_helper(kSampleRate, 0);
+ ASSERT_FALSE(discard_helper.initialized());
+
+ const base::TimeDelta kTimestamp = base::TimeDelta();
+ const base::TimeDelta kDuration = base::TimeDelta::FromMilliseconds(10);
+ const int kTestFrames = discard_helper.TimeDeltaToFrames(kDuration);
+
+ scoped_refptr<DecoderBuffer> encoded_buffer =
+ CreateEncodedBuffer(kTimestamp, kDuration);
+
+ // Set all the discard values to be different to ensure each is properly used.
+ const int kDiscardFrames = kTestFrames / 4;
+ encoded_buffer->set_discard_padding(
+ std::make_pair(kDuration / 8, kDuration / 16));
+ discard_helper.Reset(kDiscardFrames);
+
+ // Verify nothing is output for the first buffer, yet initialized is true.
+ ASSERT_FALSE(discard_helper.ProcessBuffers(encoded_buffer, NULL));
+ ASSERT_TRUE(discard_helper.initialized());
+
+ // Create an encoded buffer with no discard padding.
+ encoded_buffer = CreateEncodedBuffer(kTimestamp + kDuration, kDuration);
+ scoped_refptr<AudioBuffer> decoded_buffer = CreateDecodedBuffer(kTestFrames);
+
+ // Verify that when the decoded buffer is consumed, the discards from the
+ // previous encoded buffer are applied.
+ ASSERT_TRUE(discard_helper.ProcessBuffers(encoded_buffer, decoded_buffer));
+ EXPECT_EQ(kTimestamp, decoded_buffer->timestamp());
+ EXPECT_EQ(kDuration - kDuration / 4 - kDuration / 8 - kDuration / 16,
+ decoded_buffer->duration());
+ EXPECT_EQ(kTestFrames - kTestFrames / 4 - kTestFrames / 8 - kTestFrames / 16,
+ decoded_buffer->frame_count());
+}
+
} // namespace media
diff --git a/media/base/decoder_buffer.cc b/media/base/decoder_buffer.cc
index b6d7d26..2059817 100644
--- a/media/base/decoder_buffer.cc
+++ b/media/base/decoder_buffer.cc
@@ -83,7 +83,8 @@ std::string DecoderBuffer::AsHumanReadableString() {
<< " size: " << size_
<< " side_data_size: " << side_data_size_
<< " encrypted: " << (decrypt_config_ != NULL)
- << " discard_padding (ms): " << discard_padding_.InMilliseconds();
+ << " discard_padding (ms): (" << discard_padding_.first.InMilliseconds()
+ << ", " << discard_padding_.second.InMilliseconds() << ")";
return s.str();
}
diff --git a/media/base/decoder_buffer.h b/media/base/decoder_buffer.h
index 8edc539..4ff836d 100644
--- a/media/base/decoder_buffer.h
+++ b/media/base/decoder_buffer.h
@@ -6,6 +6,7 @@
#define MEDIA_BASE_DECODER_BUFFER_H_
#include <string>
+#include <utility>
#include "base/logging.h"
#include "base/memory/aligned_memory.h"
@@ -105,12 +106,16 @@ class MEDIA_EXPORT DecoderBuffer
return side_data_size_;
}
- base::TimeDelta discard_padding() const {
+ // A discard window indicates the amount of data which should be discard from
+ // this buffer after decoding. The first value is the amount of the front and
+ // the second the amount off the back.
+ typedef std::pair<base::TimeDelta, base::TimeDelta> DiscardPadding;
+ const DiscardPadding& discard_padding() const {
DCHECK(!end_of_stream());
return discard_padding_;
}
- void set_discard_padding(const base::TimeDelta discard_padding) {
+ void set_discard_padding(const DiscardPadding& discard_padding) {
DCHECK(!end_of_stream());
discard_padding_ = discard_padding;
}
@@ -166,7 +171,7 @@ class MEDIA_EXPORT DecoderBuffer
int side_data_size_;
scoped_ptr<uint8, base::AlignedFreeDeleter> side_data_;
scoped_ptr<DecryptConfig> decrypt_config_;
- base::TimeDelta discard_padding_;
+ DiscardPadding discard_padding_;
base::TimeDelta splice_timestamp_;
// Constructor helper method for memory allocations.
diff --git a/media/filters/ffmpeg_audio_decoder.cc b/media/filters/ffmpeg_audio_decoder.cc
index fec5da52..5c9a29c 100644
--- a/media/filters/ffmpeg_audio_decoder.cc
+++ b/media/filters/ffmpeg_audio_decoder.cc
@@ -427,7 +427,8 @@ bool FFmpegAudioDecoder::ConfigureDecoder() {
// Success!
av_frame_.reset(av_frame_alloc());
- discard_helper_.reset(new AudioDiscardHelper(config_.samples_per_second()));
+ discard_helper_.reset(new AudioDiscardHelper(config_.samples_per_second(),
+ config_.codec_delay()));
av_sample_format_ = codec_context_->sample_fmt;
if (codec_context_->channels !=
diff --git a/media/filters/ffmpeg_demuxer.cc b/media/filters/ffmpeg_demuxer.cc
index 40a8c91..aa829d7 100644
--- a/media/filters/ffmpeg_demuxer.cc
+++ b/media/filters/ffmpeg_demuxer.cc
@@ -50,6 +50,11 @@ static base::Time ExtractTimelineOffset(AVFormatContext* format_context) {
return base::Time();
}
+static base::TimeDelta FramesToTimeDelta(int frames, double sample_rate) {
+ return base::TimeDelta::FromMicroseconds(
+ frames * base::Time::kMicrosecondsPerSecond / sample_rate);
+}
+
//
// FFmpegDemuxerStream
//
@@ -189,20 +194,28 @@ void FFmpegDemuxerStream::EnqueuePacket(ScopedAVPacket packet) {
}
int skip_samples_size = 0;
- uint8* skip_samples = av_packet_get_side_data(packet.get(),
- AV_PKT_DATA_SKIP_SAMPLES,
- &skip_samples_size);
+ const uint32* skip_samples_ptr =
+ reinterpret_cast<const uint32*>(av_packet_get_side_data(
+ packet.get(), AV_PKT_DATA_SKIP_SAMPLES, &skip_samples_size));
const int kSkipSamplesValidSize = 10;
- const int kSkipSamplesOffset = 4;
+ const int kSkipEndSamplesOffset = 1;
if (skip_samples_size >= kSkipSamplesValidSize) {
- int discard_padding_samples = base::ByteSwapToLE32(
- *(reinterpret_cast<const uint32*>(skip_samples +
- kSkipSamplesOffset)));
- // TODO(vigneshv): Change decoder buffer to use number of samples so that
- // this conversion can be avoided.
- buffer->set_discard_padding(base::TimeDelta::FromMicroseconds(
- discard_padding_samples * 1000000.0 /
- audio_decoder_config().samples_per_second()));
+ // Because FFmpeg rolls codec delay and skip samples into one we can only
+ // allow front discard padding on the first buffer. Otherwise the discard
+ // helper can't figure out which data to discard. See AudioDiscardHelper.
+ int discard_front_samples = base::ByteSwapToLE32(*skip_samples_ptr);
+ if (last_packet_timestamp_ != kNoTimestamp()) {
+ DLOG(ERROR) << "Skip samples are only allowed for the first packet.";
+ discard_front_samples = 0;
+ }
+
+ const int discard_end_samples =
+ base::ByteSwapToLE32(*(skip_samples_ptr + kSkipEndSamplesOffset));
+ const int samples_per_second =
+ audio_decoder_config().samples_per_second();
+ buffer->set_discard_padding(std::make_pair(
+ FramesToTimeDelta(discard_front_samples, samples_per_second),
+ FramesToTimeDelta(discard_end_samples, samples_per_second)));
}
if (decrypt_config)
diff --git a/media/filters/opus_audio_decoder.cc b/media/filters/opus_audio_decoder.cc
index bbdcb3f..27c58c6 100644
--- a/media/filters/opus_audio_decoder.cc
+++ b/media/filters/opus_audio_decoder.cc
@@ -418,7 +418,8 @@ bool OpusAudioDecoder::ConfigureDecoder() {
return false;
}
- discard_helper_.reset(new AudioDiscardHelper(config_.samples_per_second()));
+ discard_helper_.reset(
+ new AudioDiscardHelper(config_.samples_per_second(), 0));
start_input_timestamp_ = kNoTimestamp();
return true;
}
diff --git a/media/filters/pipeline_integration_test.cc b/media/filters/pipeline_integration_test.cc
index 96a4e34..7ff2f0f 100644
--- a/media/filters/pipeline_integration_test.cc
+++ b/media/filters/pipeline_integration_test.cc
@@ -855,6 +855,17 @@ TEST_P(PipelineIntegrationTest, MediaSource_ADTS_TimestampOffset) {
EXPECT_TRUE(WaitUntilOnEnded());
}
+TEST_F(PipelineIntegrationTest, BasicPlaybackHashed_MP3) {
+ ASSERT_TRUE(Start(GetTestDataFilePath("sfx.mp3"), PIPELINE_OK, kHashed));
+
+ Play();
+
+ ASSERT_TRUE(WaitUntilOnEnded());
+
+ // Verify codec delay and preroll are stripped.
+ EXPECT_EQ("3.05,2.87,3.00,3.32,3.58,4.08,", GetAudioHash());
+}
+
TEST_P(PipelineIntegrationTest, MediaSource_MP3) {
MockMediaSource source("sfx.mp3", kMP3, kAppendWholeFile, GetParam());
StartHashedPipelineWithMediaSource(&source);
diff --git a/media/formats/webm/webm_cluster_parser.cc b/media/formats/webm/webm_cluster_parser.cc
index 3816fdb..172eafa 100644
--- a/media/formats/webm/webm_cluster_parser.cc
+++ b/media/formats/webm/webm_cluster_parser.cc
@@ -416,8 +416,9 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
}
if (discard_padding != 0) {
- buffer->set_discard_padding(base::TimeDelta::FromMicroseconds(
- discard_padding / 1000));
+ buffer->set_discard_padding(std::make_pair(
+ base::TimeDelta(),
+ base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
}
return track->AddBuffer(buffer);