diff options
author | vigneshv@chromium.org <vigneshv@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-09-26 04:08:56 +0000 |
---|---|---|
committer | vigneshv@chromium.org <vigneshv@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-09-26 04:08:56 +0000 |
commit | abdfa9ac4a8633e4d5b52cc4d4ce0d135738f905 (patch) | |
tree | 7d2fb32ec4757d13e5863c1992aa6a17bbba94f4 /media | |
parent | 7760e2270c347b65391bf9ac806184ace4b7a8fd (diff) | |
download | chromium_src-abdfa9ac4a8633e4d5b52cc4d4ce0d135738f905.zip chromium_src-abdfa9ac4a8633e4d5b52cc4d4ce0d135738f905.tar.gz chromium_src-abdfa9ac4a8633e4d5b52cc4d4ce0d135738f905.tar.bz2 |
media: Implementing Opus Seek Pre Roll
Opus has the idea of a rolling key frame (i.e.) whenever there is
a discontinuity, certain number of samples has to be thrown away
before the decoded data is valid. The number of samples to be
thrown away can be found in the Seek Pre Roll element of the WebM
container. This CL makes use of that value to throw away those
samples after decoding whenever a seek happens and adjusts the
timestamp accordingly.
TEST=media unittests still pass
Review URL: https://codereview.chromium.org/23444076
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@225318 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'media')
-rw-r--r-- | media/filters/opus_audio_decoder.cc | 258 | ||||
-rw-r--r-- | media/filters/opus_audio_decoder.h | 15 | ||||
-rw-r--r-- | media/filters/pipeline_integration_test.cc | 34 |
3 files changed, 192 insertions, 115 deletions
diff --git a/media/filters/opus_audio_decoder.cc b/media/filters/opus_audio_decoder.cc index b3e903b..74ac302 100644 --- a/media/filters/opus_audio_decoder.cc +++ b/media/filters/opus_audio_decoder.cc @@ -41,6 +41,11 @@ static inline bool IsEndOfStream(int decoded_size, return decoded_size == 0 && input->end_of_stream(); } +static int TimeDeltaToAudioFrames(base::TimeDelta time_delta, + int frame_rate) { + return std::ceil(time_delta.InSecondsF() * frame_rate); +} + // The Opus specification is part of IETF RFC 6716: // http://tools.ietf.org/html/rfc6716 @@ -128,7 +133,7 @@ static void RemapOpusChannelLayout(const uint8* opus_mapping, channel_layout[channel] = opus_mapping[vorbis_layout_offset[channel]]; } -// Opus Header contents: +// Opus Extra Data contents: // - "OpusHead" (64 bits) // - version number (8 bits) // - Channels C (8 bits) @@ -152,37 +157,41 @@ static void RemapOpusChannelLayout(const uint8* opus_mapping, // - stream = byte-M // Default audio output channel layout. Used to initialize |stream_map| in -// OpusHeader, and passed to opus_multistream_decoder_create() when the header -// does not contain mapping information. The values are valid only for mono and -// stereo output: Opus streams with more than 2 channels require a stream map. +// OpusExtraData, and passed to opus_multistream_decoder_create() when the +// extra data does not contain mapping information. The values are valid only +// for mono and stereo output: Opus streams with more than 2 channels require a +// stream map. static const int kMaxChannelsWithDefaultLayout = 2; static const uint8 kDefaultOpusChannelLayout[kMaxChannelsWithDefaultLayout] = { 0, 1 }; -// Size of the Opus header excluding optional mapping information. -static const int kOpusHeaderSize = 19; +// Size of the Opus extra data excluding optional mapping information. +static const int kOpusExtraDataSize = 19; -// Offset to the channel count byte in the Opus header. -static const int kOpusHeaderChannelsOffset = 9; +// Offset to the channel count byte in the Opus extra data. +static const int kOpusExtraDataChannelsOffset = 9; -// Offset to the pre-skip value in the Opus header. -static const int kOpusHeaderSkipSamplesOffset = 10; +// Offset to the pre-skip value in the Opus extra data. +static const int kOpusExtraDataSkipSamplesOffset = 10; -// Offset to the channel mapping byte in the Opus header. -static const int kOpusHeaderChannelMappingOffset = 18; +// Offset to the channel mapping byte in the Opus extra data. +static const int kOpusExtraDataChannelMappingOffset = 18; -// Header contains a stream map. The mapping values are in extra data beyond -// the always present |kOpusHeaderSize| bytes of data. The mapping data +// Extra Data contains a stream map. The mapping values are in extra data beyond +// the always present |kOpusExtraDataSize| bytes of data. The mapping data // contains stream count, coupling information, and per channel mapping values: // - Byte 0: Number of streams. // - Byte 1: Number coupled. -// - Byte 2: Starting at byte 2 are |header->channels| uint8 mapping values. -static const int kOpusHeaderNumStreamsOffset = kOpusHeaderSize; -static const int kOpusHeaderNumCoupledOffset = kOpusHeaderNumStreamsOffset + 1; -static const int kOpusHeaderStreamMapOffset = kOpusHeaderNumStreamsOffset + 2; - -struct OpusHeader { - OpusHeader() +// - Byte 2: Starting at byte 2 are |extra_data->channels| uint8 mapping +// values. +static const int kOpusExtraDataNumStreamsOffset = kOpusExtraDataSize; +static const int kOpusExtraDataNumCoupledOffset = + kOpusExtraDataNumStreamsOffset + 1; +static const int kOpusExtraDataStreamMapOffset = + kOpusExtraDataNumStreamsOffset + 2; + +struct OpusExtraData { + OpusExtraData() : channels(0), skip_samples(0), channel_mapping(0), @@ -200,46 +209,54 @@ struct OpusHeader { uint8 stream_map[kMaxVorbisChannels]; }; -// Returns true when able to successfully parse and store Opus header data in -// data parsed in |header|. Based on opus header parsing code in libopusdec -// from FFmpeg, and opus_header from Xiph's opus-tools project. -static void ParseOpusHeader(const uint8* data, int data_size, - const AudioDecoderConfig& config, - OpusHeader* header) { - CHECK_GE(data_size, kOpusHeaderSize); +// Returns true when able to successfully parse and store Opus extra data in +// |extra_data|. Based on opus header parsing code in libopusdec from FFmpeg, +// and opus_header from Xiph's opus-tools project. +static bool ParseOpusExtraData(const uint8* data, int data_size, + const AudioDecoderConfig& config, + OpusExtraData* extra_data) { + if (data_size < kOpusExtraDataSize) + return false; - header->channels = *(data + kOpusHeaderChannelsOffset); + extra_data->channels = *(data + kOpusExtraDataChannelsOffset); - CHECK(header->channels > 0 && header->channels <= kMaxVorbisChannels) - << "invalid channel count in header: " << header->channels; + if (extra_data->channels <= 0 || extra_data->channels > kMaxVorbisChannels) { + DVLOG(0) << "invalid channel count in extra data: " << extra_data->channels; + return false; + } - header->skip_samples = - ReadLE16(data, data_size, kOpusHeaderSkipSamplesOffset); + extra_data->skip_samples = + ReadLE16(data, data_size, kOpusExtraDataSkipSamplesOffset); - header->channel_mapping = *(data + kOpusHeaderChannelMappingOffset); + extra_data->channel_mapping = *(data + kOpusExtraDataChannelMappingOffset); - if (!header->channel_mapping) { - CHECK_LE(header->channels, kMaxChannelsWithDefaultLayout) - << "Invalid header, missing stream map."; + if (!extra_data->channel_mapping) { + if (extra_data->channels > kMaxChannelsWithDefaultLayout) { + DVLOG(0) << "Invalid extra data, missing stream map."; + return false; + } - header->num_streams = 1; - header->num_coupled = + extra_data->num_streams = 1; + extra_data->num_coupled = (ChannelLayoutToChannelCount(config.channel_layout()) > 1) ? 1 : 0; - return; + return true; } - CHECK_GE(data_size, kOpusHeaderStreamMapOffset + header->channels) - << "Invalid stream map; insufficient data for current channel count: " - << header->channels; + if (data_size < kOpusExtraDataStreamMapOffset + extra_data->channels) { + DVLOG(0) << "Invalid stream map; insufficient data for current channel " + << "count: " << extra_data->channels; + return false; + } - header->num_streams = *(data + kOpusHeaderNumStreamsOffset); - header->num_coupled = *(data + kOpusHeaderNumCoupledOffset); + extra_data->num_streams = *(data + kOpusExtraDataNumStreamsOffset); + extra_data->num_coupled = *(data + kOpusExtraDataNumCoupledOffset); - if (header->num_streams + header->num_coupled != header->channels) - LOG(WARNING) << "Inconsistent channel mapping."; + if (extra_data->num_streams + extra_data->num_coupled != extra_data->channels) + DVLOG(1) << "Inconsistent channel mapping."; - for (int i = 0; i < header->channels; ++i) - header->stream_map[i] = *(data + kOpusHeaderStreamMapOffset + i); + for (int i = 0; i < extra_data->channels; ++i) + extra_data->stream_map[i] = *(data + kOpusExtraDataStreamMapOffset + i); + return true; } OpusAudioDecoder::OpusAudioDecoder( @@ -252,7 +269,8 @@ OpusAudioDecoder::OpusAudioDecoder( channel_layout_(CHANNEL_LAYOUT_NONE), samples_per_second_(0), last_input_timestamp_(kNoTimestamp()), - skip_samples_(0) { + frames_to_discard_(0), + frame_delay_at_start_(0) { } void OpusAudioDecoder::Initialize( @@ -265,7 +283,7 @@ void OpusAudioDecoder::Initialize( if (demuxer_stream_) { // TODO(scherkus): initialization currently happens more than once in // PipelineIntegrationTest.BasicPlayback. - LOG(ERROR) << "Initialize has already been called."; + DVLOG(0) << "Initialize has already been called."; CHECK(false); } @@ -376,8 +394,8 @@ void OpusAudioDecoder::BufferReady( input->timestamp() < last_input_timestamp_) { base::TimeDelta diff = input->timestamp() - last_input_timestamp_; DVLOG(1) << "Input timestamps are not monotonically increasing! " - << " ts " << input->timestamp().InMicroseconds() << " us" - << " diff " << diff.InMicroseconds() << " us"; + << " ts " << input->timestamp().InMicroseconds() << " us" + << " diff " << diff.InMicroseconds() << " us"; base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL); return; } @@ -405,29 +423,29 @@ bool OpusAudioDecoder::ConfigureDecoder() { const AudioDecoderConfig& config = demuxer_stream_->audio_decoder_config(); if (config.codec() != kCodecOpus) { - DLOG(ERROR) << "codec must be kCodecOpus."; + DVLOG(0) << "codec must be kCodecOpus."; return false; } const int channel_count = ChannelLayoutToChannelCount(config.channel_layout()); if (!config.IsValidConfig() || channel_count > kMaxVorbisChannels) { - DLOG(ERROR) << "Invalid or unsupported audio stream -" - << " codec: " << config.codec() - << " channel count: " << channel_count - << " channel layout: " << config.channel_layout() - << " bits per channel: " << config.bits_per_channel() - << " samples per second: " << config.samples_per_second(); + DVLOG(0) << "Invalid or unsupported audio stream -" + << " codec: " << config.codec() + << " channel count: " << channel_count + << " channel layout: " << config.channel_layout() + << " bits per channel: " << config.bits_per_channel() + << " samples per second: " << config.samples_per_second(); return false; } if (config.bits_per_channel() != kBitsPerChannel) { - DLOG(ERROR) << "16 bit samples required."; + DVLOG(0) << "16 bit samples required."; return false; } if (config.is_encrypted()) { - DLOG(ERROR) << "Encrypted audio stream not supported."; + DVLOG(0) << "Encrypted audio stream not supported."; return false; } @@ -452,27 +470,30 @@ bool OpusAudioDecoder::ConfigureDecoder() { if (!output_buffer_) output_buffer_.reset(new int16[kMaxOpusOutputPacketSizeSamples]); - // Parse the Opus header. - OpusHeader opus_header; - ParseOpusHeader(config.extra_data(), config.extra_data_size(), - config, - &opus_header); + // Parse the Opus Extra Data. + OpusExtraData opus_extra_data; + if (!ParseOpusExtraData(config.extra_data(), config.extra_data_size(), + config, + &opus_extra_data)) + return false; if (!config.codec_delay().InMicroseconds()) { // TODO(vigneshv): Replace this with return false once ffmpeg demuxer code - // starts populating the config correctly. - skip_samples_ = opus_header.skip_samples; + // starts populating the config correctly. Also, timestamp is not being + // offset in this case as this code path will go away in a subsequent CL. + frames_to_discard_ = opus_extra_data.skip_samples; } else { // Convert from seconds to samples. - skip_samples_ = std::ceil(config.codec_delay().InMicroseconds() * - config.samples_per_second() / 1000000.0); - if (skip_samples_ < 0) { + timestamp_offset_ = config.codec_delay(); + frame_delay_at_start_ = TimeDeltaToAudioFrames(config.codec_delay(), + config.samples_per_second()); + if (frame_delay_at_start_ < 0) { DVLOG(1) << "Invalid file. Incorrect value for codec delay."; return false; } - if (skip_samples_ != opus_header.skip_samples) { + if (frame_delay_at_start_ != opus_extra_data.skip_samples) { DVLOG(1) << "Invalid file. Codec Delay in container does not match the " - << "value in Opus header."; + << "value in Opus Extra Data."; return false; } } @@ -483,7 +504,7 @@ bool OpusAudioDecoder::ConfigureDecoder() { kMaxChannelsWithDefaultLayout); if (channel_count > kMaxChannelsWithDefaultLayout) { - RemapOpusChannelLayout(opus_header.stream_map, + RemapOpusChannelLayout(opus_extra_data.stream_map, channel_count, channel_mapping); } @@ -492,13 +513,13 @@ bool OpusAudioDecoder::ConfigureDecoder() { int status = OPUS_INVALID_STATE; opus_decoder_ = opus_multistream_decoder_create(config.samples_per_second(), channel_count, - opus_header.num_streams, - opus_header.num_coupled, + opus_extra_data.num_streams, + opus_extra_data.num_coupled, channel_mapping, &status); if (!opus_decoder_ || status != OPUS_OK) { - LOG(ERROR) << "opus_multistream_decoder_create failed status=" - << opus_strerror(status); + DVLOG(0) << "opus_multistream_decoder_create failed status=" + << opus_strerror(status); return false; } @@ -520,30 +541,32 @@ void OpusAudioDecoder::CloseDecoder() { void OpusAudioDecoder::ResetTimestampState() { output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp()); last_input_timestamp_ = kNoTimestamp(); - skip_samples_ = 0; + frames_to_discard_ = TimeDeltaToAudioFrames( + demuxer_stream_->audio_decoder_config().seek_preroll(), + samples_per_second_); } bool OpusAudioDecoder::Decode(const scoped_refptr<DecoderBuffer>& input, scoped_refptr<AudioBuffer>* output_buffer) { - int samples_decoded = opus_multistream_decode(opus_decoder_, - input->data(), - input->data_size(), - &output_buffer_[0], - kMaxOpusOutputPacketSizeSamples, - 0); - if (samples_decoded < 0) { - LOG(ERROR) << "opus_multistream_decode failed for" - << " timestamp: " << input->timestamp().InMicroseconds() - << " us, duration: " << input->duration().InMicroseconds() - << " us, packet size: " << input->data_size() << " bytes with" - << " status: " << opus_strerror(samples_decoded); + int frames_decoded = opus_multistream_decode(opus_decoder_, + input->data(), + input->data_size(), + &output_buffer_[0], + kMaxOpusOutputPacketSizeSamples, + 0); + if (frames_decoded < 0) { + DVLOG(0) << "opus_multistream_decode failed for" + << " timestamp: " << input->timestamp().InMicroseconds() + << " us, duration: " << input->duration().InMicroseconds() + << " us, packet size: " << input->data_size() << " bytes with" + << " status: " << opus_strerror(frames_decoded); return false; } uint8* decoded_audio_data = reinterpret_cast<uint8*>(&output_buffer_[0]); - int decoded_audio_size = samples_decoded * + int bytes_decoded = frames_decoded * demuxer_stream_->audio_decoder_config().bytes_per_frame(); - DCHECK_LE(decoded_audio_size, kMaxOpusOutputPacketSizeBytes); + DCHECK_LE(bytes_decoded, kMaxOpusOutputPacketSizeBytes); if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() && !input->end_of_stream()) { @@ -551,42 +574,49 @@ bool OpusAudioDecoder::Decode(const scoped_refptr<DecoderBuffer>& input, output_timestamp_helper_->SetBaseTimestamp(input->timestamp()); } - if (decoded_audio_size > 0) { + // Skip samples should be equal to codec delay when the file starts and when + // there is a seek to zero. + // TODO(vigneshv): This should be checked for start of stream rather than + // input timestamp of zero to accomodate streams that don't start at zero. + if (input->timestamp() == base::TimeDelta()) + frames_to_discard_ = frame_delay_at_start_; + + if (bytes_decoded > 0 && frames_decoded > frames_to_discard_) { // Copy the audio samples into an output buffer. uint8* data[] = { decoded_audio_data }; *output_buffer = AudioBuffer::CopyFrom( kSampleFormatS16, ChannelLayoutToChannelCount(channel_layout_), - samples_decoded, + frames_decoded, data, - output_timestamp_helper_->GetTimestamp(), - output_timestamp_helper_->GetFrameDuration(samples_decoded)); - output_timestamp_helper_->AddFrames(samples_decoded); - if (skip_samples_ > 0) { - int dropped_size = std::min(samples_decoded, skip_samples_); - output_buffer->get()->TrimStart(dropped_size); - skip_samples_ -= dropped_size; - samples_decoded -= dropped_size; + output_timestamp_helper_->GetTimestamp() - timestamp_offset_, + output_timestamp_helper_->GetFrameDuration(frames_decoded)); + output_timestamp_helper_->AddFrames(frames_decoded); + if (frames_to_discard_ > 0) { + output_buffer->get()->TrimStart(frames_to_discard_); + frames_decoded -= frames_to_discard_; + frames_to_discard_ = 0; } if (input->discard_padding().InMicroseconds() > 0) { - int discard_padding = std::ceil( - input->discard_padding().InMicroseconds() * - samples_per_second_ / 1000000.0); - if (discard_padding < 0 || discard_padding > samples_decoded) { + int discard_padding = TimeDeltaToAudioFrames(input->discard_padding(), + samples_per_second_); + if (discard_padding < 0 || discard_padding > frames_decoded) { DVLOG(1) << "Invalid file. Incorrect discard padding value."; return false; } - output_buffer->get()->TrimEnd(std::min(samples_decoded, discard_padding)); - samples_decoded -= discard_padding; + output_buffer->get()->TrimEnd(discard_padding); + frames_decoded -= discard_padding; } + } else if (bytes_decoded > 0) { + frames_to_discard_ -= frames_decoded; + frames_decoded = 0; } - decoded_audio_size = - samples_decoded * - demuxer_stream_->audio_decoder_config().bytes_per_frame(); // Decoding finished successfully, update statistics. PipelineStatistics statistics; - statistics.audio_bytes_decoded = decoded_audio_size; + statistics.audio_bytes_decoded = + frames_decoded * + demuxer_stream_->audio_decoder_config().bytes_per_frame(); statistics_cb_.Run(statistics); return true; diff --git a/media/filters/opus_audio_decoder.h b/media/filters/opus_audio_decoder.h index 77e8434..50ba069 100644 --- a/media/filters/opus_audio_decoder.h +++ b/media/filters/opus_audio_decoder.h @@ -72,7 +72,20 @@ class MEDIA_EXPORT OpusAudioDecoder : public AudioDecoder { ReadCB read_cb_; - int skip_samples_; + // Number of frames to be discarded from the start of the packet. This value + // is respected for all packets except for the first one in the stream. For + // the first packet in the stream, |frame_delay_at_start_| is used. This is + // usually set to the SeekPreRoll value from the container whenever a seek + // happens. + int frames_to_discard_; + + // Number of frames to be discarded at the start of the stream. This value + // is typically the CodecDelay value from the container. + int frame_delay_at_start_; + + // Timestamp to be subtracted from all the frames. This is typically computed + // from the CodecDelay value in the container. + base::TimeDelta timestamp_offset_; // Buffer for output from libopus. scoped_ptr<int16[]> output_buffer_; diff --git a/media/filters/pipeline_integration_test.cc b/media/filters/pipeline_integration_test.cc index 0ea768d..92d8fc7 100644 --- a/media/filters/pipeline_integration_test.cc +++ b/media/filters/pipeline_integration_test.cc @@ -366,6 +366,11 @@ class PipelineIntegrationTest message_loop_.Run(); } + void StartHashedPipelineWithMediaSource(MockMediaSource* source) { + hashing_enabled_ = true; + StartPipelineWithMediaSource(source); + } + void StartPipelineWithEncryptedMedia( MockMediaSource* source, FakeEncryptedMedia* encrypted_media) { @@ -561,6 +566,35 @@ TEST_F(PipelineIntegrationTest, BasicPlayback_MediaSource_Opus_WebM) { Stop(); } +TEST_F(PipelineIntegrationTest, MediaSource_Opus_Seeking_WebM) { + EXPECT_CALL(*this, OnSetOpaque(false)).Times(AnyNumber()); + MockMediaSource source("bear-opus-end-trimming.webm", kOpusAudioOnlyWebM, + kAppendWholeFile); + StartHashedPipelineWithMediaSource(&source); + + + EXPECT_EQ(1u, pipeline_->GetBufferedTimeRanges().size()); + EXPECT_EQ(0, pipeline_->GetBufferedTimeRanges().start(0).InMilliseconds()); + EXPECT_EQ(kOpusEndTrimmingWebMFileDurationMs, + pipeline_->GetBufferedTimeRanges().end(0).InMilliseconds()); + + base::TimeDelta start_seek_time = base::TimeDelta::FromMilliseconds(1000); + base::TimeDelta seek_time = base::TimeDelta::FromMilliseconds(2000); + + Play(); + ASSERT_TRUE(WaitUntilCurrentTimeIsAfter(start_seek_time)); + source.Seek(seek_time, 0x1D5, 34017); + source.EndOfStream(); + ASSERT_TRUE(Seek(seek_time)); + + ASSERT_TRUE(WaitUntilOnEnded()); + + EXPECT_EQ("0.76,0.20,-0.82,-0.58,-1.29,-0.29,", GetAudioHash()); + + source.Abort(); + Stop(); +} + TEST_F(PipelineIntegrationTest, MediaSource_ConfigChange_WebM) { MockMediaSource source("bear-320x240-16x9-aspect.webm", kWebM, kAppendWholeFile); |