// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "media/cdm/ppapi/ffmpeg_cdm_audio_decoder.h" #include #include "base/logging.h" #include "media/base/audio_bus.h" #include "media/base/audio_timestamp_helper.h" #include "media/base/buffers.h" #include "media/base/data_buffer.h" #include "media/base/limits.h" // Include FFmpeg header files. extern "C" { // Temporarily disable possible loss of data warning. MSVC_PUSH_DISABLE_WARNING(4244); #include MSVC_POP_WARNING(); } // extern "C" namespace media { // Maximum number of channels with defined layout in src/media. static const int kMaxChannels = 8; static AVCodecID CdmAudioCodecToCodecID( cdm::AudioDecoderConfig::AudioCodec audio_codec) { switch (audio_codec) { case cdm::AudioDecoderConfig::kCodecVorbis: return AV_CODEC_ID_VORBIS; case cdm::AudioDecoderConfig::kCodecAac: return AV_CODEC_ID_AAC; case cdm::AudioDecoderConfig::kUnknownAudioCodec: default: NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec; return AV_CODEC_ID_NONE; } } static void CdmAudioDecoderConfigToAVCodecContext( const cdm::AudioDecoderConfig& config, AVCodecContext* codec_context) { codec_context->codec_type = AVMEDIA_TYPE_AUDIO; codec_context->codec_id = CdmAudioCodecToCodecID(config.codec); switch (config.bits_per_channel) { case 8: codec_context->sample_fmt = AV_SAMPLE_FMT_U8; break; case 16: codec_context->sample_fmt = AV_SAMPLE_FMT_S16; break; case 32: codec_context->sample_fmt = AV_SAMPLE_FMT_S32; break; default: DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits " "per channel: " << config.bits_per_channel; codec_context->sample_fmt = AV_SAMPLE_FMT_NONE; } codec_context->channels = config.channel_count; codec_context->sample_rate = config.samples_per_second; if (config.extra_data) { codec_context->extradata_size = config.extra_data_size; codec_context->extradata = reinterpret_cast( av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE)); memcpy(codec_context->extradata, config.extra_data, config.extra_data_size); memset(codec_context->extradata + config.extra_data_size, '\0', FF_INPUT_BUFFER_PADDING_SIZE); } else { codec_context->extradata = NULL; codec_context->extradata_size = 0; } } FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(cdm::Host* host) : is_initialized_(false), host_(host), codec_context_(NULL), av_frame_(NULL), bits_per_channel_(0), samples_per_second_(0), channels_(0), av_sample_format_(0), bytes_per_frame_(0), last_input_timestamp_(kNoTimestamp()), output_bytes_to_drop_(0) { } FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() { ReleaseFFmpegResources(); } bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) { DVLOG(1) << "Initialize()"; if (!IsValidConfig(config)) { LOG(ERROR) << "Initialize(): invalid audio decoder configuration."; return false; } if (is_initialized_) { LOG(ERROR) << "Initialize(): Already initialized."; return false; } // Initialize AVCodecContext structure. codec_context_ = avcodec_alloc_context3(NULL); CdmAudioDecoderConfigToAVCodecContext(config, codec_context_); // MP3 decodes to S16P which we don't support, tell it to use S16 instead. if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16; AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id); if (!codec || avcodec_open2(codec_context_, codec, NULL) < 0) { DLOG(ERROR) << "Could not initialize audio decoder: " << codec_context_->codec_id; return false; } // Ensure avcodec_open2() respected our format request. if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) { DLOG(ERROR) << "Unable to configure a supported sample format: " << codec_context_->sample_fmt; return false; } // Some codecs will only output float data, so we need to convert to integer // before returning the decoded buffer. if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP || codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { // Preallocate the AudioBus for float conversions. We can treat interleaved // float data as a single planar channel since our output is expected in an // interleaved format anyways. int channels = codec_context_->channels; if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) channels = 1; converter_bus_ = AudioBus::CreateWrapper(channels); } // Success! av_frame_ = avcodec_alloc_frame(); bits_per_channel_ = config.bits_per_channel; samples_per_second_ = config.samples_per_second; bytes_per_frame_ = codec_context_->channels * bits_per_channel_ / 8; output_timestamp_helper_.reset( new AudioTimestampHelper(config.samples_per_second)); serialized_audio_frames_.reserve(bytes_per_frame_ * samples_per_second_); is_initialized_ = true; // Store initial values to guard against midstream configuration changes. channels_ = codec_context_->channels; av_sample_format_ = codec_context_->sample_fmt; return true; } void FFmpegCdmAudioDecoder::Deinitialize() { DVLOG(1) << "Deinitialize()"; ReleaseFFmpegResources(); is_initialized_ = false; ResetTimestampState(); } void FFmpegCdmAudioDecoder::Reset() { DVLOG(1) << "Reset()"; avcodec_flush_buffers(codec_context_); ResetTimestampState(); } // static bool FFmpegCdmAudioDecoder::IsValidConfig( const cdm::AudioDecoderConfig& config) { return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec && config.channel_count > 0 && config.channel_count <= kMaxChannels && config.bits_per_channel > 0 && config.bits_per_channel <= limits::kMaxBitsPerSample && config.samples_per_second > 0 && config.samples_per_second <= limits::kMaxSampleRate; } cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer( const uint8_t* compressed_buffer, int32_t compressed_buffer_size, int64_t input_timestamp, cdm::AudioFrames* decoded_frames) { DVLOG(1) << "DecodeBuffer()"; const bool is_end_of_stream = !compressed_buffer; base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(input_timestamp); bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS; if (!is_end_of_stream) { if (last_input_timestamp_ == kNoTimestamp()) { if (is_vorbis && timestamp < base::TimeDelta()) { // Dropping frames for negative timestamps as outlined in section A.2 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html int frames_to_drop = floor( 0.5 + -timestamp.InSecondsF() * samples_per_second_); output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop; } else { last_input_timestamp_ = timestamp; } } else if (timestamp != kNoTimestamp()) { if (timestamp < last_input_timestamp_) { base::TimeDelta diff = timestamp - last_input_timestamp_; DVLOG(1) << "Input timestamps are not monotonically increasing! " << " ts " << timestamp.InMicroseconds() << " us" << " diff " << diff.InMicroseconds() << " us"; return cdm::kDecodeError; } last_input_timestamp_ = timestamp; } } AVPacket packet; av_init_packet(&packet); packet.data = const_cast(compressed_buffer); packet.size = compressed_buffer_size; // Each audio packet may contain several frames, so we must call the decoder // until we've exhausted the packet. Regardless of the packet size we always // want to hand it to the decoder at least once, otherwise we would end up // skipping end of stream packets since they have a size of zero. do { // Reset frame to default values. avcodec_get_frame_defaults(av_frame_); int frame_decoded = 0; int result = avcodec_decode_audio4( codec_context_, av_frame_, &frame_decoded, &packet); if (result < 0) { DCHECK(!is_end_of_stream) << "End of stream buffer produced an error! " << "This is quite possibly a bug in the audio decoder not handling " << "end of stream AVPackets correctly."; DLOG(ERROR) << "Error decoding an audio frame with timestamp: " << timestamp.InMicroseconds() << " us, duration: " << timestamp.InMicroseconds() << " us, packet size: " << compressed_buffer_size << " bytes"; return cdm::kDecodeError; } // Update packet size and data pointer in case we need to call the decoder // with the remaining bytes from this packet. packet.size -= result; packet.data += result; if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() && !is_end_of_stream) { DCHECK(timestamp != kNoTimestamp()); if (output_bytes_to_drop_ > 0) { // Currently Vorbis is the only codec that causes us to drop samples. // If we have to drop samples it always means the timeline starts at 0. DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS); output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta()); } else { output_timestamp_helper_->SetBaseTimestamp(timestamp); } } int decoded_audio_size = 0; if (frame_decoded) { if (av_frame_->sample_rate != samples_per_second_ || av_frame_->channels != channels_ || av_frame_->format != av_sample_format_) { DLOG(ERROR) << "Unsupported midstream configuration change!" << " Sample Rate: " << av_frame_->sample_rate << " vs " << samples_per_second_ << ", Channels: " << av_frame_->channels << " vs " << channels_ << ", Sample Format: " << av_frame_->format << " vs " << av_sample_format_; return cdm::kDecodeError; } decoded_audio_size = av_samples_get_buffer_size( NULL, codec_context_->channels, av_frame_->nb_samples, codec_context_->sample_fmt, 1); // If we're decoding into float, adjust audio size. if (converter_bus_ && bits_per_channel_ / 8 != sizeof(float)) { DCHECK(codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT || codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP); decoded_audio_size *= static_cast(bits_per_channel_ / 8) / sizeof(float); } } int start_sample = 0; if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) { DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) << "Decoder didn't output full frames"; int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_); start_sample = dropped_size / bytes_per_frame_; decoded_audio_size -= dropped_size; output_bytes_to_drop_ -= dropped_size; } scoped_refptr output; if (decoded_audio_size > 0) { DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) << "Decoder didn't output full frames"; // Convert float data using an AudioBus. if (converter_bus_) { // Setup the AudioBus as a wrapper of the AVFrame data and then use // AudioBus::ToInterleaved() to convert the data as necessary. int skip_frames = start_sample; int total_frames = av_frame_->nb_samples; int frames_to_interleave = decoded_audio_size / bytes_per_frame_; if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { DCHECK_EQ(converter_bus_->channels(), 1); total_frames *= codec_context_->channels; skip_frames *= codec_context_->channels; frames_to_interleave *= codec_context_->channels; } converter_bus_->set_frames(total_frames); for (int i = 0; i < converter_bus_->channels(); ++i) { converter_bus_->SetChannelData(i, reinterpret_cast( av_frame_->extended_data[i])); } output = new DataBuffer(decoded_audio_size); output->set_data_size(decoded_audio_size); DCHECK_EQ(frames_to_interleave, converter_bus_->frames() - skip_frames); converter_bus_->ToInterleavedPartial( skip_frames, frames_to_interleave, bits_per_channel_ / 8, output->writable_data()); } else { output = DataBuffer::CopyFrom( av_frame_->extended_data[0] + start_sample * bytes_per_frame_, decoded_audio_size); } base::TimeDelta output_timestamp = output_timestamp_helper_->GetTimestamp(); output_timestamp_helper_->AddFrames(decoded_audio_size / bytes_per_frame_); // Serialize the audio samples into |serialized_audio_frames_|. SerializeInt64(output_timestamp.InMicroseconds()); SerializeInt64(output->data_size()); serialized_audio_frames_.insert( serialized_audio_frames_.end(), output->data(), output->data() + output->data_size()); } } while (packet.size > 0); if (!serialized_audio_frames_.empty()) { decoded_frames->SetFrameBuffer( host_->Allocate(serialized_audio_frames_.size())); if (!decoded_frames->FrameBuffer()) { LOG(ERROR) << "DecodeBuffer() cdm::Host::Allocate failed."; return cdm::kDecodeError; } memcpy(decoded_frames->FrameBuffer()->Data(), &serialized_audio_frames_[0], serialized_audio_frames_.size()); decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size()); serialized_audio_frames_.clear(); return cdm::kSuccess; } return cdm::kNeedMoreData; } void FFmpegCdmAudioDecoder::ResetTimestampState() { output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp()); last_input_timestamp_ = kNoTimestamp(); output_bytes_to_drop_ = 0; } void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() { DVLOG(1) << "ReleaseFFmpegResources()"; if (codec_context_) { av_free(codec_context_->extradata); avcodec_close(codec_context_); av_free(codec_context_); codec_context_ = NULL; } if (av_frame_) { av_free(av_frame_); av_frame_ = NULL; } } void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) { int previous_size = serialized_audio_frames_.size(); serialized_audio_frames_.resize(previous_size + sizeof(value)); memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value)); } } // namespace media