// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "media/cdm/ppapi/ffmpeg_cdm_audio_decoder.h" #include <algorithm> #include "base/logging.h" #include "media/base/audio_bus.h" #include "media/base/audio_timestamp_helper.h" #include "media/base/buffers.h" #include "media/base/data_buffer.h" #include "media/base/limits.h" #include "media/ffmpeg/ffmpeg_common.h" // Include FFmpeg header files. extern "C" { // Temporarily disable possible loss of data warning. MSVC_PUSH_DISABLE_WARNING(4244); #include <libavcodec/avcodec.h> MSVC_POP_WARNING(); } // extern "C" namespace media { // Maximum number of channels with defined layout in src/media. static const int kMaxChannels = 8; static AVCodecID CdmAudioCodecToCodecID( cdm::AudioDecoderConfig::AudioCodec audio_codec) { switch (audio_codec) { case cdm::AudioDecoderConfig::kCodecVorbis: return AV_CODEC_ID_VORBIS; case cdm::AudioDecoderConfig::kCodecAac: return AV_CODEC_ID_AAC; case cdm::AudioDecoderConfig::kUnknownAudioCodec: default: NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec; return AV_CODEC_ID_NONE; } } static void CdmAudioDecoderConfigToAVCodecContext( const cdm::AudioDecoderConfig& config, AVCodecContext* codec_context) { codec_context->codec_type = AVMEDIA_TYPE_AUDIO; codec_context->codec_id = CdmAudioCodecToCodecID(config.codec); switch (config.bits_per_channel) { case 8: codec_context->sample_fmt = AV_SAMPLE_FMT_U8; break; case 16: codec_context->sample_fmt = AV_SAMPLE_FMT_S16; break; case 32: codec_context->sample_fmt = AV_SAMPLE_FMT_S32; break; default: DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits " "per channel: " << config.bits_per_channel; codec_context->sample_fmt = AV_SAMPLE_FMT_NONE; } codec_context->channels = config.channel_count; codec_context->sample_rate = config.samples_per_second; if (config.extra_data) { codec_context->extradata_size = config.extra_data_size; codec_context->extradata = reinterpret_cast<uint8_t*>( av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE)); memcpy(codec_context->extradata, config.extra_data, config.extra_data_size); memset(codec_context->extradata + config.extra_data_size, '\0', FF_INPUT_BUFFER_PADDING_SIZE); } else { codec_context->extradata = NULL; codec_context->extradata_size = 0; } } static cdm::AudioFormat AVSampleFormatToCdmAudioFormat( AVSampleFormat sample_format) { switch (sample_format) { case AV_SAMPLE_FMT_U8: return cdm::kAudioFormatU8; case AV_SAMPLE_FMT_S16: return cdm::kAudioFormatS16; case AV_SAMPLE_FMT_S32: return cdm::kAudioFormatS32; case AV_SAMPLE_FMT_FLT: return cdm::kAudioFormatF32; case AV_SAMPLE_FMT_S16P: return cdm::kAudioFormatPlanarS16; case AV_SAMPLE_FMT_FLTP: return cdm::kAudioFormatPlanarF32; default: DVLOG(1) << "Unknown AVSampleFormat: " << sample_format; } return cdm::kUnknownAudioFormat; } static void CopySamples(cdm::AudioFormat cdm_format, int decoded_audio_size, const AVFrame& av_frame, uint8_t* output_buffer) { switch (cdm_format) { case cdm::kAudioFormatU8: case cdm::kAudioFormatS16: case cdm::kAudioFormatS32: case cdm::kAudioFormatF32: memcpy(output_buffer, av_frame.data[0], decoded_audio_size); break; case cdm::kAudioFormatPlanarS16: case cdm::kAudioFormatPlanarF32: { const int decoded_size_per_channel = decoded_audio_size / av_frame.channels; for (int i = 0; i < av_frame.channels; ++i) { memcpy(output_buffer, av_frame.extended_data[i], decoded_size_per_channel); output_buffer += decoded_size_per_channel; } break; } default: NOTREACHED() << "Unsupported CDM Audio Format!"; memset(output_buffer, 0, decoded_audio_size); } } FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(CdmHost* host) : is_initialized_(false), host_(host), samples_per_second_(0), channels_(0), av_sample_format_(0), bytes_per_frame_(0), last_input_timestamp_(kNoTimestamp()), output_bytes_to_drop_(0) { } FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() { ReleaseFFmpegResources(); } bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) { DVLOG(1) << "Initialize()"; if (!IsValidConfig(config)) { LOG(ERROR) << "Initialize(): invalid audio decoder configuration."; return false; } if (is_initialized_) { LOG(ERROR) << "Initialize(): Already initialized."; return false; } // Initialize AVCodecContext structure. codec_context_.reset(avcodec_alloc_context3(NULL)); CdmAudioDecoderConfigToAVCodecContext(config, codec_context_.get()); // MP3 decodes to S16P which we don't support, tell it to use S16 instead. if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16; AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id); if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) { DLOG(ERROR) << "Could not initialize audio decoder: " << codec_context_->codec_id; return false; } // Ensure avcodec_open2() respected our format request. if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) { DLOG(ERROR) << "Unable to configure a supported sample format: " << codec_context_->sample_fmt; return false; } // Success! av_frame_.reset(avcodec_alloc_frame()); samples_per_second_ = config.samples_per_second; bytes_per_frame_ = codec_context_->channels * config.bits_per_channel / 8; output_timestamp_helper_.reset( new AudioTimestampHelper(config.samples_per_second)); is_initialized_ = true; // Store initial values to guard against midstream configuration changes. channels_ = codec_context_->channels; av_sample_format_ = codec_context_->sample_fmt; return true; } void FFmpegCdmAudioDecoder::Deinitialize() { DVLOG(1) << "Deinitialize()"; ReleaseFFmpegResources(); is_initialized_ = false; ResetTimestampState(); } void FFmpegCdmAudioDecoder::Reset() { DVLOG(1) << "Reset()"; avcodec_flush_buffers(codec_context_.get()); ResetTimestampState(); } // static bool FFmpegCdmAudioDecoder::IsValidConfig( const cdm::AudioDecoderConfig& config) { return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec && config.channel_count > 0 && config.channel_count <= kMaxChannels && config.bits_per_channel > 0 && config.bits_per_channel <= limits::kMaxBitsPerSample && config.samples_per_second > 0 && config.samples_per_second <= limits::kMaxSampleRate; } cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer( const uint8_t* compressed_buffer, int32_t compressed_buffer_size, int64_t input_timestamp, cdm::AudioFrames* decoded_frames) { DVLOG(1) << "DecodeBuffer()"; const bool is_end_of_stream = !compressed_buffer; base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(input_timestamp); bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS; if (!is_end_of_stream) { if (last_input_timestamp_ == kNoTimestamp()) { if (is_vorbis && timestamp < base::TimeDelta()) { // Dropping frames for negative timestamps as outlined in section A.2 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html int frames_to_drop = floor( 0.5 + -timestamp.InSecondsF() * samples_per_second_); output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop; } else { last_input_timestamp_ = timestamp; } } else if (timestamp != kNoTimestamp()) { if (timestamp < last_input_timestamp_) { base::TimeDelta diff = timestamp - last_input_timestamp_; DVLOG(1) << "Input timestamps are not monotonically increasing! " << " ts " << timestamp.InMicroseconds() << " us" << " diff " << diff.InMicroseconds() << " us"; return cdm::kDecodeError; } last_input_timestamp_ = timestamp; } } AVPacket packet; av_init_packet(&packet); packet.data = const_cast<uint8_t*>(compressed_buffer); packet.size = compressed_buffer_size; // Tell the CDM what AudioFormat we're using. const cdm::AudioFormat cdm_format = AVSampleFormatToCdmAudioFormat( static_cast<AVSampleFormat>(av_sample_format_)); DCHECK_NE(cdm_format, cdm::kUnknownAudioFormat); decoded_frames->SetFormat(cdm_format); // Each audio packet may contain several frames, so we must call the decoder // until we've exhausted the packet. Regardless of the packet size we always // want to hand it to the decoder at least once, otherwise we would end up // skipping end of stream packets since they have a size of zero. do { // Reset frame to default values. avcodec_get_frame_defaults(av_frame_.get()); int frame_decoded = 0; int result = avcodec_decode_audio4( codec_context_.get(), av_frame_.get(), &frame_decoded, &packet); if (result < 0) { DCHECK(!is_end_of_stream) << "End of stream buffer produced an error! " << "This is quite possibly a bug in the audio decoder not handling " << "end of stream AVPackets correctly."; DLOG(ERROR) << "Error decoding an audio frame with timestamp: " << timestamp.InMicroseconds() << " us, duration: " << timestamp.InMicroseconds() << " us, packet size: " << compressed_buffer_size << " bytes"; return cdm::kDecodeError; } // Update packet size and data pointer in case we need to call the decoder // with the remaining bytes from this packet. packet.size -= result; packet.data += result; if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() && !is_end_of_stream) { DCHECK(timestamp != kNoTimestamp()); if (output_bytes_to_drop_ > 0) { // Currently Vorbis is the only codec that causes us to drop samples. // If we have to drop samples it always means the timeline starts at 0. DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS); output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta()); } else { output_timestamp_helper_->SetBaseTimestamp(timestamp); } } int decoded_audio_size = 0; if (frame_decoded) { if (av_frame_->sample_rate != samples_per_second_ || av_frame_->channels != channels_ || av_frame_->format != av_sample_format_) { DLOG(ERROR) << "Unsupported midstream configuration change!" << " Sample Rate: " << av_frame_->sample_rate << " vs " << samples_per_second_ << ", Channels: " << av_frame_->channels << " vs " << channels_ << ", Sample Format: " << av_frame_->format << " vs " << av_sample_format_; return cdm::kDecodeError; } decoded_audio_size = av_samples_get_buffer_size( NULL, codec_context_->channels, av_frame_->nb_samples, codec_context_->sample_fmt, 1); } if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) { DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) << "Decoder didn't output full frames"; int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_); decoded_audio_size -= dropped_size; output_bytes_to_drop_ -= dropped_size; } if (decoded_audio_size > 0) { DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) << "Decoder didn't output full frames"; base::TimeDelta output_timestamp = output_timestamp_helper_->GetTimestamp(); output_timestamp_helper_->AddFrames(decoded_audio_size / bytes_per_frame_); // If we've exhausted the packet in the first decode we can write directly // into the frame buffer instead of a multistep serialization approach. if (serialized_audio_frames_.empty() && !packet.size) { const uint32_t buffer_size = decoded_audio_size + sizeof(int64) * 2; decoded_frames->SetFrameBuffer(host_->Allocate(buffer_size)); if (!decoded_frames->FrameBuffer()) { LOG(ERROR) << "DecodeBuffer() CdmHost::Allocate failed."; return cdm::kDecodeError; } decoded_frames->FrameBuffer()->SetSize(buffer_size); uint8_t* output_buffer = decoded_frames->FrameBuffer()->Data(); const int64 timestamp = output_timestamp.InMicroseconds(); memcpy(output_buffer, ×tamp, sizeof(timestamp)); output_buffer += sizeof(timestamp); const int64 output_size = decoded_audio_size; memcpy(output_buffer, &output_size, sizeof(output_size)); output_buffer += sizeof(output_size); // Copy the samples and return success. CopySamples( cdm_format, decoded_audio_size, *av_frame_, output_buffer); return cdm::kSuccess; } // There are still more frames to decode, so we need to serialize them in // a secondary buffer since we don't know their sizes ahead of time (which // is required to allocate the FrameBuffer object). SerializeInt64(output_timestamp.InMicroseconds()); SerializeInt64(decoded_audio_size); const size_t previous_size = serialized_audio_frames_.size(); serialized_audio_frames_.resize(previous_size + decoded_audio_size); uint8_t* output_buffer = &serialized_audio_frames_[0] + previous_size; CopySamples( cdm_format, decoded_audio_size, *av_frame_, output_buffer); } } while (packet.size > 0); if (!serialized_audio_frames_.empty()) { decoded_frames->SetFrameBuffer( host_->Allocate(serialized_audio_frames_.size())); if (!decoded_frames->FrameBuffer()) { LOG(ERROR) << "DecodeBuffer() CdmHost::Allocate failed."; return cdm::kDecodeError; } memcpy(decoded_frames->FrameBuffer()->Data(), &serialized_audio_frames_[0], serialized_audio_frames_.size()); decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size()); serialized_audio_frames_.clear(); return cdm::kSuccess; } return cdm::kNeedMoreData; } void FFmpegCdmAudioDecoder::ResetTimestampState() { output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp()); last_input_timestamp_ = kNoTimestamp(); output_bytes_to_drop_ = 0; } void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() { DVLOG(1) << "ReleaseFFmpegResources()"; codec_context_.reset(); av_frame_.reset(); } void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) { const size_t previous_size = serialized_audio_frames_.size(); serialized_audio_frames_.resize(previous_size + sizeof(value)); memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value)); } } // namespace media