Encrypted Media: Enforcing the CDM to decode audio into S16 integers.

After the new FFmpeg roll, FFmpeg decodes into float instead of integers. Therefore, it may be confusing about what the CDMs are returning. In this CL, we limit the DecryptingAudioDecoder to support S16 only to work around this issue. All CDM implementations are expected to return S16. This is a short term fix for the audio corruption in CDMs. In the future, we need to be able to accept any format returned by the CDMs (see http://crbug.com/169105). BUG=168862 TEST=media_unittests pass; clearkeycdm audio works well. Review URL: https://chromiumcodereview.appspot.com/11778079 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@176805 0039d316-1c4b-4281-b951-d872f2087c98
author: xhwang@chromium.org <xhwang@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-01-15 06:25:03 +0000
committer: xhwang@chromium.org <xhwang@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-01-15 06:25:03 +0000
commit: ca1d8f72d5a3ba5ab084ce43d8bd0d437363e725 (patch)
tree: b36b1a1922062fc64d036eb7ff36b2ac99a27ad7 /webkit/media
parent: 408ba4f5dfaf7292da71d9fc906e562590934eb0 (diff)
download: chromium_src-ca1d8f72d5a3ba5ab084ce43d8bd0d437363e725.zip
chromium_src-ca1d8f72d5a3ba5ab084ce43d8bd0d437363e725.tar.gz
chromium_src-ca1d8f72d5a3ba5ab084ce43d8bd0d437363e725.tar.bz2
3 files changed, 116 insertions, 44 deletions
diff --git a/webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.cc b/webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.cc
index 8f76256..4173080 100644
--- a/webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.cc
+++ b/webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.cc
@@ -7,7 +7,10 @@
 #include <algorithm>
 
 #include "base/logging.h"
+#include "media/base/audio_bus.h"
+#include "media/base/audio_timestamp_helper.h"
 #include "media/base/buffers.h"
+#include "media/base/data_buffer.h"
 #include "media/base/limits.h"
 #include "webkit/media/crypto/ppapi/cdm/content_decryption_module.h"
 
@@ -85,8 +88,6 @@ FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(cdm::Allocator* allocator)
       bits_per_channel_(0),
       samples_per_second_(0),
       bytes_per_frame_(0),
-      output_timestamp_base_(media::kNoTimestamp()),
-      total_frames_decoded_(0),
       last_input_timestamp_(media::kNoTimestamp()),
       output_bytes_to_drop_(0) {
 }
@@ -112,22 +113,44 @@ bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) {
   codec_context_ = avcodec_alloc_context3(NULL);
   CdmAudioDecoderConfigToAVCodecContext(config, codec_context_);
 
+  // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
+  if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
+    codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
+
   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
-  if (!codec) {
-    LOG(ERROR) << "Initialize(): avcodec_find_decoder failed.";
+  if (!codec || avcodec_open2(codec_context_, codec, NULL) < 0) {
+    DLOG(ERROR) << "Could not initialize audio decoder: "
+                << codec_context_->codec_id;
     return false;
   }
 
-  int status;
-  if ((status = avcodec_open2(codec_context_, codec, NULL)) < 0) {
-    LOG(ERROR) << "Initialize(): avcodec_open2 failed: " << status;
+  // Ensure avcodec_open2() respected our format request.
+  if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
+    DLOG(ERROR) << "Unable to configure a supported sample format: "
+                << codec_context_->sample_fmt;
     return false;
   }
 
+  // Some codecs will only output float data, so we need to convert to integer
+  // before returning the decoded buffer.
+  if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP ||
+      codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
+    // Preallocate the AudioBus for float conversions.  We can treat interleaved
+    // float data as a single planar channel since our output is expected in an
+    // interleaved format anyways.
+    int channels = codec_context_->channels;
+    if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT)
+      channels = 1;
+    converter_bus_ = media::AudioBus::CreateWrapper(channels);
+  }
+
+  // Success!
   av_frame_ = avcodec_alloc_frame();
   bits_per_channel_ = config.bits_per_channel;
   samples_per_second_ = config.samples_per_second;
   bytes_per_frame_ = codec_context_->channels * bits_per_channel_ / 8;
+  output_timestamp_helper_.reset(new media::AudioTimestampHelper(
+      bytes_per_frame_, config.samples_per_second));
   serialized_audio_frames_.reserve(bytes_per_frame_ * samples_per_second_);
   is_initialized_ = true;
 
@@ -138,13 +161,13 @@ void FFmpegCdmAudioDecoder::Deinitialize() {
   DVLOG(1) << "Deinitialize()";
   ReleaseFFmpegResources();
   is_initialized_ = false;
-  ResetAudioTimingData();
+  ResetTimestampState();
 }
 
 void FFmpegCdmAudioDecoder::Reset() {
   DVLOG(1) << "Reset()";
   avcodec_flush_buffers(codec_context_);
-  ResetAudioTimingData();
+  ResetTimestampState();
 }
 
 // static
@@ -168,10 +191,11 @@ cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
   const bool is_end_of_stream = compressed_buffer_size == 0;
   base::TimeDelta timestamp =
       base::TimeDelta::FromMicroseconds(input_timestamp);
+
+  bool is_vorbis = codec_context_->codec_id == CODEC_ID_VORBIS;
   if (!is_end_of_stream) {
     if (last_input_timestamp_ == media::kNoTimestamp()) {
-      if (codec_context_->codec_id == CODEC_ID_VORBIS &&
-          timestamp < base::TimeDelta()) {
+      if (is_vorbis && timestamp < base::TimeDelta()) {
         // Dropping frames for negative timestamps as outlined in section A.2
         // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
         int frames_to_drop = floor(
@@ -230,17 +254,19 @@ cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
     packet.size -= result;
     packet.data += result;
 
-    if (output_timestamp_base_ == media::kNoTimestamp() && !is_end_of_stream) {
+    if (output_timestamp_helper_->base_timestamp() == media::kNoTimestamp() &&
+        !is_end_of_stream) {
       DCHECK(timestamp != media::kNoTimestamp());
       if (output_bytes_to_drop_ > 0) {
+        // Currently Vorbis is the only codec that causes us to drop samples.
         // If we have to drop samples it always means the timeline starts at 0.
-        output_timestamp_base_ = base::TimeDelta();
+        DCHECK_EQ(codec_context_->codec_id, CODEC_ID_VORBIS);
+        output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
       } else {
-        output_timestamp_base_ = timestamp;
+        output_timestamp_helper_->SetBaseTimestamp(timestamp);
       }
     }
 
-    const uint8_t* decoded_audio_data = NULL;
     int decoded_audio_size = 0;
     if (frame_decoded) {
       int output_sample_rate = av_frame_->sample_rate;
@@ -250,35 +276,76 @@ cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
         return cdm::kDecodeError;
       }
 
-      decoded_audio_data = av_frame_->data[0];
-      decoded_audio_size =
-          av_samples_get_buffer_size(NULL,
-                                     codec_context_->channels,
-                                     av_frame_->nb_samples,
-                                     codec_context_->sample_fmt,
-                                     1);
+      decoded_audio_size = av_samples_get_buffer_size(
+          NULL, codec_context_->channels, av_frame_->nb_samples,
+          codec_context_->sample_fmt, 1);
+      // If we're decoding into float, adjust audio size.
+      if (converter_bus_ && bits_per_channel_ / 8 != sizeof(float)) {
+        DCHECK(codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT ||
+               codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP);
+        decoded_audio_size *=
+            static_cast<float>(bits_per_channel_ / 8) / sizeof(float);
+      }
     }
 
+    int start_sample = 0;
     if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
+      DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
+          << "Decoder didn't output full frames";
+
       int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
-      decoded_audio_data += dropped_size;
+      start_sample = dropped_size / bytes_per_frame_;
       decoded_audio_size -= dropped_size;
       output_bytes_to_drop_ -= dropped_size;
     }
 
+    scoped_refptr<media::DataBuffer> output;
     if (decoded_audio_size > 0) {
       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
           << "Decoder didn't output full frames";
 
-      base::TimeDelta output_timestamp = GetNextOutputTimestamp();
-      total_frames_decoded_ += decoded_audio_size / bytes_per_frame_;
+      // Convert float data using an AudioBus.
+      if (converter_bus_) {
+        // Setup the AudioBus as a wrapper of the AVFrame data and then use
+        // AudioBus::ToInterleaved() to convert the data as necessary.
+        int skip_frames = start_sample;
+        int total_frames = av_frame_->nb_samples - start_sample;
+        if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
+          DCHECK_EQ(converter_bus_->channels(), 1);
+          total_frames *= codec_context_->channels;
+          skip_frames *= codec_context_->channels;
+        }
+        converter_bus_->set_frames(total_frames);
+        DCHECK_EQ(decoded_audio_size,
+                  converter_bus_->frames() * bytes_per_frame_);
+
+        for (int i = 0; i < converter_bus_->channels(); ++i) {
+          converter_bus_->SetChannelData(i, reinterpret_cast<float*>(
+              av_frame_->extended_data[i]) + skip_frames);
+        }
+
+        output = new media::DataBuffer(decoded_audio_size);
+        output->SetDataSize(decoded_audio_size);
+        converter_bus_->ToInterleaved(
+            converter_bus_->frames(), bits_per_channel_ / 8,
+            output->GetWritableData());
+      } else {
+        output = new media::DataBuffer(
+            av_frame_->extended_data[0] + start_sample * bytes_per_frame_,
+            decoded_audio_size);
+      }
+
+      base::TimeDelta output_timestamp =
+          output_timestamp_helper_->GetTimestamp();
+      output_timestamp_helper_->AddBytes(decoded_audio_size);
 
       // Serialize the audio samples into |serialized_audio_frames_|.
       SerializeInt64(output_timestamp.InMicroseconds());
-      SerializeInt64(decoded_audio_size);
-      serialized_audio_frames_.insert(serialized_audio_frames_.end(),
-                                      decoded_audio_data,
-                                      decoded_audio_data + decoded_audio_size);
+      SerializeInt64(output->GetDataSize());
+      serialized_audio_frames_.insert(
+          serialized_audio_frames_.end(),
+          output->GetData(),
+          output->GetData() + output->GetDataSize());
     }
   } while (packet.size > 0);
 
@@ -301,9 +368,8 @@ cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
   return cdm::kNeedMoreData;
 }
 
-void FFmpegCdmAudioDecoder::ResetAudioTimingData() {
-  output_timestamp_base_ = media::kNoTimestamp();
-  total_frames_decoded_ = 0;
+void FFmpegCdmAudioDecoder::ResetTimestampState() {
+  output_timestamp_helper_->SetBaseTimestamp(media::kNoTimestamp());
   last_input_timestamp_ = media::kNoTimestamp();
   output_bytes_to_drop_ = 0;
 }
@@ -323,15 +389,6 @@ void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
   }
 }
 
-base::TimeDelta FFmpegCdmAudioDecoder::GetNextOutputTimestamp() const {
-  DCHECK(output_timestamp_base_ != media::kNoTimestamp());
-  const double total_frames_decoded = total_frames_decoded_;
-  const double decoded_us = (total_frames_decoded / samples_per_second_) *
-      base::Time::kMicrosecondsPerSecond;
-  return output_timestamp_base_ +
-      base::TimeDelta::FromMicroseconds(decoded_us);
-}
-
 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) {
   int previous_size = serialized_audio_frames_.size();
   serialized_audio_frames_.resize(previous_size + sizeof(value));
diff --git a/webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.h b/webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.h
index 51efb6b..994f0be 100644
--- a/webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.h
+++ b/webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.h
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "base/basictypes.h"
+#include "base/memory/scoped_ptr.h"
 #include "base/time.h"
 #include "base/compiler_specific.h"
 #include "webkit/media/crypto/ppapi/cdm/content_decryption_module.h"
@@ -15,8 +16,17 @@
 struct AVCodecContext;
 struct AVFrame;
 
+namespace media {
+class AudioBus;
+class AudioTimestampHelper;
+}
+
 namespace webkit_media {
 
+// TODO(xhwang): This class is partially cloned from media::FFmpegAudioDecoder.
+// When media::FFmpegAudioDecoder is updated, it's a pain to keep this class
+// in sync with media::FFmpegAudioDecoder. We need a long term sustainable
+// solution for this. See http://crbug.com/169203
 class FFmpegCdmAudioDecoder {
  public:
   explicit FFmpegCdmAudioDecoder(cdm::Allocator* allocator);
@@ -38,7 +48,7 @@ class FFmpegCdmAudioDecoder {
                            cdm::AudioFrames* decoded_frames);
 
  private:
-  void ResetAudioTimingData();
+  void ResetTimestampState();
   void ReleaseFFmpegResources();
 
   base::TimeDelta GetNextOutputTimestamp() const;
@@ -58,11 +68,14 @@ class FFmpegCdmAudioDecoder {
   int samples_per_second_;
 
   // Used for computing output timestamps.
+  scoped_ptr<media::AudioTimestampHelper> output_timestamp_helper_;
   int bytes_per_frame_;
-  base::TimeDelta output_timestamp_base_;
-  int64_t total_frames_decoded_;
   base::TimeDelta last_input_timestamp_;
 
+  // We may need to convert the audio data coming out of FFmpeg from planar
+  // float to integer.
+  scoped_ptr<media::AudioBus> converter_bus_;
+
   // Number of output sample bytes to drop before generating output buffers.
   // This is required for handling negative timestamps when decoding Vorbis
   // audio, for example.
diff --git a/webkit/media/webkit_media.gypi b/webkit/media/webkit_media.gypi
index ade49ea..99e8470 100644
--- a/webkit/media/webkit_media.gypi
+++ b/webkit/media/webkit_media.gypi
@@ -174,6 +174,8 @@
       'dependencies': [
         '<(DEPTH)/base/base.gyp:base',
         '<(DEPTH)/media/media.gyp:media',
+        # Include the following for media::AudioBus.
+        '<(DEPTH)/media/media.gyp:shared_memory_support',
       ],
       'sources': [
         'crypto/ppapi/cdm_video_decoder.cc',
author	xhwang@chromium.org <xhwang@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-01-15 06:25:03 +0000
committer	xhwang@chromium.org <xhwang@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-01-15 06:25:03 +0000
commit	ca1d8f72d5a3ba5ab084ce43d8bd0d437363e725 (patch)
tree	b36b1a1922062fc64d036eb7ff36b2ac99a27ad7 /webkit/media
parent	408ba4f5dfaf7292da71d9fc906e562590934eb0 (diff)
download	chromium_src-ca1d8f72d5a3ba5ab084ce43d8bd0d437363e725.zip chromium_src-ca1d8f72d5a3ba5ab084ce43d8bd0d437363e725.tar.gz chromium_src-ca1d8f72d5a3ba5ab084ce43d8bd0d437363e725.tar.bz2