diff options
Diffstat (limited to 'media/media_foundation')
-rw-r--r-- | media/media_foundation/README.chromium | 26 | ||||
-rw-r--r-- | media/media_foundation/file_reader_util.cc | 196 | ||||
-rw-r--r-- | media/media_foundation/file_reader_util.h | 68 | ||||
-rw-r--r-- | media/media_foundation/h264mft.cc | 633 | ||||
-rw-r--r-- | media/media_foundation/h264mft.h | 117 | ||||
-rw-r--r-- | media/media_foundation/main.cc | 299 |
6 files changed, 1339 insertions, 0 deletions
diff --git a/media/media_foundation/README.chromium b/media/media_foundation/README.chromium new file mode 100644 index 0000000..2847e97 --- /dev/null +++ b/media/media_foundation/README.chromium @@ -0,0 +1,26 @@ +This tool demonstrates the use of the Media Foundation H.264 decoder as a
+standalone Media Foundation Transform (MFT). The H.264 decoder takes sample
+objects (IMFSample) containing Annex B streams as input, and outputs decoded
+NV12 video frames as output, contained in a buffer object (if DXVA is not
+enabled) or a Direct3D surface (if DXVA is enabled.)
+
+This tool uses ffmpeg's parser and bitstream converter to read a file
+containing H.264 video and outputs packets containing Annex B streams which are
+then fed into the H.264 decoder. This tool also demonstrates the use of the
+H.264 decoder as a state machine, and the steps taken in each state.
+
+Requirements: Windows 7
+
+Note1: This tool currently does decoding only. There is no visible output
+besides the log entry containing state of the decoder at each input/output
+step.
+
+Note2: There is a mysterious 1-off decoded frame count when DXVA is enabled.
+
+Note3: This tool requires the ffmpeg library to have the H.264 codec and Annex
+B bitstream filter. You might need build your own, or grab one from
+http://ffmpeg.arrozcru.org/autobuilds/
+
+Note4: A single H264Mft instance is only for 1 H.264 video stream only.
+Inputting streams consisting of more than 1 video to a single instance
+may result in undefined behavior.
diff --git a/media/media_foundation/file_reader_util.cc b/media/media_foundation/file_reader_util.cc new file mode 100644 index 0000000..d7c9944 --- /dev/null +++ b/media/media_foundation/file_reader_util.cc @@ -0,0 +1,196 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Borrowed from media/tools/omx_test/file_reader_util.cc. +// Added some functionalities related to timestamps on packets. + +#include "media/media_foundation/file_reader_util.h" + +#include <algorithm> + +#include "base/scoped_comptr_win.h" +#include "base/logging.h" +#include "media/ffmpeg/ffmpeg_common.h" +#include "media/filters/bitstream_converter.h" +#include "media/media_foundation/h264mft.h" + +namespace media { + +////////////////////////////////////////////////////////////////////////////// +// FFmpegFileReader +FFmpegFileReader::FFmpegFileReader(const std::string& filename) + : filename_(filename), + format_context_(NULL), + codec_context_(NULL), + target_stream_(-1), + converter_(NULL), + end_of_stream_(false) { +} + +FFmpegFileReader::~FFmpegFileReader() { + if (format_context_) + av_close_input_file(format_context_); +} + +bool FFmpegFileReader::Initialize() { + int result = av_open_input_file(&format_context_, filename_.c_str(), + NULL, 0, NULL); + if (result < 0) { + switch (result) { + case AVERROR_NOFMT: + LOG(ERROR) << "Error: File format not supported " + << filename_; + break; + default: + LOG(ERROR) << "Error: Could not open input for " + << filename_ << ": " << result; + break; + } + return false; + } + if (av_find_stream_info(format_context_) < 0) { + LOG(ERROR) << "can't use FFmpeg to parse stream info"; + return false; + } + + for (size_t i = 0; i < format_context_->nb_streams; ++i) { + codec_context_ = format_context_->streams[i]->codec; + + // Find the video stream. + if (codec_context_->codec_type == CODEC_TYPE_VIDEO) { + target_stream_ = i; + break; + } + } + if (target_stream_ == -1) { + LOG(ERROR) << "no video in the stream"; + return false; + } + + // Initialize the bitstream filter if needed. + // TODO(hclam): find a better way to identify mp4 container. + if (codec_context_->codec_id == CODEC_ID_H264) { + converter_.reset(new media::FFmpegBitstreamConverter( + "h264_mp4toannexb", codec_context_)); + } else if (codec_context_->codec_id == CODEC_ID_MPEG4) { + converter_.reset(new media::FFmpegBitstreamConverter( + "mpeg4video_es", codec_context_)); + } else if (codec_context_->codec_id == CODEC_ID_WMV3) { + converter_.reset(new media::FFmpegBitstreamConverter( + "vc1_asftorcv", codec_context_)); + } else if (codec_context_->codec_id == CODEC_ID_VC1) { + converter_.reset(new media::FFmpegBitstreamConverter( + "vc1_asftoannexg", codec_context_)); + } + if (converter_.get() && !converter_->Initialize()) { + converter_.reset(); + LOG(ERROR) << "failed to initialize h264_mp4toannexb filter"; + return false; + } + return true; +} + +void FFmpegFileReader::Read(uint8** output, int* size) { + Read(output, size, NULL, NULL); +} + +void FFmpegFileReader::Read(uint8** output, int* size, int* duration, + int64* sample_time) { + if (!format_context_ || !codec_context_ || target_stream_ == -1) { + *size = 0; + *output = NULL; + return; + } + AVPacket packet; + bool found = false; + while (!found) { + int result = av_read_frame(format_context_, &packet); + if (result < 0) { + *output = NULL; + *size = 0; + end_of_stream_ = true; + return; + } + if (packet.stream_index == target_stream_) { + if (converter_.get() && !converter_->ConvertPacket(&packet)) { + LOG(ERROR) << "failed to convert AVPacket"; + } + *output = new uint8[packet.size]; + if (*output == NULL) { + LOG(ERROR) << "Failed to allocate buffer for annex b stream"; + *size = 0; + return; + } + *size = packet.size; + memcpy(*output, packet.data, packet.size); + if (duration) { + if (packet.duration == 0) { + LOG(WARNING) << "Packet duration not known"; + } + // This is in AVCodecContext::time_base units + *duration = packet.duration; + } + if (sample_time) { + if (packet.pts == AV_NOPTS_VALUE) { + LOG(ERROR) << "Packet presentation time not known"; + *sample_time = 0L; + } else { + // This is in AVCodecContext::time_base units + *sample_time = packet.pts; + } + } + found = true; + } + av_free_packet(&packet); + } +} + +bool FFmpegFileReader::GetFrameRate(int* num, int *denom) const { + if (!codec_context_) + return false; + *num = codec_context_->time_base.num; + *denom = codec_context_->time_base.den; + if (denom == 0) { + *num = 0; + return false; + } + return true; +} + +bool FFmpegFileReader::GetWidth(int* width) const { + if (!codec_context_) + return false; + *width = codec_context_->width; + return true; +} + +bool FFmpegFileReader::GetHeight(int* height) const { + if (!codec_context_) + return false; + *height = codec_context_->height; + return true; +} + +bool FFmpegFileReader::GetAspectRatio(int* num, int* denom) const { + if (!codec_context_) + return false; + AVRational aspect_ratio = codec_context_->sample_aspect_ratio; + if (aspect_ratio.num == 0 || aspect_ratio.den == 0) + return false; + *num = aspect_ratio.num; + *denom = aspect_ratio.den; + return true; +} + +int64 FFmpegFileReader::ConvertFFmpegTimeBaseTo100Ns( + int64 time_base_unit) const { + // FFmpeg units after time base conversion seems to be actually given in + // milliseconds (instead of seconds...) so we need to multiply it by a factor + // of 10,000 to convert it into units compatible with MF. + CHECK(codec_context_) << "Codec context needs to be initialized"; + return time_base_unit * 10000 * codec_context_->time_base.num / + codec_context_->time_base.den; +} + +} // namespace media diff --git a/media/media_foundation/file_reader_util.h b/media/media_foundation/file_reader_util.h new file mode 100644 index 0000000..15cf643 --- /dev/null +++ b/media/media_foundation/file_reader_util.h @@ -0,0 +1,68 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. Use of this +// source code is governed by a BSD-style license that can be found in the +// LICENSE file. +// +// Borrowed from media/tools/omx_test/file_reader_util.h. +// Added some functionalities related to timestamps on packets and Media +// Foundation. + +#ifndef MEDIA_MEDIA_FOUNDATION_FILE_READER_UTIL_H_ +#define MEDIA_MEDIA_FOUNDATION_FILE_READER_UTIL_H_ + +#include <string> + +#include "base/basictypes.h" +#include "base/scoped_handle.h" +#include "base/scoped_ptr.h" + +struct AVCodecContext; +struct AVFormatContext; + +namespace media { + +class BitstreamConverter; + +// A class to help reading and parsing input file for use in omx_test. +class FileReader { + public: + virtual ~FileReader() {} + + // Initialize FileReader object, returns true if successful. + virtual bool Initialize() = 0; + + // Read the file into |output|, and output the number of bytes read to + // |size|. + virtual void Read(uint8** output, int* size) = 0; +}; + +class FFmpegFileReader : public FileReader { + public: + explicit FFmpegFileReader(const std::string& filename); + virtual ~FFmpegFileReader(); + virtual bool Initialize(); + virtual void Read(uint8** output, int* size); + + // Reads a video packet, converts it into Annex B stream, and allocates a + // buffer to |*output| and copies the contents into it. + void Read(uint8** output, int* size, int* duration, int64* sample_time); + bool GetFrameRate(int* num, int* denom) const; + bool GetWidth(int* width) const; + bool GetHeight(int* height) const; + bool GetAspectRatio(int* num, int* denom) const; + int64 ConvertFFmpegTimeBaseTo100Ns(int64 time_base_unit) const; + bool end_of_stream() const { return end_of_stream_; } + + private: + std::string filename_; + AVFormatContext* format_context_; + AVCodecContext* codec_context_; + int target_stream_; + scoped_ptr<media::BitstreamConverter> converter_; + bool end_of_stream_; + + DISALLOW_COPY_AND_ASSIGN(FFmpegFileReader); +}; + +} // namespace media + +#endif // MEDIA_MEDIA_FOUNDATION_FILE_READER_UTIL_H_ diff --git a/media/media_foundation/h264mft.cc b/media/media_foundation/h264mft.cc new file mode 100644 index 0000000..bca600f --- /dev/null +++ b/media/media_foundation/h264mft.cc @@ -0,0 +1,633 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. Use of this +// source code is governed by a BSD-style license that can be found in the +// LICENSE file. + +#include "media/media_foundation/h264mft.h" + +#include <algorithm> +#include <string> + +#include <d3d9.h> +#include <evr.h> +#include <initguid.h> +#include <mfapi.h> +#include <mferror.h> +#include <mfidl.h> +#include <shlwapi.h> +#include <wmcodecdsp.h> + +#include "base/logging.h" +#include "base/scoped_comptr_win.h" +#include "media/base/video_frame.h" +#include "media/media_foundation/file_reader_util.h" + +#pragma comment(lib, "dxva2.lib") +#pragma comment(lib, "d3d9.lib") +#pragma comment(lib, "mfuuid.lib") +#pragma comment(lib, "evr.lib") +#pragma comment(lib, "mfplat.lib") + +namespace media { + +// Returns Media Foundation's H.264 decoder as an MFT, or NULL if not found +// (e.g. Not using Windows 7) +static IMFTransform* GetH264Decoder() { + // Use __uuidof() to avoid linking to a library just for the CLSID. + IMFTransform* dec; + HRESULT hr = CoCreateInstance(__uuidof(CMSH264DecoderMFT), NULL, + CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&dec)); + if (FAILED(hr)) { + LOG(ERROR) << "CoCreateInstance failed " << std::hex << std::showbase << hr; + return NULL; + } + return dec; +} + +// Creates an empty Media Foundation sample with no buffers. +static IMFSample* CreateEmptySample() { + HRESULT hr; + ScopedComPtr<IMFSample> sample; + hr = MFCreateSample(sample.Receive()); + if (FAILED(hr)) { + LOG(ERROR) << "Unable to create an empty sample"; + return NULL; + } + return sample.Detach(); +} + +// Creates a Media Foundation sample with one buffer of length |buffer_length|. +static IMFSample* CreateEmptySampleWithBuffer(int buffer_length) { + CHECK_GT(buffer_length, 0); + ScopedComPtr<IMFSample> sample; + sample.Attach(CreateEmptySample()); + if (sample.get() == NULL) + return NULL; + ScopedComPtr<IMFMediaBuffer> buffer; + HRESULT hr; + hr = MFCreateMemoryBuffer(buffer_length, buffer.Receive()); + if (FAILED(hr)) { + LOG(ERROR) << "Unable to create an empty buffer"; + return NULL; + } + hr = sample->AddBuffer(buffer.get()); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to add empty buffer to sample"; + return NULL; + } + return sample.Detach(); +} + +// Creates a Media Foundation sample with one buffer containing a copy of the +// given Annex B stream data. +// If duration and sample_time are not known, provide 0. +// min_size specifies the minimum size of the buffer (might be required by +// the decoder for input). The times here should be given in 100ns units. +static IMFSample* CreateInputSample(uint8* stream, int size, + int64 timestamp, int64 duration, + int min_size) { + CHECK(stream != NULL); + CHECK_GT(size, 0); + ScopedComPtr<IMFSample> sample; + sample.Attach(CreateEmptySampleWithBuffer(std::max(min_size, size))); + if (sample.get() == NULL) { + LOG(ERROR) << "Failed to create empty buffer for input"; + return NULL; + } + HRESULT hr; + if (duration > 0) { + hr = sample->SetSampleDuration(duration); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to set sample duration"; + return NULL; + } + } + if (timestamp > 0) { + hr = sample->SetSampleTime(timestamp); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to set sample time"; + return NULL; + } + } + ScopedComPtr<IMFMediaBuffer> buffer; + hr = sample->GetBufferByIndex(0, buffer.Receive()); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to get buffer in sample"; + return NULL; + } + DWORD max_length, current_length; + uint8* destination; + hr = buffer->Lock(&destination, &max_length, ¤t_length); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to lock buffer"; + return NULL; + } + CHECK_EQ(static_cast<int>(current_length), 0); + CHECK_GE(static_cast<int>(max_length), size); + memcpy(destination, stream, size); + CHECK(SUCCEEDED(buffer->Unlock())); + hr = buffer->SetCurrentLength(size); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to set current length to " << size; + return NULL; + } + LOG(INFO) << __FUNCTION__ << " wrote " << size << " bytes into input sample"; + return sample.Detach(); +} + +// Public methods + +H264Mft::H264Mft(bool use_dxva) + : decoder_(NULL), + initialized_(false), + use_dxva_(use_dxva), + drain_message_sent_(false), + in_buffer_size_(0), + out_buffer_size_(0), + frames_read_(0), + frames_decoded_(0), + width_(0), + height_(0), + stride_(0) { +} + +H264Mft::~H264Mft() { +} + +bool H264Mft::Init(IDirect3DDeviceManager9* dev_manager, + int frame_rate_num, int frame_rate_denom, + int width, int height, + int aspect_num, int aspect_denom) { + if (initialized_) + return true; + if (!InitDecoder(dev_manager, frame_rate_num, frame_rate_denom, + width, height, aspect_num, aspect_denom)) + return false; + if (!GetStreamsInfoAndBufferReqs()) + return false; + if (!SendStartMessage()) + return false; + initialized_ = true; + return true; +} + +bool H264Mft::SendInput(uint8* data, int size, int64 timestamp, + int64 duration) { + CHECK(initialized_); + CHECK(data != NULL); + CHECK_GT(size, 0); + if (drain_message_sent_) { + LOG(ERROR) << "Drain message was already sent, but trying to send more " + "input to decoder"; + return false; + } + ScopedComPtr<IMFSample> sample; + sample.Attach(CreateInputSample(data, size, timestamp, duration, + in_buffer_size_)); + if (sample.get() == NULL) { + LOG(ERROR) << "Failed to convert input stream to sample"; + return false; + } + HRESULT hr = decoder_->ProcessInput(0, sample.get(), 0); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to ProcessInput, hr = " << std::hex << hr; + return false; + } + frames_read_++; + return true; +} + +static const char* const ProcessOutputStatusToCString(HRESULT hr) { + if (hr == MF_E_TRANSFORM_STREAM_CHANGE) + return "media stream change occurred, need to set output type"; + if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) + return "decoder needs more samples"; + else + return "unhandled error from ProcessOutput"; +} + +H264Mft::DecoderOutputState H264Mft::GetOutput( + scoped_refptr<VideoFrame>* decoded_frame) { + CHECK(initialized_); + CHECK(decoded_frame != NULL); + + ScopedComPtr<IMFSample> output_sample; + if (!use_dxva_) { + // If DXVA is enabled, the decoder will allocate the sample for us. + output_sample.Attach(CreateEmptySampleWithBuffer(out_buffer_size_)); + if (output_sample.get() == NULL) { + LOG(ERROR) << "GetSample: failed to create empty output sample"; + return kNoMemory; + } + } + MFT_OUTPUT_DATA_BUFFER output_data_buffer; + output_data_buffer.dwStreamID = 0; + output_data_buffer.pSample = output_sample; + output_data_buffer.dwStatus = 0; + output_data_buffer.pEvents = NULL; + DWORD status; + HRESULT hr; + hr = decoder_->ProcessOutput(0, // No flags + 1, // # of out streams to pull from + &output_data_buffer, + &status); + + // TODO(imcheng): Handle the events, if any. (No event is returned most of + // the time.) + IMFCollection* events = output_data_buffer.pEvents; + if (events != NULL) { + LOG(INFO) << "Got events from ProcessOuput, but discarding"; + events->Release(); + } + if (FAILED(hr)) { + LOG(INFO) << "ProcessOutput failed with status " << std::hex << hr + << ", meaning..." << ProcessOutputStatusToCString(hr); + if (hr == MF_E_TRANSFORM_STREAM_CHANGE) { + if (!SetDecoderOutputMediaType(MFVideoFormat_NV12)) { + LOG(ERROR) << "Failed to reset output type"; + return kResetOutputStreamFailed; + } else { + LOG(INFO) << "Reset output type done"; + return kResetOutputStreamOk; + } + } else if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) { + // At this point we have either read everything from file or we can + // still feed the decoder input. If we have read everything then we + // should've sent a drain message to the MFT. If the drain message is + // sent but it doesn't give out anymore output then we know the decoder + // has processed everything. + if (drain_message_sent_) { + LOG(INFO) << "Drain message was already sent + no output => done"; + return kNoMoreOutput; + } else { + return kNeedMoreInput; + } + } else { + return kUnspecifiedError; + } + } else { + // A decoded sample was successfully obtained. + LOG(INFO) << "Got a decoded sample from decoder"; + if (use_dxva_) { + // If dxva is enabled, we did not provide a sample to ProcessOutput, + // i.e. output_sample is NULL. + output_sample.Attach(output_data_buffer.pSample); + if (output_sample.get() == NULL) { + LOG(ERROR) << "Output sample using DXVA is NULL - ProcessOutput did " + << "not provide it!"; + return kOutputSampleError; + } + } + int64 timestamp, duration; + hr = output_sample->GetSampleTime(×tamp); + hr = output_sample->GetSampleDuration(&duration); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to get sample duration or timestamp " + << std::hex << hr; + return kOutputSampleError; + } + + // The duration and timestamps are in 100-ns units, so divide by 10 + // to convert to microseconds. + timestamp /= 10; + duration /= 10; + + // Sanity checks for checking if there is really something in the sample. + DWORD buf_count; + hr = output_sample->GetBufferCount(&buf_count); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to get buff count, hr = " << std::hex << hr; + return kOutputSampleError; + } + if (buf_count == 0) { + LOG(ERROR) << "buf_count is 0, dropping sample"; + return kOutputSampleError; + } + ScopedComPtr<IMFMediaBuffer> out_buffer; + hr = output_sample->GetBufferByIndex(0, out_buffer.Receive()); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to get decoded output buffer"; + return kOutputSampleError; + } + + // To obtain the data, the caller should call the Lock() method instead + // of using the data field. + // In NV12, there are only 2 planes - the Y plane, and the interleaved UV + // plane. Both have the same strides. + uint8* null_data[2] = { NULL, NULL }; + int32 strides[2] = { stride_, stride_ }; + VideoFrame::CreateFrameExternal( + use_dxva_ ? VideoFrame::TYPE_DIRECT3DSURFACE : + VideoFrame::TYPE_MFBUFFER, + VideoFrame::NV12, + width_, + height_, + 2, + null_data, + strides, + base::TimeDelta::FromMicroseconds(timestamp), + base::TimeDelta::FromMicroseconds(duration), + out_buffer.Detach(), + decoded_frame); + CHECK(decoded_frame->get() != NULL); + frames_decoded_++; + return kOutputOk; + } +} + +bool H264Mft::SendDrainMessage() { + CHECK(initialized_); + if (drain_message_sent_) { + LOG(ERROR) << "Drain message was already sent before!"; + return false; + } + + // Send the drain message with no parameters. + HRESULT hr = decoder_->ProcessMessage(MFT_MESSAGE_COMMAND_DRAIN, NULL); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to send the drain message to decoder"; + return false; + } + drain_message_sent_ = true; + return true; +} + +// Private methods + +bool H264Mft::InitDecoder(IDirect3DDeviceManager9* dev_manager, + int frame_rate_num, int frame_rate_denom, + int width, int height, + int aspect_num, int aspect_denom) { + decoder_.Attach(GetH264Decoder()); + if (!decoder_.get()) + return false; + if (!CheckDecoderProperties()) + return false; + if (use_dxva_) { + if (!CheckDecoderDxvaSupport()) + return false; + if (!SetDecoderD3d9Manager(dev_manager)) + return false; + } + if (!SetDecoderMediaTypes(frame_rate_num, frame_rate_denom, + width, height, + aspect_num, aspect_denom)) { + return false; + } + return true; +} + +bool H264Mft::CheckDecoderProperties() { + DCHECK(decoder_.get()); + DWORD in_stream_count; + DWORD out_stream_count; + HRESULT hr; + hr = decoder_->GetStreamCount(&in_stream_count, &out_stream_count); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to get stream count"; + return false; + } else { + LOG(INFO) << "Input stream count: " << in_stream_count << ", " + << "Output stream count: " << out_stream_count; + bool mismatch = false; + if (in_stream_count != 1) { + LOG(ERROR) << "Input stream count mismatch!"; + mismatch = true; + } + if (out_stream_count != 1) { + LOG(ERROR) << "Output stream count mismatch!"; + mismatch = true; + } + return !mismatch; + } +} + +bool H264Mft::CheckDecoderDxvaSupport() { + HRESULT hr; + ScopedComPtr<IMFAttributes> attributes; + hr = decoder_->GetAttributes(attributes.Receive()); + if (FAILED(hr)) { + LOG(ERROR) << "Unlock: Failed to get attributes, hr = " + << std::hex << std::showbase << hr; + return false; + } + UINT32 dxva; + hr = attributes->GetUINT32(MF_SA_D3D_AWARE, &dxva); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to get DXVA attr, hr = " + << std::hex << std::showbase << hr + << "this might not be the right decoder."; + return false; + } + LOG(INFO) << "Support dxva? " << dxva; + if (!dxva) { + LOG(ERROR) << "Decoder does not support DXVA - this might not be the " + << "right decoder."; + return false; + } + return true; +} + +bool H264Mft::SetDecoderD3d9Manager(IDirect3DDeviceManager9* dev_manager) { + DCHECK(use_dxva_) << "SetDecoderD3d9Manager should only be called if DXVA is " + << "enabled"; + CHECK(dev_manager != NULL); + HRESULT hr; + hr = decoder_->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, + reinterpret_cast<ULONG_PTR>(dev_manager)); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to set D3D9 device to decoder"; + return false; + } + return true; +} + +bool H264Mft::SetDecoderMediaTypes(int frame_rate_num, int frame_rate_denom, + int width, int height, + int aspect_num, int aspect_denom) { + DCHECK(decoder_.get()); + if (!SetDecoderInputMediaType(frame_rate_num, frame_rate_denom, + width, height, + aspect_num, aspect_denom)) + return false; + if (!SetDecoderOutputMediaType(MFVideoFormat_NV12)) { + return false; + } + return true; +} + +bool H264Mft::SetDecoderInputMediaType(int frame_rate_num, int frame_rate_denom, + int width, int height, + int aspect_num, int aspect_denom) { + ScopedComPtr<IMFMediaType> media_type; + HRESULT hr; + hr = MFCreateMediaType(media_type.Receive()); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to create empty media type object"; + return NULL; + } + hr = media_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); + if (FAILED(hr)) { + LOG(ERROR) << "SetGUID for major type failed"; + return NULL; + } + hr = media_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264); + if (FAILED(hr)) { + LOG(ERROR) << "SetGUID for subtype failed"; + return NULL; + } + + // Provide additional info to the decoder to avoid a format change during + // streaming. + if (frame_rate_num == 0 || frame_rate_denom == 0) { + hr = MFSetAttributeRatio(media_type.get(), MF_MT_FRAME_RATE, + frame_rate_num, frame_rate_denom); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to set frame rate"; + return NULL; + } + } + if (width == 0 || height == 0) { + hr = MFSetAttributeSize(media_type.get(), MF_MT_FRAME_SIZE, width, height); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to set frame size"; + return NULL; + } + } + + // TODO(imcheng): Not sure about this, but this is the recommended value by + // MSDN. + hr = media_type->SetUINT32(MF_MT_INTERLACE_MODE, + MFVideoInterlace_MixedInterlaceOrProgressive); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to set interlace mode"; + return NULL; + } + if (aspect_num == 0 || aspect_denom == 0) { + hr = MFSetAttributeRatio(media_type.get(), MF_MT_PIXEL_ASPECT_RATIO, + aspect_num, aspect_denom); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to get aspect ratio"; + return NULL; + } + } + hr = decoder_->SetInputType(0, media_type.get(), 0); // No flags + if (FAILED(hr)) { + LOG(ERROR) << "Failed to set decoder's input type"; + return false; + } + return true; +} + +bool H264Mft::SetDecoderOutputMediaType(const GUID subtype) { + DWORD i = 0; + IMFMediaType* out_media_type; + bool found = false; + while (SUCCEEDED(decoder_->GetOutputAvailableType(0, i, &out_media_type))) { + GUID out_subtype; + HRESULT hr; + hr = out_media_type->GetGUID(MF_MT_SUBTYPE, &out_subtype); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to GetGUID() on GetOutputAvailableType() " << i; + out_media_type->Release(); + continue; + } + if (out_subtype == subtype) { + LOG(INFO) << "|subtype| is at index " + << i << " in GetOutputAvailableType()"; + hr = decoder_->SetOutputType(0, out_media_type, 0); // No flags + hr = MFGetAttributeSize(out_media_type, MF_MT_FRAME_SIZE, + reinterpret_cast<UINT32*>(&width_), + reinterpret_cast<UINT32*>(&height_)); + hr = MFGetStrideForBitmapInfoHeader( + MFVideoFormat_NV12.Data1, + width_, + reinterpret_cast<LONG*>(&stride_)); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to SetOutputType to |subtype| or obtain " + << "width/height/stride " << std::hex << hr; + } else { + found = true; + out_media_type->Release(); + break; + } + } + i++; + out_media_type->Release(); + } + if (!found) { + LOG(ERROR) << "NV12 was not found in GetOutputAvailableType()"; + return false; + } + return true; +} + +bool H264Mft::SendStartMessage() { + HRESULT hr; + hr = decoder_->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL); + if (FAILED(hr)) { + LOG(ERROR) << "Process start message failed, hr = " + << std::hex << std::showbase << hr; + return false; + } else { + LOG(INFO) << "Sent a message to decoder to indicate start of stream"; + return true; + } +} + +// Prints out info about the input/output streams, gets the minimum buffer sizes +// for input and output samples. +// The MFT will not allocate buffer for neither input nor output, so we have +// to do it ourselves and make sure they're the correct size. +// Exception is when dxva is enabled, the decoder will allocate output. +bool H264Mft::GetStreamsInfoAndBufferReqs() { + DCHECK(decoder_.get()); + HRESULT hr; + MFT_INPUT_STREAM_INFO input_stream_info; + hr = decoder_->GetInputStreamInfo(0, &input_stream_info); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to get input stream info"; + return false; + } + LOG(INFO) << "Input stream info: "; + LOG(INFO) << "Max latency: " << input_stream_info.hnsMaxLatency; + + // There should be three flags, one for requiring a whole frame be in a + // single sample, one for requiring there be one buffer only in a single + // sample, and one that specifies a fixed sample size. (as in cbSize) + LOG(INFO) << "Flags: " + << std::hex << std::showbase << input_stream_info.dwFlags; + CHECK_EQ(static_cast<int>(input_stream_info.dwFlags), 0x7); + LOG(INFO) << "Min buffer size: " << input_stream_info.cbSize; + LOG(INFO) << "Max lookahead: " << input_stream_info.cbMaxLookahead; + LOG(INFO) << "Alignment: " << input_stream_info.cbAlignment; + if (input_stream_info.cbAlignment > 0) { + LOG(WARNING) << "Warning: Decoder requires input to be aligned"; + } + in_buffer_size_ = input_stream_info.cbSize; + + MFT_OUTPUT_STREAM_INFO output_stream_info; + hr = decoder_->GetOutputStreamInfo(0, &output_stream_info); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to get output stream info"; + return false; + } + LOG(INFO) << "Output stream info: "; + + // The flags here should be the same and mean the same thing, except when + // DXVA is enabled, there is an extra 0x100 flag meaning decoder will + // allocate its own sample. + LOG(INFO) << "Flags: " + << std::hex << std::showbase << output_stream_info.dwFlags; + CHECK_EQ(static_cast<int>(output_stream_info.dwFlags), + use_dxva_ ? 0x107 : 0x7); + LOG(INFO) << "Min buffer size: " << output_stream_info.cbSize; + LOG(INFO) << "Alignment: " << output_stream_info.cbAlignment; + if (output_stream_info.cbAlignment > 0) { + LOG(WARNING) << "Warning: Decoder requires output to be aligned"; + } + out_buffer_size_ = output_stream_info.cbSize; + + return true; +} + +} // namespace media diff --git a/media/media_foundation/h264mft.h b/media/media_foundation/h264mft.h new file mode 100644 index 0000000..3e60a23 --- /dev/null +++ b/media/media_foundation/h264mft.h @@ -0,0 +1,117 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. Use of this +// source code is governed by a BSD-style license that can be found in the +// LICENSE file. +// +// Decodes H.264 Annex B streams using the Media Foundation H.264 decoder as +// a standalone Media Foundation Transform (MFT). +// Note: A single H264Mft instance is only for 1 H.264 video stream only. +// Inputting streams consisting of more than 1 video to a single instance +// may result in undefined behavior. + +#ifndef MEDIA_MEDIA_FOUNDATION_H264MFT_H_ +#define MEDIA_MEDIA_FOUNDATION_H264MFT_H_ + +#include <string> + +#include <mfidl.h> + +#include "base/basictypes.h" +#include "base/scoped_ptr.h" +#include "base/scoped_comptr_win.h" +#include "media/base/video_frame.h" + +struct IDirect3DDeviceManager9; +struct IMFSample; +struct IMFTransform; + +namespace media { + +// A decoder that takes samples of Annex B streams then outputs decoded frames. +class H264Mft { + public: + enum DecoderOutputState { + kOutputOk = 0, + kResetOutputStreamFailed, + kResetOutputStreamOk, + kNeedMoreInput, + kNoMoreOutput, + kUnspecifiedError, + kNoMemory, + kOutputSampleError + }; + explicit H264Mft(bool use_dxva); + ~H264Mft(); + + // Initializes the decoder. |dev_manager| is not required if the decoder does + // not use DXVA. + // If the other arguments are not known, leave them as 0. They can be + // provided to the decoder to try to avoid an initial output format change, + // but it is not necessary to have them. + bool Init(IDirect3DDeviceManager9* dev_manager, + int frame_rate_num, int frame_rate_denom, + int width, int height, + int aspect_num, int aspect_denom); + + // Sends an Annex B stream to the decoder. The times here should be given + // in 100ns units. This creates a IMFSample, copies the stream over to the + // sample, and sends the sample to the decoder. + // Returns: true if the sample was sent successfully. + bool SendInput(uint8* data, int size, int64 timestamp, int64 duration); + + // Tries to get an output sample from the decoder. + // Returns: status of the decoder, and if successful, a decoded sample. + DecoderOutputState GetOutput(scoped_refptr<VideoFrame>* decoded_frame); + + // Sends a drain message to the decoder to indicate no more input will be + // sent. SendInput() should not be called after calling this method. + // Returns: true if the drain message was sent successfully. + bool SendDrainMessage(); + + bool initialized() const { return initialized_; } + bool use_dxva() const { return use_dxva_; } + bool drain_message_sent() const { return drain_message_sent_; } + int in_buffer_size() const { return in_buffer_size_; } + int out_buffer_size() const { return out_buffer_size_; } + int frames_read() const { return frames_read_; } + int frames_decoded() const { return frames_decoded_; } + int width() const { return width_; } + int height() const { return height_; } + + private: + bool InitDecoder(IDirect3DDeviceManager9* dev_manager, + int frame_rate_num, int frame_rate_denom, + int width, int height, + int aspect_num, int aspect_denom); + bool CheckDecoderProperties(); + bool CheckDecoderDxvaSupport(); + bool SetDecoderD3d9Manager(IDirect3DDeviceManager9* dev_manager); + bool SetDecoderMediaTypes(int frame_rate_num, int frame_rate_denom, + int width, int height, + int aspect_num, int aspect_denom); + bool SetDecoderInputMediaType(int frame_rate_num, int frame_rate_denom, + int width, int height, + int aspect_num, int aspect_denom); + bool SetDecoderOutputMediaType(const GUID subtype); + bool SendStartMessage(); + bool GetStreamsInfoAndBufferReqs(); + + ScopedComPtr<IMFTransform> decoder_; + bool initialized_; + bool use_dxva_; + bool drain_message_sent_; + + // Minimum input and output buffer sizes as required by the decoder. + int in_buffer_size_; + int out_buffer_size_; + int frames_read_; + int frames_decoded_; + int width_; + int height_; + int stride_; + + DISALLOW_COPY_AND_ASSIGN(H264Mft); +}; + +} // namespace media + +#endif // MEDIA_MF_H264MFT_H_ diff --git a/media/media_foundation/main.cc b/media/media_foundation/main.cc new file mode 100644 index 0000000..fbb1bdc2 --- /dev/null +++ b/media/media_foundation/main.cc @@ -0,0 +1,299 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. Use of this +// source code is governed by a BSD-style license that can be found in the +// LICENSE file. +// +// Demonstrates the use of H264Mft. + +#include <d3d9.h> +#include <dxva2api.h> +#include <mfapi.h> + +#include "base/command_line.h" +#include "base/file_path.h" +#include "base/logging.h" +#include "base/scoped_comptr_win.h" +#include "base/scoped_ptr.h" +#include "base/time.h" +#include "media/base/media.h" +#include "media/ffmpeg/ffmpeg_common.h" +#include "media/ffmpeg/file_protocol.h" +#include "media/media_foundation/file_reader_util.h" +#include "media/media_foundation/h264mft.h" + +using media::FFmpegFileReader; +using media::H264Mft; +using media::VideoFrame; + +namespace { + +void usage() { + static char* usage_msg = + "Usage: h264mft [--enable-dxva] --input-file=FILE\n" + "enable-dxva: Enables hardware accelerated decoding\n" + "To display this message: h264mft --help"; + fprintf(stderr, "%s\n", usage_msg); +} + +static bool InitFFmpeg() { + if (!media::InitializeMediaLibrary(FilePath())) + return false; + avcodec_init(); + av_register_all(); + av_register_protocol2(&kFFmpegFileProtocol, sizeof(kFFmpegFileProtocol)); + return true; +} + +bool InitComLibraries() { + HRESULT hr; + hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE); + if (FAILED(hr)) { + LOG(ERROR) << "CoInit fail"; + return false; + } + hr = MFStartup(MF_VERSION, MFSTARTUP_FULL); + if (FAILED(hr)) { + LOG(ERROR) << "MFStartup fail"; + CoUninitialize(); + return false; + } + return true; +} + +void ShutdownComLibraries() { + HRESULT hr; + hr = MFShutdown(); + if (FAILED(hr)) { + LOG(WARNING) << "Warning: MF failed to shutdown"; + } + CoUninitialize(); +} + +IDirect3DDeviceManager9* CreateD3DDevManager(HWND video_window, + int width, + int height, + IDirect3D9** direct3d, + IDirect3DDevice9** device) { + CHECK(video_window != NULL); + CHECK(direct3d != NULL); + CHECK(device != NULL); + + ScopedComPtr<IDirect3DDeviceManager9> dev_manager; + ScopedComPtr<IDirect3D9> d3d; + d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION)); + if (d3d == NULL) { + LOG(ERROR) << "Failed to create D3D9"; + return NULL; + } + D3DPRESENT_PARAMETERS present_params = {0}; + + present_params.BackBufferWidth = width; + present_params.BackBufferHeight = height; + present_params.BackBufferFormat = D3DFMT_UNKNOWN; + present_params.BackBufferCount = 1; + present_params.SwapEffect = D3DSWAPEFFECT_DISCARD; + present_params.hDeviceWindow = video_window; + present_params.Windowed = TRUE; + present_params.Flags = D3DPRESENTFLAG_VIDEO; + present_params.FullScreen_RefreshRateInHz = 0; + present_params.PresentationInterval = 0; + + ScopedComPtr<IDirect3DDevice9> temp_device; + + // D3DCREATE_HARDWARE_VERTEXPROCESSING specifies hardware vertex processing. + // (Is it even needed for just video decoding?) + HRESULT hr = d3d->CreateDevice(D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, + video_window, + D3DCREATE_HARDWARE_VERTEXPROCESSING, + &present_params, + temp_device.Receive()); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to create D3D Device"; + return NULL; + } + UINT dev_manager_reset_token = 0; + hr = DXVA2CreateDirect3DDeviceManager9(&dev_manager_reset_token, + dev_manager.Receive()); + if (FAILED(hr)) { + LOG(ERROR) << "Couldn't create D3D Device manager"; + return NULL; + } + hr = dev_manager->ResetDevice(temp_device.get(), dev_manager_reset_token); + if (FAILED(hr)) { + LOG(ERROR) << "Failed to set device to device manager"; + return NULL; + } + *direct3d = d3d.Detach(); + *device = temp_device.Detach(); + return dev_manager.Detach(); +} + +// Example usage of how to get a decoded frame from the decoder. +bool GetDecodedSample(FFmpegFileReader* reader, H264Mft* decoder, + scoped_refptr<VideoFrame>* decoded_frame) { + // Keep feeding the MFT with inputs until it spits out an output. + for (;;) { + // First check if there is output. + H264Mft::DecoderOutputState state = decoder->GetOutput(decoded_frame); + if (state == H264Mft::kOutputOk) { + LOG(INFO) << "Got an output from decoder"; + return true; + } else if (state == H264Mft::kResetOutputStreamFailed) { + LOG(ERROR) << "Reset output stream failed, quitting"; + return false; + } else if (state == H264Mft::kResetOutputStreamOk) { + LOG(INFO) << "Reset output stream, try to get output again"; + continue; + } else if (state == H264Mft::kNeedMoreInput) { + LOG(INFO) << "Need more input"; + uint8* input_stream_dummy; + int size; + int duration; + int64 timestamp; + reader->Read(&input_stream_dummy, &size, &duration, ×tamp); + scoped_array<uint8> input_stream(input_stream_dummy); + if (input_stream.get() == NULL) { + LOG(INFO) << "No more input, sending drain message to decoder"; + if (!decoder->SendDrainMessage()) { + LOG(ERROR) << "Failed to send drain message, quitting"; + return false; + } else { + continue; // Try reading the rest of the drained outputs. + } + } else { + // We read an input stream, we can feed it into the decoder. + if (!decoder->SendInput(input_stream.get(), size, + reader->ConvertFFmpegTimeBaseTo100Ns(timestamp), + reader->ConvertFFmpegTimeBaseTo100Ns(duration))) { + LOG(ERROR) << "Failed to send input, dropping frame..."; + } + continue; // Try reading the output after attempting to send an input. + } + } else if (state == H264Mft::kNoMoreOutput) { + LOG(INFO) << "Decoder has processed everything, quitting"; + return false; + } else if (state == H264Mft::kUnspecifiedError) { + LOG(ERROR) << "Unknown error, quitting"; + return false; + } else if (state == H264Mft::kNoMemory) { + LOG(ERROR) << "Not enough memory for sample, quitting"; + return false; + } else if (state == H264Mft::kOutputSampleError) { + LOG(ERROR) << "Inconsistent sample, dropping..."; + continue; + } else { + NOTREACHED(); + } + } // for (;;) + NOTREACHED(); +} + +static void ReleaseOutputBuffer(VideoFrame* frame) { + if (frame->type() == VideoFrame::TYPE_MFBUFFER || + frame->type() == VideoFrame::TYPE_DIRECT3DSURFACE) { + static_cast<IMFMediaBuffer*>(frame->private_buffer())->Release(); + } else { + return; + } +} + +int Run(bool use_dxva, const std::string& input_file) { + scoped_ptr<FFmpegFileReader> reader(new FFmpegFileReader(input_file)); + if (reader.get() == NULL) { + LOG(ERROR) << "Failed to create reader"; + return -1; + } + if (!reader->Initialize()) { + LOG(ERROR) << "Failed to initialize reader"; + return -1; + } + int frame_rate_num = 0, frame_rate_denom = 0; + if (!reader->GetFrameRate(&frame_rate_num, &frame_rate_denom)) { + LOG(WARNING) << "Failed to get frame rate from reader"; + } + int width = 0, height = 0; + if (!reader->GetWidth(&width) || !reader->GetHeight(&height)) { + LOG(WARNING) << "Failed to get width/height from reader"; + } + int aspect_ratio_num = 0, aspect_ratio_denom = 0; + if (!reader->GetAspectRatio(&aspect_ratio_num, &aspect_ratio_denom)) { + LOG(WARNING) << "Failed to get aspect ratio from reader"; + } + ScopedComPtr<IDirect3D9> d3d9; + ScopedComPtr<IDirect3DDevice9> device; + ScopedComPtr<IDirect3DDeviceManager9> dev_manager; + if (use_dxva) { + dev_manager.Attach(CreateD3DDevManager(GetDesktopWindow(), + width, + height, + d3d9.Receive(), + device.Receive())); + if (dev_manager.get() == NULL) { + LOG(ERROR) << "Cannot create D3D9 manager"; + return -1; + } + } + scoped_ptr<H264Mft> mft(new H264Mft(use_dxva)); + if (mft.get() == NULL) { + LOG(ERROR) << "Failed to create MFT"; + return -1; + } + if (!mft->Init(dev_manager, frame_rate_num, frame_rate_denom, width, height, + aspect_ratio_num, aspect_ratio_denom)) { + LOG(ERROR) << "Failed to initialize mft"; + return -1; + } + base::TimeDelta decode_time; + while (true) { + // Do nothing with the sample except to let it go out of scope + scoped_refptr<VideoFrame> decoded_frame; + base::Time decode_start(base::Time::Now()); + if (!GetDecodedSample(reader.get(), mft.get(), &decoded_frame)) + break; + decode_time += base::Time::Now() - decode_start; + ReleaseOutputBuffer(decoded_frame.get()); + } + printf("All done, frames read: %d, frames decoded: %d\n", + mft->frames_read(), mft->frames_decoded()); + printf("Took %lldms\n", decode_time.InMilliseconds()); + return 0; +} + +} // namespace + +int main(int argc, char** argv) { + CommandLine::Init(argc, argv); + if (argc == 1) { + fprintf(stderr, "Not enough arguments\n"); + usage(); + return -1; + } + + const CommandLine& cmd_line = *CommandLine::ForCurrentProcess(); + if (cmd_line.HasSwitch("help")) { + usage(); + return -1; + } + bool use_dxva = cmd_line.HasSwitch("enable-dxva"); + std::string input_file = cmd_line.GetSwitchValueASCII("input-file"); + if (input_file.empty()) { + fprintf(stderr, "No input file provided\n"); + usage(); + return -1; + } + printf("enable-dxva: %d\n", use_dxva); + printf("input-file: %s\n", input_file.c_str()); + + if (!InitFFmpeg()) { + LOG(ERROR) << "InitFFMpeg() failed"; + return -1; + } + if (!InitComLibraries()) { + LOG(ERROR) << "InitComLibraries() failed"; + return -1; + } + int ret = Run(use_dxva, input_file); + ShutdownComLibraries(); + printf("Done\n"); + return ret; +} |