// Copyright (c) 2010 The Chromium Authors. All rights reserved. Use of this // source code is governed by a BSD-style license that can be found in the // LICENSE file. #include "media/media_foundation/h264mft.h" #include #include #include #include #include #include #include #include #include #include #include "base/logging.h" #include "base/scoped_comptr_win.h" #include "media/base/video_frame.h" #include "media/media_foundation/file_reader_util.h" #pragma comment(lib, "dxva2.lib") #pragma comment(lib, "d3d9.lib") #pragma comment(lib, "mfuuid.lib") #pragma comment(lib, "evr.lib") #pragma comment(lib, "mfplat.lib") namespace media { // Returns Media Foundation's H.264 decoder as an MFT, or NULL if not found // (e.g. Not using Windows 7) static IMFTransform* GetH264Decoder() { // Use __uuidof() to avoid linking to a library just for the CLSID. IMFTransform* dec; HRESULT hr = CoCreateInstance(__uuidof(CMSH264DecoderMFT), NULL, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&dec)); if (FAILED(hr)) { LOG(ERROR) << "CoCreateInstance failed " << std::hex << std::showbase << hr; return NULL; } return dec; } // Creates an empty Media Foundation sample with no buffers. static IMFSample* CreateEmptySample() { HRESULT hr; ScopedComPtr sample; hr = MFCreateSample(sample.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Unable to create an empty sample"; return NULL; } return sample.Detach(); } // Creates a Media Foundation sample with one buffer of length |buffer_length|. static IMFSample* CreateEmptySampleWithBuffer(int buffer_length) { CHECK_GT(buffer_length, 0); ScopedComPtr sample; sample.Attach(CreateEmptySample()); if (sample.get() == NULL) return NULL; ScopedComPtr buffer; HRESULT hr; hr = MFCreateMemoryBuffer(buffer_length, buffer.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Unable to create an empty buffer"; return NULL; } hr = sample->AddBuffer(buffer.get()); if (FAILED(hr)) { LOG(ERROR) << "Failed to add empty buffer to sample"; return NULL; } return sample.Detach(); } // Creates a Media Foundation sample with one buffer containing a copy of the // given Annex B stream data. // If duration and sample_time are not known, provide 0. // min_size specifies the minimum size of the buffer (might be required by // the decoder for input). The times here should be given in 100ns units. static IMFSample* CreateInputSample(uint8* stream, int size, int64 timestamp, int64 duration, int min_size) { CHECK(stream != NULL); CHECK_GT(size, 0); ScopedComPtr sample; sample.Attach(CreateEmptySampleWithBuffer(std::max(min_size, size))); if (sample.get() == NULL) { LOG(ERROR) << "Failed to create empty buffer for input"; return NULL; } HRESULT hr; if (duration > 0) { hr = sample->SetSampleDuration(duration); if (FAILED(hr)) { LOG(ERROR) << "Failed to set sample duration"; return NULL; } } if (timestamp > 0) { hr = sample->SetSampleTime(timestamp); if (FAILED(hr)) { LOG(ERROR) << "Failed to set sample time"; return NULL; } } ScopedComPtr buffer; hr = sample->GetBufferByIndex(0, buffer.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Failed to get buffer in sample"; return NULL; } DWORD max_length, current_length; uint8* destination; hr = buffer->Lock(&destination, &max_length, ¤t_length); if (FAILED(hr)) { LOG(ERROR) << "Failed to lock buffer"; return NULL; } CHECK_EQ(static_cast(current_length), 0); CHECK_GE(static_cast(max_length), size); memcpy(destination, stream, size); CHECK(SUCCEEDED(buffer->Unlock())); hr = buffer->SetCurrentLength(size); if (FAILED(hr)) { LOG(ERROR) << "Failed to set current length to " << size; return NULL; } LOG(INFO) << __FUNCTION__ << " wrote " << size << " bytes into input sample"; return sample.Detach(); } // Public methods H264Mft::H264Mft(bool use_dxva) : decoder_(NULL), initialized_(false), use_dxva_(use_dxva), drain_message_sent_(false), in_buffer_size_(0), out_buffer_size_(0), frames_read_(0), frames_decoded_(0), width_(0), height_(0), stride_(0) { } H264Mft::~H264Mft() { } bool H264Mft::Init(IDirect3DDeviceManager9* dev_manager, int frame_rate_num, int frame_rate_denom, int width, int height, int aspect_num, int aspect_denom) { if (initialized_) return true; if (!InitDecoder(dev_manager, frame_rate_num, frame_rate_denom, width, height, aspect_num, aspect_denom)) return false; if (!GetStreamsInfoAndBufferReqs()) return false; if (!SendStartMessage()) return false; initialized_ = true; return true; } bool H264Mft::SendInput(uint8* data, int size, int64 timestamp, int64 duration) { CHECK(initialized_); CHECK(data != NULL); CHECK_GT(size, 0); if (drain_message_sent_) { LOG(ERROR) << "Drain message was already sent, but trying to send more " "input to decoder"; return false; } ScopedComPtr sample; sample.Attach(CreateInputSample(data, size, timestamp, duration, in_buffer_size_)); if (sample.get() == NULL) { LOG(ERROR) << "Failed to convert input stream to sample"; return false; } HRESULT hr = decoder_->ProcessInput(0, sample.get(), 0); if (FAILED(hr)) { LOG(ERROR) << "Failed to ProcessInput, hr = " << std::hex << hr; return false; } frames_read_++; return true; } static const char* const ProcessOutputStatusToCString(HRESULT hr) { if (hr == MF_E_TRANSFORM_STREAM_CHANGE) return "media stream change occurred, need to set output type"; if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) return "decoder needs more samples"; else return "unhandled error from ProcessOutput"; } H264Mft::DecoderOutputState H264Mft::GetOutput( scoped_refptr* decoded_frame) { CHECK(initialized_); CHECK(decoded_frame != NULL); ScopedComPtr output_sample; if (!use_dxva_) { // If DXVA is enabled, the decoder will allocate the sample for us. output_sample.Attach(CreateEmptySampleWithBuffer(out_buffer_size_)); if (output_sample.get() == NULL) { LOG(ERROR) << "GetSample: failed to create empty output sample"; return kNoMemory; } } MFT_OUTPUT_DATA_BUFFER output_data_buffer; output_data_buffer.dwStreamID = 0; output_data_buffer.pSample = output_sample; output_data_buffer.dwStatus = 0; output_data_buffer.pEvents = NULL; DWORD status; HRESULT hr; hr = decoder_->ProcessOutput(0, // No flags 1, // # of out streams to pull from &output_data_buffer, &status); // TODO(imcheng): Handle the events, if any. (No event is returned most of // the time.) IMFCollection* events = output_data_buffer.pEvents; if (events != NULL) { LOG(INFO) << "Got events from ProcessOuput, but discarding"; events->Release(); } if (FAILED(hr)) { LOG(INFO) << "ProcessOutput failed with status " << std::hex << hr << ", meaning..." << ProcessOutputStatusToCString(hr); if (hr == MF_E_TRANSFORM_STREAM_CHANGE) { if (!SetDecoderOutputMediaType(MFVideoFormat_NV12)) { LOG(ERROR) << "Failed to reset output type"; return kResetOutputStreamFailed; } else { LOG(INFO) << "Reset output type done"; return kResetOutputStreamOk; } } else if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) { // At this point we have either read everything from file or we can // still feed the decoder input. If we have read everything then we // should've sent a drain message to the MFT. If the drain message is // sent but it doesn't give out anymore output then we know the decoder // has processed everything. if (drain_message_sent_) { LOG(INFO) << "Drain message was already sent + no output => done"; return kNoMoreOutput; } else { return kNeedMoreInput; } } else { return kUnspecifiedError; } } else { // A decoded sample was successfully obtained. LOG(INFO) << "Got a decoded sample from decoder"; if (use_dxva_) { // If dxva is enabled, we did not provide a sample to ProcessOutput, // i.e. output_sample is NULL. output_sample.Attach(output_data_buffer.pSample); if (output_sample.get() == NULL) { LOG(ERROR) << "Output sample using DXVA is NULL - ProcessOutput did " << "not provide it!"; return kOutputSampleError; } } int64 timestamp, duration; hr = output_sample->GetSampleTime(×tamp); hr = output_sample->GetSampleDuration(&duration); if (FAILED(hr)) { LOG(ERROR) << "Failed to get sample duration or timestamp " << std::hex << hr; return kOutputSampleError; } // The duration and timestamps are in 100-ns units, so divide by 10 // to convert to microseconds. timestamp /= 10; duration /= 10; // Sanity checks for checking if there is really something in the sample. DWORD buf_count; hr = output_sample->GetBufferCount(&buf_count); if (FAILED(hr)) { LOG(ERROR) << "Failed to get buff count, hr = " << std::hex << hr; return kOutputSampleError; } if (buf_count == 0) { LOG(ERROR) << "buf_count is 0, dropping sample"; return kOutputSampleError; } ScopedComPtr out_buffer; hr = output_sample->GetBufferByIndex(0, out_buffer.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Failed to get decoded output buffer"; return kOutputSampleError; } // To obtain the data, the caller should call the Lock() method instead // of using the data field. // In NV12, there are only 2 planes - the Y plane, and the interleaved UV // plane. Both have the same strides. uint8* null_data[2] = { NULL, NULL }; int32 strides[2] = { stride_, stride_ }; VideoFrame::CreateFrameExternal( use_dxva_ ? VideoFrame::TYPE_DIRECT3DSURFACE : VideoFrame::TYPE_MFBUFFER, VideoFrame::NV12, width_, height_, 2, null_data, strides, base::TimeDelta::FromMicroseconds(timestamp), base::TimeDelta::FromMicroseconds(duration), out_buffer.Detach(), decoded_frame); CHECK(decoded_frame->get() != NULL); frames_decoded_++; return kOutputOk; } } bool H264Mft::SendDrainMessage() { CHECK(initialized_); if (drain_message_sent_) { LOG(ERROR) << "Drain message was already sent before!"; return false; } // Send the drain message with no parameters. HRESULT hr = decoder_->ProcessMessage(MFT_MESSAGE_COMMAND_DRAIN, NULL); if (FAILED(hr)) { LOG(ERROR) << "Failed to send the drain message to decoder"; return false; } drain_message_sent_ = true; return true; } // Private methods bool H264Mft::InitDecoder(IDirect3DDeviceManager9* dev_manager, int frame_rate_num, int frame_rate_denom, int width, int height, int aspect_num, int aspect_denom) { decoder_.Attach(GetH264Decoder()); if (!decoder_.get()) return false; if (!CheckDecoderProperties()) return false; if (use_dxva_) { if (!CheckDecoderDxvaSupport()) return false; if (!SetDecoderD3d9Manager(dev_manager)) return false; } if (!SetDecoderMediaTypes(frame_rate_num, frame_rate_denom, width, height, aspect_num, aspect_denom)) { return false; } return true; } bool H264Mft::CheckDecoderProperties() { DCHECK(decoder_.get()); DWORD in_stream_count; DWORD out_stream_count; HRESULT hr; hr = decoder_->GetStreamCount(&in_stream_count, &out_stream_count); if (FAILED(hr)) { LOG(ERROR) << "Failed to get stream count"; return false; } else { LOG(INFO) << "Input stream count: " << in_stream_count << ", " << "Output stream count: " << out_stream_count; bool mismatch = false; if (in_stream_count != 1) { LOG(ERROR) << "Input stream count mismatch!"; mismatch = true; } if (out_stream_count != 1) { LOG(ERROR) << "Output stream count mismatch!"; mismatch = true; } return !mismatch; } } bool H264Mft::CheckDecoderDxvaSupport() { HRESULT hr; ScopedComPtr attributes; hr = decoder_->GetAttributes(attributes.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Unlock: Failed to get attributes, hr = " << std::hex << std::showbase << hr; return false; } UINT32 dxva; hr = attributes->GetUINT32(MF_SA_D3D_AWARE, &dxva); if (FAILED(hr)) { LOG(ERROR) << "Failed to get DXVA attr, hr = " << std::hex << std::showbase << hr << "this might not be the right decoder."; return false; } LOG(INFO) << "Support dxva? " << dxva; if (!dxva) { LOG(ERROR) << "Decoder does not support DXVA - this might not be the " << "right decoder."; return false; } return true; } bool H264Mft::SetDecoderD3d9Manager(IDirect3DDeviceManager9* dev_manager) { DCHECK(use_dxva_) << "SetDecoderD3d9Manager should only be called if DXVA is " << "enabled"; CHECK(dev_manager != NULL); HRESULT hr; hr = decoder_->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast(dev_manager)); if (FAILED(hr)) { LOG(ERROR) << "Failed to set D3D9 device to decoder"; return false; } return true; } bool H264Mft::SetDecoderMediaTypes(int frame_rate_num, int frame_rate_denom, int width, int height, int aspect_num, int aspect_denom) { DCHECK(decoder_.get()); if (!SetDecoderInputMediaType(frame_rate_num, frame_rate_denom, width, height, aspect_num, aspect_denom)) return false; if (!SetDecoderOutputMediaType(MFVideoFormat_NV12)) { return false; } return true; } bool H264Mft::SetDecoderInputMediaType(int frame_rate_num, int frame_rate_denom, int width, int height, int aspect_num, int aspect_denom) { ScopedComPtr media_type; HRESULT hr; hr = MFCreateMediaType(media_type.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Failed to create empty media type object"; return NULL; } hr = media_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); if (FAILED(hr)) { LOG(ERROR) << "SetGUID for major type failed"; return NULL; } hr = media_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264); if (FAILED(hr)) { LOG(ERROR) << "SetGUID for subtype failed"; return NULL; } // Provide additional info to the decoder to avoid a format change during // streaming. if (frame_rate_num == 0 || frame_rate_denom == 0) { hr = MFSetAttributeRatio(media_type.get(), MF_MT_FRAME_RATE, frame_rate_num, frame_rate_denom); if (FAILED(hr)) { LOG(ERROR) << "Failed to set frame rate"; return NULL; } } if (width == 0 || height == 0) { hr = MFSetAttributeSize(media_type.get(), MF_MT_FRAME_SIZE, width, height); if (FAILED(hr)) { LOG(ERROR) << "Failed to set frame size"; return NULL; } } // TODO(imcheng): Not sure about this, but this is the recommended value by // MSDN. hr = media_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_MixedInterlaceOrProgressive); if (FAILED(hr)) { LOG(ERROR) << "Failed to set interlace mode"; return NULL; } if (aspect_num == 0 || aspect_denom == 0) { hr = MFSetAttributeRatio(media_type.get(), MF_MT_PIXEL_ASPECT_RATIO, aspect_num, aspect_denom); if (FAILED(hr)) { LOG(ERROR) << "Failed to get aspect ratio"; return NULL; } } hr = decoder_->SetInputType(0, media_type.get(), 0); // No flags if (FAILED(hr)) { LOG(ERROR) << "Failed to set decoder's input type"; return false; } return true; } bool H264Mft::SetDecoderOutputMediaType(const GUID subtype) { DWORD i = 0; IMFMediaType* out_media_type; bool found = false; while (SUCCEEDED(decoder_->GetOutputAvailableType(0, i, &out_media_type))) { GUID out_subtype; HRESULT hr; hr = out_media_type->GetGUID(MF_MT_SUBTYPE, &out_subtype); if (FAILED(hr)) { LOG(ERROR) << "Failed to GetGUID() on GetOutputAvailableType() " << i; out_media_type->Release(); continue; } if (out_subtype == subtype) { LOG(INFO) << "|subtype| is at index " << i << " in GetOutputAvailableType()"; hr = decoder_->SetOutputType(0, out_media_type, 0); // No flags hr = MFGetAttributeSize(out_media_type, MF_MT_FRAME_SIZE, reinterpret_cast(&width_), reinterpret_cast(&height_)); hr = MFGetStrideForBitmapInfoHeader( MFVideoFormat_NV12.Data1, width_, reinterpret_cast(&stride_)); if (FAILED(hr)) { LOG(ERROR) << "Failed to SetOutputType to |subtype| or obtain " << "width/height/stride " << std::hex << hr; } else { found = true; out_media_type->Release(); break; } } i++; out_media_type->Release(); } if (!found) { LOG(ERROR) << "NV12 was not found in GetOutputAvailableType()"; return false; } return true; } bool H264Mft::SendStartMessage() { HRESULT hr; hr = decoder_->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL); if (FAILED(hr)) { LOG(ERROR) << "Process start message failed, hr = " << std::hex << std::showbase << hr; return false; } else { LOG(INFO) << "Sent a message to decoder to indicate start of stream"; return true; } } // Prints out info about the input/output streams, gets the minimum buffer sizes // for input and output samples. // The MFT will not allocate buffer for neither input nor output, so we have // to do it ourselves and make sure they're the correct size. // Exception is when dxva is enabled, the decoder will allocate output. bool H264Mft::GetStreamsInfoAndBufferReqs() { DCHECK(decoder_.get()); HRESULT hr; MFT_INPUT_STREAM_INFO input_stream_info; hr = decoder_->GetInputStreamInfo(0, &input_stream_info); if (FAILED(hr)) { LOG(ERROR) << "Failed to get input stream info"; return false; } LOG(INFO) << "Input stream info: "; LOG(INFO) << "Max latency: " << input_stream_info.hnsMaxLatency; // There should be three flags, one for requiring a whole frame be in a // single sample, one for requiring there be one buffer only in a single // sample, and one that specifies a fixed sample size. (as in cbSize) LOG(INFO) << "Flags: " << std::hex << std::showbase << input_stream_info.dwFlags; CHECK_EQ(static_cast(input_stream_info.dwFlags), 0x7); LOG(INFO) << "Min buffer size: " << input_stream_info.cbSize; LOG(INFO) << "Max lookahead: " << input_stream_info.cbMaxLookahead; LOG(INFO) << "Alignment: " << input_stream_info.cbAlignment; if (input_stream_info.cbAlignment > 0) { LOG(WARNING) << "Warning: Decoder requires input to be aligned"; } in_buffer_size_ = input_stream_info.cbSize; MFT_OUTPUT_STREAM_INFO output_stream_info; hr = decoder_->GetOutputStreamInfo(0, &output_stream_info); if (FAILED(hr)) { LOG(ERROR) << "Failed to get output stream info"; return false; } LOG(INFO) << "Output stream info: "; // The flags here should be the same and mean the same thing, except when // DXVA is enabled, there is an extra 0x100 flag meaning decoder will // allocate its own sample. LOG(INFO) << "Flags: " << std::hex << std::showbase << output_stream_info.dwFlags; CHECK_EQ(static_cast(output_stream_info.dwFlags), use_dxva_ ? 0x107 : 0x7); LOG(INFO) << "Min buffer size: " << output_stream_info.cbSize; LOG(INFO) << "Alignment: " << output_stream_info.cbAlignment; if (output_stream_info.cbAlignment > 0) { LOG(WARNING) << "Warning: Decoder requires output to be aligned"; } out_buffer_size_ = output_stream_info.cbSize; return true; } } // namespace media