// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "media/video/mft_h264_decode_engine.h" #include #include #include #include // Placed after mfapi.h to avoid linking strmiids.lib for MR_BUFFER_SERVICE. #include #include #include #include "base/time.h" #include "base/message_loop.h" #include "media/base/limits.h" #include "media/base/pipeline.h" #include "media/video/video_decode_context.h" #pragma comment(lib, "dxva2.lib") #pragma comment(lib, "d3d9.lib") #pragma comment(lib, "mf.lib") #pragma comment(lib, "mfplat.lib") using base::TimeDelta; namespace media { // Creates an empty Media Foundation sample with no buffers. static IMFSample* CreateEmptySample() { HRESULT hr; base::win::ScopedComPtr sample; hr = MFCreateSample(sample.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Unable to create an empty sample"; return NULL; } return sample.Detach(); } // Creates a Media Foundation sample with one buffer of length |buffer_length| // on a |align|-byte boundary. Alignment must be a perfect power of 2 or 0. // If |align| is 0, then no alignment is specified. static IMFSample* CreateEmptySampleWithBuffer(int buffer_length, int align) { CHECK_GT(buffer_length, 0); base::win::ScopedComPtr sample; sample.Attach(CreateEmptySample()); if (!sample.get()) return NULL; base::win::ScopedComPtr buffer; HRESULT hr; if (align == 0) { // Note that MFCreateMemoryBuffer is same as MFCreateAlignedMemoryBuffer // with the align argument being 0. hr = MFCreateMemoryBuffer(buffer_length, buffer.Receive()); } else { hr = MFCreateAlignedMemoryBuffer(buffer_length, align - 1, buffer.Receive()); } if (FAILED(hr)) { LOG(ERROR) << "Unable to create an empty buffer"; return NULL; } hr = sample->AddBuffer(buffer.get()); if (FAILED(hr)) { LOG(ERROR) << "Failed to add empty buffer to sample"; return NULL; } return sample.Detach(); } // Creates a Media Foundation sample with one buffer containing a copy of the // given Annex B stream data. // If duration and sample time are not known, provide 0. // |min_size| specifies the minimum size of the buffer (might be required by // the decoder for input). The times here should be given in 100ns units. // |alignment| specifies the buffer in the sample to be aligned. If no // alignment is required, provide 0 or 1. static IMFSample* CreateInputSample(const uint8* stream, int size, int64 timestamp, int64 duration, int min_size, int alignment) { CHECK(stream); CHECK_GT(size, 0); base::win::ScopedComPtr sample; sample.Attach(CreateEmptySampleWithBuffer(std::max(min_size, size), alignment)); if (!sample.get()) { LOG(ERROR) << "Failed to create empty buffer for input"; return NULL; } HRESULT hr; if (duration > 0) { hr = sample->SetSampleDuration(duration); if (FAILED(hr)) { LOG(ERROR) << "Failed to set sample duration"; return NULL; } } if (timestamp > 0) { hr = sample->SetSampleTime(timestamp); if (FAILED(hr)) { LOG(ERROR) << "Failed to set sample time"; return NULL; } } base::win::ScopedComPtr buffer; hr = sample->GetBufferByIndex(0, buffer.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Failed to get buffer in sample"; return NULL; } DWORD max_length, current_length; uint8* destination; hr = buffer->Lock(&destination, &max_length, ¤t_length); if (FAILED(hr)) { LOG(ERROR) << "Failed to lock buffer"; return NULL; } CHECK_EQ(current_length, 0u); CHECK_GE(static_cast(max_length), size); memcpy(destination, stream, size); CHECK(SUCCEEDED(buffer->Unlock())); hr = buffer->SetCurrentLength(size); if (FAILED(hr)) { LOG(ERROR) << "Failed to set current length to " << size; return NULL; } VLOG(1) << __FUNCTION__ << " wrote " << size << " bytes into input sample"; return sample.Detach(); } const GUID ConvertVideoFrameFormatToGuid(VideoFrame::Format format) { switch (format) { case VideoFrame::NV12: return MFVideoFormat_NV12; case VideoFrame::YV12: return MFVideoFormat_YV12; default: NOTREACHED() << "Unsupported VideoFrame format"; return GUID_NULL; } NOTREACHED(); return GUID_NULL; } // public methods MftH264DecodeEngine::MftH264DecodeEngine(bool use_dxva) : use_dxva_(use_dxva), state_(kUninitialized), width_(0), height_(0), event_handler_(NULL), context_(NULL) { memset(&input_stream_info_, 0, sizeof(input_stream_info_)); memset(&output_stream_info_, 0, sizeof(output_stream_info_)); memset(&info_, 0, sizeof(info_)); } MftH264DecodeEngine::~MftH264DecodeEngine() { } void MftH264DecodeEngine::Initialize( MessageLoop* message_loop, VideoDecodeEngine::EventHandler* event_handler, VideoDecodeContext* context, const VideoDecoderConfig& config) { DCHECK(!use_dxva_ || context); if (state_ != kUninitialized) { LOG(ERROR) << "Initialize: invalid state"; return; } if (!message_loop || !event_handler) { LOG(ERROR) << "MftH264DecodeEngine::Initialize: parameters cannot be NULL"; return; } context_ = context; event_handler_ = event_handler; info_.provides_buffers = true; if (use_dxva_) { info_.stream_info.surface_format = VideoFrame::NV12; // TODO(hclam): Need to correct this since this is not really GL texture. // We should just remove surface_type from stream_info. info_.stream_info.surface_type = VideoFrame::TYPE_GL_TEXTURE; } else { info_.stream_info.surface_format = VideoFrame::YV12; info_.stream_info.surface_type = VideoFrame::TYPE_SYSTEM_MEMORY; } // codec_info.stream_info_.surface_width_/height_ are initialized // in InitInternal(). info_.success = InitInternal(); if (info_.success) { state_ = kNormal; AllocFramesFromContext(); } else { LOG(ERROR) << "MftH264DecodeEngine::Initialize failed"; event_handler_->OnInitializeComplete(info_); } } void MftH264DecodeEngine::Uninitialize() { if (state_ == kUninitialized) { LOG(ERROR) << "Uninitialize: invalid state"; return; } // TODO(hclam): Call ShutdownComLibraries only after MFT is released. decode_engine_.Release(); ShutdownComLibraries(); state_ = kUninitialized; event_handler_->OnUninitializeComplete(); } void MftH264DecodeEngine::Flush() { if (state_ != kNormal) { LOG(ERROR) << "Flush: invalid state"; return; } state_ = kFlushing; if (!SendMFTMessage(MFT_MESSAGE_COMMAND_FLUSH)) { LOG(WARNING) << "MftH264DecodeEngine::Flush failed to send message"; } state_ = kNormal; event_handler_->OnFlushComplete(); } void MftH264DecodeEngine::Seek() { if (state_ != kNormal) { LOG(ERROR) << "Seek: invalid state"; return; } // TODO(hclam): Seriously the logic in VideoRendererBase is flawed that we // have to perform the following hack to get playback going. PipelineStatistics statistics; for (size_t i = 0; i < Limits::kMaxVideoFrames; ++i) { event_handler_->ConsumeVideoFrame(output_frames_[0], statistics); } // Seek not implemented. event_handler_->OnSeekComplete(); } void MftH264DecodeEngine::ConsumeVideoSample(scoped_refptr buffer) { if (state_ == kUninitialized) { LOG(ERROR) << "ConsumeVideoSample: invalid state"; } base::win::ScopedComPtr sample; PipelineStatistics statistics; if (!buffer->IsEndOfStream()) { sample.Attach( CreateInputSample(buffer->GetData(), buffer->GetDataSize(), buffer->GetTimestamp().InMicroseconds() * 10, buffer->GetDuration().InMicroseconds() * 10, input_stream_info_.cbSize, input_stream_info_.cbAlignment)); if (!sample.get()) { LOG(ERROR) << "Failed to create an input sample"; } else { if (FAILED(decode_engine_->ProcessInput(0, sample.get(), 0))) { event_handler_->OnError(); } } statistics.video_bytes_decoded = buffer->GetDataSize(); } else { if (state_ != MftH264DecodeEngine::kEosDrain) { // End of stream, send drain messages. if (!SendMFTMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM) || !SendMFTMessage(MFT_MESSAGE_COMMAND_DRAIN)) { LOG(ERROR) << "Failed to send EOS / drain messages to MFT"; event_handler_->OnError(); } else { state_ = MftH264DecodeEngine::kEosDrain; } } } DoDecode(statistics); } void MftH264DecodeEngine::ProduceVideoFrame(scoped_refptr frame) { if (state_ == kUninitialized) { LOG(ERROR) << "ProduceVideoFrame: invalid state"; return; } event_handler_->ProduceVideoSample(NULL); } // private methods // static bool MftH264DecodeEngine::StartupComLibraries() { HRESULT hr; hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE); if (FAILED(hr)) { LOG(ERROR) << "CoInit fail"; return false; } hr = MFStartup(MF_VERSION, MFSTARTUP_FULL); if (FAILED(hr)) { LOG(ERROR) << "MFStartup fail"; CoUninitialize(); return false; } return true; } // static void MftH264DecodeEngine::ShutdownComLibraries() { HRESULT hr; hr = MFShutdown(); if (FAILED(hr)) { LOG(WARNING) << "Warning: MF failed to shutdown"; } CoUninitialize(); } bool MftH264DecodeEngine::EnableDxva() { IDirect3DDevice9* device = static_cast( context_->GetDevice()); base::win::ScopedComPtr device_manager; UINT dev_manager_reset_token = 0; HRESULT hr = DXVA2CreateDirect3DDeviceManager9(&dev_manager_reset_token, device_manager.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Couldn't create D3D Device manager"; return false; } hr = device_manager->ResetDevice(device, dev_manager_reset_token); if (FAILED(hr)) { LOG(ERROR) << "Failed to reset device"; return false; } hr = decode_engine_->ProcessMessage( MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast(device_manager.get())); if (FAILED(hr)) { LOG(ERROR) << "Failed to set D3D9 device manager to decoder " << std::hex << hr; return false; } return true; } bool MftH264DecodeEngine::InitInternal() { if (!StartupComLibraries()) return false; if (!InitDecodeEngine()) return false; if (!GetStreamsInfoAndBufferReqs()) return false; return SendMFTMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING); } bool MftH264DecodeEngine::InitDecodeEngine() { // TODO(jiesun): use MFEnum to get decoder CLSID. HRESULT hr = CoCreateInstance(__uuidof(CMSH264DecoderMFT), NULL, CLSCTX_INPROC_SERVER, __uuidof(IMFTransform), reinterpret_cast( decode_engine_.Receive())); if (FAILED(hr) || !decode_engine_.get()) { LOG(ERROR) << "CoCreateInstance failed " << std::hex << std::showbase << hr; return false; } if (!CheckDecodeEngineDxvaSupport()) return false; if (use_dxva_ && !EnableDxva()) return false; return SetDecodeEngineMediaTypes(); } void MftH264DecodeEngine::AllocFramesFromContext() { if (!use_dxva_) return; // TODO(imcheng): Pass in an actual task. (From EventHandler?) context_->ReleaseAllVideoFrames(); output_frames_.clear(); context_->AllocateVideoFrames( 1, info_.stream_info.surface_width, info_.stream_info.surface_height, VideoFrame::RGBA, &output_frames_, NewRunnableMethod(this, &MftH264DecodeEngine::OnAllocFramesDone)); } void MftH264DecodeEngine::OnAllocFramesDone() { event_handler_->OnInitializeComplete(info_); } bool MftH264DecodeEngine::CheckDecodeEngineDxvaSupport() { base::win::ScopedComPtr attributes; HRESULT hr = decode_engine_->GetAttributes(attributes.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Unlock: Failed to get attributes, hr = " << std::hex << std::showbase << hr; return false; } UINT32 dxva; hr = attributes->GetUINT32(MF_SA_D3D_AWARE, &dxva); if (FAILED(hr) || !dxva) { LOG(ERROR) << "Failed to get DXVA attr or decoder is not DXVA-aware, hr = " << std::hex << std::showbase << hr << " this might not be the right decoder."; return false; } return true; } bool MftH264DecodeEngine::SetDecodeEngineMediaTypes() { if (!SetDecodeEngineInputMediaType()) return false; return SetDecodeEngineOutputMediaType( ConvertVideoFrameFormatToGuid(info_.stream_info.surface_format)); } bool MftH264DecodeEngine::SetDecodeEngineInputMediaType() { base::win::ScopedComPtr media_type; HRESULT hr = MFCreateMediaType(media_type.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Failed to create empty media type object"; return false; } hr = media_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); if (FAILED(hr)) { LOG(ERROR) << "SetGUID for major type failed"; return false; } hr = media_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264); if (FAILED(hr)) { LOG(ERROR) << "SetGUID for subtype failed"; return false; } hr = decode_engine_->SetInputType(0, media_type.get(), 0); // No flags if (FAILED(hr)) { LOG(ERROR) << "Failed to set decoder's input type"; return false; } return true; } bool MftH264DecodeEngine::SetDecodeEngineOutputMediaType(const GUID subtype) { DWORD i = 0; IMFMediaType* out_media_type; bool found = false; while (SUCCEEDED(decode_engine_->GetOutputAvailableType(0, i, &out_media_type))) { GUID out_subtype; HRESULT hr = out_media_type->GetGUID(MF_MT_SUBTYPE, &out_subtype); if (FAILED(hr)) { LOG(ERROR) << "Failed to GetGUID() on GetOutputAvailableType() " << i; out_media_type->Release(); continue; } if (out_subtype == subtype) { hr = decode_engine_->SetOutputType(0, out_media_type, 0); // No flags hr = MFGetAttributeSize(out_media_type, MF_MT_FRAME_SIZE, reinterpret_cast(&info_.stream_info.surface_width), reinterpret_cast(&info_.stream_info.surface_height)); width_ = info_.stream_info.surface_width; height_ = info_.stream_info.surface_height; if (FAILED(hr)) { LOG(ERROR) << "Failed to SetOutputType to |subtype| or obtain " << "width/height " << std::hex << hr; } else { out_media_type->Release(); return true; } } i++; out_media_type->Release(); } return false; } bool MftH264DecodeEngine::SendMFTMessage(MFT_MESSAGE_TYPE msg) { HRESULT hr = decode_engine_->ProcessMessage(msg, NULL); return SUCCEEDED(hr); } // Prints out info about the input/output streams, gets the minimum buffer sizes // for input and output samples. // The MFT will not allocate buffer for neither input nor output, so we have // to do it ourselves and make sure they're the correct size. // Exception is when dxva is enabled, the decoder will allocate output. bool MftH264DecodeEngine::GetStreamsInfoAndBufferReqs() { HRESULT hr = decode_engine_->GetInputStreamInfo(0, &input_stream_info_); if (FAILED(hr)) { LOG(ERROR) << "Failed to get input stream info"; return false; } VLOG(1) << "Input stream info:" << "\nMax latency: " << input_stream_info_.hnsMaxLatency << "\nFlags: " << std::hex << std::showbase << input_stream_info_.dwFlags << "\nMin buffer size: " << input_stream_info_.cbSize << "\nMax lookahead: " << input_stream_info_.cbMaxLookahead << "\nAlignment: " << input_stream_info_.cbAlignment; // There should be three flags, one for requiring a whole frame be in a // single sample, one for requiring there be one buffer only in a single // sample, and one that specifies a fixed sample size. (as in cbSize) CHECK_EQ(input_stream_info_.dwFlags, 0x7u); hr = decode_engine_->GetOutputStreamInfo(0, &output_stream_info_); if (FAILED(hr)) { LOG(ERROR) << "Failed to get output stream info"; return false; } VLOG(1) << "Output stream info:" << "\nFlags: " << std::hex << std::showbase << output_stream_info_.dwFlags << "\nMin buffer size: " << output_stream_info_.cbSize << "\nAlignment: " << output_stream_info_.cbAlignment; // The flags here should be the same and mean the same thing, except when // DXVA is enabled, there is an extra 0x100 flag meaning decoder will // allocate its own sample. CHECK_EQ(output_stream_info_.dwFlags, use_dxva_ ? 0x107u : 0x7u); return true; } bool MftH264DecodeEngine::DoDecode(const PipelineStatistics& statistics) { if (state_ != kNormal && state_ != kEosDrain) { LOG(ERROR) << "DoDecode: not in normal or drain state"; return false; } scoped_refptr frame; base::win::ScopedComPtr output_sample; if (!use_dxva_) { output_sample.Attach( CreateEmptySampleWithBuffer(output_stream_info_.cbSize, output_stream_info_.cbAlignment)); if (!output_sample.get()) { LOG(ERROR) << "GetSample: failed to create empty output sample"; event_handler_->OnError(); return false; } } MFT_OUTPUT_DATA_BUFFER output_data_buffer; memset(&output_data_buffer, 0, sizeof(output_data_buffer)); output_data_buffer.dwStreamID = 0; output_data_buffer.pSample = output_sample; DWORD status; HRESULT hr = decode_engine_->ProcessOutput(0, // No flags 1, // # of out streams to pull &output_data_buffer, &status); IMFCollection* events = output_data_buffer.pEvents; if (events != NULL) { VLOG(1) << "Got events from ProcessOuput, but discarding"; events->Release(); } if (FAILED(hr)) { if (hr == MF_E_TRANSFORM_STREAM_CHANGE) { hr = SetDecodeEngineOutputMediaType( ConvertVideoFrameFormatToGuid(info_.stream_info.surface_format)); if (SUCCEEDED(hr)) { // TODO(hclam): Need to fix this case. This happens when we have a // format change. We have to resume decoding only after we have // allocated a new set of video frames. // AllocFramesFromContext(); // event_handler_->OnFormatChange(info_.stream_info); event_handler_->ProduceVideoSample(NULL); return true; } event_handler_->OnError(); return false; } if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) { if (state_ == kEosDrain) { // No more output from the decoder. Notify EOS and stop playback. scoped_refptr frame; VideoFrame::CreateEmptyFrame(&frame); event_handler_->ConsumeVideoFrame(frame, statistics); state_ = MftH264DecodeEngine::kStopped; return false; } event_handler_->ProduceVideoSample(NULL); return true; } LOG(ERROR) << "Unhandled error in DoDecode()"; state_ = MftH264DecodeEngine::kStopped; event_handler_->OnError(); return false; } // We succeeded in getting an output sample. if (use_dxva_) { // For DXVA we didn't provide the sample, i.e. output_sample was NULL. output_sample.Attach(output_data_buffer.pSample); } if (!output_sample.get()) { LOG(ERROR) << "ProcessOutput succeeded, but did not get a sample back"; event_handler_->OnError(); return true; } int64 timestamp = 0, duration = 0; if (FAILED(output_sample->GetSampleTime(×tamp)) || FAILED(output_sample->GetSampleDuration(&duration))) { LOG(WARNING) << "Failed to get timestamp/duration from output"; } // The duration and timestamps are in 100-ns units, so divide by 10 // to convert to microseconds. timestamp /= 10; duration /= 10; // Sanity checks for checking if there is really something in the sample. DWORD buf_count; hr = output_sample->GetBufferCount(&buf_count); if (FAILED(hr) || buf_count != 1) { LOG(ERROR) << "Failed to get buffer count, or buffer count mismatch"; return true; } base::win::ScopedComPtr output_buffer; hr = output_sample->GetBufferByIndex(0, output_buffer.Receive()); if (FAILED(hr)) { LOG(ERROR) << "Failed to get buffer from sample"; return true; } if (use_dxva_) { base::win::ScopedComPtr surface; hr = MFGetService(output_buffer, MR_BUFFER_SERVICE, IID_PPV_ARGS(surface.Receive())); if (FAILED(hr)) { LOG(ERROR) << "Failed to get surface from buffer"; return true; } // Since we only allocated 1 frame from context. // TODO(imcheng): Detect error. output_frames_[0]->SetTimestamp(TimeDelta::FromMicroseconds(timestamp)); output_frames_[0]->SetDuration(TimeDelta::FromMicroseconds(duration)); context_->ConvertToVideoFrame( surface.get(), output_frames_[0], NewRunnableMethod(this, &MftH264DecodeEngine::OnUploadVideoFrameDone, surface, output_frames_[0], statistics)); return true; } // TODO(hclam): Remove this branch. // Not DXVA. VideoFrame::CreateFrame(info_.stream_info.surface_format, info_.stream_info.surface_width, info_.stream_info.surface_height, TimeDelta::FromMicroseconds(timestamp), TimeDelta::FromMicroseconds(duration), &frame); if (!frame.get()) { LOG(ERROR) << "Failed to allocate video frame for yuv plane"; event_handler_->OnError(); return true; } uint8* src_y; DWORD max_length, current_length; hr = output_buffer->Lock(&src_y, &max_length, ¤t_length); if (FAILED(hr)) return true; uint8* dst_y = static_cast(frame->data(VideoFrame::kYPlane)); memcpy(dst_y, src_y, current_length); CHECK(SUCCEEDED(output_buffer->Unlock())); event_handler_->ConsumeVideoFrame(frame, statistics); return true; } void MftH264DecodeEngine::OnUploadVideoFrameDone( base::win::ScopedComPtr surface, scoped_refptr frame, PipelineStatistics statistics) { // After this method is exited the reference to surface is released. event_handler_->ConsumeVideoFrame(frame, statistics); } } // namespace media DISABLE_RUNNABLE_METHOD_REFCOUNT(media::MftH264DecodeEngine);