From ff6af38fee3252568d103648cf517adb5e4614c6 Mon Sep 17 00:00:00 2001 From: "nick@chromium.org" Date: Wed, 20 Mar 2013 00:27:01 +0000 Subject: Fix performance issue affecting ATI/AMD GPUs. This change preserves both the Lock+memcpy path and the GetRenderTargetPath by moving both into AcceleratedSurfaceTransformer. And for now, use GetRenderTargetData for readbacks everywhere. Lock+memcpy is particularly slow on ATI/AMD parts, and a quick survey showed that GetRenderTargetData is usually as fast or faster. There are some Intel integrated systems where Lock+memcpy is faster, and this will regress readback perf by 20-50% on those systems, but it will improve performance by 10000% on some Radeon systems. In a future change I hope to introduce logic to pick between the two impls adaptively. BUG=168532 Review URL: https://chromiumcodereview.appspot.com/12659007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@189173 0039d316-1c4b-4281-b951-d872f2087c98 --- ui/surface/accelerated_surface_win.cc | 83 ++++++++++------------------------- 1 file changed, 24 insertions(+), 59 deletions(-) (limited to 'ui/surface/accelerated_surface_win.cc') diff --git a/ui/surface/accelerated_surface_win.cc b/ui/surface/accelerated_surface_win.cc index 052ed54..3d88f70 100644 --- a/ui/surface/accelerated_surface_win.cc +++ b/ui/surface/accelerated_surface_win.cc @@ -55,29 +55,15 @@ bool DoAllShowPresentWithGDI() { switches::kDoAllShowPresentWithGDI); } -// Lock a D3D surface, and invoke a VideoFrame copier on the result. -bool LockAndCopyPlane(IDirect3DSurface9* src_surface, - media::VideoFrame* dst_frame, - size_t plane_id) { - gfx::Size src_size = d3d_utils::GetSize(src_surface); - - D3DLOCKED_RECT locked_rect; - { - TRACE_EVENT0("gpu", "LockRect"); - HRESULT hr = src_surface->LockRect(&locked_rect, NULL, - D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK); - if (FAILED(hr)) - return false; - } - - { - TRACE_EVENT0("gpu", "memcpy"); - uint8* src = reinterpret_cast(locked_rect.pBits); - int src_stride = locked_rect.Pitch; - media::CopyPlane(plane_id, src, src_stride, src_size.height(), dst_frame); - } - src_surface->UnlockRect(); - return true; +// Use a SurfaceReader to copy into one plane of the VideoFrame. +bool CopyPlane(AcceleratedSurfaceTransformer* gpu_ops, + IDirect3DSurface9* src_surface, + media::VideoFrame* dst_frame, + size_t plane_id) { + int width_in_bytes = dst_frame->row_bytes(plane_id); + return gpu_ops->ReadFast(src_surface, dst_frame->data(plane_id), + width_in_bytes, dst_frame->rows(plane_id), + dst_frame->row_bytes(plane_id)); } } // namespace @@ -477,39 +463,18 @@ bool AcceleratedPresenter::DoCopyToARGB(const gfx::Rect& requested_src_subrect, } } - D3DLOCKED_RECT locked_rect; - - // Empirical evidence seems to suggest that LockRect and memcpy are faster - // than would be GetRenderTargetData to an offscreen surface wrapping *buf. - { - TRACE_EVENT0("gpu", "LockRect"); - hr = final_surface->LockRect(&locked_rect, NULL, - D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK); - if (FAILED(hr)) { - LOG(ERROR) << "Failed to lock surface"; - return false; - } - } - - { - TRACE_EVENT0("gpu", "memcpy"); - - bitmap->setConfig(SkBitmap::kARGB_8888_Config, - dst_size.width(), dst_size.height(), - locked_rect.Pitch); - if (!bitmap->allocPixels()) { - final_surface->UnlockRect(); - return false; - } - bitmap->setIsOpaque(true); - - memcpy(reinterpret_cast(bitmap->getPixels()), - reinterpret_cast(locked_rect.pBits), - locked_rect.Pitch * dst_size.height()); - } - final_surface->UnlockRect(); - - return true; + bitmap->setConfig(SkBitmap::kARGB_8888_Config, + dst_size.width(), dst_size.height()); + if (!bitmap->allocPixels()) + return false; + bitmap->setIsOpaque(true); + + // Copy |final_surface| to |bitmap|. This is always a synchronous operation. + return gpu_ops->ReadFast(final_surface, + reinterpret_cast(bitmap->getPixels()), + bitmap->width() * bitmap->bytesPerPixel(), + bitmap->height(), + static_cast(bitmap->rowBytes())); } bool AcceleratedPresenter::DoCopyToYUV( @@ -590,11 +555,11 @@ bool AcceleratedPresenter::DoCopyToYUV( } } - if (!LockAndCopyPlane(y, frame, media::VideoFrame::kYPlane)) + if (!CopyPlane(gpu_ops, y, frame, media::VideoFrame::kYPlane)) return false; - if (!LockAndCopyPlane(u, frame, media::VideoFrame::kUPlane)) + if (!CopyPlane(gpu_ops, u, frame, media::VideoFrame::kUPlane)) return false; - if (!LockAndCopyPlane(v, frame, media::VideoFrame::kVPlane)) + if (!CopyPlane(gpu_ops, v, frame, media::VideoFrame::kVPlane)) return false; return true; } -- cgit v1.1