diff options
author | nick@chromium.org <nick@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-03-20 00:27:01 +0000 |
---|---|---|
committer | nick@chromium.org <nick@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-03-20 00:27:01 +0000 |
commit | ff6af38fee3252568d103648cf517adb5e4614c6 (patch) | |
tree | e4fae0991f0faa8ec1e93614a6f2895fc265d1d9 /ui/surface/accelerated_surface_win.cc | |
parent | 6468898246c51a66808d05e8f4a95d7330204337 (diff) | |
download | chromium_src-ff6af38fee3252568d103648cf517adb5e4614c6.zip chromium_src-ff6af38fee3252568d103648cf517adb5e4614c6.tar.gz chromium_src-ff6af38fee3252568d103648cf517adb5e4614c6.tar.bz2 |
Fix performance issue affecting ATI/AMD GPUs.
This change preserves both the Lock+memcpy path and the GetRenderTargetPath by
moving both into AcceleratedSurfaceTransformer. And for now, use
GetRenderTargetData for readbacks everywhere. Lock+memcpy is particularly slow
on ATI/AMD parts, and a quick survey showed that GetRenderTargetData is usually
as fast or faster. There are some Intel integrated systems where Lock+memcpy is
faster, and this will regress readback perf by 20-50% on those systems, but it
will improve performance by 10000% on some Radeon systems.
In a future change I hope to introduce logic to pick between the two impls
adaptively.
BUG=168532
Review URL: https://chromiumcodereview.appspot.com/12659007
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@189173 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'ui/surface/accelerated_surface_win.cc')
-rw-r--r-- | ui/surface/accelerated_surface_win.cc | 83 |
1 files changed, 24 insertions, 59 deletions
diff --git a/ui/surface/accelerated_surface_win.cc b/ui/surface/accelerated_surface_win.cc index 052ed54..3d88f70 100644 --- a/ui/surface/accelerated_surface_win.cc +++ b/ui/surface/accelerated_surface_win.cc @@ -55,29 +55,15 @@ bool DoAllShowPresentWithGDI() { switches::kDoAllShowPresentWithGDI); } -// Lock a D3D surface, and invoke a VideoFrame copier on the result. -bool LockAndCopyPlane(IDirect3DSurface9* src_surface, - media::VideoFrame* dst_frame, - size_t plane_id) { - gfx::Size src_size = d3d_utils::GetSize(src_surface); - - D3DLOCKED_RECT locked_rect; - { - TRACE_EVENT0("gpu", "LockRect"); - HRESULT hr = src_surface->LockRect(&locked_rect, NULL, - D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK); - if (FAILED(hr)) - return false; - } - - { - TRACE_EVENT0("gpu", "memcpy"); - uint8* src = reinterpret_cast<uint8*>(locked_rect.pBits); - int src_stride = locked_rect.Pitch; - media::CopyPlane(plane_id, src, src_stride, src_size.height(), dst_frame); - } - src_surface->UnlockRect(); - return true; +// Use a SurfaceReader to copy into one plane of the VideoFrame. +bool CopyPlane(AcceleratedSurfaceTransformer* gpu_ops, + IDirect3DSurface9* src_surface, + media::VideoFrame* dst_frame, + size_t plane_id) { + int width_in_bytes = dst_frame->row_bytes(plane_id); + return gpu_ops->ReadFast(src_surface, dst_frame->data(plane_id), + width_in_bytes, dst_frame->rows(plane_id), + dst_frame->row_bytes(plane_id)); } } // namespace @@ -477,39 +463,18 @@ bool AcceleratedPresenter::DoCopyToARGB(const gfx::Rect& requested_src_subrect, } } - D3DLOCKED_RECT locked_rect; - - // Empirical evidence seems to suggest that LockRect and memcpy are faster - // than would be GetRenderTargetData to an offscreen surface wrapping *buf. - { - TRACE_EVENT0("gpu", "LockRect"); - hr = final_surface->LockRect(&locked_rect, NULL, - D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK); - if (FAILED(hr)) { - LOG(ERROR) << "Failed to lock surface"; - return false; - } - } - - { - TRACE_EVENT0("gpu", "memcpy"); - - bitmap->setConfig(SkBitmap::kARGB_8888_Config, - dst_size.width(), dst_size.height(), - locked_rect.Pitch); - if (!bitmap->allocPixels()) { - final_surface->UnlockRect(); - return false; - } - bitmap->setIsOpaque(true); - - memcpy(reinterpret_cast<int8*>(bitmap->getPixels()), - reinterpret_cast<int8*>(locked_rect.pBits), - locked_rect.Pitch * dst_size.height()); - } - final_surface->UnlockRect(); - - return true; + bitmap->setConfig(SkBitmap::kARGB_8888_Config, + dst_size.width(), dst_size.height()); + if (!bitmap->allocPixels()) + return false; + bitmap->setIsOpaque(true); + + // Copy |final_surface| to |bitmap|. This is always a synchronous operation. + return gpu_ops->ReadFast(final_surface, + reinterpret_cast<uint8*>(bitmap->getPixels()), + bitmap->width() * bitmap->bytesPerPixel(), + bitmap->height(), + static_cast<int>(bitmap->rowBytes())); } bool AcceleratedPresenter::DoCopyToYUV( @@ -590,11 +555,11 @@ bool AcceleratedPresenter::DoCopyToYUV( } } - if (!LockAndCopyPlane(y, frame, media::VideoFrame::kYPlane)) + if (!CopyPlane(gpu_ops, y, frame, media::VideoFrame::kYPlane)) return false; - if (!LockAndCopyPlane(u, frame, media::VideoFrame::kUPlane)) + if (!CopyPlane(gpu_ops, u, frame, media::VideoFrame::kUPlane)) return false; - if (!LockAndCopyPlane(v, frame, media::VideoFrame::kVPlane)) + if (!CopyPlane(gpu_ops, v, frame, media::VideoFrame::kVPlane)) return false; return true; } |