summaryrefslogtreecommitdiffstats
path: root/ui/surface/accelerated_surface_win.cc
diff options
context:
space:
mode:
authornick@chromium.org <nick@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-03-20 00:27:01 +0000
committernick@chromium.org <nick@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-03-20 00:27:01 +0000
commitff6af38fee3252568d103648cf517adb5e4614c6 (patch)
treee4fae0991f0faa8ec1e93614a6f2895fc265d1d9 /ui/surface/accelerated_surface_win.cc
parent6468898246c51a66808d05e8f4a95d7330204337 (diff)
downloadchromium_src-ff6af38fee3252568d103648cf517adb5e4614c6.zip
chromium_src-ff6af38fee3252568d103648cf517adb5e4614c6.tar.gz
chromium_src-ff6af38fee3252568d103648cf517adb5e4614c6.tar.bz2
Fix performance issue affecting ATI/AMD GPUs.
This change preserves both the Lock+memcpy path and the GetRenderTargetPath by moving both into AcceleratedSurfaceTransformer. And for now, use GetRenderTargetData for readbacks everywhere. Lock+memcpy is particularly slow on ATI/AMD parts, and a quick survey showed that GetRenderTargetData is usually as fast or faster. There are some Intel integrated systems where Lock+memcpy is faster, and this will regress readback perf by 20-50% on those systems, but it will improve performance by 10000% on some Radeon systems. In a future change I hope to introduce logic to pick between the two impls adaptively. BUG=168532 Review URL: https://chromiumcodereview.appspot.com/12659007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@189173 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'ui/surface/accelerated_surface_win.cc')
-rw-r--r--ui/surface/accelerated_surface_win.cc83
1 files changed, 24 insertions, 59 deletions
diff --git a/ui/surface/accelerated_surface_win.cc b/ui/surface/accelerated_surface_win.cc
index 052ed54..3d88f70 100644
--- a/ui/surface/accelerated_surface_win.cc
+++ b/ui/surface/accelerated_surface_win.cc
@@ -55,29 +55,15 @@ bool DoAllShowPresentWithGDI() {
switches::kDoAllShowPresentWithGDI);
}
-// Lock a D3D surface, and invoke a VideoFrame copier on the result.
-bool LockAndCopyPlane(IDirect3DSurface9* src_surface,
- media::VideoFrame* dst_frame,
- size_t plane_id) {
- gfx::Size src_size = d3d_utils::GetSize(src_surface);
-
- D3DLOCKED_RECT locked_rect;
- {
- TRACE_EVENT0("gpu", "LockRect");
- HRESULT hr = src_surface->LockRect(&locked_rect, NULL,
- D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK);
- if (FAILED(hr))
- return false;
- }
-
- {
- TRACE_EVENT0("gpu", "memcpy");
- uint8* src = reinterpret_cast<uint8*>(locked_rect.pBits);
- int src_stride = locked_rect.Pitch;
- media::CopyPlane(plane_id, src, src_stride, src_size.height(), dst_frame);
- }
- src_surface->UnlockRect();
- return true;
+// Use a SurfaceReader to copy into one plane of the VideoFrame.
+bool CopyPlane(AcceleratedSurfaceTransformer* gpu_ops,
+ IDirect3DSurface9* src_surface,
+ media::VideoFrame* dst_frame,
+ size_t plane_id) {
+ int width_in_bytes = dst_frame->row_bytes(plane_id);
+ return gpu_ops->ReadFast(src_surface, dst_frame->data(plane_id),
+ width_in_bytes, dst_frame->rows(plane_id),
+ dst_frame->row_bytes(plane_id));
}
} // namespace
@@ -477,39 +463,18 @@ bool AcceleratedPresenter::DoCopyToARGB(const gfx::Rect& requested_src_subrect,
}
}
- D3DLOCKED_RECT locked_rect;
-
- // Empirical evidence seems to suggest that LockRect and memcpy are faster
- // than would be GetRenderTargetData to an offscreen surface wrapping *buf.
- {
- TRACE_EVENT0("gpu", "LockRect");
- hr = final_surface->LockRect(&locked_rect, NULL,
- D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK);
- if (FAILED(hr)) {
- LOG(ERROR) << "Failed to lock surface";
- return false;
- }
- }
-
- {
- TRACE_EVENT0("gpu", "memcpy");
-
- bitmap->setConfig(SkBitmap::kARGB_8888_Config,
- dst_size.width(), dst_size.height(),
- locked_rect.Pitch);
- if (!bitmap->allocPixels()) {
- final_surface->UnlockRect();
- return false;
- }
- bitmap->setIsOpaque(true);
-
- memcpy(reinterpret_cast<int8*>(bitmap->getPixels()),
- reinterpret_cast<int8*>(locked_rect.pBits),
- locked_rect.Pitch * dst_size.height());
- }
- final_surface->UnlockRect();
-
- return true;
+ bitmap->setConfig(SkBitmap::kARGB_8888_Config,
+ dst_size.width(), dst_size.height());
+ if (!bitmap->allocPixels())
+ return false;
+ bitmap->setIsOpaque(true);
+
+ // Copy |final_surface| to |bitmap|. This is always a synchronous operation.
+ return gpu_ops->ReadFast(final_surface,
+ reinterpret_cast<uint8*>(bitmap->getPixels()),
+ bitmap->width() * bitmap->bytesPerPixel(),
+ bitmap->height(),
+ static_cast<int>(bitmap->rowBytes()));
}
bool AcceleratedPresenter::DoCopyToYUV(
@@ -590,11 +555,11 @@ bool AcceleratedPresenter::DoCopyToYUV(
}
}
- if (!LockAndCopyPlane(y, frame, media::VideoFrame::kYPlane))
+ if (!CopyPlane(gpu_ops, y, frame, media::VideoFrame::kYPlane))
return false;
- if (!LockAndCopyPlane(u, frame, media::VideoFrame::kUPlane))
+ if (!CopyPlane(gpu_ops, u, frame, media::VideoFrame::kUPlane))
return false;
- if (!LockAndCopyPlane(v, frame, media::VideoFrame::kVPlane))
+ if (!CopyPlane(gpu_ops, v, frame, media::VideoFrame::kVPlane))
return false;
return true;
}