diff options
author | hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-01 19:59:42 +0000 |
---|---|---|
committer | hclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-01 19:59:42 +0000 |
commit | 2cc0ec84815e5167b7ecbdae94a47033e95422e6 (patch) | |
tree | e4a66d4b83294f108acde9878cb5b2e6ec60ca7b /remoting | |
parent | a4ff9eaea74e058144eda4164fe4694e6e1b9c21 (diff) | |
download | chromium_src-2cc0ec84815e5167b7ecbdae94a47033e95422e6.zip chromium_src-2cc0ec84815e5167b7ecbdae94a47033e95422e6.tar.gz chromium_src-2cc0ec84815e5167b7ecbdae94a47033e95422e6.tar.bz2 |
Optimize mac capturer to copy memory only dirty rect.
This optimization shave a lot of memory copies when there are small changes on
the screen. On the computer I tested capturer nows run with 9ms when before was
20ms. This however increases capture time when more than half of the screen is
changing or the dirty rects overlap.
There are several further optimiations can be done:
1. Merge dirty rects so that they don't overlap.
2. Subtract current dirty rects from last dirty rects.
If the above two are implemented we can then shave copying in OpenGL. This
can be done by grouping rectangles and download them as a bulk, this can bring
capture time to be less than 5ms.
Overall this patch shaves 11ms, about 20% of latency on the mac host I uses.
BUG=91201
TEST=Mac capturer runs faster
Review URL: http://codereview.chromium.org/7540024
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@94964 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'remoting')
-rw-r--r-- | remoting/base/util.cc | 23 | ||||
-rw-r--r-- | remoting/base/util.h | 8 | ||||
-rw-r--r-- | remoting/host/capturer_mac.cc | 63 |
3 files changed, 81 insertions, 13 deletions
diff --git a/remoting/base/util.cc b/remoting/base/util.cc index 788c602..2e7d7df 100644 --- a/remoting/base/util.cc +++ b/remoting/base/util.cc @@ -146,4 +146,27 @@ gfx::Rect ScaleRect(const gfx::Rect& rect, return scaled_rect; } +void CopyRect(const uint8* src_plane, + int src_plane_stride, + uint8* dest_plane, + int dest_plane_stride, + int bytes_per_pixel, + const gfx::Rect& rect) { + // Get the address of the starting point. + const int src_y_offset = src_plane_stride * rect.y(); + const int dest_y_offset = dest_plane_stride * rect.y(); + const int x_offset = bytes_per_pixel * rect.x(); + src_plane += src_y_offset + x_offset; + dest_plane += dest_y_offset + x_offset; + + // Copy pixels in the rectangle line by line. + const int bytes_per_line = bytes_per_pixel * rect.width(); + const int height = rect.height(); + for (int i = 0 ; i < height; ++i) { + memcpy(dest_plane, src_plane, bytes_per_line); + src_plane += src_plane_stride; + dest_plane += dest_plane_stride; + } +} + } // namespace remoting diff --git a/remoting/base/util.h b/remoting/base/util.h index 920bb89..bd0f95c 100644 --- a/remoting/base/util.h +++ b/remoting/base/util.h @@ -56,6 +56,14 @@ gfx::Rect ScaleRect(const gfx::Rect& rect, double horizontal_ratio, double vertical_ratio); +// Copy pixels in the rectangle from source to destination. +void CopyRect(const uint8* src_plane, + int src_plane_stride, + uint8* dest_plane, + int dest_plane_stride, + int bytes_per_pixel, + const gfx::Rect& rect); + } // namespace remoting #endif // REMOTING_BASE_UTIL_H_ diff --git a/remoting/host/capturer_mac.cc b/remoting/host/capturer_mac.cc index cf1cfe7..0e0b90ee 100644 --- a/remoting/host/capturer_mac.cc +++ b/remoting/host/capturer_mac.cc @@ -13,6 +13,7 @@ #include "base/logging.h" #include "base/mac/mac_util.h" #include "base/memory/scoped_ptr.h" +#include "remoting/base/util.h" #include "remoting/host/capturer_helper.h" namespace remoting { @@ -141,7 +142,7 @@ class CapturerMac : public Capturer { virtual const gfx::Size& size_most_recent() const OVERRIDE; private: - void GlBlitFast(const VideoFrameBuffer& buffer); + void GlBlitFast(const VideoFrameBuffer& buffer, const InvalidRects& rects); void GlBlitSlow(const VideoFrameBuffer& buffer); void CgBlit(const VideoFrameBuffer& buffer, const InvalidRects& rects); void CaptureRects(const InvalidRects& rects, @@ -179,6 +180,9 @@ class CapturerMac : public Capturer { // a particular screen resolution. uint8* last_buffer_; + // Contains a list of invalid rectangles in the last capture. + InvalidRects last_invalid_rects_; + // Format of pixels returned in buffer. media::VideoFrame::Format pixel_format_; @@ -303,7 +307,7 @@ void CapturerMac::CaptureInvalidRects(CaptureCompletedCallback* callback) { bool flip = true; // GL capturers need flipping. if (cgl_context_) { if (pixel_buffer_object_.get() != 0) { - GlBlitFast(current_buffer); + GlBlitFast(current_buffer, rects); } else { // See comment in scoped_pixel_buffer_object::Init about why the slow // path is always used on 10.5. @@ -335,7 +339,33 @@ void CapturerMac::CaptureInvalidRects(CaptureCompletedCallback* callback) { delete callback; } -void CapturerMac::GlBlitFast(const VideoFrameBuffer& buffer) { +void CapturerMac::GlBlitFast(const VideoFrameBuffer& buffer, + const InvalidRects& rects) { + if (last_buffer_) { + // We are doing double buffer for the capture data so we just need to copy + // invalid rects in the last capture in the current buffer. + // TODO(hclam): |last_invalid_rects_| and |rects| can overlap and this + // causes extra copies on the overlapped region. Subtract |rects| from + // |last_invalid_rects_| to do a minimal amount of copy when we have proper + // region algorithms implemented. + + // Since the image obtained from OpenGL is upside-down, need to do some + // magic here to copy the correct rectangle. + const int y_offset = (buffer.size().height() - 1) * buffer.bytes_per_row(); + for (InvalidRects::iterator i = last_invalid_rects_.begin(); + i != last_invalid_rects_.end(); + ++i) { + CopyRect(last_buffer_ + y_offset, + -buffer.bytes_per_row(), + buffer.ptr() + y_offset, + -buffer.bytes_per_row(), + 4, // Bytes for pixel for RGBA. + *i); + } + } + last_buffer_ = buffer.ptr(); + last_invalid_rects_ = rects; + CGLContextObj CGL_MACRO_CONTEXT = cgl_context_; glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, pixel_buffer_object_.get()); glReadPixels(0, 0, buffer.size().width(), buffer.size().height(), @@ -347,7 +377,17 @@ void CapturerMac::GlBlitFast(const VideoFrameBuffer& buffer) { // release it. pixel_buffer_object_.Release(); } else { - memcpy(buffer.ptr(), ptr, buffer.size().height() * buffer.bytes_per_row()); + // Copy only from the dirty rects. Since the image obtained from OpenGL is + // upside-down we need to do some magic here to copy the correct rectangle. + const int y_offset = (buffer.size().height() - 1) * buffer.bytes_per_row(); + for (InvalidRects::iterator i = rects.begin(); i != rects.end(); ++i) { + CopyRect(ptr + y_offset, + -buffer.bytes_per_row(), + buffer.ptr() + y_offset, + -buffer.bytes_per_row(), + 4, // Bytes for pixel for RGBA. + *i); + } } if (!glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB)) { // If glUnmapBuffer returns false, then the contents of the data store are @@ -386,15 +426,12 @@ void CapturerMac::CgBlit(const VideoFrameBuffer& buffer, int src_bytes_per_row = CGDisplayBytesPerRow(main_display); int src_bytes_per_pixel = CGDisplayBitsPerPixel(main_display) / 8; for (InvalidRects::iterator i = rects.begin(); i != rects.end(); ++i) { - int src_row_offset = i->x() * src_bytes_per_pixel; - int dst_row_offset = i->x() * sizeof(uint32_t); - int rect_width_in_bytes = i->width() * src_bytes_per_pixel; - int ymax = i->height() + i->y(); - for (int y = i->y(); y < ymax; ++y) { - memcpy(buffer.ptr() + y * buffer.bytes_per_row() + dst_row_offset, - display_base_address + y * src_bytes_per_row + src_row_offset, - rect_width_in_bytes); - } + CopyRect(display_base_address, + src_bytes_per_row, + buffer.ptr(), + buffer.bytes_per_row(), + src_bytes_per_pixel, + *i); } } |