summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-01 19:59:42 +0000
committerhclam@chromium.org <hclam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-01 19:59:42 +0000
commit2cc0ec84815e5167b7ecbdae94a47033e95422e6 (patch)
treee4a66d4b83294f108acde9878cb5b2e6ec60ca7b
parenta4ff9eaea74e058144eda4164fe4694e6e1b9c21 (diff)
downloadchromium_src-2cc0ec84815e5167b7ecbdae94a47033e95422e6.zip
chromium_src-2cc0ec84815e5167b7ecbdae94a47033e95422e6.tar.gz
chromium_src-2cc0ec84815e5167b7ecbdae94a47033e95422e6.tar.bz2
Optimize mac capturer to copy memory only dirty rect.
This optimization shave a lot of memory copies when there are small changes on the screen. On the computer I tested capturer nows run with 9ms when before was 20ms. This however increases capture time when more than half of the screen is changing or the dirty rects overlap. There are several further optimiations can be done: 1. Merge dirty rects so that they don't overlap. 2. Subtract current dirty rects from last dirty rects. If the above two are implemented we can then shave copying in OpenGL. This can be done by grouping rectangles and download them as a bulk, this can bring capture time to be less than 5ms. Overall this patch shaves 11ms, about 20% of latency on the mac host I uses. BUG=91201 TEST=Mac capturer runs faster Review URL: http://codereview.chromium.org/7540024 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@94964 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--remoting/base/util.cc23
-rw-r--r--remoting/base/util.h8
-rw-r--r--remoting/host/capturer_mac.cc63
3 files changed, 81 insertions, 13 deletions
diff --git a/remoting/base/util.cc b/remoting/base/util.cc
index 788c602..2e7d7df 100644
--- a/remoting/base/util.cc
+++ b/remoting/base/util.cc
@@ -146,4 +146,27 @@ gfx::Rect ScaleRect(const gfx::Rect& rect,
return scaled_rect;
}
+void CopyRect(const uint8* src_plane,
+ int src_plane_stride,
+ uint8* dest_plane,
+ int dest_plane_stride,
+ int bytes_per_pixel,
+ const gfx::Rect& rect) {
+ // Get the address of the starting point.
+ const int src_y_offset = src_plane_stride * rect.y();
+ const int dest_y_offset = dest_plane_stride * rect.y();
+ const int x_offset = bytes_per_pixel * rect.x();
+ src_plane += src_y_offset + x_offset;
+ dest_plane += dest_y_offset + x_offset;
+
+ // Copy pixels in the rectangle line by line.
+ const int bytes_per_line = bytes_per_pixel * rect.width();
+ const int height = rect.height();
+ for (int i = 0 ; i < height; ++i) {
+ memcpy(dest_plane, src_plane, bytes_per_line);
+ src_plane += src_plane_stride;
+ dest_plane += dest_plane_stride;
+ }
+}
+
} // namespace remoting
diff --git a/remoting/base/util.h b/remoting/base/util.h
index 920bb89..bd0f95c 100644
--- a/remoting/base/util.h
+++ b/remoting/base/util.h
@@ -56,6 +56,14 @@ gfx::Rect ScaleRect(const gfx::Rect& rect,
double horizontal_ratio,
double vertical_ratio);
+// Copy pixels in the rectangle from source to destination.
+void CopyRect(const uint8* src_plane,
+ int src_plane_stride,
+ uint8* dest_plane,
+ int dest_plane_stride,
+ int bytes_per_pixel,
+ const gfx::Rect& rect);
+
} // namespace remoting
#endif // REMOTING_BASE_UTIL_H_
diff --git a/remoting/host/capturer_mac.cc b/remoting/host/capturer_mac.cc
index cf1cfe7..0e0b90ee 100644
--- a/remoting/host/capturer_mac.cc
+++ b/remoting/host/capturer_mac.cc
@@ -13,6 +13,7 @@
#include "base/logging.h"
#include "base/mac/mac_util.h"
#include "base/memory/scoped_ptr.h"
+#include "remoting/base/util.h"
#include "remoting/host/capturer_helper.h"
namespace remoting {
@@ -141,7 +142,7 @@ class CapturerMac : public Capturer {
virtual const gfx::Size& size_most_recent() const OVERRIDE;
private:
- void GlBlitFast(const VideoFrameBuffer& buffer);
+ void GlBlitFast(const VideoFrameBuffer& buffer, const InvalidRects& rects);
void GlBlitSlow(const VideoFrameBuffer& buffer);
void CgBlit(const VideoFrameBuffer& buffer, const InvalidRects& rects);
void CaptureRects(const InvalidRects& rects,
@@ -179,6 +180,9 @@ class CapturerMac : public Capturer {
// a particular screen resolution.
uint8* last_buffer_;
+ // Contains a list of invalid rectangles in the last capture.
+ InvalidRects last_invalid_rects_;
+
// Format of pixels returned in buffer.
media::VideoFrame::Format pixel_format_;
@@ -303,7 +307,7 @@ void CapturerMac::CaptureInvalidRects(CaptureCompletedCallback* callback) {
bool flip = true; // GL capturers need flipping.
if (cgl_context_) {
if (pixel_buffer_object_.get() != 0) {
- GlBlitFast(current_buffer);
+ GlBlitFast(current_buffer, rects);
} else {
// See comment in scoped_pixel_buffer_object::Init about why the slow
// path is always used on 10.5.
@@ -335,7 +339,33 @@ void CapturerMac::CaptureInvalidRects(CaptureCompletedCallback* callback) {
delete callback;
}
-void CapturerMac::GlBlitFast(const VideoFrameBuffer& buffer) {
+void CapturerMac::GlBlitFast(const VideoFrameBuffer& buffer,
+ const InvalidRects& rects) {
+ if (last_buffer_) {
+ // We are doing double buffer for the capture data so we just need to copy
+ // invalid rects in the last capture in the current buffer.
+ // TODO(hclam): |last_invalid_rects_| and |rects| can overlap and this
+ // causes extra copies on the overlapped region. Subtract |rects| from
+ // |last_invalid_rects_| to do a minimal amount of copy when we have proper
+ // region algorithms implemented.
+
+ // Since the image obtained from OpenGL is upside-down, need to do some
+ // magic here to copy the correct rectangle.
+ const int y_offset = (buffer.size().height() - 1) * buffer.bytes_per_row();
+ for (InvalidRects::iterator i = last_invalid_rects_.begin();
+ i != last_invalid_rects_.end();
+ ++i) {
+ CopyRect(last_buffer_ + y_offset,
+ -buffer.bytes_per_row(),
+ buffer.ptr() + y_offset,
+ -buffer.bytes_per_row(),
+ 4, // Bytes for pixel for RGBA.
+ *i);
+ }
+ }
+ last_buffer_ = buffer.ptr();
+ last_invalid_rects_ = rects;
+
CGLContextObj CGL_MACRO_CONTEXT = cgl_context_;
glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, pixel_buffer_object_.get());
glReadPixels(0, 0, buffer.size().width(), buffer.size().height(),
@@ -347,7 +377,17 @@ void CapturerMac::GlBlitFast(const VideoFrameBuffer& buffer) {
// release it.
pixel_buffer_object_.Release();
} else {
- memcpy(buffer.ptr(), ptr, buffer.size().height() * buffer.bytes_per_row());
+ // Copy only from the dirty rects. Since the image obtained from OpenGL is
+ // upside-down we need to do some magic here to copy the correct rectangle.
+ const int y_offset = (buffer.size().height() - 1) * buffer.bytes_per_row();
+ for (InvalidRects::iterator i = rects.begin(); i != rects.end(); ++i) {
+ CopyRect(ptr + y_offset,
+ -buffer.bytes_per_row(),
+ buffer.ptr() + y_offset,
+ -buffer.bytes_per_row(),
+ 4, // Bytes for pixel for RGBA.
+ *i);
+ }
}
if (!glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB)) {
// If glUnmapBuffer returns false, then the contents of the data store are
@@ -386,15 +426,12 @@ void CapturerMac::CgBlit(const VideoFrameBuffer& buffer,
int src_bytes_per_row = CGDisplayBytesPerRow(main_display);
int src_bytes_per_pixel = CGDisplayBitsPerPixel(main_display) / 8;
for (InvalidRects::iterator i = rects.begin(); i != rects.end(); ++i) {
- int src_row_offset = i->x() * src_bytes_per_pixel;
- int dst_row_offset = i->x() * sizeof(uint32_t);
- int rect_width_in_bytes = i->width() * src_bytes_per_pixel;
- int ymax = i->height() + i->y();
- for (int y = i->y(); y < ymax; ++y) {
- memcpy(buffer.ptr() + y * buffer.bytes_per_row() + dst_row_offset,
- display_base_address + y * src_bytes_per_row + src_row_offset,
- rect_width_in_bytes);
- }
+ CopyRect(display_base_address,
+ src_bytes_per_row,
+ buffer.ptr(),
+ buffer.bytes_per_row(),
+ src_bytes_per_pixel,
+ *i);
}
}