From fe8d73c066f9fd5506509fbc95d53f33872e13ab Mon Sep 17 00:00:00 2001 From: "reveman@chromium.org" Date: Sat, 16 Feb 2013 22:37:32 +0000 Subject: Re-land: Mark async texture uploads as completed from the upload thread. This reduces the latency between when an upload completes and when the client is notified. BUG=173802 NOTRY=True Review URL: https://chromiumcodereview.appspot.com/12213073 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@183010 0039d316-1c4b-4281-b951-d872f2087c98 --- content/common/gpu/gpu_command_buffer_stub.cc | 3 + .../service/async_pixel_transfer_delegate_mock.h | 5 +- gpu/command_buffer/service/common_decoder.cc | 1 + gpu/command_buffer/service/common_decoder.h | 1 + gpu/command_buffer/service/gles2_cmd_decoder.cc | 5 + gpu/command_buffer/service/query_manager.cc | 104 ++++++++++++++++----- gpu/command_buffer/service/query_manager.h | 32 ++++++- ui/gl/async_pixel_transfer_delegate.h | 6 +- ui/gl/async_pixel_transfer_delegate_android.cc | 53 +++++++---- ui/gl/async_pixel_transfer_delegate_stub.cc | 5 +- ui/gl/async_pixel_transfer_delegate_stub.h | 4 +- 11 files changed, 170 insertions(+), 49 deletions(-) diff --git a/content/common/gpu/gpu_command_buffer_stub.cc b/content/common/gpu/gpu_command_buffer_stub.cc index de8d196..c0cf215 100644 --- a/content/common/gpu/gpu_command_buffer_stub.cc +++ b/content/common/gpu/gpu_command_buffer_stub.cc @@ -299,6 +299,9 @@ void GpuCommandBufferStub::Destroy() { while (!sync_points_.empty()) OnRetireSyncPoint(sync_points_.front()); + if (decoder_.get()) + decoder_->set_engine(NULL); + // The scheduler has raw references to the decoder and the command buffer so // destroy it before those. scheduler_.reset(); diff --git a/gpu/command_buffer/service/async_pixel_transfer_delegate_mock.h b/gpu/command_buffer/service/async_pixel_transfer_delegate_mock.h index 4f44f7a..b716e70 100644 --- a/gpu/command_buffer/service/async_pixel_transfer_delegate_mock.h +++ b/gpu/command_buffer/service/async_pixel_transfer_delegate_mock.h @@ -32,8 +32,9 @@ class MockAsyncPixelTransferDelegate : public gfx::AsyncPixelTransferDelegate { // Implement AsyncPixelTransferDelegate. MOCK_METHOD1(CreateRawPixelTransferState, gfx::AsyncPixelTransferState*(GLuint service_id)); - MOCK_METHOD1(AsyncNotifyCompletion, - void(const base::Closure& task)); + MOCK_METHOD2(AsyncNotifyCompletion, + void(const AsyncMemoryParams& mem_params, + const CompletionCallback& callback)); MOCK_METHOD3(AsyncTexImage2D, void(gfx::AsyncPixelTransferState*, const AsyncTexImage2DParams& tex_params, diff --git a/gpu/command_buffer/service/common_decoder.cc b/gpu/command_buffer/service/common_decoder.cc index fa97c1c..e0a6a31 100644 --- a/gpu/command_buffer/service/common_decoder.cc +++ b/gpu/command_buffer/service/common_decoder.cc @@ -63,6 +63,7 @@ CommonDecoder::~CommonDecoder() {} void* CommonDecoder::GetAddressAndCheckSize(unsigned int shm_id, unsigned int offset, unsigned int size) { + CHECK(engine_); Buffer buffer = engine_->GetSharedMemoryBuffer(shm_id); if (!buffer.ptr) return NULL; diff --git a/gpu/command_buffer/service/common_decoder.h b/gpu/command_buffer/service/common_decoder.h index 2848360..2465934 100644 --- a/gpu/command_buffer/service/common_decoder.h +++ b/gpu/command_buffer/service/common_decoder.h @@ -102,6 +102,7 @@ class GPU_EXPORT CommonDecoder : NON_EXPORTED_BASE(public AsyncAPIInterface) { void set_engine(CommandBufferEngine* engine) { engine_ = engine; } + CommandBufferEngine* engine() const { return engine_; } // Creates a bucket. If the bucket already exists returns that bucket. Bucket* CreateBucket(uint32 bucket_id); diff --git a/gpu/command_buffer/service/gles2_cmd_decoder.cc b/gpu/command_buffer/service/gles2_cmd_decoder.cc index 7f14ef2..0f34765 100644 --- a/gpu/command_buffer/service/gles2_cmd_decoder.cc +++ b/gpu/command_buffer/service/gles2_cmd_decoder.cc @@ -2725,6 +2725,9 @@ bool GLES2DecoderImpl::MakeCurrent() { return false; } + if (engine() && query_manager_.get()) + query_manager_->ProcessPendingTransferQueries(); + // TODO(epenner): Is there a better place to do this? Transfers // can complete any time we yield the main thread. So we *must* // process transfers after any such yield, before resuming. @@ -9126,6 +9129,8 @@ error::Error GLES2DecoderImpl::HandleEndQueryEXT( return error::kOutOfBounds; } + query_manager_->ProcessPendingTransferQueries(); + state_.current_query = NULL; return error::kNoError; } diff --git a/gpu/command_buffer/service/query_manager.cc b/gpu/command_buffer/service/query_manager.cc index 51055e2..cc4de7a 100644 --- a/gpu/command_buffer/service/query_manager.cc +++ b/gpu/command_buffer/service/query_manager.cc @@ -3,13 +3,15 @@ // found in the LICENSE file. #include "gpu/command_buffer/service/query_manager.h" + #include "base/atomicops.h" #include "base/bind.h" #include "base/logging.h" +#include "base/shared_memory.h" #include "base/time.h" #include "gpu/command_buffer/common/gles2_cmd_format.h" -#include "gpu/command_buffer/service/gles2_cmd_decoder.h" #include "gpu/command_buffer/service/feature_info.h" +#include "gpu/command_buffer/service/gles2_cmd_decoder.h" #include "ui/gl/async_pixel_transfer_delegate.h" namespace gpu { @@ -176,10 +178,20 @@ class AsyncPixelTransfersCompletedQuery virtual bool Process() OVERRIDE; virtual void Destroy(bool have_context) OVERRIDE; - void MarkAsCompletedCallback() { MarkAsCompleted(1); } - protected: virtual ~AsyncPixelTransfersCompletedQuery(); + + static void MarkAsCompletedThreadSafe( + uint32 submit_count, const gfx::AsyncMemoryParams& mem_params) { + DCHECK(mem_params.shared_memory); + DCHECK(mem_params.shared_memory->memory()); + void *data = static_cast(mem_params.shared_memory->memory()) + + mem_params.shm_data_offset; + QuerySync* sync = static_cast(data); + + // No need for a MemoryBarrier here as sync->result is not written. + sync->process_count = submit_count; + } }; AsyncPixelTransfersCompletedQuery::AsyncPixelTransfersCompletedQuery( @@ -192,28 +204,41 @@ bool AsyncPixelTransfersCompletedQuery::Begin() { } bool AsyncPixelTransfersCompletedQuery::End(uint32 submit_count) { - MarkAsPending(submit_count); - - // This will call MarkAsCompleted(1) as a reply to a task on - // the async upload thread, such that it occurs after all previous - // async transfers have completed. + gfx::AsyncMemoryParams mem_params; + // Get the real shared memory since it might need to be duped to prevent + // use-after-free of the memory. + Buffer buffer = manager()->decoder()->GetSharedMemoryBuffer(shm_id()); + if (!buffer.shared_memory) + return false; + mem_params.shared_memory = buffer.shared_memory; + mem_params.shm_size = buffer.size; + mem_params.shm_data_offset = shm_offset(); + mem_params.shm_data_size = sizeof(QuerySync); + + // Ask AsyncPixelTransferDelegate to run completion callback after all + // previous async transfers are done. No guarantee that callback is run + // on the current thread. manager()->decoder()->GetAsyncPixelTransferDelegate()->AsyncNotifyCompletion( - base::Bind( - &AsyncPixelTransfersCompletedQuery::MarkAsCompletedCallback, - AsWeakPtr())); - - // TODO(epenner): The async task occurs outside the normal - // flow, via a callback on this thread. Is there anything - // missing or wrong with that? + mem_params, + base::Bind(AsyncPixelTransfersCompletedQuery::MarkAsCompletedThreadSafe, + submit_count)); - // TODO(epenner): Could we possibly trigger the completion on - // the upload thread by writing to the query shared memory - // directly? - return true; + return AddToPendingTransferQueue(submit_count); } bool AsyncPixelTransfersCompletedQuery::Process() { - NOTREACHED(); + QuerySync* sync = manager()->decoder()->GetSharedMemoryAs( + shm_id(), shm_offset(), sizeof(*sync)); + if (!sync) + return false; + + // Check if completion callback has been run. sync->process_count atomicity + // is guaranteed as this is already used to notify client of a completed + // query. + if (sync->process_count != submit_count()) + return true; + + UnmarkAsPending(); return true; } @@ -435,7 +460,7 @@ bool QueryManager::ProcessPendingQueries() { return false; } if (query->pending()) { - return true; + break; } pending_queries_.pop_front(); } @@ -447,6 +472,25 @@ bool QueryManager::HavePendingQueries() { return !pending_queries_.empty(); } +bool QueryManager::ProcessPendingTransferQueries() { + while (!pending_transfer_queries_.empty()) { + Query* query = pending_transfer_queries_.front().get(); + if (!query->Process()) { + return false; + } + if (query->pending()) { + break; + } + pending_transfer_queries_.pop_front(); + } + + return true; +} + +bool QueryManager::HavePendingTransferQueries() { + return !pending_transfer_queries_.empty(); +} + bool QueryManager::AddPendingQuery(Query* query, uint32 submit_count) { DCHECK(query); DCHECK(!query->IsDeleted()); @@ -458,6 +502,17 @@ bool QueryManager::AddPendingQuery(Query* query, uint32 submit_count) { return true; } +bool QueryManager::AddPendingTransferQuery(Query* query, uint32 submit_count) { + DCHECK(query); + DCHECK(!query->IsDeleted()); + if (!RemovePendingQuery(query)) { + return false; + } + query->MarkAsPending(submit_count); + pending_transfer_queries_.push_back(query); + return true; +} + bool QueryManager::RemovePendingQuery(Query* query) { DCHECK(query); if (query->pending()) { @@ -471,6 +526,13 @@ bool QueryManager::RemovePendingQuery(Query* query) { break; } } + for (QueryQueue::iterator it = pending_transfer_queries_.begin(); + it != pending_transfer_queries_.end(); ++it) { + if (it->get() == query) { + pending_transfer_queries_.erase(it); + break; + } + } if (!query->MarkAsCompleted(0)) { return false; } diff --git a/gpu/command_buffer/service/query_manager.h b/gpu/command_buffer/service/query_manager.h index 322c03d..bf083aa 100644 --- a/gpu/command_buffer/service/query_manager.h +++ b/gpu/command_buffer/service/query_manager.h @@ -89,11 +89,21 @@ class GPU_EXPORT QueryManager { submit_count_ = submit_count; } + void UnmarkAsPending() { + DCHECK(pending_); + pending_ = false; + } + // Returns false if shared memory for sync is invalid. bool AddToPendingQueue(uint32 submit_count) { return manager_->AddPendingQuery(this, submit_count); } + // Returns false if shared memory for sync is invalid. + bool AddToPendingTransferQueue(uint32 submit_count) { + return manager_->AddPendingTransferQuery(this, submit_count); + } + void BeginQueryHelper(GLenum target, GLuint id) { manager_->BeginQueryHelper(target, id); } @@ -102,15 +112,15 @@ class GPU_EXPORT QueryManager { manager_->EndQueryHelper(target); } + uint32 submit_count() const { + return submit_count_; + } + private: friend class QueryManager; friend class QueryManagerTest; friend class base::RefCounted; - uint32 submit_count() const { - return submit_count_; - } - // The manager that owns this Query. QueryManager* manager_; @@ -162,6 +172,13 @@ class GPU_EXPORT QueryManager { // True if there are pending queries. bool HavePendingQueries(); + // Processes pending transfer queries. Returns false if any queries are + // pointing to invalid shared memory. + bool ProcessPendingTransferQueries(); + + // True if there are pending transfer queries. + bool HavePendingTransferQueries(); + GLES2Decoder* decoder() const { return decoder_; } @@ -179,6 +196,10 @@ class GPU_EXPORT QueryManager { // Returns false if any query is pointing to invalid shared memory. bool AddPendingQuery(Query* query, uint32 submit_count); + // Adds to queue of transfer queries waiting for completion. + // Returns false if any query is pointing to invalid shared memory. + bool AddPendingTransferQuery(Query* query, uint32 submit_count); + // Removes a query from the queue of pending queries. // Returns false if any query is pointing to invalid shared memory. bool RemovePendingQuery(Query* query); @@ -205,6 +226,9 @@ class GPU_EXPORT QueryManager { typedef std::deque QueryQueue; QueryQueue pending_queries_; + // Async pixel transfer queries waiting for completion. + QueryQueue pending_transfer_queries_; + DISALLOW_COPY_AND_ASSIGN(QueryManager); }; diff --git a/ui/gl/async_pixel_transfer_delegate.h b/ui/gl/async_pixel_transfer_delegate.h index d07a77d..da45722 100644 --- a/ui/gl/async_pixel_transfer_delegate.h +++ b/ui/gl/async_pixel_transfer_delegate.h @@ -78,6 +78,8 @@ class GL_EXPORT AsyncPixelTransferState : class GL_EXPORT AsyncPixelTransferDelegate { public: + typedef base::Callback CompletionCallback; + static scoped_ptr Create(gfx::GLContext* context); virtual ~AsyncPixelTransferDelegate() {} @@ -89,8 +91,10 @@ class GL_EXPORT AsyncPixelTransferDelegate { return make_scoped_ptr(CreateRawPixelTransferState(texture_id)); } + // There's no guarantee that callback will run on the caller thread. virtual void AsyncNotifyCompletion( - const base::Closure& notify_task) = 0; + const AsyncMemoryParams& mem_params, + const CompletionCallback& callback) = 0; virtual void AsyncTexImage2D( AsyncPixelTransferState* state, diff --git a/ui/gl/async_pixel_transfer_delegate_android.cc b/ui/gl/async_pixel_transfer_delegate_android.cc index cd934a5..98ac32e 100644 --- a/ui/gl/async_pixel_transfer_delegate_android.cc +++ b/ui/gl/async_pixel_transfer_delegate_android.cc @@ -148,8 +148,7 @@ SafeSharedMemoryPool* safe_shared_memory_pool() { // Class which holds async pixel transfers state (EGLImage). // The EGLImage is accessed by either thread, but everything // else accessed only on the main thread. -class TransferStateInternal - : public base::RefCountedThreadSafe { +class TransferStateInternal : public base::RefCounted { public: explicit TransferStateInternal(GLuint texture_id, bool wait_for_uploads, @@ -239,7 +238,7 @@ class TransferStateInternal } protected: - friend class base::RefCountedThreadSafe; + friend class base::RefCounted; friend class AsyncPixelTransferDelegateAndroid; static void DeleteTexture(GLuint id) { @@ -318,7 +317,8 @@ class AsyncPixelTransferDelegateAndroid // implement AsyncPixelTransferDelegate: virtual void AsyncNotifyCompletion( - const base::Closure& task) OVERRIDE; + const AsyncMemoryParams& mem_params, + const CompletionCallback& callback) OVERRIDE; virtual void AsyncTexImage2D( AsyncPixelTransferState* state, const AsyncTexImage2DParams& tex_params, @@ -338,6 +338,10 @@ class AsyncPixelTransferDelegateAndroid void AsyncTexImage2DCompleted(scoped_refptr state); void AsyncTexSubImage2DCompleted(scoped_refptr state); + static void PerformNotifyCompletion( + AsyncMemoryParams mem_params, + ScopedSafeSharedMemory* safe_shared_memory, + const CompletionCallback& callback); static void PerformAsyncTexImage2D( TransferStateInternal* state, AsyncTexImage2DParams tex_params, @@ -406,7 +410,7 @@ AsyncPixelTransferState* CreateRawPixelTransferState(GLuint texture_id) { // We can't wait on uploads on imagination (it can take 200ms+). - // In practice, they are complete when the CPU glSubTexImage2D completes. + // In practice, they are complete when the CPU glTexSubImage2D completes. bool wait_for_uploads = !is_imagination_; // Qualcomm has a race when using image_preserved=FALSE, @@ -422,19 +426,23 @@ AsyncPixelTransferState* use_image_preserved)); } -namespace { -// Dummy function to measure completion on -// the upload thread. -void NoOp() {} -} // namespace - void AsyncPixelTransferDelegateAndroid::AsyncNotifyCompletion( - const base::Closure& task) { - // Post a no-op task to the upload thread followed - // by a reply to the callback. The reply will then occur after - // all async transfers are complete. - transfer_message_loop_proxy()->PostTaskAndReply(FROM_HERE, - base::Bind(&NoOp), task); + const AsyncMemoryParams& mem_params, + const CompletionCallback& callback) { + DCHECK(mem_params.shared_memory); + DCHECK_LE(mem_params.shm_data_offset + mem_params.shm_data_size, + mem_params.shm_size); + // Post a PerformNotifyCompletion task to the upload thread. This task + // will run after all async transfers are complete. + transfer_message_loop_proxy()->PostTask( + FROM_HERE, + base::Bind(&AsyncPixelTransferDelegateAndroid::PerformNotifyCompletion, + mem_params, + base::Owned( + new ScopedSafeSharedMemory(safe_shared_memory_pool(), + mem_params.shared_memory, + mem_params.shm_size)), + callback)); } void AsyncPixelTransferDelegateAndroid::AsyncTexImage2D( @@ -558,6 +566,16 @@ void SetGlParametersForEglImageTexture() { } } // namespace +void AsyncPixelTransferDelegateAndroid::PerformNotifyCompletion( + AsyncMemoryParams mem_params, + ScopedSafeSharedMemory* safe_shared_memory, + const CompletionCallback& callback) { + TRACE_EVENT0("gpu", "PerformNotifyCompletion"); + gfx::AsyncMemoryParams safe_mem_params = mem_params; + safe_mem_params.shared_memory = safe_shared_memory->shared_memory(); + callback.Run(safe_mem_params); +} + void AsyncPixelTransferDelegateAndroid::PerformAsyncTexImage2D( TransferStateInternal* state, AsyncTexImage2DParams tex_params, @@ -642,7 +660,6 @@ void AsyncPixelTransferDelegateAndroid::PerformAsyncTexSubImage2D( state->last_transfer_time_ = base::TimeTicks::HighResNow() - begin_time; } - namespace { bool IsPowerOfTwo (unsigned int x) { return ((x != 0) && !(x & (x - 1))); diff --git a/ui/gl/async_pixel_transfer_delegate_stub.cc b/ui/gl/async_pixel_transfer_delegate_stub.cc index 86f0687..b4f3639 100644 --- a/ui/gl/async_pixel_transfer_delegate_stub.cc +++ b/ui/gl/async_pixel_transfer_delegate_stub.cc @@ -77,8 +77,9 @@ AsyncPixelTransferState* } void AsyncPixelTransferDelegateStub::AsyncNotifyCompletion( - const base::Closure& task) { - task.Run(); + const AsyncMemoryParams& mem_params, + const CompletionCallback& callback) { + callback.Run(mem_params); } void AsyncPixelTransferDelegateStub::AsyncTexImage2D( diff --git a/ui/gl/async_pixel_transfer_delegate_stub.h b/ui/gl/async_pixel_transfer_delegate_stub.h index dc24f8d..cf95a98 100644 --- a/ui/gl/async_pixel_transfer_delegate_stub.h +++ b/ui/gl/async_pixel_transfer_delegate_stub.h @@ -36,7 +36,9 @@ class AsyncPixelTransferDelegateStub : public AsyncPixelTransferDelegate { virtual ~AsyncPixelTransferDelegateStub(); // implement AsyncPixelTransferDelegate: - virtual void AsyncNotifyCompletion(const base::Closure& task) OVERRIDE; + virtual void AsyncNotifyCompletion( + const AsyncMemoryParams& mem_params, + const CompletionCallback& callback) OVERRIDE; virtual void AsyncTexImage2D( AsyncPixelTransferState* transfer_state, const AsyncTexImage2DParams& tex_params, -- cgit v1.1