diff options
author | zea@chromium.org <zea@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-02-13 23:36:26 +0000 |
---|---|---|
committer | zea@chromium.org <zea@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-02-13 23:36:26 +0000 |
commit | e264f2501c94d82e4e28333fe29c592706714cb4 (patch) | |
tree | a226f649568151f1cd2ffe3c8c7dcea95c5b6013 /google_apis | |
parent | f5f3905ce24a285d727a035178b4a7af50793f2c (diff) | |
download | chromium_src-e264f2501c94d82e4e28333fe29c592706714cb4.zip chromium_src-e264f2501c94d82e4e28333fe29c592706714cb4.tar.gz chromium_src-e264f2501c94d82e4e28333fe29c592706714cb4.tar.bz2 |
[GCM] Track connection failures properly
This patch consolidates the reset logic and adds a reason for the reset,
for use in histograms. Additionally, connection uptime and overall success
rate are now tracked as well.
BUG=284553
Review URL: https://codereview.chromium.org/160703002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@251186 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'google_apis')
-rw-r--r-- | google_apis/gcm/engine/connection_factory.h | 14 | ||||
-rw-r--r-- | google_apis/gcm/engine/connection_factory_impl.cc | 97 | ||||
-rw-r--r-- | google_apis/gcm/engine/connection_factory_impl.h | 18 | ||||
-rw-r--r-- | google_apis/gcm/engine/connection_factory_impl_unittest.cc | 10 | ||||
-rw-r--r-- | google_apis/gcm/engine/fake_connection_factory.cc | 12 | ||||
-rw-r--r-- | google_apis/gcm/engine/fake_connection_factory.h | 15 | ||||
-rw-r--r-- | google_apis/gcm/engine/fake_connection_handler.cc | 1 | ||||
-rw-r--r-- | google_apis/gcm/engine/mcs_client.cc | 9 | ||||
-rw-r--r-- | google_apis/gcm/engine/mcs_client_unittest.cc | 5 |
9 files changed, 135 insertions, 46 deletions
diff --git a/google_apis/gcm/engine/connection_factory.h b/google_apis/gcm/engine/connection_factory.h index 962a79a..298bf5e 100644 --- a/google_apis/gcm/engine/connection_factory.h +++ b/google_apis/gcm/engine/connection_factory.h @@ -23,6 +23,18 @@ class GCM_EXPORT ConnectionFactory { typedef base::Callback<void(mcs_proto::LoginRequest* login_request)> BuildLoginRequestCallback; + // Reasons for triggering a connection reset. Note that these enums are + // consumed by a histogram, so ordering should not be modified. + enum ConnectionResetReason { + LOGIN_FAILURE, // Login response included an error. + CLOSE_COMMAND, // Received a close command. + HEARTBEAT_FAILURE, // Heartbeat was not acknowledged in time. + SOCKET_FAILURE, // net::Socket error. + // Count of total number of connection reset reasons. All new reset reasons + // should be added above this line. + CONNECTION_RESET_COUNT, + }; + ConnectionFactory(); virtual ~ConnectionFactory(); @@ -62,7 +74,7 @@ class GCM_EXPORT ConnectionFactory { // event forced a reset (e.g. server sends a close connection response). // If the last connection was made within kConnectionResetWindowSecs, the old // backoff is restored, else a new backoff kicks off. - virtual void SignalConnectionReset() = 0; + virtual void SignalConnectionReset(ConnectionResetReason reason) = 0; }; } // namespace gcm diff --git a/google_apis/gcm/engine/connection_factory_impl.cc b/google_apis/gcm/engine/connection_factory_impl.cc index fcab2c2..3c2e412 100644 --- a/google_apis/gcm/engine/connection_factory_impl.cc +++ b/google_apis/gcm/engine/connection_factory_impl.cc @@ -29,6 +29,15 @@ const int kReadTimeoutMs = 30000; // 30 seconds. // as if it was transient). const int kConnectionResetWindowSecs = 10; // 10 seconds. +// Decides whether the last login was within kConnectionResetWindowSecs of now +// or not. +bool ShouldRestorePreviousBackoff(const base::TimeTicks& login_time, + const base::TimeTicks& now_ticks) { + return !login_time.is_null() && + now_ticks - login_time <= + base::TimeDelta::FromSeconds(kConnectionResetWindowSecs); +} + } // namespace ConnectionFactoryImpl::ConnectionFactoryImpl( @@ -41,6 +50,7 @@ ConnectionFactoryImpl::ConnectionFactoryImpl( network_session_(network_session), net_log_(net_log), connecting_(false), + logging_in_(false), weak_ptr_factory_(this) { } @@ -98,21 +108,52 @@ bool ConnectionFactoryImpl::IsEndpointReachable() const { !connecting_; } -void ConnectionFactoryImpl::SignalConnectionReset() { +void ConnectionFactoryImpl::SignalConnectionReset( + ConnectionResetReason reason) { + // A failure can trigger multiple resets, so no need to do anything if a + // connection is already in progress. if (connecting_) - return; // Already attempting to reconnect. + return; + + UMA_HISTOGRAM_ENUMERATION("GCM.ConnectionResetReason", + reason, + CONNECTION_RESET_COUNT); + if (!last_login_time_.is_null()) { + UMA_HISTOGRAM_CUSTOM_TIMES("GCM.ConnectionUpTime", + NowTicks() - last_login_time_, + base::TimeDelta::FromSeconds(1), + base::TimeDelta::FromHours(24), + 50); + // |last_login_time_| will be reset below, before attempting the new + // connection. + } if (connection_handler_) connection_handler_->Reset(); - if (!backoff_reset_time_.is_null() && - NowTicks() - backoff_reset_time_ <= - base::TimeDelta::FromSeconds(kConnectionResetWindowSecs)) { + if (socket_handle_.socket() && socket_handle_.socket()->IsConnected()) + socket_handle_.socket()->Disconnect(); + socket_handle_.Reset(); + + if (logging_in_) { + // Failures prior to login completion just reuse the existing backoff entry. + logging_in_ = false; + backoff_entry_->InformOfRequest(false); + } else if (reason == LOGIN_FAILURE || + ShouldRestorePreviousBackoff(last_login_time_, NowTicks())) { + // Failures due to login, or within the reset window of a login, restore + // the backoff entry that was saved off at login completion time. backoff_entry_.swap(previous_backoff_); backoff_entry_->InformOfRequest(false); + } else { + // We shouldn't be in backoff in thise case. + DCHECK(backoff_entry_->CanDiscard()); } - backoff_reset_time_ = base::TimeTicks(); - previous_backoff_->Reset(); + + // At this point the last login time has been consumed or deemed irrelevant, + // reset it. + last_login_time_ = base::TimeTicks(); + Connect(); } @@ -143,10 +184,8 @@ void ConnectionFactoryImpl::OnIPAddressChanged() { } void ConnectionFactoryImpl::ConnectImpl() { - DCHECK(!connection_handler_->CanSendMessage()); - if (socket_handle_.socket() && socket_handle_.socket()->IsConnected()) - socket_handle_.socket()->Disconnect(); - socket_handle_.Reset(); + DCHECK(connecting_); + DCHECK(!socket_handle_.socket()); // TODO(zea): resolve proxies. net::ProxyInfo proxy_info; @@ -190,6 +229,8 @@ base::TimeTicks ConnectionFactoryImpl::NowTicks() { } void ConnectionFactoryImpl::OnConnectDone(int result) { + UMA_HISTOGRAM_BOOLEAN("GCM.ConnectionSuccessRate", (result == net::OK)); + if (result != net::OK) { LOG(ERROR) << "Failed to connect to MCS endpoint with error " << result; backoff_entry_->InformOfRequest(false); @@ -198,31 +239,29 @@ void ConnectionFactoryImpl::OnConnectDone(int result) { return; } - DVLOG(1) << "MCS endpoint socket connection success, starting handshake."; + connecting_ = false; + logging_in_ = true; + DVLOG(1) << "MCS endpoint socket connection success, starting login."; InitHandler(); } void ConnectionFactoryImpl::ConnectionHandlerCallback(int result) { - if (result == net::OK) { - // Handshake succeeded, reset the backoff. - connecting_ = false; - backoff_reset_time_ = NowTicks(); - previous_backoff_.swap(backoff_entry_); - backoff_entry_->Reset(); + DCHECK(!connecting_); + if (result != net::OK) { + // TODO(zea): Consider how to handle errors that may require some sort of + // user intervention (login page, etc.). + UMA_HISTOGRAM_SPARSE_SLOWLY("GCM.ConnectionDisconnectErrorCode", result); + SignalConnectionReset(SOCKET_FAILURE); return; } - if (!connecting_) - UMA_HISTOGRAM_SPARSE_SLOWLY("GCM.ConnectionDisconnectErrorCode", result); - - if (connection_handler_) - connection_handler_->Reset(); - - // TODO(zea): Consider how to handle errors that may require some sort of - // user intervention (login page, etc.). - LOG(ERROR) << "Connection reset with error " << result; - backoff_entry_->InformOfRequest(false); - Connect(); + // Handshake complete, reset backoff. If the login failed with an error, + // the client should invoke SignalConnectionReset(LOGIN_FAILURE), which will + // restore the previous backoff. + last_login_time_ = NowTicks(); + previous_backoff_.swap(backoff_entry_); + backoff_entry_->Reset(); + logging_in_ = false; } } // namespace gcm diff --git a/google_apis/gcm/engine/connection_factory_impl.h b/google_apis/gcm/engine/connection_factory_impl.h index 3d79a78..7326d79 100644 --- a/google_apis/gcm/engine/connection_factory_impl.h +++ b/google_apis/gcm/engine/connection_factory_impl.h @@ -8,6 +8,7 @@ #include "google_apis/gcm/engine/connection_factory.h" #include "base/memory/weak_ptr.h" +#include "base/time/time.h" #include "google_apis/gcm/protocol/mcs.pb.h" #include "net/base/backoff_entry.h" #include "net/base/network_change_notifier.h" @@ -44,7 +45,7 @@ class GCM_EXPORT ConnectionFactoryImpl : virtual void Connect() OVERRIDE; virtual bool IsEndpointReachable() const OVERRIDE; virtual base::TimeTicks NextRetryAttempt() const OVERRIDE; - virtual void SignalConnectionReset() OVERRIDE; + virtual void SignalConnectionReset(ConnectionResetReason reason) OVERRIDE; // NetworkChangeNotifier observer implementations. virtual void OnConnectionTypeChanged( @@ -91,17 +92,26 @@ class GCM_EXPORT ConnectionFactoryImpl : net::NetLog* const net_log_; // The handle to the socket for the current connection, if one exists. net::ClientSocketHandle socket_handle_; - // Connection attempt backoff policy. + // Current backoff entry. scoped_ptr<net::BackoffEntry> backoff_entry_; - // Backoff policy from previous backoff attempt. + // Backoff entry from previous connection attempt. Updated on each login + // completion. scoped_ptr<net::BackoffEntry> previous_backoff_; - base::TimeTicks backoff_reset_time_; // Whether a connection attempt is currently in progress or we're in backoff // waiting until the next connection attempt. |!connecting_| denotes // steady state with an active connection. bool connecting_; + // Whether login successfully completed after the connection was established. + // If a connection reset happens while attempting to log in, the current + // backoff entry is reused (after incrementing with a new failure). + bool logging_in_; + + // The time of the last login completion. Used for calculating whether to + // restore a previous backoff entry and for measuring uptime. + base::TimeTicks last_login_time_; + // The current connection handler, if one exists. scoped_ptr<ConnectionHandlerImpl> connection_handler_; diff --git a/google_apis/gcm/engine/connection_factory_impl_unittest.cc b/google_apis/gcm/engine/connection_factory_impl_unittest.cc index 93a76dd..82c3f18 100644 --- a/google_apis/gcm/engine/connection_factory_impl_unittest.cc +++ b/google_apis/gcm/engine/connection_factory_impl_unittest.cc @@ -351,7 +351,7 @@ TEST_F(ConnectionFactoryImplTest, FailViaSignalReset) { factory()->Connect(); EXPECT_TRUE(factory()->NextRetryAttempt().is_null()); - factory()->SignalConnectionReset(); + factory()->SignalConnectionReset(ConnectionFactory::SOCKET_FAILURE); EXPECT_FALSE(factory()->NextRetryAttempt().is_null()); EXPECT_FALSE(factory()->GetConnectionHandler()->CanSendMessage()); } @@ -365,13 +365,13 @@ TEST_F(ConnectionFactoryImplTest, IgnoreResetWhileConnecting) { factory()->Connect(); EXPECT_TRUE(factory()->NextRetryAttempt().is_null()); - factory()->SignalConnectionReset(); + factory()->SignalConnectionReset(ConnectionFactory::SOCKET_FAILURE); base::TimeTicks retry_time = factory()->NextRetryAttempt(); EXPECT_FALSE(retry_time.is_null()); const int kNumAttempts = 5; for (int i = 0; i < kNumAttempts; ++i) - factory()->SignalConnectionReset(); + factory()->SignalConnectionReset(ConnectionFactory::SOCKET_FAILURE); EXPECT_EQ(retry_time, factory()->NextRetryAttempt()); } @@ -395,7 +395,7 @@ TEST_F(ConnectionFactoryImplTest, SignalResetRestoresBackoff) { WaitForConnections(); EXPECT_TRUE(factory()->NextRetryAttempt().is_null()); - factory()->SignalConnectionReset(); + factory()->SignalConnectionReset(ConnectionFactory::SOCKET_FAILURE); EXPECT_FALSE(factory()->GetConnectionHandler()->CanSendMessage()); EXPECT_NE(retry_time, factory()->NextRetryAttempt()); retry_time = factory()->NextRetryAttempt(); @@ -410,7 +410,7 @@ TEST_F(ConnectionFactoryImplTest, SignalResetRestoresBackoff) { WaitForConnections(); EXPECT_TRUE(factory()->NextRetryAttempt().is_null()); - factory()->SignalConnectionReset(); + factory()->SignalConnectionReset(ConnectionFactory::SOCKET_FAILURE); EXPECT_NE(retry_time, factory()->NextRetryAttempt()); retry_time = factory()->NextRetryAttempt(); EXPECT_FALSE(retry_time.is_null()); diff --git a/google_apis/gcm/engine/fake_connection_factory.cc b/google_apis/gcm/engine/fake_connection_factory.cc index 084ff66..88bbc51 100644 --- a/google_apis/gcm/engine/fake_connection_factory.cc +++ b/google_apis/gcm/engine/fake_connection_factory.cc @@ -10,7 +10,9 @@ namespace gcm { -FakeConnectionFactory::FakeConnectionFactory() { +FakeConnectionFactory::FakeConnectionFactory() + : reconnect_pending_(false), + delay_reconnect_(false) { } FakeConnectionFactory::~FakeConnectionFactory() { @@ -43,8 +45,12 @@ base::TimeTicks FakeConnectionFactory::NextRetryAttempt() const { return base::TimeTicks(); } -void FakeConnectionFactory::SignalConnectionReset() { - Connect(); +void FakeConnectionFactory::SignalConnectionReset( + ConnectionResetReason reason) { + if (!delay_reconnect_) + Connect(); + else + reconnect_pending_ = true; } } // namespace gcm diff --git a/google_apis/gcm/engine/fake_connection_factory.h b/google_apis/gcm/engine/fake_connection_factory.h index bd29310..393e591 100644 --- a/google_apis/gcm/engine/fake_connection_factory.h +++ b/google_apis/gcm/engine/fake_connection_factory.h @@ -28,13 +28,26 @@ class FakeConnectionFactory : public ConnectionFactory { virtual void Connect() OVERRIDE; virtual bool IsEndpointReachable() const OVERRIDE; virtual base::TimeTicks NextRetryAttempt() const OVERRIDE; - virtual void SignalConnectionReset() OVERRIDE; + virtual void SignalConnectionReset(ConnectionResetReason reason) OVERRIDE; + + // Whether a connection reset has been triggered and is yet to run. + bool reconnect_pending() const { return reconnect_pending_; } + + // Whether connection resets should be handled immediately or delayed until + // release. + void set_delay_reconnect(bool should_delay) { + delay_reconnect_ = should_delay; + } private: scoped_ptr<FakeConnectionHandler> connection_handler_; BuildLoginRequestCallback request_builder_; + // Logic for handling connection resets. + bool reconnect_pending_; + bool delay_reconnect_; + DISALLOW_COPY_AND_ASSIGN(FakeConnectionFactory); }; diff --git a/google_apis/gcm/engine/fake_connection_handler.cc b/google_apis/gcm/engine/fake_connection_handler.cc index acbd1d9..3e5d5ac 100644 --- a/google_apis/gcm/engine/fake_connection_handler.cc +++ b/google_apis/gcm/engine/fake_connection_handler.cc @@ -40,6 +40,7 @@ FakeConnectionHandler::~FakeConnectionHandler() { void FakeConnectionHandler::Init(const mcs_proto::LoginRequest& login_request, net::StreamSocket* socket) { + ASSERT_GE(expected_outgoing_messages_.size(), 1U); EXPECT_EQ(expected_outgoing_messages_.front().SerializeAsString(), login_request.SerializeAsString()); expected_outgoing_messages_.pop_front(); diff --git a/google_apis/gcm/engine/mcs_client.cc b/google_apis/gcm/engine/mcs_client.cc index 2f4edf1..e81a829 100644 --- a/google_apis/gcm/engine/mcs_client.cc +++ b/google_apis/gcm/engine/mcs_client.cc @@ -606,6 +606,9 @@ void MCSClient::HandlePacketFromWire( state_ = UNINITIALIZED; DVLOG(1) << " Error code: " << login_response->error().code(); DVLOG(1) << " Error message: " << login_response->error().message(); + LOG(ERROR) << "Failed to log in to GCM, resetting connection."; + connection_factory_->SignalConnectionReset( + ConnectionFactory::LOGIN_FAILURE); mcs_error_callback_.Run(); return; } @@ -656,7 +659,8 @@ void MCSClient::HandlePacketFromWire( case kCloseTag: LOG(ERROR) << "Received close command, resetting connection."; state_ = LOADED; - connection_factory_->SignalConnectionReset(); + connection_factory_->SignalConnectionReset( + ConnectionFactory::CLOSE_COMMAND); return; case kIqStanzaTag: { DCHECK_GE(stream_id_in_, 1U); @@ -805,7 +809,8 @@ MCSClient::PersistentId MCSClient::GetNextPersistentId() { } void MCSClient::OnConnectionResetByHeartbeat() { - connection_factory_->SignalConnectionReset(); + connection_factory_->SignalConnectionReset( + ConnectionFactory::HEARTBEAT_FAILURE); } void MCSClient::NotifyMessageSendStatus( diff --git a/google_apis/gcm/engine/mcs_client_unittest.cc b/google_apis/gcm/engine/mcs_client_unittest.cc index 085f136..2b95254 100644 --- a/google_apis/gcm/engine/mcs_client_unittest.cc +++ b/google_apis/gcm/engine/mcs_client_unittest.cc @@ -274,15 +274,18 @@ TEST_F(MCSClientTest, LoginSuccess) { EXPECT_EQ(kLoginResponseTag, received_message()->tag()); } -// Encounter a server error during the login attempt. +// Encounter a server error during the login attempt. Should trigger a +// reconnect. TEST_F(MCSClientTest, FailLogin) { BuildMCSClient(); InitializeClient(); GetFakeHandler()->set_fail_login(true); + connection_factory()->set_delay_reconnect(true); LoginClient(std::vector<std::string>()); EXPECT_FALSE(connection_factory()->IsEndpointReachable()); EXPECT_FALSE(init_success()); EXPECT_FALSE(received_message()); + EXPECT_TRUE(connection_factory()->reconnect_pending()); } // Send a message without RMQ support. |