summaryrefslogtreecommitdiffstats
path: root/google_apis
diff options
context:
space:
mode:
authorzea@chromium.org <zea@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-02-13 23:36:26 +0000
committerzea@chromium.org <zea@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-02-13 23:36:26 +0000
commite264f2501c94d82e4e28333fe29c592706714cb4 (patch)
treea226f649568151f1cd2ffe3c8c7dcea95c5b6013 /google_apis
parentf5f3905ce24a285d727a035178b4a7af50793f2c (diff)
downloadchromium_src-e264f2501c94d82e4e28333fe29c592706714cb4.zip
chromium_src-e264f2501c94d82e4e28333fe29c592706714cb4.tar.gz
chromium_src-e264f2501c94d82e4e28333fe29c592706714cb4.tar.bz2
[GCM] Track connection failures properly
This patch consolidates the reset logic and adds a reason for the reset, for use in histograms. Additionally, connection uptime and overall success rate are now tracked as well. BUG=284553 Review URL: https://codereview.chromium.org/160703002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@251186 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'google_apis')
-rw-r--r--google_apis/gcm/engine/connection_factory.h14
-rw-r--r--google_apis/gcm/engine/connection_factory_impl.cc97
-rw-r--r--google_apis/gcm/engine/connection_factory_impl.h18
-rw-r--r--google_apis/gcm/engine/connection_factory_impl_unittest.cc10
-rw-r--r--google_apis/gcm/engine/fake_connection_factory.cc12
-rw-r--r--google_apis/gcm/engine/fake_connection_factory.h15
-rw-r--r--google_apis/gcm/engine/fake_connection_handler.cc1
-rw-r--r--google_apis/gcm/engine/mcs_client.cc9
-rw-r--r--google_apis/gcm/engine/mcs_client_unittest.cc5
9 files changed, 135 insertions, 46 deletions
diff --git a/google_apis/gcm/engine/connection_factory.h b/google_apis/gcm/engine/connection_factory.h
index 962a79a..298bf5e 100644
--- a/google_apis/gcm/engine/connection_factory.h
+++ b/google_apis/gcm/engine/connection_factory.h
@@ -23,6 +23,18 @@ class GCM_EXPORT ConnectionFactory {
typedef base::Callback<void(mcs_proto::LoginRequest* login_request)>
BuildLoginRequestCallback;
+ // Reasons for triggering a connection reset. Note that these enums are
+ // consumed by a histogram, so ordering should not be modified.
+ enum ConnectionResetReason {
+ LOGIN_FAILURE, // Login response included an error.
+ CLOSE_COMMAND, // Received a close command.
+ HEARTBEAT_FAILURE, // Heartbeat was not acknowledged in time.
+ SOCKET_FAILURE, // net::Socket error.
+ // Count of total number of connection reset reasons. All new reset reasons
+ // should be added above this line.
+ CONNECTION_RESET_COUNT,
+ };
+
ConnectionFactory();
virtual ~ConnectionFactory();
@@ -62,7 +74,7 @@ class GCM_EXPORT ConnectionFactory {
// event forced a reset (e.g. server sends a close connection response).
// If the last connection was made within kConnectionResetWindowSecs, the old
// backoff is restored, else a new backoff kicks off.
- virtual void SignalConnectionReset() = 0;
+ virtual void SignalConnectionReset(ConnectionResetReason reason) = 0;
};
} // namespace gcm
diff --git a/google_apis/gcm/engine/connection_factory_impl.cc b/google_apis/gcm/engine/connection_factory_impl.cc
index fcab2c2..3c2e412 100644
--- a/google_apis/gcm/engine/connection_factory_impl.cc
+++ b/google_apis/gcm/engine/connection_factory_impl.cc
@@ -29,6 +29,15 @@ const int kReadTimeoutMs = 30000; // 30 seconds.
// as if it was transient).
const int kConnectionResetWindowSecs = 10; // 10 seconds.
+// Decides whether the last login was within kConnectionResetWindowSecs of now
+// or not.
+bool ShouldRestorePreviousBackoff(const base::TimeTicks& login_time,
+ const base::TimeTicks& now_ticks) {
+ return !login_time.is_null() &&
+ now_ticks - login_time <=
+ base::TimeDelta::FromSeconds(kConnectionResetWindowSecs);
+}
+
} // namespace
ConnectionFactoryImpl::ConnectionFactoryImpl(
@@ -41,6 +50,7 @@ ConnectionFactoryImpl::ConnectionFactoryImpl(
network_session_(network_session),
net_log_(net_log),
connecting_(false),
+ logging_in_(false),
weak_ptr_factory_(this) {
}
@@ -98,21 +108,52 @@ bool ConnectionFactoryImpl::IsEndpointReachable() const {
!connecting_;
}
-void ConnectionFactoryImpl::SignalConnectionReset() {
+void ConnectionFactoryImpl::SignalConnectionReset(
+ ConnectionResetReason reason) {
+ // A failure can trigger multiple resets, so no need to do anything if a
+ // connection is already in progress.
if (connecting_)
- return; // Already attempting to reconnect.
+ return;
+
+ UMA_HISTOGRAM_ENUMERATION("GCM.ConnectionResetReason",
+ reason,
+ CONNECTION_RESET_COUNT);
+ if (!last_login_time_.is_null()) {
+ UMA_HISTOGRAM_CUSTOM_TIMES("GCM.ConnectionUpTime",
+ NowTicks() - last_login_time_,
+ base::TimeDelta::FromSeconds(1),
+ base::TimeDelta::FromHours(24),
+ 50);
+ // |last_login_time_| will be reset below, before attempting the new
+ // connection.
+ }
if (connection_handler_)
connection_handler_->Reset();
- if (!backoff_reset_time_.is_null() &&
- NowTicks() - backoff_reset_time_ <=
- base::TimeDelta::FromSeconds(kConnectionResetWindowSecs)) {
+ if (socket_handle_.socket() && socket_handle_.socket()->IsConnected())
+ socket_handle_.socket()->Disconnect();
+ socket_handle_.Reset();
+
+ if (logging_in_) {
+ // Failures prior to login completion just reuse the existing backoff entry.
+ logging_in_ = false;
+ backoff_entry_->InformOfRequest(false);
+ } else if (reason == LOGIN_FAILURE ||
+ ShouldRestorePreviousBackoff(last_login_time_, NowTicks())) {
+ // Failures due to login, or within the reset window of a login, restore
+ // the backoff entry that was saved off at login completion time.
backoff_entry_.swap(previous_backoff_);
backoff_entry_->InformOfRequest(false);
+ } else {
+ // We shouldn't be in backoff in thise case.
+ DCHECK(backoff_entry_->CanDiscard());
}
- backoff_reset_time_ = base::TimeTicks();
- previous_backoff_->Reset();
+
+ // At this point the last login time has been consumed or deemed irrelevant,
+ // reset it.
+ last_login_time_ = base::TimeTicks();
+
Connect();
}
@@ -143,10 +184,8 @@ void ConnectionFactoryImpl::OnIPAddressChanged() {
}
void ConnectionFactoryImpl::ConnectImpl() {
- DCHECK(!connection_handler_->CanSendMessage());
- if (socket_handle_.socket() && socket_handle_.socket()->IsConnected())
- socket_handle_.socket()->Disconnect();
- socket_handle_.Reset();
+ DCHECK(connecting_);
+ DCHECK(!socket_handle_.socket());
// TODO(zea): resolve proxies.
net::ProxyInfo proxy_info;
@@ -190,6 +229,8 @@ base::TimeTicks ConnectionFactoryImpl::NowTicks() {
}
void ConnectionFactoryImpl::OnConnectDone(int result) {
+ UMA_HISTOGRAM_BOOLEAN("GCM.ConnectionSuccessRate", (result == net::OK));
+
if (result != net::OK) {
LOG(ERROR) << "Failed to connect to MCS endpoint with error " << result;
backoff_entry_->InformOfRequest(false);
@@ -198,31 +239,29 @@ void ConnectionFactoryImpl::OnConnectDone(int result) {
return;
}
- DVLOG(1) << "MCS endpoint socket connection success, starting handshake.";
+ connecting_ = false;
+ logging_in_ = true;
+ DVLOG(1) << "MCS endpoint socket connection success, starting login.";
InitHandler();
}
void ConnectionFactoryImpl::ConnectionHandlerCallback(int result) {
- if (result == net::OK) {
- // Handshake succeeded, reset the backoff.
- connecting_ = false;
- backoff_reset_time_ = NowTicks();
- previous_backoff_.swap(backoff_entry_);
- backoff_entry_->Reset();
+ DCHECK(!connecting_);
+ if (result != net::OK) {
+ // TODO(zea): Consider how to handle errors that may require some sort of
+ // user intervention (login page, etc.).
+ UMA_HISTOGRAM_SPARSE_SLOWLY("GCM.ConnectionDisconnectErrorCode", result);
+ SignalConnectionReset(SOCKET_FAILURE);
return;
}
- if (!connecting_)
- UMA_HISTOGRAM_SPARSE_SLOWLY("GCM.ConnectionDisconnectErrorCode", result);
-
- if (connection_handler_)
- connection_handler_->Reset();
-
- // TODO(zea): Consider how to handle errors that may require some sort of
- // user intervention (login page, etc.).
- LOG(ERROR) << "Connection reset with error " << result;
- backoff_entry_->InformOfRequest(false);
- Connect();
+ // Handshake complete, reset backoff. If the login failed with an error,
+ // the client should invoke SignalConnectionReset(LOGIN_FAILURE), which will
+ // restore the previous backoff.
+ last_login_time_ = NowTicks();
+ previous_backoff_.swap(backoff_entry_);
+ backoff_entry_->Reset();
+ logging_in_ = false;
}
} // namespace gcm
diff --git a/google_apis/gcm/engine/connection_factory_impl.h b/google_apis/gcm/engine/connection_factory_impl.h
index 3d79a78..7326d79 100644
--- a/google_apis/gcm/engine/connection_factory_impl.h
+++ b/google_apis/gcm/engine/connection_factory_impl.h
@@ -8,6 +8,7 @@
#include "google_apis/gcm/engine/connection_factory.h"
#include "base/memory/weak_ptr.h"
+#include "base/time/time.h"
#include "google_apis/gcm/protocol/mcs.pb.h"
#include "net/base/backoff_entry.h"
#include "net/base/network_change_notifier.h"
@@ -44,7 +45,7 @@ class GCM_EXPORT ConnectionFactoryImpl :
virtual void Connect() OVERRIDE;
virtual bool IsEndpointReachable() const OVERRIDE;
virtual base::TimeTicks NextRetryAttempt() const OVERRIDE;
- virtual void SignalConnectionReset() OVERRIDE;
+ virtual void SignalConnectionReset(ConnectionResetReason reason) OVERRIDE;
// NetworkChangeNotifier observer implementations.
virtual void OnConnectionTypeChanged(
@@ -91,17 +92,26 @@ class GCM_EXPORT ConnectionFactoryImpl :
net::NetLog* const net_log_;
// The handle to the socket for the current connection, if one exists.
net::ClientSocketHandle socket_handle_;
- // Connection attempt backoff policy.
+ // Current backoff entry.
scoped_ptr<net::BackoffEntry> backoff_entry_;
- // Backoff policy from previous backoff attempt.
+ // Backoff entry from previous connection attempt. Updated on each login
+ // completion.
scoped_ptr<net::BackoffEntry> previous_backoff_;
- base::TimeTicks backoff_reset_time_;
// Whether a connection attempt is currently in progress or we're in backoff
// waiting until the next connection attempt. |!connecting_| denotes
// steady state with an active connection.
bool connecting_;
+ // Whether login successfully completed after the connection was established.
+ // If a connection reset happens while attempting to log in, the current
+ // backoff entry is reused (after incrementing with a new failure).
+ bool logging_in_;
+
+ // The time of the last login completion. Used for calculating whether to
+ // restore a previous backoff entry and for measuring uptime.
+ base::TimeTicks last_login_time_;
+
// The current connection handler, if one exists.
scoped_ptr<ConnectionHandlerImpl> connection_handler_;
diff --git a/google_apis/gcm/engine/connection_factory_impl_unittest.cc b/google_apis/gcm/engine/connection_factory_impl_unittest.cc
index 93a76dd..82c3f18 100644
--- a/google_apis/gcm/engine/connection_factory_impl_unittest.cc
+++ b/google_apis/gcm/engine/connection_factory_impl_unittest.cc
@@ -351,7 +351,7 @@ TEST_F(ConnectionFactoryImplTest, FailViaSignalReset) {
factory()->Connect();
EXPECT_TRUE(factory()->NextRetryAttempt().is_null());
- factory()->SignalConnectionReset();
+ factory()->SignalConnectionReset(ConnectionFactory::SOCKET_FAILURE);
EXPECT_FALSE(factory()->NextRetryAttempt().is_null());
EXPECT_FALSE(factory()->GetConnectionHandler()->CanSendMessage());
}
@@ -365,13 +365,13 @@ TEST_F(ConnectionFactoryImplTest, IgnoreResetWhileConnecting) {
factory()->Connect();
EXPECT_TRUE(factory()->NextRetryAttempt().is_null());
- factory()->SignalConnectionReset();
+ factory()->SignalConnectionReset(ConnectionFactory::SOCKET_FAILURE);
base::TimeTicks retry_time = factory()->NextRetryAttempt();
EXPECT_FALSE(retry_time.is_null());
const int kNumAttempts = 5;
for (int i = 0; i < kNumAttempts; ++i)
- factory()->SignalConnectionReset();
+ factory()->SignalConnectionReset(ConnectionFactory::SOCKET_FAILURE);
EXPECT_EQ(retry_time, factory()->NextRetryAttempt());
}
@@ -395,7 +395,7 @@ TEST_F(ConnectionFactoryImplTest, SignalResetRestoresBackoff) {
WaitForConnections();
EXPECT_TRUE(factory()->NextRetryAttempt().is_null());
- factory()->SignalConnectionReset();
+ factory()->SignalConnectionReset(ConnectionFactory::SOCKET_FAILURE);
EXPECT_FALSE(factory()->GetConnectionHandler()->CanSendMessage());
EXPECT_NE(retry_time, factory()->NextRetryAttempt());
retry_time = factory()->NextRetryAttempt();
@@ -410,7 +410,7 @@ TEST_F(ConnectionFactoryImplTest, SignalResetRestoresBackoff) {
WaitForConnections();
EXPECT_TRUE(factory()->NextRetryAttempt().is_null());
- factory()->SignalConnectionReset();
+ factory()->SignalConnectionReset(ConnectionFactory::SOCKET_FAILURE);
EXPECT_NE(retry_time, factory()->NextRetryAttempt());
retry_time = factory()->NextRetryAttempt();
EXPECT_FALSE(retry_time.is_null());
diff --git a/google_apis/gcm/engine/fake_connection_factory.cc b/google_apis/gcm/engine/fake_connection_factory.cc
index 084ff66..88bbc51 100644
--- a/google_apis/gcm/engine/fake_connection_factory.cc
+++ b/google_apis/gcm/engine/fake_connection_factory.cc
@@ -10,7 +10,9 @@
namespace gcm {
-FakeConnectionFactory::FakeConnectionFactory() {
+FakeConnectionFactory::FakeConnectionFactory()
+ : reconnect_pending_(false),
+ delay_reconnect_(false) {
}
FakeConnectionFactory::~FakeConnectionFactory() {
@@ -43,8 +45,12 @@ base::TimeTicks FakeConnectionFactory::NextRetryAttempt() const {
return base::TimeTicks();
}
-void FakeConnectionFactory::SignalConnectionReset() {
- Connect();
+void FakeConnectionFactory::SignalConnectionReset(
+ ConnectionResetReason reason) {
+ if (!delay_reconnect_)
+ Connect();
+ else
+ reconnect_pending_ = true;
}
} // namespace gcm
diff --git a/google_apis/gcm/engine/fake_connection_factory.h b/google_apis/gcm/engine/fake_connection_factory.h
index bd29310..393e591 100644
--- a/google_apis/gcm/engine/fake_connection_factory.h
+++ b/google_apis/gcm/engine/fake_connection_factory.h
@@ -28,13 +28,26 @@ class FakeConnectionFactory : public ConnectionFactory {
virtual void Connect() OVERRIDE;
virtual bool IsEndpointReachable() const OVERRIDE;
virtual base::TimeTicks NextRetryAttempt() const OVERRIDE;
- virtual void SignalConnectionReset() OVERRIDE;
+ virtual void SignalConnectionReset(ConnectionResetReason reason) OVERRIDE;
+
+ // Whether a connection reset has been triggered and is yet to run.
+ bool reconnect_pending() const { return reconnect_pending_; }
+
+ // Whether connection resets should be handled immediately or delayed until
+ // release.
+ void set_delay_reconnect(bool should_delay) {
+ delay_reconnect_ = should_delay;
+ }
private:
scoped_ptr<FakeConnectionHandler> connection_handler_;
BuildLoginRequestCallback request_builder_;
+ // Logic for handling connection resets.
+ bool reconnect_pending_;
+ bool delay_reconnect_;
+
DISALLOW_COPY_AND_ASSIGN(FakeConnectionFactory);
};
diff --git a/google_apis/gcm/engine/fake_connection_handler.cc b/google_apis/gcm/engine/fake_connection_handler.cc
index acbd1d9..3e5d5ac 100644
--- a/google_apis/gcm/engine/fake_connection_handler.cc
+++ b/google_apis/gcm/engine/fake_connection_handler.cc
@@ -40,6 +40,7 @@ FakeConnectionHandler::~FakeConnectionHandler() {
void FakeConnectionHandler::Init(const mcs_proto::LoginRequest& login_request,
net::StreamSocket* socket) {
+ ASSERT_GE(expected_outgoing_messages_.size(), 1U);
EXPECT_EQ(expected_outgoing_messages_.front().SerializeAsString(),
login_request.SerializeAsString());
expected_outgoing_messages_.pop_front();
diff --git a/google_apis/gcm/engine/mcs_client.cc b/google_apis/gcm/engine/mcs_client.cc
index 2f4edf1..e81a829 100644
--- a/google_apis/gcm/engine/mcs_client.cc
+++ b/google_apis/gcm/engine/mcs_client.cc
@@ -606,6 +606,9 @@ void MCSClient::HandlePacketFromWire(
state_ = UNINITIALIZED;
DVLOG(1) << " Error code: " << login_response->error().code();
DVLOG(1) << " Error message: " << login_response->error().message();
+ LOG(ERROR) << "Failed to log in to GCM, resetting connection.";
+ connection_factory_->SignalConnectionReset(
+ ConnectionFactory::LOGIN_FAILURE);
mcs_error_callback_.Run();
return;
}
@@ -656,7 +659,8 @@ void MCSClient::HandlePacketFromWire(
case kCloseTag:
LOG(ERROR) << "Received close command, resetting connection.";
state_ = LOADED;
- connection_factory_->SignalConnectionReset();
+ connection_factory_->SignalConnectionReset(
+ ConnectionFactory::CLOSE_COMMAND);
return;
case kIqStanzaTag: {
DCHECK_GE(stream_id_in_, 1U);
@@ -805,7 +809,8 @@ MCSClient::PersistentId MCSClient::GetNextPersistentId() {
}
void MCSClient::OnConnectionResetByHeartbeat() {
- connection_factory_->SignalConnectionReset();
+ connection_factory_->SignalConnectionReset(
+ ConnectionFactory::HEARTBEAT_FAILURE);
}
void MCSClient::NotifyMessageSendStatus(
diff --git a/google_apis/gcm/engine/mcs_client_unittest.cc b/google_apis/gcm/engine/mcs_client_unittest.cc
index 085f136..2b95254 100644
--- a/google_apis/gcm/engine/mcs_client_unittest.cc
+++ b/google_apis/gcm/engine/mcs_client_unittest.cc
@@ -274,15 +274,18 @@ TEST_F(MCSClientTest, LoginSuccess) {
EXPECT_EQ(kLoginResponseTag, received_message()->tag());
}
-// Encounter a server error during the login attempt.
+// Encounter a server error during the login attempt. Should trigger a
+// reconnect.
TEST_F(MCSClientTest, FailLogin) {
BuildMCSClient();
InitializeClient();
GetFakeHandler()->set_fail_login(true);
+ connection_factory()->set_delay_reconnect(true);
LoginClient(std::vector<std::string>());
EXPECT_FALSE(connection_factory()->IsEndpointReachable());
EXPECT_FALSE(init_success());
EXPECT_FALSE(received_message());
+ EXPECT_TRUE(connection_factory()->reconnect_pending());
}
// Send a message without RMQ support.