summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwillchan@chromium.org <willchan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-07 23:45:30 +0000
committerwillchan@chromium.org <willchan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-07 23:45:30 +0000
commit7da01767d03ed6e6c5e8cfafc00b426dc77c4da5 (patch)
tree0d5af0ad821f6021d75cd1636cf1f21887d7fb54
parent9e64440a70e0a18ff5b6edcbf10652feb2954b39 (diff)
downloadchromium_src-7da01767d03ed6e6c5e8cfafc00b426dc77c4da5.zip
chromium_src-7da01767d03ed6e6c5e8cfafc00b426dc77c4da5.tar.gz
chromium_src-7da01767d03ed6e6c5e8cfafc00b426dc77c4da5.tar.bz2
Fix IO thread hang on releasing a socket.
The problem was we assumed ProcessPendingRequest() would always make progress once the group's releasing socket went down to zero. However, in OnAvailableSocketSlot(), since the top stalled group might still have a releasing socket, it won't necessarily make progress. The algorithmic solution is to simply never do any socket slot processing in DoReleaseSocket() if there are any releasing sockets. This requires us to search for any stalled groups after releasing sockets gets back down to 0, so it requires the full group scan each time num_releasing_sockets goes back to 0. There is a performance hit here, but I think a linear search through 256~ groups in the worst case is ok. TODO(willchan): evaluate the perf hit and consider adding a secondary data structure to improve the stalled group search. BUG=42267 Review URL: http://codereview.chromium.org/2013009 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@46757 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--net/socket/client_socket_pool_base.cc56
-rw-r--r--net/socket/client_socket_pool_base.h3
-rw-r--r--net/socket/client_socket_pool_base_unittest.cc64
-rw-r--r--net/socket/tcp_client_socket_pool_unittest.cc7
4 files changed, 104 insertions, 26 deletions
diff --git a/net/socket/client_socket_pool_base.cc b/net/socket/client_socket_pool_base.cc
index 9fe07d3..89df54f 100644
--- a/net/socket/client_socket_pool_base.cc
+++ b/net/socket/client_socket_pool_base.cc
@@ -117,6 +117,7 @@ ClientSocketPoolBaseHelper::ClientSocketPoolBaseHelper(
: idle_socket_count_(0),
connecting_socket_count_(0),
handed_out_socket_count_(0),
+ num_releasing_sockets_(0),
max_sockets_(max_sockets),
max_sockets_per_group_(max_sockets_per_group),
unused_idle_socket_timeout_(unused_idle_socket_timeout),
@@ -358,6 +359,7 @@ void ClientSocketPoolBaseHelper::ReleaseSocket(const std::string& group_name,
ClientSocket* socket) {
Group& group = group_map_[group_name];
group.num_releasing_sockets++;
+ num_releasing_sockets_++;
DCHECK_LE(group.num_releasing_sockets, group.active_socket_count);
// Run this asynchronously to allow the caller to finish before we let
// another to begin doing work. This also avoids nasty recursion issues.
@@ -482,6 +484,9 @@ void ClientSocketPoolBaseHelper::DoReleaseSocket(const std::string& group_name,
CHECK_GT(group.active_socket_count, 0);
group.active_socket_count--;
+ CHECK_GT(num_releasing_sockets_, 0);
+ num_releasing_sockets_--;
+
const bool can_reuse = socket->IsConnectedAndIdle();
if (can_reuse) {
AddIdleSocket(socket, true /* used socket */, &group);
@@ -489,36 +494,35 @@ void ClientSocketPoolBaseHelper::DoReleaseSocket(const std::string& group_name,
delete socket;
}
- OnAvailableSocketSlot(group_name, &group);
-
// If there are no more releasing sockets, then we might have to process
// multiple available socket slots, since we stalled their processing until
- // all sockets have been released.
- i = group_map_.find(group_name);
- if (i == group_map_.end() || i->second.num_releasing_sockets > 0)
- return;
-
- while (true) {
- // We can't activate more sockets since we're already at our global limit.
- if (ReachedMaxSocketsLimit())
- return;
-
- // |group| might now be deleted.
- i = group_map_.find(group_name);
- if (i == group_map_.end())
- return;
-
- group = i->second;
-
- // If we already have enough ConnectJobs to satisfy the pending requests,
- // don't bother starting up more.
- if (group.pending_requests.size() <= group.jobs.size())
- return;
+ // all sockets have been released. Note that ProcessPendingRequest() will
+ // invoke user callbacks, so |num_releasing_sockets_| may change.
+ //
+ // This code has been known to infinite loop. Set a counter and CHECK to make
+ // sure it doesn't get ridiculously high.
+
+ int iterations = 0;
+ while (num_releasing_sockets_ == 0) {
+ CHECK_LT(iterations, 100000) << "Probably stuck in an infinite loop.";
+ std::string top_group_name;
+ Group* top_group = NULL;
+ int stalled_group_count = FindTopStalledGroup(&top_group, &top_group_name);
+ if (stalled_group_count >= 1) {
+ if (ReachedMaxSocketsLimit()) {
+ // We can't activate more sockets since we're already at our global
+ // limit.
+ may_have_stalled_group_ = true;
+ return;
+ }
- if (!group.HasAvailableSocketSlot(max_sockets_per_group_))
+ ProcessPendingRequest(top_group_name, top_group);
+ } else {
+ may_have_stalled_group_ = false;
return;
+ }
- OnAvailableSocketSlot(group_name, &group);
+ iterations++;
}
}
@@ -640,7 +644,7 @@ void ClientSocketPoolBaseHelper::OnAvailableSocketSlot(
std::string top_group_name;
Group* top_group = NULL;
int stalled_group_count = FindTopStalledGroup(&top_group, &top_group_name);
- if (stalled_group_count <= 1)
+ if (stalled_group_count <= 1 && top_group->num_releasing_sockets == 0)
may_have_stalled_group_ = false;
if (stalled_group_count >= 1)
ProcessPendingRequest(top_group_name, top_group);
diff --git a/net/socket/client_socket_pool_base.h b/net/socket/client_socket_pool_base.h
index 91289d8..89bdb78 100644
--- a/net/socket/client_socket_pool_base.h
+++ b/net/socket/client_socket_pool_base.h
@@ -408,6 +408,9 @@ class ClientSocketPoolBaseHelper
// Number of connected sockets we handed out across all groups.
int handed_out_socket_count_;
+ // Number of sockets being released.
+ int num_releasing_sockets_;
+
// The maximum total number of sockets. See ReachedMaxSocketsLimit.
const int max_sockets_;
diff --git a/net/socket/client_socket_pool_base_unittest.cc b/net/socket/client_socket_pool_base_unittest.cc
index 3e20eb9b..7e26ec6 100644
--- a/net/socket/client_socket_pool_base_unittest.cc
+++ b/net/socket/client_socket_pool_base_unittest.cc
@@ -1455,6 +1455,70 @@ TEST_F(ClientSocketPoolBaseTest, MultipleReleasingDisconnectedSockets) {
EXPECT_FALSE(req4.handle()->is_reused());
}
+// Regression test for http://crbug.com/42267.
+// When DoReleaseSocket() is processed for one socket, it is blocked because the
+// other stalled groups all have releasing sockets, so no progress can be made.
+TEST_F(ClientSocketPoolBaseTest, SocketLimitReleasingSockets) {
+ CreatePoolWithIdleTimeouts(
+ 4 /* socket limit */, 4 /* socket limit per group */,
+ base::TimeDelta(), // Time out unused sockets immediately.
+ base::TimeDelta::FromDays(1)); // Don't time out used sockets.
+
+ connect_job_factory_->set_job_type(TestConnectJob::kMockJob);
+
+ // Max out the socket limit with 2 per group.
+
+ scoped_ptr<TestSocketRequest> req_a[4];
+ scoped_ptr<TestSocketRequest> req_b[4];
+
+ for (int i = 0; i < 2; ++i) {
+ req_a[i].reset(new TestSocketRequest(&request_order_, &completion_count_));
+ req_b[i].reset(new TestSocketRequest(&request_order_, &completion_count_));
+ EXPECT_EQ(OK,
+ InitHandle(req_a[i]->handle(), "a", LOWEST, req_a[i].get(), pool_,
+ BoundNetLog()));
+ EXPECT_EQ(OK,
+ InitHandle(req_b[i]->handle(), "b", LOWEST, req_b[i].get(), pool_,
+ BoundNetLog()));
+ }
+
+ // Make 4 pending requests, 2 per group.
+
+ for (int i = 2; i < 4; ++i) {
+ req_a[i].reset(new TestSocketRequest(&request_order_, &completion_count_));
+ req_b[i].reset(new TestSocketRequest(&request_order_, &completion_count_));
+ EXPECT_EQ(ERR_IO_PENDING,
+ InitHandle(req_a[i]->handle(), "a", LOWEST, req_a[i].get(), pool_,
+ BoundNetLog()));
+ EXPECT_EQ(ERR_IO_PENDING,
+ InitHandle(req_b[i]->handle(), "b", LOWEST, req_b[i].get(), pool_,
+ BoundNetLog()));
+ }
+
+ // Release b's socket first. The order is important, because in
+ // DoReleaseSocket(), we'll process b's released socket, and since both b and
+ // a are stalled, but 'a' is lower lexicographically, we'll process group 'a'
+ // first, which has a releasing socket, so it refuses to start up another
+ // ConnectJob. So, we used to infinite loop on this.
+ req_b[0]->handle()->socket()->Disconnect();
+ req_b[0]->handle()->Reset();
+ req_a[0]->handle()->socket()->Disconnect();
+ req_a[0]->handle()->Reset();
+
+ // Used to get stuck here.
+ MessageLoop::current()->RunAllPending();
+
+ req_b[1]->handle()->socket()->Disconnect();
+ req_b[1]->handle()->Reset();
+ req_a[1]->handle()->socket()->Disconnect();
+ req_a[1]->handle()->Reset();
+
+ for (int i = 2; i < 4; ++i) {
+ EXPECT_EQ(OK, req_b[i]->WaitForResult());
+ EXPECT_EQ(OK, req_a[i]->WaitForResult());
+ }
+}
+
TEST_F(ClientSocketPoolBaseTest,
ReleasingDisconnectedSocketsMaintainsPriorityOrder) {
CreatePool(kDefaultMaxSockets, kDefaultMaxSocketsPerGroup);
diff --git a/net/socket/tcp_client_socket_pool_unittest.cc b/net/socket/tcp_client_socket_pool_unittest.cc
index 6df566ba..cba12b5 100644
--- a/net/socket/tcp_client_socket_pool_unittest.cc
+++ b/net/socket/tcp_client_socket_pool_unittest.cc
@@ -731,6 +731,13 @@ TEST_F(TCPClientSocketPoolTest, BackupSocketConnect) {
// One socket is stalled, the other is active.
EXPECT_EQ(0, pool_->IdleSocketCount());
handle.Reset();
+
+ pool_ = new TCPClientSocketPool(kMaxSockets,
+ kMaxSocketsPerGroup,
+ "TCPUnitTest",
+ host_resolver_,
+ &client_socket_factory_,
+ NULL);
}
}