// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "net/socket/tcp_socket.h" #include #include #include #include #include #include #include "base/callback_helpers.h" #include "base/logging.h" #include "base/metrics/histogram.h" #include "base/metrics/stats_counters.h" #include "base/posix/eintr_wrapper.h" #include "build/build_config.h" #include "net/base/address_list.h" #include "net/base/connection_type_histograms.h" #include "net/base/io_buffer.h" #include "net/base/ip_endpoint.h" #include "net/base/net_errors.h" #include "net/base/net_util.h" #include "net/base/network_change_notifier.h" #include "net/socket/socket_net_log_params.h" // If we don't have a definition for TCPI_OPT_SYN_DATA, create one. #ifndef TCPI_OPT_SYN_DATA #define TCPI_OPT_SYN_DATA 32 #endif namespace net { namespace { const int kTCPKeepAliveSeconds = 45; // SetTCPNoDelay turns on/off buffering in the kernel. By default, TCP sockets // will wait up to 200ms for more data to complete a packet before transmitting. // After calling this function, the kernel will not wait. See TCP_NODELAY in // `man 7 tcp`. bool SetTCPNoDelay(int fd, bool no_delay) { int on = no_delay ? 1 : 0; int error = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); return error == 0; } // SetTCPKeepAlive sets SO_KEEPALIVE. bool SetTCPKeepAlive(int fd, bool enable, int delay) { int on = enable ? 1 : 0; if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on))) { PLOG(ERROR) << "Failed to set SO_KEEPALIVE on fd: " << fd; return false; } #if defined(OS_LINUX) || defined(OS_ANDROID) // Set seconds until first TCP keep alive. if (setsockopt(fd, SOL_TCP, TCP_KEEPIDLE, &delay, sizeof(delay))) { PLOG(ERROR) << "Failed to set TCP_KEEPIDLE on fd: " << fd; return false; } // Set seconds between TCP keep alives. if (setsockopt(fd, SOL_TCP, TCP_KEEPINTVL, &delay, sizeof(delay))) { PLOG(ERROR) << "Failed to set TCP_KEEPINTVL on fd: " << fd; return false; } #endif return true; } int MapConnectError(int os_error) { switch (os_error) { case EACCES: return ERR_NETWORK_ACCESS_DENIED; case ETIMEDOUT: return ERR_CONNECTION_TIMED_OUT; default: { int net_error = MapSystemError(os_error); if (net_error == ERR_FAILED) return ERR_CONNECTION_FAILED; // More specific than ERR_FAILED. // Give a more specific error when the user is offline. if (net_error == ERR_ADDRESS_UNREACHABLE && NetworkChangeNotifier::IsOffline()) { return ERR_INTERNET_DISCONNECTED; } return net_error; } } } } // namespace //----------------------------------------------------------------------------- TCPSocketLibevent::Watcher::Watcher( const base::Closure& read_ready_callback, const base::Closure& write_ready_callback) : read_ready_callback_(read_ready_callback), write_ready_callback_(write_ready_callback) { } TCPSocketLibevent::Watcher::~Watcher() { } void TCPSocketLibevent::Watcher::OnFileCanReadWithoutBlocking(int /* fd */) { if (!read_ready_callback_.is_null()) read_ready_callback_.Run(); else NOTREACHED(); } void TCPSocketLibevent::Watcher::OnFileCanWriteWithoutBlocking(int /* fd */) { if (!write_ready_callback_.is_null()) write_ready_callback_.Run(); else NOTREACHED(); } TCPSocketLibevent::TCPSocketLibevent(NetLog* net_log, const NetLog::Source& source) : socket_(kInvalidSocket), accept_watcher_(base::Bind(&TCPSocketLibevent::DidCompleteAccept, base::Unretained(this)), base::Closure()), accept_socket_(NULL), accept_address_(NULL), read_watcher_(base::Bind(&TCPSocketLibevent::DidCompleteRead, base::Unretained(this)), base::Closure()), write_watcher_(base::Closure(), base::Bind(&TCPSocketLibevent::DidCompleteConnectOrWrite, base::Unretained(this))), read_buf_len_(0), write_buf_len_(0), use_tcp_fastopen_(IsTCPFastOpenEnabled()), tcp_fastopen_connected_(false), fast_open_status_(FAST_OPEN_STATUS_UNKNOWN), waiting_connect_(false), connect_os_error_(0), logging_multiple_connect_attempts_(false), net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) { net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE, source.ToEventParametersCallback()); } TCPSocketLibevent::~TCPSocketLibevent() { net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE); if (tcp_fastopen_connected_) { UMA_HISTOGRAM_ENUMERATION("Net.TcpFastOpenSocketConnection", fast_open_status_, FAST_OPEN_MAX_VALUE); } Close(); } int TCPSocketLibevent::Open(AddressFamily family) { DCHECK(CalledOnValidThread()); DCHECK_EQ(socket_, kInvalidSocket); socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM, IPPROTO_TCP); if (socket_ < 0) { PLOG(ERROR) << "CreatePlatformSocket() returned an error"; return MapSystemError(errno); } if (SetNonBlocking(socket_)) { int result = MapSystemError(errno); Close(); return result; } return OK; } int TCPSocketLibevent::AdoptConnectedSocket(int socket, const IPEndPoint& peer_address) { DCHECK(CalledOnValidThread()); DCHECK_EQ(socket_, kInvalidSocket); socket_ = socket; if (SetNonBlocking(socket_)) { int result = MapSystemError(errno); Close(); return result; } peer_address_.reset(new IPEndPoint(peer_address)); return OK; } int TCPSocketLibevent::Bind(const IPEndPoint& address) { DCHECK(CalledOnValidThread()); DCHECK_NE(socket_, kInvalidSocket); SockaddrStorage storage; if (!address.ToSockAddr(storage.addr, &storage.addr_len)) return ERR_ADDRESS_INVALID; int result = bind(socket_, storage.addr, storage.addr_len); if (result < 0) { PLOG(ERROR) << "bind() returned an error"; return MapSystemError(errno); } return OK; } int TCPSocketLibevent::Listen(int backlog) { DCHECK(CalledOnValidThread()); DCHECK_GT(backlog, 0); DCHECK_NE(socket_, kInvalidSocket); int result = listen(socket_, backlog); if (result < 0) { PLOG(ERROR) << "listen() returned an error"; return MapSystemError(errno); } return OK; } int TCPSocketLibevent::Accept(scoped_ptr* socket, IPEndPoint* address, const CompletionCallback& callback) { DCHECK(CalledOnValidThread()); DCHECK(socket); DCHECK(address); DCHECK(!callback.is_null()); DCHECK(accept_callback_.is_null()); net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT); int result = AcceptInternal(socket, address); if (result == ERR_IO_PENDING) { if (!base::MessageLoopForIO::current()->WatchFileDescriptor( socket_, true, base::MessageLoopForIO::WATCH_READ, &accept_socket_watcher_, &accept_watcher_)) { PLOG(ERROR) << "WatchFileDescriptor failed on read"; return MapSystemError(errno); } accept_socket_ = socket; accept_address_ = address; accept_callback_ = callback; } return result; } int TCPSocketLibevent::Connect(const IPEndPoint& address, const CompletionCallback& callback) { DCHECK(CalledOnValidThread()); DCHECK_NE(socket_, kInvalidSocket); DCHECK(!waiting_connect_); // |peer_address_| will be non-NULL if Connect() has been called. Unless // Close() is called to reset the internal state, a second call to Connect() // is not allowed. // Please note that we don't allow a second Connect() even if the previous // Connect() has failed. Connecting the same |socket_| again after a // connection attempt failed results in unspecified behavior according to // POSIX. DCHECK(!peer_address_); if (!logging_multiple_connect_attempts_) LogConnectBegin(AddressList(address)); peer_address_.reset(new IPEndPoint(address)); int rv = DoConnect(); if (rv == ERR_IO_PENDING) { // Synchronous operation not supported. DCHECK(!callback.is_null()); write_callback_ = callback; waiting_connect_ = true; } else { DoConnectComplete(rv); } return rv; } bool TCPSocketLibevent::IsConnected() const { DCHECK(CalledOnValidThread()); if (socket_ == kInvalidSocket || waiting_connect_) return false; if (use_tcp_fastopen_ && !tcp_fastopen_connected_ && peer_address_) { // With TCP FastOpen, we pretend that the socket is connected. // This allows GetPeerAddress() to return peer_address_. return true; } // Check if connection is alive. char c; int rv = HANDLE_EINTR(recv(socket_, &c, 1, MSG_PEEK)); if (rv == 0) return false; if (rv == -1 && errno != EAGAIN && errno != EWOULDBLOCK) return false; return true; } bool TCPSocketLibevent::IsConnectedAndIdle() const { DCHECK(CalledOnValidThread()); if (socket_ == kInvalidSocket || waiting_connect_) return false; // TODO(wtc): should we also handle the TCP FastOpen case here, // as we do in IsConnected()? // Check if connection is alive and we haven't received any data // unexpectedly. char c; int rv = HANDLE_EINTR(recv(socket_, &c, 1, MSG_PEEK)); if (rv >= 0) return false; if (errno != EAGAIN && errno != EWOULDBLOCK) return false; return true; } int TCPSocketLibevent::Read(IOBuffer* buf, int buf_len, const CompletionCallback& callback) { DCHECK(CalledOnValidThread()); DCHECK_NE(kInvalidSocket, socket_); DCHECK(!waiting_connect_); DCHECK(read_callback_.is_null()); // Synchronous operation not supported DCHECK(!callback.is_null()); DCHECK_GT(buf_len, 0); int nread = HANDLE_EINTR(read(socket_, buf->data(), buf_len)); if (nread >= 0) { base::StatsCounter read_bytes("tcp.read_bytes"); read_bytes.Add(nread); net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, nread, buf->data()); RecordFastOpenStatus(); return nread; } if (errno != EAGAIN && errno != EWOULDBLOCK) { int net_error = MapSystemError(errno); net_log_.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR, CreateNetLogSocketErrorCallback(net_error, errno)); return net_error; } if (!base::MessageLoopForIO::current()->WatchFileDescriptor( socket_, true, base::MessageLoopForIO::WATCH_READ, &read_socket_watcher_, &read_watcher_)) { DVLOG(1) << "WatchFileDescriptor failed on read, errno " << errno; return MapSystemError(errno); } read_buf_ = buf; read_buf_len_ = buf_len; read_callback_ = callback; return ERR_IO_PENDING; } int TCPSocketLibevent::Write(IOBuffer* buf, int buf_len, const CompletionCallback& callback) { DCHECK(CalledOnValidThread()); DCHECK_NE(kInvalidSocket, socket_); DCHECK(!waiting_connect_); DCHECK(write_callback_.is_null()); // Synchronous operation not supported DCHECK(!callback.is_null()); DCHECK_GT(buf_len, 0); int nwrite = InternalWrite(buf, buf_len); if (nwrite >= 0) { base::StatsCounter write_bytes("tcp.write_bytes"); write_bytes.Add(nwrite); net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, nwrite, buf->data()); return nwrite; } if (errno != EAGAIN && errno != EWOULDBLOCK) { int net_error = MapSystemError(errno); net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR, CreateNetLogSocketErrorCallback(net_error, errno)); return net_error; } if (!base::MessageLoopForIO::current()->WatchFileDescriptor( socket_, true, base::MessageLoopForIO::WATCH_WRITE, &write_socket_watcher_, &write_watcher_)) { DVLOG(1) << "WatchFileDescriptor failed on write, errno " << errno; return MapSystemError(errno); } write_buf_ = buf; write_buf_len_ = buf_len; write_callback_ = callback; return ERR_IO_PENDING; } int TCPSocketLibevent::GetLocalAddress(IPEndPoint* address) const { DCHECK(CalledOnValidThread()); DCHECK(address); SockaddrStorage storage; if (getsockname(socket_, storage.addr, &storage.addr_len) < 0) return MapSystemError(errno); if (!address->FromSockAddr(storage.addr, storage.addr_len)) return ERR_ADDRESS_INVALID; return OK; } int TCPSocketLibevent::GetPeerAddress(IPEndPoint* address) const { DCHECK(CalledOnValidThread()); DCHECK(address); if (!IsConnected()) return ERR_SOCKET_NOT_CONNECTED; *address = *peer_address_; return OK; } int TCPSocketLibevent::SetDefaultOptionsForServer() { DCHECK(CalledOnValidThread()); return SetAddressReuse(true); } void TCPSocketLibevent::SetDefaultOptionsForClient() { DCHECK(CalledOnValidThread()); // This mirrors the behaviour on Windows. See the comment in // tcp_socket_win.cc after searching for "NODELAY". SetTCPNoDelay(socket_, true); // If SetTCPNoDelay fails, we don't care. SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds); } int TCPSocketLibevent::SetAddressReuse(bool allow) { DCHECK(CalledOnValidThread()); // SO_REUSEADDR is useful for server sockets to bind to a recently unbound // port. When a socket is closed, the end point changes its state to TIME_WAIT // and wait for 2 MSL (maximum segment lifetime) to ensure the remote peer // acknowledges its closure. For server sockets, it is usually safe to // bind to a TIME_WAIT end point immediately, which is a widely adopted // behavior. // // Note that on *nix, SO_REUSEADDR does not enable the TCP socket to bind to // an end point that is already bound by another socket. To do that one must // set SO_REUSEPORT instead. This option is not provided on Linux prior // to 3.9. // // SO_REUSEPORT is provided in MacOS X and iOS. int boolean_value = allow ? 1 : 0; int rv = setsockopt(socket_, SOL_SOCKET, SO_REUSEADDR, &boolean_value, sizeof(boolean_value)); if (rv < 0) return MapSystemError(errno); return OK; } bool TCPSocketLibevent::SetReceiveBufferSize(int32 size) { DCHECK(CalledOnValidThread()); int rv = setsockopt(socket_, SOL_SOCKET, SO_RCVBUF, reinterpret_cast(&size), sizeof(size)); DCHECK(!rv) << "Could not set socket receive buffer size: " << errno; return rv == 0; } bool TCPSocketLibevent::SetSendBufferSize(int32 size) { DCHECK(CalledOnValidThread()); int rv = setsockopt(socket_, SOL_SOCKET, SO_SNDBUF, reinterpret_cast(&size), sizeof(size)); DCHECK(!rv) << "Could not set socket send buffer size: " << errno; return rv == 0; } bool TCPSocketLibevent::SetKeepAlive(bool enable, int delay) { DCHECK(CalledOnValidThread()); return SetTCPKeepAlive(socket_, enable, delay); } bool TCPSocketLibevent::SetNoDelay(bool no_delay) { DCHECK(CalledOnValidThread()); return SetTCPNoDelay(socket_, no_delay); } void TCPSocketLibevent::Close() { DCHECK(CalledOnValidThread()); bool ok = accept_socket_watcher_.StopWatchingFileDescriptor(); DCHECK(ok); ok = read_socket_watcher_.StopWatchingFileDescriptor(); DCHECK(ok); ok = write_socket_watcher_.StopWatchingFileDescriptor(); DCHECK(ok); if (socket_ != kInvalidSocket) { if (HANDLE_EINTR(close(socket_)) < 0) PLOG(ERROR) << "close"; socket_ = kInvalidSocket; } if (!accept_callback_.is_null()) { accept_socket_ = NULL; accept_address_ = NULL; accept_callback_.Reset(); } if (!read_callback_.is_null()) { read_buf_ = NULL; read_buf_len_ = 0; read_callback_.Reset(); } if (!write_callback_.is_null()) { write_buf_ = NULL; write_buf_len_ = 0; write_callback_.Reset(); } tcp_fastopen_connected_ = false; fast_open_status_ = FAST_OPEN_STATUS_UNKNOWN; waiting_connect_ = false; peer_address_.reset(); connect_os_error_ = 0; } bool TCPSocketLibevent::UsingTCPFastOpen() const { return use_tcp_fastopen_; } void TCPSocketLibevent::StartLoggingMultipleConnectAttempts( const AddressList& addresses) { if (!logging_multiple_connect_attempts_) { logging_multiple_connect_attempts_ = true; LogConnectBegin(addresses); } else { NOTREACHED(); } } void TCPSocketLibevent::EndLoggingMultipleConnectAttempts(int net_error) { if (logging_multiple_connect_attempts_) { LogConnectEnd(net_error); logging_multiple_connect_attempts_ = false; } else { NOTREACHED(); } } int TCPSocketLibevent::AcceptInternal(scoped_ptr* socket, IPEndPoint* address) { SockaddrStorage storage; int new_socket = HANDLE_EINTR(accept(socket_, storage.addr, &storage.addr_len)); if (new_socket < 0) { int net_error = MapSystemError(errno); if (net_error != ERR_IO_PENDING) net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error); return net_error; } IPEndPoint ip_end_point; if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) { NOTREACHED(); if (HANDLE_EINTR(close(new_socket)) < 0) PLOG(ERROR) << "close"; net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, ERR_ADDRESS_INVALID); return ERR_ADDRESS_INVALID; } scoped_ptr tcp_socket(new TCPSocketLibevent( net_log_.net_log(), net_log_.source())); int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point); if (adopt_result != OK) { net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result); return adopt_result; } *socket = tcp_socket.Pass(); *address = ip_end_point; net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT, CreateNetLogIPEndPointCallback(&ip_end_point)); return OK; } int TCPSocketLibevent::DoConnect() { DCHECK_EQ(0, connect_os_error_); net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, CreateNetLogIPEndPointCallback(peer_address_.get())); // Connect the socket. if (!use_tcp_fastopen_) { SockaddrStorage storage; if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len)) return ERR_INVALID_ARGUMENT; if (!HANDLE_EINTR(connect(socket_, storage.addr, storage.addr_len))) { // Connected without waiting! return OK; } } else { // With TCP FastOpen, we pretend that the socket is connected. DCHECK(!tcp_fastopen_connected_); return OK; } // Check if the connect() failed synchronously. connect_os_error_ = errno; if (connect_os_error_ != EINPROGRESS) return MapConnectError(connect_os_error_); // Otherwise the connect() is going to complete asynchronously, so watch // for its completion. if (!base::MessageLoopForIO::current()->WatchFileDescriptor( socket_, true, base::MessageLoopForIO::WATCH_WRITE, &write_socket_watcher_, &write_watcher_)) { connect_os_error_ = errno; DVLOG(1) << "WatchFileDescriptor failed: " << connect_os_error_; return MapSystemError(connect_os_error_); } return ERR_IO_PENDING; } void TCPSocketLibevent::DoConnectComplete(int result) { // Log the end of this attempt (and any OS error it threw). int os_error = connect_os_error_; connect_os_error_ = 0; if (result != OK) { net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, NetLog::IntegerCallback("os_error", os_error)); } else { net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT); } if (!logging_multiple_connect_attempts_) LogConnectEnd(result); } void TCPSocketLibevent::LogConnectBegin(const AddressList& addresses) { base::StatsCounter connects("tcp.connect"); connects.Increment(); net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT, addresses.CreateNetLogCallback()); } void TCPSocketLibevent::LogConnectEnd(int net_error) { if (net_error == OK) UpdateConnectionTypeHistograms(CONNECTION_ANY); if (net_error != OK) { net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error); return; } SockaddrStorage storage; int rv = getsockname(socket_, storage.addr, &storage.addr_len); if (rv != 0) { PLOG(ERROR) << "getsockname() [rv: " << rv << "] error: "; NOTREACHED(); net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv); return; } net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT, CreateNetLogSourceAddressCallback(storage.addr, storage.addr_len)); } void TCPSocketLibevent::DidCompleteRead() { RecordFastOpenStatus(); if (read_callback_.is_null()) return; int bytes_transferred; bytes_transferred = HANDLE_EINTR(read(socket_, read_buf_->data(), read_buf_len_)); int result; if (bytes_transferred >= 0) { result = bytes_transferred; base::StatsCounter read_bytes("tcp.read_bytes"); read_bytes.Add(bytes_transferred); net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, result, read_buf_->data()); } else { result = MapSystemError(errno); if (result != ERR_IO_PENDING) { net_log_.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR, CreateNetLogSocketErrorCallback(result, errno)); } } if (result != ERR_IO_PENDING) { read_buf_ = NULL; read_buf_len_ = 0; bool ok = read_socket_watcher_.StopWatchingFileDescriptor(); DCHECK(ok); base::ResetAndReturn(&read_callback_).Run(result); } } void TCPSocketLibevent::DidCompleteWrite() { if (write_callback_.is_null()) return; int bytes_transferred; bytes_transferred = HANDLE_EINTR(write(socket_, write_buf_->data(), write_buf_len_)); int result; if (bytes_transferred >= 0) { result = bytes_transferred; base::StatsCounter write_bytes("tcp.write_bytes"); write_bytes.Add(bytes_transferred); net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, result, write_buf_->data()); } else { result = MapSystemError(errno); if (result != ERR_IO_PENDING) { net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR, CreateNetLogSocketErrorCallback(result, errno)); } } if (result != ERR_IO_PENDING) { write_buf_ = NULL; write_buf_len_ = 0; write_socket_watcher_.StopWatchingFileDescriptor(); base::ResetAndReturn(&write_callback_).Run(result); } } void TCPSocketLibevent::DidCompleteConnect() { DCHECK(waiting_connect_); // Get the error that connect() completed with. int os_error = 0; socklen_t len = sizeof(os_error); if (getsockopt(socket_, SOL_SOCKET, SO_ERROR, &os_error, &len) < 0) os_error = errno; int result = MapConnectError(os_error); connect_os_error_ = os_error; if (result != ERR_IO_PENDING) { DoConnectComplete(result); waiting_connect_ = false; write_socket_watcher_.StopWatchingFileDescriptor(); base::ResetAndReturn(&write_callback_).Run(result); } } void TCPSocketLibevent::DidCompleteConnectOrWrite() { if (waiting_connect_) DidCompleteConnect(); else DidCompleteWrite(); } void TCPSocketLibevent::DidCompleteAccept() { DCHECK(CalledOnValidThread()); int result = AcceptInternal(accept_socket_, accept_address_); if (result != ERR_IO_PENDING) { accept_socket_ = NULL; accept_address_ = NULL; bool ok = accept_socket_watcher_.StopWatchingFileDescriptor(); DCHECK(ok); CompletionCallback callback = accept_callback_; accept_callback_.Reset(); callback.Run(result); } } int TCPSocketLibevent::InternalWrite(IOBuffer* buf, int buf_len) { int nwrite; if (use_tcp_fastopen_ && !tcp_fastopen_connected_) { SockaddrStorage storage; if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len)) { errno = EINVAL; return -1; } int flags = 0x20000000; // Magic flag to enable TCP_FASTOPEN. #if defined(OS_LINUX) // sendto() will fail with EPIPE when the system doesn't support TCP Fast // Open. Theoretically that shouldn't happen since the caller should check // for system support on startup, but users may dynamically disable TCP Fast // Open via sysctl. flags |= MSG_NOSIGNAL; #endif // defined(OS_LINUX) nwrite = HANDLE_EINTR(sendto(socket_, buf->data(), buf_len, flags, storage.addr, storage.addr_len)); tcp_fastopen_connected_ = true; if (nwrite < 0) { DCHECK_NE(EPIPE, errno); // If errno == EINPROGRESS, that means the kernel didn't have a cookie // and would block. The kernel is internally doing a connect() though. // Remap EINPROGRESS to EAGAIN so we treat this the same as our other // asynchronous cases. Note that the user buffer has not been copied to // kernel space. if (errno == EINPROGRESS) { errno = EAGAIN; fast_open_status_ = FAST_OPEN_SLOW_CONNECT_RETURN; } else { fast_open_status_ = FAST_OPEN_ERROR; } } else { fast_open_status_ = FAST_OPEN_FAST_CONNECT_RETURN; } } else { nwrite = HANDLE_EINTR(write(socket_, buf->data(), buf_len)); } return nwrite; } void TCPSocketLibevent::RecordFastOpenStatus() { if (use_tcp_fastopen_ && (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN || fast_open_status_ == FAST_OPEN_SLOW_CONNECT_RETURN)) { DCHECK_NE(FAST_OPEN_STATUS_UNKNOWN, fast_open_status_); bool getsockopt_success(false); bool server_acked_data(false); #if defined(TCP_INFO) // Probe to see the if the socket used TCP Fast Open. tcp_info info; socklen_t info_len = sizeof(tcp_info); getsockopt_success = getsockopt(socket_, IPPROTO_TCP, TCP_INFO, &info, &info_len) == 0 && info_len == sizeof(tcp_info); server_acked_data = getsockopt_success && (info.tcpi_options & TCPI_OPT_SYN_DATA); #endif if (getsockopt_success) { if (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN) { fast_open_status_ = (server_acked_data ? FAST_OPEN_SYN_DATA_ACK : FAST_OPEN_SYN_DATA_NACK); } else { fast_open_status_ = (server_acked_data ? FAST_OPEN_NO_SYN_DATA_ACK : FAST_OPEN_NO_SYN_DATA_NACK); } } else { fast_open_status_ = (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN ? FAST_OPEN_SYN_DATA_FAILED : FAST_OPEN_NO_SYN_DATA_FAILED); } } } } // namespace net