net/socket/tcp_socket_win.cc - chromium/src.git - Git at Google

 // Copyright 2013 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "net/socket/tcp_socket_win.h"

 #include <mstcpip.h>

 #include "base/callback_helpers.h"
 #include "base/logging.h"
 #include "base/metrics/stats_counters.h"
 #include "base/win/windows_version.h"
 #include "net/base/address_list.h"
 #include "net/base/connection_type_histograms.h"
 #include "net/base/io_buffer.h"
 #include "net/base/ip_endpoint.h"
 #include "net/base/net_errors.h"
 #include "net/base/net_util.h"
 #include "net/base/network_change_notifier.h"
 #include "net/base/winsock_init.h"
 #include "net/base/winsock_util.h"
 #include "net/socket/socket_descriptor.h"
 #include "net/socket/socket_net_log_params.h"

 namespace net {

 namespace {

 const int kTCPKeepAliveSeconds = 45;

 bool SetSocketReceiveBufferSize(SOCKET socket, int32 size) {
   int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
                       reinterpret_cast<const char*>(&size), sizeof(size));
   DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError();
   return rv == 0;
 }

 bool SetSocketSendBufferSize(SOCKET socket, int32 size) {
   int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
                       reinterpret_cast<const char*>(&size), sizeof(size));
   DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError();
   return rv == 0;
 }

 // Disable Nagle.
 // The Nagle implementation on windows is governed by RFC 896.  The idea
 // behind Nagle is to reduce small packets on the network.  When Nagle is
 // enabled, if a partial packet has been sent, the TCP stack will disallow
 // further *partial* packets until an ACK has been received from the other
 // side.  Good applications should always strive to send as much data as
 // possible and avoid partial-packet sends.  However, in most real world
 // applications, there are edge cases where this does not happen, and two
 // partial packets may be sent back to back.  For a browser, it is NEVER
 // a benefit to delay for an RTT before the second packet is sent.
 //
 // As a practical example in Chromium today, consider the case of a small
 // POST.  I have verified this:
 //     Client writes 649 bytes of header  (partial packet #1)
 //     Client writes 50 bytes of POST data (partial packet #2)
 // In the above example, with Nagle, a RTT delay is inserted between these
 // two sends due to nagle.  RTTs can easily be 100ms or more.  The best
 // fix is to make sure that for POSTing data, we write as much data as
 // possible and minimize partial packets.  We will fix that.  But disabling
 // Nagle also ensure we don't run into this delay in other edge cases.
 // See also:
 //    http://technet.microsoft.com/en-us/library/bb726981.aspx
 bool DisableNagle(SOCKET socket, bool disable) {
   BOOL val = disable ? TRUE : FALSE;
   int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
                       reinterpret_cast<const char*>(&val),
                       sizeof(val));
   DCHECK(!rv) << "Could not disable nagle";
   return rv == 0;
 }

 // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
 // connections. See http://crbug.com/27400 for details.
 bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) {
   int delay = delay_secs * 1000;
   struct tcp_keepalive keepalive_vals = {
     enable ? 1 : 0,  // TCP keep-alive on.
     delay,  // Delay seconds before sending first TCP keep-alive packet.
     delay,  // Delay seconds between sending TCP keep-alive packets.
   };
   DWORD bytes_returned = 0xABAB;
   int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals,
                     sizeof(keepalive_vals), NULL, 0,
                     &bytes_returned, NULL, NULL);
   DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket
               << " [error: " << WSAGetLastError() << "].";

   // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
   return rv == 0;
 }

 int MapConnectError(int os_error) {
   switch (os_error) {
     // connect fails with WSAEACCES when Windows Firewall blocks the
     // connection.
     case WSAEACCES:
       return ERR_NETWORK_ACCESS_DENIED;
     case WSAETIMEDOUT:
       return ERR_CONNECTION_TIMED_OUT;
     default: {
       int net_error = MapSystemError(os_error);
       if (net_error == ERR_FAILED)
         return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.

       // Give a more specific error when the user is offline.
       if (net_error == ERR_ADDRESS_UNREACHABLE &&
           NetworkChangeNotifier::IsOffline()) {
         return ERR_INTERNET_DISCONNECTED;
       }

       return net_error;
     }
   }
 }

 }  // namespace

 //-----------------------------------------------------------------------------

 // This class encapsulates all the state that has to be preserved as long as
 // there is a network IO operation in progress. If the owner TCPSocketWin is
 // destroyed while an operation is in progress, the Core is detached and it
 // lives until the operation completes and the OS doesn't reference any resource
 // declared on this class anymore.
 class TCPSocketWin::Core : public base::RefCounted<Core> {
  public:
   explicit Core(TCPSocketWin* socket);

   // Start watching for the end of a read or write operation.
   void WatchForRead();
   void WatchForWrite();

   // The TCPSocketWin is going away.
   void Detach() { socket_ = NULL; }

   // The separate OVERLAPPED variables for asynchronous operation.
   // |read_overlapped_| is used for both Connect() and Read().
   // |write_overlapped_| is only used for Write();
   OVERLAPPED read_overlapped_;
   OVERLAPPED write_overlapped_;

   // The buffers used in Read() and Write().
   scoped_refptr<IOBuffer> read_iobuffer_;
   scoped_refptr<IOBuffer> write_iobuffer_;
   int read_buffer_length_;
   int write_buffer_length_;

   bool non_blocking_reads_initialized_;

  private:
   friend class base::RefCounted<Core>;

   class ReadDelegate : public base::win::ObjectWatcher::Delegate {
    public:
     explicit ReadDelegate(Core* core) : core_(core) {}
     virtual ~ReadDelegate() {}

     // base::ObjectWatcher::Delegate methods:
     virtual void OnObjectSignaled(HANDLE object);

    private:
     Core* const core_;
   };

   class WriteDelegate : public base::win::ObjectWatcher::Delegate {
    public:
     explicit WriteDelegate(Core* core) : core_(core) {}
     virtual ~WriteDelegate() {}

     // base::ObjectWatcher::Delegate methods:
     virtual void OnObjectSignaled(HANDLE object);

    private:
     Core* const core_;
   };

   ~Core();

   // The socket that created this object.
   TCPSocketWin* socket_;

   // |reader_| handles the signals from |read_watcher_|.
   ReadDelegate reader_;
   // |writer_| handles the signals from |write_watcher_|.
   WriteDelegate writer_;

   // |read_watcher_| watches for events from Connect() and Read().
   base::win::ObjectWatcher read_watcher_;
   // |write_watcher_| watches for events from Write();
   base::win::ObjectWatcher write_watcher_;

   DISALLOW_COPY_AND_ASSIGN(Core);
 };

 TCPSocketWin::Core::Core(TCPSocketWin* socket)
     : read_buffer_length_(0),
       write_buffer_length_(0),
       non_blocking_reads_initialized_(false),
       socket_(socket),
       reader_(this),
       writer_(this) {
   memset(&read_overlapped_, 0, sizeof(read_overlapped_));
   memset(&write_overlapped_, 0, sizeof(write_overlapped_));

   read_overlapped_.hEvent = WSACreateEvent();
   write_overlapped_.hEvent = WSACreateEvent();
 }

 TCPSocketWin::Core::~Core() {
   // Make sure the message loop is not watching this object anymore.
   read_watcher_.StopWatching();
   write_watcher_.StopWatching();

   WSACloseEvent(read_overlapped_.hEvent);
   memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
   WSACloseEvent(write_overlapped_.hEvent);
   memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
 }

 void TCPSocketWin::Core::WatchForRead() {
   // We grab an extra reference because there is an IO operation in progress.
   // Balanced in ReadDelegate::OnObjectSignaled().
   AddRef();
   read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
 }

 void TCPSocketWin::Core::WatchForWrite() {
   // We grab an extra reference because there is an IO operation in progress.
   // Balanced in WriteDelegate::OnObjectSignaled().
   AddRef();
   write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
 }

 void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object) {
   DCHECK_EQ(object, core_->read_overlapped_.hEvent);
   if (core_->socket_) {
     if (core_->socket_->waiting_connect_)
       core_->socket_->DidCompleteConnect();
     else
       core_->socket_->DidSignalRead();
   }

   core_->Release();
 }

 void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled(
     HANDLE object) {
   DCHECK_EQ(object, core_->write_overlapped_.hEvent);
   if (core_->socket_)
     core_->socket_->DidCompleteWrite();

   core_->Release();
 }

 //-----------------------------------------------------------------------------

 TCPSocketWin::TCPSocketWin(net::NetLog* net_log,
                            const net::NetLog::Source& source)
     : socket_(INVALID_SOCKET),
       accept_event_(WSA_INVALID_EVENT),
       accept_socket_(NULL),
       accept_address_(NULL),
       waiting_connect_(false),
       waiting_read_(false),
       waiting_write_(false),
       connect_os_error_(0),
       logging_multiple_connect_attempts_(false),
       net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
   net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
                       source.ToEventParametersCallback());
   EnsureWinsockInit();
 }

 TCPSocketWin::~TCPSocketWin() {
   Close();
   net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
 }

 int TCPSocketWin::Open(AddressFamily family) {
   DCHECK(CalledOnValidThread());
   DCHECK_EQ(socket_, INVALID_SOCKET);

   socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM,
                                  IPPROTO_TCP);
   if (socket_ == INVALID_SOCKET) {
     PLOG(ERROR) << "CreatePlatformSocket() returned an error";
     return MapSystemError(WSAGetLastError());
   }

   if (SetNonBlocking(socket_)) {
     int result = MapSystemError(WSAGetLastError());
     Close();
     return result;
   }

   return OK;
 }

 int TCPSocketWin::AdoptConnectedSocket(SOCKET socket,
                                        const IPEndPoint& peer_address) {
   DCHECK(CalledOnValidThread());
   DCHECK_EQ(socket_, INVALID_SOCKET);
   DCHECK(!core_);

   socket_ = socket;

   if (SetNonBlocking(socket_)) {
     int result = MapSystemError(WSAGetLastError());
     Close();
     return result;
   }

   core_ = new Core(this);
   peer_address_.reset(new IPEndPoint(peer_address));

   return OK;
 }

 int TCPSocketWin::Bind(const IPEndPoint& address) {
   DCHECK(CalledOnValidThread());
   DCHECK_NE(socket_, INVALID_SOCKET);

   SockaddrStorage storage;
   if (!address.ToSockAddr(storage.addr, &storage.addr_len))
     return ERR_ADDRESS_INVALID;

   int result = bind(socket_, storage.addr, storage.addr_len);
   if (result < 0) {
     PLOG(ERROR) << "bind() returned an error";
     return MapSystemError(WSAGetLastError());
   }

   return OK;
 }

 int TCPSocketWin::Listen(int backlog) {
   DCHECK(CalledOnValidThread());
   DCHECK_GT(backlog, 0);
   DCHECK_NE(socket_, INVALID_SOCKET);
   DCHECK_EQ(accept_event_, WSA_INVALID_EVENT);

   accept_event_ = WSACreateEvent();
   if (accept_event_ == WSA_INVALID_EVENT) {
     PLOG(ERROR) << "WSACreateEvent()";
     return MapSystemError(WSAGetLastError());
   }

   int result = listen(socket_, backlog);
   if (result < 0) {
     PLOG(ERROR) << "listen() returned an error";
     return MapSystemError(WSAGetLastError());
   }

   return OK;
 }

 int TCPSocketWin::Accept(scoped_ptr<TCPSocketWin>* socket,
                          IPEndPoint* address,
                          const CompletionCallback& callback) {
   DCHECK(CalledOnValidThread());
   DCHECK(socket);
   DCHECK(address);
   DCHECK(!callback.is_null());
   DCHECK(accept_callback_.is_null());

   net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);

   int result = AcceptInternal(socket, address);

   if (result == ERR_IO_PENDING) {
     // Start watching.
     WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
     accept_watcher_.StartWatching(accept_event_, this);

     accept_socket_ = socket;
     accept_address_ = address;
     accept_callback_ = callback;
   }

   return result;
 }

 int TCPSocketWin::Connect(const IPEndPoint& address,
                           const CompletionCallback& callback) {
   DCHECK(CalledOnValidThread());
   DCHECK_NE(socket_, INVALID_SOCKET);
   DCHECK(!waiting_connect_);

   // |peer_address_| and |core_| will be non-NULL if Connect() has been called.
   // Unless Close() is called to reset the internal state, a second call to
   // Connect() is not allowed.
   // Please note that we enforce this even if the previous Connect() has
   // completed and failed. Although it is allowed to connect the same |socket_|
   // again after a connection attempt failed on Windows, it results in
   // unspecified behavior according to POSIX. Therefore, we make it behave in
   // the same way as TCPSocketLibevent.
   DCHECK(!peer_address_ && !core_);

   if (!logging_multiple_connect_attempts_)
     LogConnectBegin(AddressList(address));

   peer_address_.reset(new IPEndPoint(address));

   int rv = DoConnect();
   if (rv == ERR_IO_PENDING) {
     // Synchronous operation not supported.
     DCHECK(!callback.is_null());
     read_callback_ = callback;
     waiting_connect_ = true;
   } else {
     DoConnectComplete(rv);
   }

   return rv;
 }

 bool TCPSocketWin::IsConnected() const {
   DCHECK(CalledOnValidThread());

   if (socket_ == INVALID_SOCKET || waiting_connect_)
     return false;

   if (waiting_read_)
     return true;

   // Check if connection is alive.
   char c;
   int rv = recv(socket_, &c, 1, MSG_PEEK);
   if (rv == 0)
     return false;
   if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
     return false;

   return true;
 }

 bool TCPSocketWin::IsConnectedAndIdle() const {
   DCHECK(CalledOnValidThread());

   if (socket_ == INVALID_SOCKET || waiting_connect_)
     return false;

   if (waiting_read_)
     return true;

   // Check if connection is alive and we haven't received any data
   // unexpectedly.
   char c;
   int rv = recv(socket_, &c, 1, MSG_PEEK);
   if (rv >= 0)
     return false;
   if (WSAGetLastError() != WSAEWOULDBLOCK)
     return false;

   return true;
 }

 int TCPSocketWin::Read(IOBuffer* buf,
                        int buf_len,
                        const CompletionCallback& callback) {
   DCHECK(CalledOnValidThread());
   DCHECK_NE(socket_, INVALID_SOCKET);
   DCHECK(!waiting_read_);
   DCHECK(read_callback_.is_null());
   DCHECK(!core_->read_iobuffer_);

   return DoRead(buf, buf_len, callback);
 }

 int TCPSocketWin::Write(IOBuffer* buf,
                         int buf_len,
                         const CompletionCallback& callback) {
   DCHECK(CalledOnValidThread());
   DCHECK_NE(socket_, INVALID_SOCKET);
   DCHECK(!waiting_write_);
   DCHECK(write_callback_.is_null());
   DCHECK_GT(buf_len, 0);
   DCHECK(!core_->write_iobuffer_);

   base::StatsCounter writes("tcp.writes");
   writes.Increment();

   WSABUF write_buffer;
   write_buffer.len = buf_len;
   write_buffer.buf = buf->data();

   // TODO(wtc): Remove the assertion after enough testing.
   AssertEventNotSignaled(core_->write_overlapped_.hEvent);
   DWORD num;
   int rv = WSASend(socket_, &write_buffer, 1, &num, 0,
                    &core_->write_overlapped_, NULL);
   if (rv == 0) {
     if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
       rv = static_cast<int>(num);
       if (rv > buf_len || rv < 0) {
         // It seems that some winsock interceptors report that more was written
         // than was available. Treat this as an error.  http://crbug.com/27870
         LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
                    << " bytes, but " << rv << " bytes reported.";
         return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
       }
       base::StatsCounter write_bytes("tcp.write_bytes");
       write_bytes.Add(rv);
       net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv,
                                     buf->data());
       return rv;
     }
   } else {
     int os_error = WSAGetLastError();
     if (os_error != WSA_IO_PENDING) {
       int net_error = MapSystemError(os_error);
       net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
                         CreateNetLogSocketErrorCallback(net_error, os_error));
       return net_error;
     }
   }
   waiting_write_ = true;
   write_callback_ = callback;
   core_->write_iobuffer_ = buf;
   core_->write_buffer_length_ = buf_len;
   core_->WatchForWrite();
   return ERR_IO_PENDING;
 }

 int TCPSocketWin::GetLocalAddress(IPEndPoint* address) const {
   DCHECK(CalledOnValidThread());
   DCHECK(address);

   SockaddrStorage storage;
   if (getsockname(socket_, storage.addr, &storage.addr_len))
     return MapSystemError(WSAGetLastError());
   if (!address->FromSockAddr(storage.addr, storage.addr_len))
     return ERR_ADDRESS_INVALID;

   return OK;
 }

 int TCPSocketWin::GetPeerAddress(IPEndPoint* address) const {
   DCHECK(CalledOnValidThread());
   DCHECK(address);
   if (!IsConnected())
     return ERR_SOCKET_NOT_CONNECTED;
   *address = *peer_address_;
   return OK;
 }

 int TCPSocketWin::SetDefaultOptionsForServer() {
   return SetExclusiveAddrUse();
 }

 void TCPSocketWin::SetDefaultOptionsForClient() {
   // Increase the socket buffer sizes from the default sizes for WinXP.  In
   // performance testing, there is substantial benefit by increasing from 8KB
   // to 64KB.
   // See also:
   //    http://support.microsoft.com/kb/823764/EN-US
   // On Vista, if we manually set these sizes, Vista turns off its receive
   // window auto-tuning feature.
   //    http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
   // Since Vista's auto-tune is better than any static value we can could set,
   // only change these on pre-vista machines.
   if (base::win::GetVersion() < base::win::VERSION_VISTA) {
     const int32 kSocketBufferSize = 64 * 1024;
     SetSocketReceiveBufferSize(socket_, kSocketBufferSize);
     SetSocketSendBufferSize(socket_, kSocketBufferSize);
   }

   DisableNagle(socket_, true);
   SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds);
 }

 int TCPSocketWin::SetExclusiveAddrUse() {
   // On Windows, a bound end point can be hijacked by another process by
   // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE
   // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the
   // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another
   // socket to forcibly bind to the end point until the end point is unbound.
   // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE.
   // MSDN: http://goo.gl/M6fjQ.
   //
   // Unlike on *nix, on Windows a TCP server socket can always bind to an end
   // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not
   // needed here.
   //
   // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end
   // point in TIME_WAIT status. It does not have this effect for a TCP server
   // socket.

   BOOL true_value = 1;
   int rv = setsockopt(socket_, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
                       reinterpret_cast<const char*>(&true_value),
                       sizeof(true_value));
   if (rv < 0)
     return MapSystemError(errno);
   return OK;
 }

 bool TCPSocketWin::SetReceiveBufferSize(int32 size) {
   DCHECK(CalledOnValidThread());
   return SetSocketReceiveBufferSize(socket_, size);
 }

 bool TCPSocketWin::SetSendBufferSize(int32 size) {
   DCHECK(CalledOnValidThread());
   return SetSocketSendBufferSize(socket_, size);
 }

 bool TCPSocketWin::SetKeepAlive(bool enable, int delay) {
   return SetTCPKeepAlive(socket_, enable, delay);
 }

 bool TCPSocketWin::SetNoDelay(bool no_delay) {
   return DisableNagle(socket_, no_delay);
 }

 void TCPSocketWin::Close() {
   DCHECK(CalledOnValidThread());

   if (socket_ != INVALID_SOCKET) {
     // Note: don't use CancelIo to cancel pending IO because it doesn't work
     // when there is a Winsock layered service provider.

     // In most socket implementations, closing a socket results in a graceful
     // connection shutdown, but in Winsock we have to call shutdown explicitly.
     // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
     // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
     shutdown(socket_, SD_SEND);

     // This cancels any pending IO.
     if (closesocket(socket_) < 0)
       PLOG(ERROR) << "closesocket";
     socket_ = INVALID_SOCKET;
   }

   if (accept_event_) {
     WSACloseEvent(accept_event_);
     accept_event_ = WSA_INVALID_EVENT;
   }

   if (!accept_callback_.is_null()) {
     accept_watcher_.StopWatching();
     accept_socket_ = NULL;
     accept_address_ = NULL;
     accept_callback_.Reset();
   }

   if (core_) {
     if (waiting_connect_) {
       // We closed the socket, so this notification will never come.
       // From MSDN' WSAEventSelect documentation:
       // "Closing a socket with closesocket also cancels the association and
       // selection of network events specified in WSAEventSelect for the
       // socket".
       core_->Release();
     }
     core_->Detach();
     core_ = NULL;
   }

   waiting_connect_ = false;
   waiting_read_ = false;
   waiting_write_ = false;

   read_callback_.Reset();
   write_callback_.Reset();
   peer_address_.reset();
   connect_os_error_ = 0;
 }

 bool TCPSocketWin::UsingTCPFastOpen() const {
   // Not supported on windows.
   return false;
 }

 void TCPSocketWin::StartLoggingMultipleConnectAttempts(
     const AddressList& addresses) {
   if (!logging_multiple_connect_attempts_) {
     logging_multiple_connect_attempts_ = true;
     LogConnectBegin(addresses);
   } else {
     NOTREACHED();
   }
 }

 void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error) {
   if (logging_multiple_connect_attempts_) {
     LogConnectEnd(net_error);
     logging_multiple_connect_attempts_ = false;
   } else {
     NOTREACHED();
   }
 }

 int TCPSocketWin::AcceptInternal(scoped_ptr<TCPSocketWin>* socket,
                                  IPEndPoint* address) {
   SockaddrStorage storage;
   int new_socket = accept(socket_, storage.addr, &storage.addr_len);
   if (new_socket < 0) {
     int net_error = MapSystemError(WSAGetLastError());
     if (net_error != ERR_IO_PENDING)
       net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
     return net_error;
   }

   IPEndPoint ip_end_point;
   if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) {
     NOTREACHED();
     if (closesocket(new_socket) < 0)
       PLOG(ERROR) << "closesocket";
     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, ERR_FAILED);
     return ERR_FAILED;
   }
   scoped_ptr<TCPSocketWin> tcp_socket(new TCPSocketWin(
       net_log_.net_log(), net_log_.source()));
   int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point);
   if (adopt_result != OK) {
     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result);
     return adopt_result;
   }
   *socket = tcp_socket.Pass();
   *address = ip_end_point;
   net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
                     CreateNetLogIPEndPointCallback(&ip_end_point));
   return OK;
 }

 void TCPSocketWin::OnObjectSignaled(HANDLE object) {
   WSANETWORKEVENTS ev;
   if (WSAEnumNetworkEvents(socket_, accept_event_, &ev) == SOCKET_ERROR) {
     PLOG(ERROR) << "WSAEnumNetworkEvents()";
     return;
   }

   if (ev.lNetworkEvents & FD_ACCEPT) {
     int result = AcceptInternal(accept_socket_, accept_address_);
     if (result != ERR_IO_PENDING) {
       accept_socket_ = NULL;
       accept_address_ = NULL;
       base::ResetAndReturn(&accept_callback_).Run(result);
     }
   } else {
     // This happens when a client opens a connection and closes it before we
     // have a chance to accept it.
     DCHECK(ev.lNetworkEvents == 0);

     // Start watching the next FD_ACCEPT event.
     WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
     accept_watcher_.StartWatching(accept_event_, this);
   }
 }

 int TCPSocketWin::DoConnect() {
   DCHECK_EQ(connect_os_error_, 0);
   DCHECK(!core_);

   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
                       CreateNetLogIPEndPointCallback(peer_address_.get()));

   core_ = new Core(this);
   // WSAEventSelect sets the socket to non-blocking mode as a side effect.
   // Our connect() and recv() calls require that the socket be non-blocking.
   WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);

   SockaddrStorage storage;
   if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len))
     return ERR_INVALID_ARGUMENT;
   if (!connect(socket_, storage.addr, storage.addr_len)) {
     // Connected without waiting!
     //
     // The MSDN page for connect says:
     //   With a nonblocking socket, the connection attempt cannot be completed
     //   immediately. In this case, connect will return SOCKET_ERROR, and
     //   WSAGetLastError will return WSAEWOULDBLOCK.
     // which implies that for a nonblocking socket, connect never returns 0.
     // It's not documented whether the event object will be signaled or not
     // if connect does return 0.  So the code below is essentially dead code
     // and we don't know if it's correct.
     NOTREACHED();

     if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
       return OK;
   } else {
     int os_error = WSAGetLastError();
     if (os_error != WSAEWOULDBLOCK) {
       LOG(ERROR) << "connect failed: " << os_error;
       connect_os_error_ = os_error;
       int rv = MapConnectError(os_error);
       CHECK_NE(ERR_IO_PENDING, rv);
       return rv;
     }
   }

   core_->WatchForRead();
   return ERR_IO_PENDING;
 }

 void TCPSocketWin::DoConnectComplete(int result) {
   // Log the end of this attempt (and any OS error it threw).
   int os_error = connect_os_error_;
   connect_os_error_ = 0;
   if (result != OK) {
     net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
                       NetLog::IntegerCallback("os_error", os_error));
   } else {
     net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
   }

   if (!logging_multiple_connect_attempts_)
     LogConnectEnd(result);
 }

 void TCPSocketWin::LogConnectBegin(const AddressList& addresses) {
   base::StatsCounter connects("tcp.connect");
   connects.Increment();

   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
                       addresses.CreateNetLogCallback());
 }

 void TCPSocketWin::LogConnectEnd(int net_error) {
   if (net_error == OK)
     UpdateConnectionTypeHistograms(CONNECTION_ANY);

   if (net_error != OK) {
     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
     return;
   }

   struct sockaddr_storage source_address;
   socklen_t addrlen = sizeof(source_address);
   int rv = getsockname(
       socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
   if (rv != 0) {
     LOG(ERROR) << "getsockname() [rv: " << rv
                << "] error: " << WSAGetLastError();
     NOTREACHED();
     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
     return;
   }

   net_log_.EndEvent(
       NetLog::TYPE_TCP_CONNECT,
       CreateNetLogSourceAddressCallback(
           reinterpret_cast<const struct sockaddr*>(&source_address),
           sizeof(source_address)));
 }

 int TCPSocketWin::DoRead(IOBuffer* buf, int buf_len,
                          const CompletionCallback& callback) {
   if (!core_->non_blocking_reads_initialized_) {
     WSAEventSelect(socket_, core_->read_overlapped_.hEvent,
                    FD_READ | FD_CLOSE);
     core_->non_blocking_reads_initialized_ = true;
   }
   int rv = recv(socket_, buf->data(), buf_len, 0);
   if (rv == SOCKET_ERROR) {
     int os_error = WSAGetLastError();
     if (os_error != WSAEWOULDBLOCK) {
       int net_error = MapSystemError(os_error);
       net_log_.AddEvent(
           NetLog::TYPE_SOCKET_READ_ERROR,
           CreateNetLogSocketErrorCallback(net_error, os_error));
       return net_error;
     }
   } else {
     base::StatsCounter read_bytes("tcp.read_bytes");
     if (rv > 0)
       read_bytes.Add(rv);
     net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv,
                                   buf->data());
     return rv;
   }

   waiting_read_ = true;
   read_callback_ = callback;
   core_->read_iobuffer_ = buf;
   core_->read_buffer_length_ = buf_len;
   core_->WatchForRead();
   return ERR_IO_PENDING;
 }

 void TCPSocketWin::DidCompleteConnect() {
   DCHECK(waiting_connect_);
   DCHECK(!read_callback_.is_null());
   int result;

   WSANETWORKEVENTS events;
   int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
                                 &events);
   int os_error = 0;
   if (rv == SOCKET_ERROR) {
     NOTREACHED();
     os_error = WSAGetLastError();
     result = MapSystemError(os_error);
   } else if (events.lNetworkEvents & FD_CONNECT) {
     os_error = events.iErrorCode[FD_CONNECT_BIT];
     result = MapConnectError(os_error);
   } else {
     NOTREACHED();
     result = ERR_UNEXPECTED;
   }

   connect_os_error_ = os_error;
   DoConnectComplete(result);
   waiting_connect_ = false;

   DCHECK_NE(result, ERR_IO_PENDING);
   base::ResetAndReturn(&read_callback_).Run(result);
 }

 void TCPSocketWin::DidCompleteWrite() {
   DCHECK(waiting_write_);
   DCHECK(!write_callback_.is_null());

   DWORD num_bytes, flags;
   BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
                                    &num_bytes, FALSE, &flags);
   WSAResetEvent(core_->write_overlapped_.hEvent);
   waiting_write_ = false;
   int rv;
   if (!ok) {
     int os_error = WSAGetLastError();
     rv = MapSystemError(os_error);
     net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
                       CreateNetLogSocketErrorCallback(rv, os_error));
   } else {
     rv = static_cast<int>(num_bytes);
     if (rv > core_->write_buffer_length_ || rv < 0) {
       // It seems that some winsock interceptors report that more was written
       // than was available. Treat this as an error.  http://crbug.com/27870
       LOG(ERROR) << "Detected broken LSP: Asked to write "
                  << core_->write_buffer_length_ << " bytes, but " << rv
                  << " bytes reported.";
       rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
     } else {
       base::StatsCounter write_bytes("tcp.write_bytes");
       write_bytes.Add(num_bytes);
       net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
                                     core_->write_iobuffer_->data());
     }
   }

   core_->write_iobuffer_ = NULL;

   DCHECK_NE(rv, ERR_IO_PENDING);
   base::ResetAndReturn(&write_callback_).Run(rv);
 }

 void TCPSocketWin::DidSignalRead() {
   DCHECK(waiting_read_);
   DCHECK(!read_callback_.is_null());

   int os_error = 0;
   WSANETWORKEVENTS network_events;
   int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
                                 &network_events);
   if (rv == SOCKET_ERROR) {
     os_error = WSAGetLastError();
     rv = MapSystemError(os_error);
   } else if (network_events.lNetworkEvents) {
     DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0);
     // If network_events.lNetworkEvents is FD_CLOSE and
     // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful
     // connection closure. It is tempting to directly set rv to 0 in
     // this case, but the MSDN pages for WSAEventSelect and
     // WSAAsyncSelect recommend we still call DoRead():
     //   FD_CLOSE should only be posted after all data is read from a
     //   socket, but an application should check for remaining data upon
     //   receipt of FD_CLOSE to avoid any possibility of losing data.
     //
     // If network_events.iErrorCode[FD_READ_BIT] or
     // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call
     // DoRead() because recv() reports a more accurate error code
     // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was
     // reset.
     rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_,
                 read_callback_);
     if (rv == ERR_IO_PENDING)
       return;
   } else {
     // This may happen because Read() may succeed synchronously and
     // consume all the received data without resetting the event object.
     core_->WatchForRead();
     return;
   }

   waiting_read_ = false;
   core_->read_iobuffer_ = NULL;
   core_->read_buffer_length_ = 0;

   DCHECK_NE(rv, ERR_IO_PENDING);
   base::ResetAndReturn(&read_callback_).Run(rv);
 }

 }  // namespace net