[newlib-cygwin] Cygwin: Implement GSO/GRO support

Corinna Vinschen corinna@sourceware.org
Fri May 13 10:15:52 GMT 2022


https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=7c804d160dab276f859b688f2d235a3ccb94c78a

commit 7c804d160dab276f859b688f2d235a3ccb94c78a
Author: Corinna Vinschen <corinna@vinschen.de>
Date:   Thu May 5 21:56:57 2022 +0200

    Cygwin: Implement GSO/GRO support
    
    - getsockopt (SOL_UDP, UDP_SEGMENT)
    - setsockopt (SOL_UDP, UDP_SEGMENT)
    - getsockopt (SOL_UDP, UDP_GRO)
    - setsockopt (SOL_UDP, UDP_GRO)
    - sendmsg with SOL_UDP/UDP_SEGMENT control message
    - recvmsg, convert Winsock UDP_COALESCED_INFO (DWORD) control message to
      Linux compatible SOL_UDP/UDP_GRO (uint16_t)

Diff:
---
 winsup/cygwin/fhandler_socket_inet.cc | 208 +++++++++++++++++++++++++++++++---
 winsup/cygwin/include/netinet/udp.h   |   4 +
 winsup/cygwin/release/3.4.0           |   2 +
 winsup/doc/new-features.xml           |   4 +
 4 files changed, 205 insertions(+), 13 deletions(-)

diff --git a/winsup/cygwin/fhandler_socket_inet.cc b/winsup/cygwin/fhandler_socket_inet.cc
index 72103d20d..f738ce4d4 100644
--- a/winsup/cygwin/fhandler_socket_inet.cc
+++ b/winsup/cygwin/fhandler_socket_inet.cc
@@ -25,6 +25,7 @@
 #include <w32api/mswsock.h>
 #include <w32api/mstcpip.h>
 #include <netinet/tcp.h>
+#include <netinet/udp.h>
 #include <unistd.h>
 #include <asm/byteorder.h>
 #include <sys/socket.h>
@@ -38,6 +39,7 @@
 #include "cygheap.h"
 #include "shared_info.h"
 #include "wininfo.h"
+#include "tls_pbuf.h"
 
 #define ASYNC_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT)
 #define EVENT_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT|FD_CLOSE)
@@ -1335,6 +1337,31 @@ fhandler_socket_wsock::recvmsg (struct msghdr *msg, int flags)
       msg->msg_controllen = wsamsg.Control.len;
       if (!CYGWIN_VERSION_CHECK_FOR_USING_ANCIENT_MSGHDR)
 	msg->msg_flags = wsamsg.dwFlags;
+      /* if a UDP_GRO packet is present, convert gso_size from Windows DWORD
+         to Linux-compatible uint16_t.  We don't have to change the
+	 msg_control block layout for that, assuming applications do as they
+	 have been told and only use CMSG_FIRSTHDR/CMSG_NXTHDR/CMSG_DATA to
+	 access control messages. The cmsghdr alignment saves our ass here! */
+      if (msg->msg_controllen && get_socket_type () == SOCK_DGRAM
+	  && (get_addr_family () == AF_INET || get_addr_family () == AF_INET6))
+	{
+	  struct cmsghdr *cmsg;
+
+	  for (cmsg = CMSG_FIRSTHDR (msg);
+	       cmsg;
+	       cmsg = CMSG_NXTHDR (msg, cmsg))
+	    {
+	      if (cmsg->cmsg_level == SOL_UDP
+		  && cmsg->cmsg_type == UDP_GRO)
+		{
+		  PDWORD gso_size_win = (PDWORD) CMSG_DATA(cmsg);
+		  uint16_t *gso_size_cyg = (uint16_t *) CMSG_DATA(cmsg);
+		  uint16_t gso_size = (uint16_t) *gso_size_win;
+		  *gso_size_cyg = gso_size;
+		  break;
+		}
+	    }
+	}
     }
   return ret;
 }
@@ -1540,16 +1567,102 @@ fhandler_socket_inet::sendto (const void *in_ptr, size_t len, int flags,
 }
 
 ssize_t
-fhandler_socket_inet::sendmsg (const struct msghdr *msg, int flags)
+fhandler_socket_inet::sendmsg (const struct msghdr *in_msg, int flags)
 {
   struct sockaddr_storage sst;
   int len = 0;
+  DWORD old_gso_size = MAXDWORD;
+  ssize_t ret;
+
+  /* Copy incoming msghdr into a local copy. We only access this from
+     here on.  Thus, make sure not to manipulate user space data. */
+  struct msghdr local_msg = *in_msg;
+  struct msghdr *msg = &local_msg;
 
   if (msg->msg_name
       && get_inet_addr_inet ((struct sockaddr *) msg->msg_name,
 			     msg->msg_namelen, &sst, &len) == SOCKET_ERROR)
     return SOCKET_ERROR;
 
+  /* Check for our optmem_max value */
+  if (msg->msg_controllen > NT_MAX_PATH)
+    {
+      set_errno (ENOBUFS);
+      return SOCKET_ERROR;
+    }
+
+  /* WSASendMsg is supported only for datagram and raw sockets. */
+  if (get_socket_type () != SOCK_DGRAM && get_socket_type () != SOCK_RAW)
+    msg->msg_controllen = 0;
+
+  /* If we actually have control data, copy it to local storage.  Control
+     messages only handled by us have to be dropped from the msg_control
+     block, and we don't want to change user space data. */
+  tmp_pathbuf tp;
+  if (msg->msg_controllen)
+    {
+      void *local_cmsg = tp.c_get ();
+      memcpy (local_cmsg, msg->msg_control, msg->msg_controllen);
+      msg->msg_control = local_cmsg;
+    }
+
+  /* Check for control message we handle inside Cygwin. Right now this
+     only affects UDP sockets, so check here early. */
+  if (msg->msg_controllen && get_socket_type () == SOCK_DGRAM)
+    {
+      struct cmsghdr *cmsg;
+      bool dropped = false;
+
+      for (cmsg = CMSG_FIRSTHDR (msg);
+	   cmsg;
+	   cmsg = dropped ? cmsg : CMSG_NXTHDR (msg, cmsg))
+	{
+	  dropped = false;
+	  /* cmsg within bounds? */
+	  if (cmsg->cmsg_len < sizeof (struct cmsghdr)
+	      || cmsg->cmsg_len > (size_t) msg->msg_controllen
+				  - ((uintptr_t) cmsg
+				     - (uintptr_t) msg->msg_control))
+	    {
+	      set_errno (EINVAL);
+	      return SOCKET_ERROR;
+	    }
+	  /* UDP_SEGMENT? Override gso_size for this single sendmsg. */
+	  if (cmsg->cmsg_level == SOL_UDP && cmsg->cmsg_type == UDP_SEGMENT)
+	    {
+	      /* 16 bit unsigned, as on Linux */
+	      DWORD gso_size = *(uint16_t *) CMSG_DATA(cmsg);
+	      int size = sizeof old_gso_size;
+	      /* Save the old gso_size and set the requested one. */
+	      if (::getsockopt (get_socket (), IPPROTO_UDP, UDP_SEGMENT,
+				(char *) &old_gso_size, &size) == SOCKET_ERROR
+		  || ::setsockopt (get_socket (), IPPROTO_UDP, UDP_SEGMENT,
+				(char *) &gso_size, sizeof gso_size)
+		     == SOCKET_ERROR)
+		{
+		  set_winsock_errno ();
+		  return SOCKET_ERROR;
+		}
+	      /* Drop message from msgbuf, Windows doesn't know it. */
+	      size_t cmsg_size = CMSG_ALIGN (cmsg->cmsg_len);
+	      struct cmsghdr *cmsg_next = CMSG_NXTHDR (msg, cmsg);
+	      if (cmsg_next)
+		memmove (cmsg, cmsg_next, (char *) msg->msg_control
+					  + msg->msg_controllen
+					  - (char *) cmsg_next);
+	      msg->msg_controllen -= cmsg_size;
+	      dropped = true;
+	      /* Avoid infinite loop */
+	      if (msg->msg_controllen <= 0)
+		{
+		  cmsg = NULL;
+		  msg->msg_controllen = 0;
+		}
+	    }
+	}
+    }
+
+  /* Copy over msg_iov into an equivalent WSABUF array. */
   WSABUF wsabuf[msg->msg_iovlen];
   WSABUF *wsaptr = wsabuf;
   const struct iovec *iovptr = msg->msg_iov;
@@ -1558,15 +1671,18 @@ fhandler_socket_inet::sendmsg (const struct msghdr *msg, int flags)
       wsaptr->len = iovptr->iov_len;
       (wsaptr++)->buf = (char *) (iovptr++)->iov_base;
     }
-  /* Disappointing but true:  Even if WSASendMsg is supported, it's only
-     supported for datagram and raw sockets. */
-  DWORD controllen = (DWORD) ((get_socket_type () == SOCK_STREAM)
-			      ? 0 : msg->msg_controllen);
+
+  /* Eventually copy over to a WSAMSG and call send_internal with that. */
   WSAMSG wsamsg = { msg->msg_name ? (struct sockaddr *) &sst : NULL, len,
 		    wsabuf, (DWORD) msg->msg_iovlen,
-		    { controllen, (char *) msg->msg_control },
+		    { (DWORD) msg->msg_controllen,
+		      msg->msg_controllen ? (char *) msg->msg_control : NULL },
 		    0 };
-  return send_internal (&wsamsg, flags);
+  ret = send_internal (&wsamsg, flags);
+  if (old_gso_size != MAXDWORD)
+    ::setsockopt (get_socket (), IPPROTO_UDP, UDP_SEGMENT,
+		  (char *) &old_gso_size, sizeof old_gso_size);
+  return ret;
 }
 
 ssize_t
@@ -1681,7 +1797,7 @@ fhandler_socket_inet::setsockopt (int level, int optname, const void *optval,
 {
   bool ignore = false;
   int ret = -1;
-  unsigned int timeout;
+  unsigned int winsock_val;
 
   /* Preprocessing setsockopt.  Set ignore to true if setsockopt call should
      get skipped entirely. */
@@ -1774,7 +1890,6 @@ fhandler_socket_inet::setsockopt (int level, int optname, const void *optval,
       break;
 
     case IPPROTO_IPV6:
-      {
       switch (optname)
 	{
 	case IPV6_TCLASS:
@@ -1785,8 +1900,6 @@ fhandler_socket_inet::setsockopt (int level, int optname, const void *optval,
 	default:
 	  break;
 	}
-      }
-    default:
       break;
 
     case IPPROTO_TCP:
@@ -1851,9 +1964,9 @@ fhandler_socket_inet::setsockopt (int level, int optname, const void *optval,
 	    {
 	      /* convert msecs to secs.  Values < 1000 ms are converted to
 		 0 secs, just as in WinSock. */
-	      timeout = *(unsigned int *) optval / MSPERSEC;
+	      winsock_val = *(unsigned int *) optval / MSPERSEC;
 	      optname = TCP_MAXRT;
-	      optval = (const void *) &timeout;
+	      optval = (const void *) &winsock_val;
 	    }
 	  break;
 
@@ -1918,6 +2031,49 @@ fhandler_socket_inet::setsockopt (int level, int optname, const void *optval,
 	  break;
 	}
       break;
+
+    case IPPROTO_UDP:
+      /* Check for dgram socket early on, so we don't have to do this for
+	 every option.  Also, WinSock returns EINVAL. */
+      if (type != SOCK_DGRAM)
+	{
+	  set_errno (EOPNOTSUPP);
+	  return -1;
+	}
+      if (optlen < (socklen_t) sizeof (int))
+	{
+	  set_errno (EINVAL);
+	  return ret;
+	}
+      switch (optname)
+	{
+	case UDP_SEGMENT:
+	  if (*(int *) optval < 0 || *(int *) optval > USHRT_MAX)
+	    {
+	      set_errno (EINVAL);
+	      return -1;
+	    }
+	  break;
+
+	case UDP_GRO:
+	  /* In contrast to Windows' UDP_RECV_MAX_COALESCED_SIZE option,
+	     Linux' UDP_GRO option is just a bool. The max. packet size
+	     is dynamically evaluated from the MRU.  There's no easy,
+	     reliable way to get the MRU. We assume that this is what Windows
+	     will do internally anyway and, given UDP_RECV_MAX_COALESCED_SIZE
+	     defines a *maximum* size for aggregated packages, we just choose
+	     the maximum sensible value.  FIXME? IP_MTU_DISCOVER / IP_MTU */
+	  winsock_val = *(int *) optval ? USHRT_MAX : 0;
+	  optval = &winsock_val;
+	  break;
+
+	default:
+	  break;
+	}
+      break;
+
+    default:
+      break;
     }
 
   /* Call Winsock setsockopt (or not) */
@@ -2118,6 +2274,16 @@ fhandler_socket_inet::getsockopt (int level, int optname, const void *optval,
 	}
       break;
 
+    case IPPROTO_UDP:
+      /* Check for dgram socket early on, so we don't have to do this for
+	 every option.  Also, WinSock returns EINVAL. */
+      if (type != SOCK_DGRAM)
+	{
+	  set_errno (EOPNOTSUPP);
+	  return -1;
+	}
+      break;
+
     default:
       break;
     }
@@ -2155,6 +2321,7 @@ fhandler_socket_inet::getsockopt (int level, int optname, const void *optval,
 	  break;
 	}
       break;
+
     case IPPROTO_TCP:
       switch (optname)
 	{
@@ -2174,6 +2341,21 @@ fhandler_socket_inet::getsockopt (int level, int optname, const void *optval,
 	default:
 	  break;
 	}
+      break;
+
+    case IPPROTO_UDP:
+      switch (optname)
+	{
+	case UDP_GRO:
+	  /* Convert to bool option */
+	  *(unsigned int *) optval = *(unsigned int *) optval ? 1 : 0;
+	  break;
+
+	default:
+	  break;
+	}
+      break;
+
     default:
       break;
     }
diff --git a/winsup/cygwin/include/netinet/udp.h b/winsup/cygwin/include/netinet/udp.h
index 6d8646e80..fbe8cc92c 100644
--- a/winsup/cygwin/include/netinet/udp.h
+++ b/winsup/cygwin/include/netinet/udp.h
@@ -33,6 +33,10 @@
 #ifndef _NETINET_UDP_H
 #define _NETINET_UDP_H
 
+#define UDP_SEGMENT	 2	/* WinSock UDP_SEND_MSG_SIZE */
+#define UDP_GRO		 3	/* WinSock UDP_RECV_MAX_COALESCED_SIZE,
+				   also == UDP_COALESCED_INFO */
+
 /*
  * Udp protocol header.
  * Per RFC 768, September, 1981.
diff --git a/winsup/cygwin/release/3.4.0 b/winsup/cygwin/release/3.4.0
index 5bcdb0580..a12905a46 100644
--- a/winsup/cygwin/release/3.4.0
+++ b/winsup/cygwin/release/3.4.0
@@ -7,6 +7,8 @@ What's new:
 
 - Add code to handle signal masks in /proc/<PID>/status.
 
+- Handle UDP_SEGMENT and UDP_GRO socket options.
+
 
 What changed:
 -------------
diff --git a/winsup/doc/new-features.xml b/winsup/doc/new-features.xml
index 4a626c716..c696a972a 100644
--- a/winsup/doc/new-features.xml
+++ b/winsup/doc/new-features.xml
@@ -25,6 +25,10 @@ The CYGWIN=pipe_byte option is now set by default, so that pipes are
 opened in byte mode rather than message mode.
 </para></listitem>
 
+<listitem><para>
+Handle UDP_SEGMENT and UDP_GRO socket options.
+</para></listitem>
+
 </itemizedlist>
 
 </sect2>


More information about the Cygwin-cvs mailing list