Index: lib/libc/sys/getsockopt.2 =================================================================== --- lib/libc/sys/getsockopt.2 +++ lib/libc/sys/getsockopt.2 @@ -28,7 +28,7 @@ .\" @(#)getsockopt.2 8.4 (Berkeley) 5/2/95 .\" $FreeBSD$ .\" -.Dd April 5, 2013 +.Dd October 11, 2016 .Dt GETSOCKOPT 2 .Os .Sh NAME @@ -187,6 +187,7 @@ .It Dv SO_LISTENQLEN Ta "get complete queue length of the socket (get only)" .It Dv SO_LISTENINCQLEN Ta "get incomplete queue length of the socket (get only)" .It Dv SO_USER_COOKIE Ta "set the 'so_user_cookie' value for the socket (uint32_t, set only)" +.It Dv SO_MAX_PACING_RATE "set the maximum transmit rate in bytes per second for the socket" .El .Pp .Dv SO_DEBUG @@ -496,6 +497,11 @@ returns the number of unaccepted complete connections. .Dv SO_LISTENINCQLEN returns the number of unaccepted incomplete connections. +.Pp +.Dv SO_MAX_PACING_RATE +instructs the socket and underlying network adapter layers that the +transmit rate should be limited to the given unsigned 32-bit value in +bytes per second. .Sh RETURN VALUES .Rv -std .Sh ERRORS Index: sbin/ifconfig/ifconfig.8 =================================================================== --- sbin/ifconfig/ifconfig.8 +++ sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd September 17, 2016 +.Dd October 11, 2016 .Dt IFCONFIG 8 .Os .Sh NAME @@ -460,6 +460,8 @@ and 802.11g .Pq Cm 11g operating modes. +.It Cm txrtlmt +Set if the driver supports TX rate limiting. .It Cm inst Ar minst , Cm instance Ar minst Set the media instance to .Ar minst . Index: sbin/ifconfig/ifconfig.c =================================================================== --- sbin/ifconfig/ifconfig.c +++ sbin/ifconfig/ifconfig.c @@ -1145,7 +1145,7 @@ "\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \ "\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \ "\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \ -"\26RXCSUM_IPV6\27TXCSUM_IPV6" +"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT" /* * Print the status of the interface. If an address family was @@ -1453,6 +1453,8 @@ DEF_CMD("-wol_mcast", -IFCAP_WOL_MCAST, setifcap), DEF_CMD("wol_magic", IFCAP_WOL_MAGIC, setifcap), DEF_CMD("-wol_magic", -IFCAP_WOL_MAGIC, setifcap), + DEF_CMD("txrtlmt", IFCAP_TXRTLMT, setifcap), + DEF_CMD("-txrtlmt", -IFCAP_TXRTLMT, setifcap), DEF_CMD("normal", -IFF_LINK0, setifflags), DEF_CMD("compress", IFF_LINK0, setifflags), DEF_CMD("noicmp", IFF_LINK1, setifflags), Index: sys/conf/NOTES =================================================================== --- sys/conf/NOTES +++ sys/conf/NOTES @@ -616,6 +616,8 @@ options INET #Internet communications protocols options INET6 #IPv6 communications protocols +options RATELIMIT # TX rate limiting support + options ROUTETABLES=2 # allocated fibs up to 65536. default is 1. # but that would be a bad idea as they are large. Index: sys/conf/config.mk =================================================================== --- sys/conf/config.mk +++ sys/conf/config.mk @@ -19,6 +19,10 @@ opt_inet6.h: @echo "#define INET6 1" > ${.TARGET} .endif +.if ${MK_RATELIMIT} != "no" +opt_ratelimit.h: + @echo "#define RATELIMIT 1" > ${.TARGET} +.endif .if ${MK_EISA} != "no" opt_eisa.h: @echo "#define DEV_EISA 1" > ${.TARGET} Index: sys/conf/kern.opts.mk =================================================================== --- sys/conf/kern.opts.mk +++ sys/conf/kern.opts.mk @@ -47,7 +47,8 @@ EISA \ EXTRA_TCP_STACKS \ NAND \ - OFED + OFED \ + RATELIMIT # Some options are totally broken on some architectures. We disable # them. If you need to enable them on an experimental basis, you Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -409,6 +409,7 @@ BOOTP_WIRED_TO opt_bootp.h DEVICE_POLLING DUMMYNET opt_ipdn.h +RATELIMIT opt_ratelimit.h INET opt_inet.h INET6 opt_inet6.h IPDIVERT Index: sys/kern/uipc_socket.c =================================================================== --- sys/kern/uipc_socket.c +++ sys/kern/uipc_socket.c @@ -105,6 +105,7 @@ #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_ratelimit.h" #include "opt_compat.h" #include @@ -2683,6 +2684,18 @@ #endif break; + case SO_MAX_PACING_RATE: +#ifdef RATELIMIT + error = sooptcopyin(sopt, &val32, sizeof(val32), + sizeof(val32)); + if (error) + goto bad; + so->so_max_pacing_rate = val32; +#else + error = EOPNOTSUPP; +#endif + break; + default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, @@ -2738,6 +2751,9 @@ #ifdef MAC struct mac extmac; #endif +#ifdef RATELIMIT + uint32_t val32; +#endif CURVNET_SET(so->so_vnet); error = 0; @@ -2870,6 +2886,15 @@ optval = so->so_incqlen; goto integer; + case SO_MAX_PACING_RATE: +#ifdef RATELIMIT + val32 = so->so_max_pacing_rate; + error = sooptcopyout(sopt, &val32, sizeof(val32)); +#else + error = EOPNOTSUPP; +#endif + break; + default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, Index: sys/net/if.h =================================================================== --- sys/net/if.h +++ sys/net/if.h @@ -239,6 +239,7 @@ #define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */ #define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */ #define IFCAP_HWSTATS 0x800000 /* manages counters internally */ +#define IFCAP_TXRTLMT 0x1000000 /* hardware supports TX rate limiting */ #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) Index: sys/net/if_dead.c =================================================================== --- sys/net/if_dead.c +++ sys/net/if_dead.c @@ -100,6 +100,26 @@ return (0); } +static int +ifdead_snd_tag_alloc(struct ifnet *ifp, unsigned type, + union if_snd_tag_params *params, struct m_snd_tag **ppmt) +{ + return (EOPNOTSUPP); +} + +static int +ifdead_snd_tag_modify(struct ifnet *ifp, unsigned type, + union if_snd_tag_params *params, struct m_snd_tag *pmt) +{ + return (EOPNOTSUPP); +} + +static void +ifdead_snd_tag_free(struct ifnet *ifp, unsigned type, + struct m_snd_tag *pmt) +{ +} + void if_dead(struct ifnet *ifp) { @@ -112,4 +132,7 @@ ifp->if_qflush = ifdead_qflush; ifp->if_transmit = ifdead_transmit; ifp->if_get_counter = ifdead_get_counter; + ifp->if_snd_tag_alloc = ifdead_snd_tag_alloc; + ifp->if_snd_tag_modify = ifdead_snd_tag_modify; + ifp->if_snd_tag_free = ifdead_snd_tag_free; } Index: sys/net/if_var.h =================================================================== --- sys/net/if_var.h +++ sys/net/if_var.h @@ -175,6 +175,32 @@ #define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ +/* + * Network interface send tag support. The storage of "struct + * m_snd_tag" comes from the network driver and it is free to allocate + * as much additional space as it wants for its own use. + */ +struct m_snd_tag; + +#define IF_SND_TAG_TYPE_RATE_LIMIT 0 +#define IF_SND_TAG_TYPE_MAX 1 + +struct if_rate_limit_snd_tag_params { + uint64_t max_rate; /* in bytes/s */ + uint32_t flowid; /* mbuf hash value */ + uint32_t flowtype; /* mbuf hash type */ +}; + +union if_snd_tag_params { + struct if_rate_limit_snd_tag_params rate_limit; +}; + +typedef int (if_snd_tag_alloc_t)(struct ifnet *, unsigned type, + union if_snd_tag_params *, struct m_snd_tag **); +typedef int (if_snd_tag_modify_t)(struct ifnet *, unsigned type, + union if_snd_tag_params *, struct m_snd_tag *); +typedef void (if_snd_tag_free_t)(struct ifnet *, unsigned type, + struct m_snd_tag *); /* * Structure defining a network interface. @@ -304,11 +330,18 @@ u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */ /* + * Network adapter send tag support: + */ + if_snd_tag_alloc_t *if_snd_tag_alloc; + if_snd_tag_modify_t *if_snd_tag_modify; + if_snd_tag_free_t *if_snd_tag_free; + + /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel * binary interface. */ - void *if_pspare[4]; /* packet pacing / general use */ + void *if_pspare[1]; /* packet pacing / general use */ int if_ispare[4]; /* packet pacing / general use */ }; Index: sys/netinet/in_pcb.h =================================================================== --- sys/netinet/in_pcb.h +++ sys/netinet/in_pcb.h @@ -181,6 +181,7 @@ * read-lock usage during modification, this model can be applied to other * protocols (especially SCTP). */ +struct m_snd_tag; struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */ LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ @@ -202,11 +203,11 @@ u_char inp_ip_minttl; /* (i) minimum TTL or drop */ uint32_t inp_flowid; /* (x) flow id / queue id */ u_int inp_refcount; /* (i) refcount */ - void *inp_pspare[5]; /* (x) packet pacing / general use */ + struct m_snd_tag *inp_snd_tag; /* (i) send tag for outgoing mbufs */ + void *inp_pspare[4]; /* (x) general use */ uint32_t inp_flowtype; /* (x) M_HASHTYPE value */ uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */ - u_int inp_ispare[4]; /* (x) packet pacing / user cookie / - * general use */ + u_int inp_ispare[4]; /* (x) user cookie / general use */ /* Local and foreign ports, local and foreign addr. */ struct in_conninfo inp_inc; /* (i) list for PCB's local port */ @@ -616,6 +617,7 @@ #define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */ #define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */ #define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */ +#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */ /* * Flags passed to in_pcblookup*() functions. @@ -736,6 +738,10 @@ struct sockaddr * in_sockaddr(in_port_t port, struct in_addr *addr); void in_pcbsosetlabel(struct socket *so); +#ifdef RATELIMIT +void in_pcboutput_txrtlmt(struct inpcb *, struct ifnet *, struct mbuf *); +int in_pcboutput_eagain(struct inpcb *, struct mbuf *); +#endif #endif /* _KERNEL */ #endif /* !_NETINET_IN_PCB_H_ */ Index: sys/netinet/in_pcb.c =================================================================== --- sys/netinet/in_pcb.c +++ sys/netinet/in_pcb.c @@ -42,6 +42,7 @@ #include "opt_ipsec.h" #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_ratelimit.h" #include "opt_pcbgroup.h" #include "opt_rss.h" @@ -57,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -136,6 +138,9 @@ #define V_ipport_tcplastcount VNET(ipport_tcplastcount) static void in_pcbremlists(struct inpcb *inp); +#ifdef RATELIMIT +static void in_pcbdetach_txrtlmt(struct inpcb *inp); +#endif #ifdef INET static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport_arg, @@ -1140,6 +1145,10 @@ KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__)); +#ifdef RATELIMIT + if (inp->inp_snd_tag != NULL) + in_pcbdetach_txrtlmt(inp); +#endif inp->inp_socket->so_pcb = NULL; inp->inp_socket = NULL; } @@ -2677,3 +2686,231 @@ db_print_inpcb(inp, "inpcb", 0); } #endif /* DDB */ + +#ifdef RATELIMIT +/* + * Modify existing TX rate limit on inp_snd_tag and update inpcb info: + */ +static int +in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate) +{ + union if_snd_tag_params params = { + .rate_limit.max_rate = max_pacing_rate, + .rate_limit.flowid = inp->inp_flowid, + .rate_limit.flowtype = inp->inp_flowtype, + }; + struct m_snd_tag *mst; + struct ifnet *ifp; + int error; + + INP_WLOCK_ASSERT(inp); + + mst = inp->inp_snd_tag; + + KASSERT(mst != NULL, + ("%s: inp->inp_snd_tag == NULL", __FUNCTION__)); + + ifp = mst->ifp; + + KASSERT(ifp != NULL, + ("%s: mst->ifp == NULL", __FUNCTION__)); + + if (ifp->if_snd_tag_modify == NULL) { + error = EOPNOTSUPP; + } else { + error = ifp->if_snd_tag_modify(ifp, + IF_SND_TAG_TYPE_RATE_LIMIT, ¶ms, mst); + } + return (error); +} + +/* + * Create a TX rate limit on ifp and attach it to inpcb: + */ +static int +in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, + uint32_t max_pacing_rate) +{ + union if_snd_tag_params params = { + .rate_limit.max_rate = max_pacing_rate, + .rate_limit.flowid = inp->inp_flowid, + .rate_limit.flowtype = inp->inp_flowtype, + }; + int error; + + INP_WLOCK_ASSERT(inp); + + KASSERT(inp->inp_snd_tag == NULL, + ("%s: inp_snd_tag != NULL", __FUNCTION__)); + + if_ref(ifp); + if (ifp->if_snd_tag_alloc == NULL) { + error = EOPNOTSUPP; + } else { + error = ifp->if_snd_tag_alloc(ifp, + IF_SND_TAG_TYPE_RATE_LIMIT, ¶ms, &inp->inp_snd_tag); + } + if (error != 0) + if_rele(ifp); + return (error); +} + +/* + * Remove TX rate limit from inp_txring_ifp and detach it from + * the inpcb: + */ +static void +in_pcbdetach_txrtlmt(struct inpcb *inp) +{ + struct m_snd_tag *mst; + struct ifnet *ifp; + + INP_WLOCK_ASSERT(inp); + + mst = inp->inp_snd_tag; + inp->inp_snd_tag = NULL; + + KASSERT(mst != NULL, + ("%s: inp->inp_snd_tag == NULL", __FUNCTION__)); + + ifp = mst->ifp; + + /* + * If the device was detached while we still had reference(s) + * on the ifp, we assume if_snd_tag_free() was replaced with + * stubs. + */ + ifp->if_snd_tag_free(ifp, IF_SND_TAG_TYPE_RATE_LIMIT, mst); + + /* release reference count on network interface */ + if_rele(ifp); +} + +/* + * Modify the TX rate limit to match what the network driver expects. + */ +void +in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb) +{ + struct socket *socket; + uint32_t max_pacing_rate; + bool wlocked; + int error; + + if (inp == NULL) + return; + + socket = inp->inp_socket; + if (socket == NULL) + return; + + /* + * NOTE: The so_max_pacing_rate value is read unlocked, + * because atomic updates are not required since the variable + * is checked at every mbuf we send. It is assumed that the + * variable read itself will be atomic. + */ + max_pacing_rate = socket->so_max_pacing_rate; + + if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) + return; + + wlocked = INP_WLOCKED(inp); + + if (!wlocked) { + /* + * NOTE: If the write locking fails, we need to bail + * out and use the non-ratelimited ring for the + * transmit until there is a new chance to get the + * write lock. + */ + if (!INP_TRY_UPGRADE(inp)) + return; + } + + /* + * NOTE: When attaching to a network interface a reference is + * made to ensure the network interface doesn't go away until + * all ratelimit connections are gone. The network interface + * pointers compared below represent valid network interfaces, + * except when comparing towards NULL. + */ + if (!(ifp->if_capabilities & IFCAP_TXRTLMT)) { + if (inp->inp_snd_tag != NULL) + in_pcbdetach_txrtlmt(inp); + error = 0; + } else if (inp->inp_snd_tag == NULL) { + /* + * In order to utilize packet pacing with RSS, we need + * to wait until there is a valid RSS hash before we + * can proceed: + */ + if (inp->inp_flowtype == M_HASHTYPE_NONE) { + if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) { + if (!wlocked) + INP_DOWNGRADE(inp); + return; + } + /* typically UDP ends up here */ + inp->inp_flowid = mb->m_pkthdr.flowid; + inp->inp_flowtype = M_HASHTYPE_GET(mb); + } + error = in_pcbattach_txrtlmt(inp, ifp, max_pacing_rate); + } else { + error = in_pcbmodify_txrtlmt(inp, max_pacing_rate); + } + if (error == 0 || error == EOPNOTSUPP) + inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; + if (!wlocked) + INP_DOWNGRADE(inp); +} + +/* + * Track route changes for TX rate limiting. + */ +int +in_pcboutput_eagain(struct inpcb *inp, struct mbuf *mb) +{ + struct socket *socket; + bool wlocked; + + if (inp == NULL) + goto failure; + + socket = inp->inp_socket; + if (socket == NULL) + goto failure; + + if (inp->inp_snd_tag == NULL) + goto failure; + + wlocked = INP_WLOCKED(inp); + + if (!wlocked) { + /* + * NOTE: If the write locking fails, we need to bail + * out and use the non-ratelimited ring for the + * transmit until there is a new chance to get the + * write lock. + */ + if (!INP_TRY_UPGRADE(inp)) + goto failure; + } + + /* detach rate limiting */ + in_pcbdetach_txrtlmt(inp); + + /* make sure new mbuf send tag allocation is made */ + inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; + + if (!wlocked) + INP_DOWNGRADE(inp); + return (EAGAIN); + +failure: + m_freem(mb); + + /* prevent TCP stack from sending more data */ + return (ENOBUFS); +} +#endif /* RATELIMIT */ Index: sys/netinet/ip_output.c =================================================================== --- sys/netinet/ip_output.c +++ sys/netinet/ip_output.c @@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_ratelimit.h" #include "opt_ipsec.h" #include "opt_mbuf_stress_test.h" #include "opt_mpath.h" @@ -661,8 +662,23 @@ */ m_clrprotoflags(m); IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); +#ifdef RATELIMIT +retry_if_output_0: + if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) + in_pcboutput_txrtlmt(inp, ifp, m); + /* stamp send tag on mbuf */ + m->m_pkthdr.snd_tag[0] = inp->inp_snd_tag; +#endif error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); +#ifdef RATELIMIT + if (error == EAGAIN) { + /* route changed, mbuf was not freed */ + error = in_pcboutput_eagain(inp, m); + if (error == EAGAIN) + goto retry_if_output_0; + } +#endif goto done; } @@ -697,8 +713,23 @@ m_clrprotoflags(m); IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); +#ifdef RATELIMIT +retry_if_output_1: + if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) + in_pcboutput_txrtlmt(inp, ifp, m); + /* stamp send tag on mbuf */ + m->m_pkthdr.snd_tag[0] = inp->inp_snd_tag; +#endif error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); +#ifdef RATELIMIT + if (error == EAGAIN) { + /* route changed, mbuf was not freed */ + error = in_pcboutput_eagain(inp, m); + if (error == EAGAIN) + goto retry_if_output_1; + } +#endif } else m_freem(m); } @@ -973,6 +1004,16 @@ INP_WUNLOCK(inp); error = 0; break; + case SO_MAX_PACING_RATE: +#ifdef RATELIMIT + INP_WLOCK(inp); + inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; + INP_WUNLOCK(inp); + error = 0; +#else + error = EOPNOTSUPP; +#endif + break; default: break; } Index: sys/netinet6/ip6_output.c =================================================================== --- sys/netinet6/ip6_output.c +++ sys/netinet6/ip6_output.c @@ -65,6 +65,7 @@ #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_ratelimit.h" #include "opt_ipsec.h" #include "opt_sctp.h" #include "opt_route.h" @@ -954,8 +955,23 @@ m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } +#ifdef RATELIMIT +retry_if_output_0: + if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) + in_pcboutput_txrtlmt(inp, ifp, m); + /* stamp send tag on mbuf */ + m->m_pkthdr.snd_tag[0] = inp->inp_snd_tag; +#endif error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); +#ifdef RATELIMIT + if (error == EAGAIN) { + /* route changed, mbuf was not freed */ + error = in_pcboutput_eagain(inp, m); + if (error == EAGAIN) + goto retry_if_output_0; + } +#endif goto done; } @@ -1054,8 +1070,23 @@ counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } +#ifdef RATELIMIT +retry_if_output_1: + if (ifp->if_capabilities & IFCAP_TXRTLMT) + in_pcboutput_txrtlmt(inp, ifp, m); + /* stamp send tag on mbuf */ + m->m_pkthdr.snd_tag[0] = inp->inp_snd_tag; +#endif error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); +#ifdef RATELIMIT + if (error == EAGAIN) { + /* route changed, mbuf was not freed */ + error = in_pcboutput_eagain(inp, m); + if (error == EAGAIN) + goto retry_if_output_1; + } +#endif } else m_freem(m); } @@ -1441,6 +1472,16 @@ INP_WUNLOCK(in6p); error = 0; break; + case SO_MAX_PACING_RATE: +#ifdef RATELIMIT + INP_WLOCK(in6p); + in6p->inp_flags2 |= INP_RATE_LIMIT_CHANGED; + INP_WUNLOCK(in6p); + error = 0; +#else + error = EOPNOTSUPP; +#endif + break; default: break; } Index: sys/sys/mbuf.h =================================================================== --- sys/sys/mbuf.h +++ sys/sys/mbuf.h @@ -130,6 +130,14 @@ }; /* + * Static network interface owned tag. + * Allocated through ifp->if_snd_tag_alloc(). + */ +struct m_snd_tag { + struct ifnet *ifp; /* network interface tag belongs to */ +}; + +/* * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. * Size ILP32: 48 * LP64: 56 @@ -137,6 +145,7 @@ * they are correct. */ struct pkthdr { + struct m_snd_tag *snd_tag[0]; /* send tag, if any */ struct ifnet *rcvif; /* rcv interface */ SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ int32_t len; /* total packet length */ Index: sys/sys/socket.h =================================================================== --- sys/sys/socket.h +++ sys/sys/socket.h @@ -158,6 +158,7 @@ #define SO_USER_COOKIE 0x1015 /* user cookie (dummynet etc.) */ #define SO_PROTOCOL 0x1016 /* get socket protocol (Linux name) */ #define SO_PROTOTYPE SO_PROTOCOL /* alias for SO_PROTOCOL (SunOS name) */ +#define SO_MAX_PACING_RATE 0x1017 /* set max TX pacing rate per socket */ #endif /* Index: sys/sys/socketvar.h =================================================================== --- sys/sys/socketvar.h +++ sys/sys/socketvar.h @@ -127,8 +127,9 @@ int so_fibnum; /* routing domain for this socket */ uint32_t so_user_cookie; - void *so_pspare[2]; /* packet pacing / general use */ - int so_ispare[2]; /* packet pacing / general use */ + void *so_pspare[2]; /* general use */ + uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */ + int so_ispare[1]; /* general use */ }; /*