diff --git a/share/man/man9/ifnet.9 b/share/man/man9/ifnet.9 --- a/share/man/man9/ifnet.9 +++ b/share/man/man9/ifnet.9 @@ -1335,6 +1335,74 @@ .Va if_data.ifi_hwassist appropriately. .Pp +.It Dv SIOCGIFCAPNV +.Xr NV 9 +version of the +.Dv SIOCGIFCAP +ioctl. +Caller must provide the pointer to +.Vt struct ifreq_cap_nv +as +.Fa data , +where the member +.Dv buffer +points to some buffer containing +.Dv length +bytes. +The serialized nvlist with description of the device capabilities +is written to the buffer. +If buffer is too short, then the structure is updated with +.Dv buffer +member set to +.Dv NULL, +.Dv length updated to the minimal required length, +and error +.Er EFBIG +is returned. +.Pp +Elements of the returned nvlist for simple capabilities are boolean, +identified by names. +Presence of the boolean element means that corresponding capability is +supported by the interface. +Element's value describes the current configured state: +.Dv true +means that the capability is enabled, and +.Dv false +that it is disabled. +.Pp +Driver indicates support for both +.Dv SIOCGIFCAPNV +and +.Dv SIOCSIFCAPNV +requests by setting +.Dv IFCAP_NV +non-modifiable capability bit in +.Dv if_capabilities . +.Pp +.It Dv SIOCSIFCAPNV +.Xr NV 9 +version of the +.Dv SIOCSIFCAP +ioctl. +Caller must provide the pointer to +.Vt struct ifreq_cap_nv +as +.Fa data , +where the member +.Dv buffer +points to unpacked nvlist of +.Dv length +bytes. +Each element of nvlist describes a requested update of one capability, +identified by the element name. +For simple capabilities, the element must be boolean. +Its +.Dv true +value means that the caller asks to enable the capability, and +.Dv false +value to disable. +Only capabilities listed in the nvlist are affected by the call. +.Pp .It Dv SIOCSIFFIB Sets interface FIB. Caller must have appropriate privilege. diff --git a/sys/net/if.h b/sys/net/if.h --- a/sys/net/if.h +++ b/sys/net/if.h @@ -236,7 +236,7 @@ #define IFCAP_TOE4 0x04000 /* interface can offload TCP */ #define IFCAP_TOE6 0x08000 /* interface can offload TCP6 */ #define IFCAP_VLAN_HWFILTER 0x10000 /* interface hw can filter vlan tag */ -/* available 0x20000 */ +#define IFCAP_NV 0x20000 /* can do SIOCGIFCAPNV/SIOCSIFCAPNV */ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ @@ -260,7 +260,40 @@ #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) #define IFCAP_TXTLS (IFCAP_TXTLS4 | IFCAP_TXTLS6) -#define IFCAP_CANTCHANGE (IFCAP_NETMAP) +#define IFCAP_CANTCHANGE (IFCAP_NETMAP | IFCAP_NV) +#define IFCAP_ALLCAPS 0xffffffff + +#define IFCAP_RXCSUM_NAME "RXCSUM" +#define IFCAP_TXCSUM_NAME "TXCSUM" +#define IFCAP_NETCONS_NAME "NETCONS" +#define IFCAP_VLAN_MTU_NAME "VLAN_MTU" +#define IFCAP_VLAN_HWTAGGING_NAME "VLAN_HWTAGGING" +#define IFCAP_JUMBO_MTU_NAME "JUMBO_MTU" +#define IFCAP_POLLING_NAME "POLLING" +#define IFCAP_VLAN_HWCSUM_NAME "VLAN_HWCSUM" +#define IFCAP_TSO4_NAME "TSO4" +#define IFCAP_TSO6_NAME "TSO6" +#define IFCAP_LRO_NAME "LRO" +#define IFCAP_WOL_UCAST_NAME "WOL_UCAST" +#define IFCAP_WOL_MCAST_NAME "WOL_MCAST" +#define IFCAP_WOL_MAGIC_NAME "WOL_MAGIC" +#define IFCAP_TOE4_NAME "TOE4" +#define IFCAP_TOE6_NAME "TOE6" +#define IFCAP_VLAN_HWFILTER_NAME "VLAN_HWFILTER" +#define IFCAP_VLAN_HWTSO_NAME "VLAN_HWTSO" +#define IFCAP_LINKSTATE_NAME "LINKSTATE" +#define IFCAP_NETMAP_NAME "NETMAP" +#define IFCAP_RXCSUM_IPV6_NAME "RXCSUM_IPV6" +#define IFCAP_TXCSUM_IPV6_NAME "TXCSUM_IPV6" +#define IFCAP_HWSTATS_NAME "HWSTATS" +#define IFCAP_TXRTLMT_NAME "TXRTLMT" +#define IFCAP_HWRXTSTMP_NAME "HWRXTSTMP" +#define IFCAP_MEXTPG_NAME "MEXTPG" +#define IFCAP_TXTLS4_NAME "TXTLS4" +#define IFCAP_TXTLS6_NAME "TXTLS6" +#define IFCAP_VXLAN_HWCSUM_NAME "VXLAN_HWCSUM" +#define IFCAP_VXLAN_HWTSO_NAME "VXLAN_HWTSO" +#define IFCAP_TXTLS_RTLMT_NAME "TXTLS_RTLMT" #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ @@ -387,6 +420,13 @@ void *buffer; }; +struct ifreq_cap_nv { + size_t length; + void *buffer; +}; + +#define IFR_CAP_NV_MAXBUFSIZE (2 * 1024 * 1024) + /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter @@ -411,6 +451,7 @@ int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; + struct ifreq_cap_nv ifru_cap_nv; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ @@ -434,6 +475,7 @@ #define ifr_fib ifr_ifru.ifru_fib /* interface fib */ #define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ #define ifr_lan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ +#define ifr_cap_nv ifr_ifru.ifru_cap_nv /* nv-based cap interface */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ @@ -605,6 +647,10 @@ extern struct sx ifnet_detach_sxlock; +struct nvlist; +int if_capnv_to_capint(const struct nvlist *nv, bool all); +void if_capint_to_capnv(struct nvlist *nv, int ifr_cap, int ifr_req); + #endif #ifndef _KERNEL diff --git a/sys/net/if.c b/sys/net/if.c --- a/sys/net/if.c +++ b/sys/net/if.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -2461,6 +2462,78 @@ return (ifrup->ifr.ifr_ifru.ifru_data); } +struct ifcap_nv_bit_name { + int cap_bit; + const char *cap_name; +}; +#define CAPNV(x) {.cap_bit = IFCAP_##x, \ + .cap_name = __CONCAT(IFCAP_, __CONCAT(x, _NAME)) } +const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = { + CAPNV(RXCSUM), + CAPNV(TXCSUM), + CAPNV(NETCONS), + CAPNV(VLAN_MTU), + CAPNV(VLAN_HWTAGGING), + CAPNV(JUMBO_MTU), + CAPNV(POLLING), + CAPNV(VLAN_HWCSUM), + CAPNV(TSO4), + CAPNV(TSO6), + CAPNV(LRO), + CAPNV(WOL_UCAST), + CAPNV(WOL_MCAST), + CAPNV(WOL_MAGIC), + CAPNV(TOE4), + CAPNV(TOE6), + CAPNV(VLAN_HWFILTER), + CAPNV(VLAN_HWTSO), + CAPNV(LINKSTATE), + CAPNV(NETMAP), + CAPNV(RXCSUM_IPV6), + CAPNV(TXCSUM_IPV6), + CAPNV(HWSTATS), + CAPNV(TXRTLMT), + CAPNV(HWRXTSTMP), + CAPNV(MEXTPG), + CAPNV(TXTLS4), + CAPNV(TXTLS6), + CAPNV(VXLAN_HWCSUM), + CAPNV(VXLAN_HWTSO), + CAPNV(TXTLS_RTLMT), +}; +#undef CAPNV + +int +if_capnv_to_capint(const nvlist_t *nv, bool all) +{ + const struct ifcap_nv_bit_name *nn; + int i, res; + + res = 0; + for (i = 0; i < nitems(ifcap_nv_bit_names); i++) { + nn = &ifcap_nv_bit_names[i]; + if (nvlist_exists_bool(nv, nn->cap_name) && (all || + nvlist_get_bool(nv, nn->cap_name))) + res |= nn->cap_bit; + } + return (res); +} + +void +if_capint_to_capnv(nvlist_t *nv, int ifr_cap, int ifr_req) +{ + const struct ifcap_nv_bit_name *nn; + int i; + + for (i = 0; i < nitems(ifcap_nv_bit_names); i++) { + nn = &ifcap_nv_bit_names[i]; + if ((nn->cap_bit & ifr_cap) != 0) { + nvlist_add_bool(nv, nn->cap_name, + (nn->cap_bit & ifr_req) != 0); + } + } +} + /* * Hardware specific interface ioctls. */ @@ -2471,12 +2544,14 @@ int error = 0, do_ifup = 0; int new_flags, temp_flags; size_t namelen, onamelen; - size_t descrlen; + size_t descrlen, nvbuflen; char *descrbuf, *odescrbuf; char new_name[IFNAMSIZ]; char old_name[IFNAMSIZ], strbuf[IFNAMSIZ + 8]; struct ifaddr *ifa; struct sockaddr_dl *sdl; + void *buf; + nvlist_t *nvcap; ifr = (struct ifreq *)data; switch (cmd) { @@ -2491,8 +2566,49 @@ break; case SIOCGIFCAP: - ifr->ifr_reqcap = ifp->if_capabilities; - ifr->ifr_curcap = ifp->if_capenable; + if ((ifp->if_capabilities & IFCAP_NV) == 0) { + ifr->ifr_reqcap = ifp->if_capabilities; + ifr->ifr_curcap = ifp->if_capenable; + break; + } + nvcap = nvlist_create(0); + error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV, + __DECONST(caddr_t, nvcap)); + if (error == 0) { + ifr->ifr_reqcap = if_capnv_to_capint(nvcap, true); + ifr->ifr_curcap = if_capnv_to_capint(nvcap, false); + } + nvlist_destroy(nvcap); + break; + + case SIOCGIFCAPNV: + if ((ifp->if_capabilities & IFCAP_NV) != 0) { + error = EINVAL; + break; + } + buf = NULL; + nvcap = nvlist_create(0); + for (;;) { + error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV, + __DECONST(caddr_t, nvcap)); + if (error != 0) + break; + buf = nvlist_pack(nvcap, &nvbuflen); + if (buf == NULL) { + error = EDOOFUS; + break; + } + if (nvbuflen > ifr->ifr_cap_nv.length) { + ifr->ifr_cap_nv.length = nvbuflen; + ifr->ifr_cap_nv.buffer = NULL; + error = EFBIG; + break; + } + error = copyout(buf, ifr->ifr_cap_nv.buffer, nvbuflen); + break; + } + free(buf, M_NVLIST); + nvlist_destroy(nvcap); break; case SIOCGIFDATA: @@ -2633,13 +2749,61 @@ case SIOCSIFCAP: error = priv_check(td, PRIV_NET_SETIFCAP); - if (error) + if (error != 0) + return (error); + if (ifp->if_ioctl == NULL) + return (EOPNOTSUPP); + if ((ifp->if_capabilities & IFCAP_NV) == 0) { + if (ifr->ifr_reqcap & ~ifp->if_capabilities) { + error = EINVAL; + break; + } + error = (*ifp->if_ioctl)(ifp, cmd, data); + break; + } else { + nvcap = nvlist_create(0); + for (;;) { + if_capint_to_capnv(nvcap, IFCAP_ALLCAPS, + ifr->ifr_reqcap); + error = (*ifp->if_ioctl)(ifp, SIOCSIFCAPNV, + __DECONST(caddr_t, nvcap)); + break; + } + nvlist_destroy(nvcap); + } + if (error == 0) + getmicrotime(&ifp->if_lastchange); + break; + + case SIOCSIFCAPNV: + error = priv_check(td, PRIV_NET_SETIFCAP); + if (error != 0) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); - if (ifr->ifr_reqcap & ~ifp->if_capabilities) + if ((ifp->if_capabilities & IFCAP_NV) == 0) return (EINVAL); - error = (*ifp->if_ioctl)(ifp, cmd, data); + if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) + return (EINVAL); + nvcap = NULL; + buf = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); + for (;;) { + error = copyin(ifr->ifr_cap_nv.buffer, buf, + ifr->ifr_cap_nv.length); + if (error != 0) + break; + nvcap = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0); + if (nvcap == NULL) { + error = EINVAL; + break; + } + error = (*ifp->if_ioctl)(ifp, cmd, + __DECONST(caddr_t, nvcap)); + break; + } + if (nvcap != NULL) + nvlist_destroy(nvcap); + free(buf, M_TEMP); if (error == 0) getmicrotime(&ifp->if_lastchange); break; diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -147,4 +147,7 @@ #define SIOCGIFDOWNREASON _IOWR('i', 154, struct ifdownreason) +#define SIOCSIFCAPNV _IOW('i', 155, struct ifreq) /* set IF features */ +#define SIOCGIFCAPNV _IOWR('i', 156, struct ifreq) /* get IF features */ + #endif /* !_SYS_SOCKIO_H_ */