Index: head/sys/dev/netmap/if_ptnet.c =================================================================== --- head/sys/dev/netmap/if_ptnet.c +++ head/sys/dev/netmap/if_ptnet.c @@ -1335,150 +1335,6 @@ ptnet_rx_eof(pq, PTNET_RX_BUDGET, true); } -/* The following offloadings-related functions are taken from the vtnet - * driver, but the same functionality is required for the ptnet driver. - * As a temporary solution, I copied this code from vtnet and I started - * to generalize it (taking away driver-specific statistic accounting), - * making as little modifications as possible. - * In the future we need to share these functions between vtnet and ptnet. - */ -static int -ptnet_tx_offload_ctx(struct mbuf *m, int *etype, int *proto, int *start) -{ - struct ether_vlan_header *evh; - int offset; - - evh = mtod(m, struct ether_vlan_header *); - if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - /* BMV: We should handle nested VLAN tags too. */ - *etype = ntohs(evh->evl_proto); - offset = sizeof(struct ether_vlan_header); - } else { - *etype = ntohs(evh->evl_encap_proto); - offset = sizeof(struct ether_header); - } - - switch (*etype) { -#if defined(INET) - case ETHERTYPE_IP: { - struct ip *ip, iphdr; - if (__predict_false(m->m_len < offset + sizeof(struct ip))) { - m_copydata(m, offset, sizeof(struct ip), - (caddr_t) &iphdr); - ip = &iphdr; - } else - ip = (struct ip *)(m->m_data + offset); - *proto = ip->ip_p; - *start = offset + (ip->ip_hl << 2); - break; - } -#endif -#if defined(INET6) - case ETHERTYPE_IPV6: - *proto = -1; - *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); - /* Assert the network stack sent us a valid packet. */ - KASSERT(*start > offset, - ("%s: mbuf %p start %d offset %d proto %d", __func__, m, - *start, offset, *proto)); - break; -#endif - default: - /* Here we should increment the tx_csum_bad_ethtype counter. */ - return (EINVAL); - } - - return (0); -} - -static int -ptnet_tx_offload_tso(if_t ifp, struct mbuf *m, int eth_type, - int offset, bool allow_ecn, struct virtio_net_hdr *hdr) -{ - static struct timeval lastecn; - static int curecn; - struct tcphdr *tcp, tcphdr; - - if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { - m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); - tcp = &tcphdr; - } else - tcp = (struct tcphdr *)(m->m_data + offset); - - hdr->hdr_len = offset + (tcp->th_off << 2); - hdr->gso_size = m->m_pkthdr.tso_segsz; - hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : - VIRTIO_NET_HDR_GSO_TCPV6; - - if (tcp->th_flags & TH_CWR) { - /* - * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, - * ECN support is not on a per-interface basis, but globally via - * the net.inet.tcp.ecn.enable sysctl knob. The default is off. - */ - if (!allow_ecn) { - if (ppsratecheck(&lastecn, &curecn, 1)) - if_printf(ifp, - "TSO with ECN not negotiated with host\n"); - return (ENOTSUP); - } - hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; - } - - /* Here we should increment tx_tso counter. */ - - return (0); -} - -static struct mbuf * -ptnet_tx_offload(if_t ifp, struct mbuf *m, bool allow_ecn, - struct virtio_net_hdr *hdr) -{ - int flags, etype, csum_start, proto, error; - - flags = m->m_pkthdr.csum_flags; - - error = ptnet_tx_offload_ctx(m, &etype, &proto, &csum_start); - if (error) - goto drop; - - if ((etype == ETHERTYPE_IP && flags & PTNET_CSUM_OFFLOAD) || - (etype == ETHERTYPE_IPV6 && flags & PTNET_CSUM_OFFLOAD_IPV6)) { - /* - * We could compare the IP protocol vs the CSUM_ flag too, - * but that really should not be necessary. - */ - hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = csum_start; - hdr->csum_offset = m->m_pkthdr.csum_data; - /* Here we should increment the tx_csum counter. */ - } - - if (flags & CSUM_TSO) { - if (__predict_false(proto != IPPROTO_TCP)) { - /* Likely failed to correctly parse the mbuf. - * Here we should increment the tx_tso_not_tcp - * counter. */ - goto drop; - } - - KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, - ("%s: mbuf %p TSO without checksum offload %#x", - __func__, m, flags)); - - error = ptnet_tx_offload_tso(ifp, m, etype, csum_start, - allow_ecn, hdr); - if (error) - goto drop; - } - - return (m); - -drop: - m_freem(m); - return (NULL); -} - static void ptnet_vlan_tag_remove(struct mbuf *m) { @@ -1494,157 +1350,6 @@ m_adj(m, ETHER_VLAN_ENCAP_LEN); } -/* - * Use the checksum offset in the VirtIO header to set the - * correct CSUM_* flags. - */ -static int -ptnet_rx_csum_by_offset(struct mbuf *m, uint16_t eth_type, int ip_start, - struct virtio_net_hdr *hdr) -{ -#if defined(INET) || defined(INET6) - int offset = hdr->csum_start + hdr->csum_offset; -#endif - - /* Only do a basic sanity check on the offset. */ - switch (eth_type) { -#if defined(INET) - case ETHERTYPE_IP: - if (__predict_false(offset < ip_start + sizeof(struct ip))) - return (1); - break; -#endif -#if defined(INET6) - case ETHERTYPE_IPV6: - if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) - return (1); - break; -#endif - default: - /* Here we should increment the rx_csum_bad_ethtype counter. */ - return (1); - } - - /* - * Use the offset to determine the appropriate CSUM_* flags. This is - * a bit dirty, but we can get by with it since the checksum offsets - * happen to be different. We assume the host host does not do IPv4 - * header checksum offloading. - */ - switch (hdr->csum_offset) { - case offsetof(struct udphdr, uh_sum): - case offsetof(struct tcphdr, th_sum): - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xFFFF; - break; - default: - /* Here we should increment the rx_csum_bad_offset counter. */ - return (1); - } - - return (0); -} - -static int -ptnet_rx_csum_by_parse(struct mbuf *m, uint16_t eth_type, int ip_start, - struct virtio_net_hdr *hdr) -{ - int offset, proto; - - switch (eth_type) { -#if defined(INET) - case ETHERTYPE_IP: { - struct ip *ip; - if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) - return (1); - ip = (struct ip *)(m->m_data + ip_start); - proto = ip->ip_p; - offset = ip_start + (ip->ip_hl << 2); - break; - } -#endif -#if defined(INET6) - case ETHERTYPE_IPV6: - if (__predict_false(m->m_len < ip_start + - sizeof(struct ip6_hdr))) - return (1); - offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); - if (__predict_false(offset < 0)) - return (1); - break; -#endif - default: - /* Here we should increment the rx_csum_bad_ethtype counter. */ - return (1); - } - - switch (proto) { - case IPPROTO_TCP: - if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) - return (1); - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xFFFF; - break; - case IPPROTO_UDP: - if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) - return (1); - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xFFFF; - break; - default: - /* - * For the remaining protocols, FreeBSD does not support - * checksum offloading, so the checksum will be recomputed. - */ -#if 0 - if_printf(ifp, "cksum offload of unsupported " - "protocol eth_type=%#x proto=%d csum_start=%d " - "csum_offset=%d\n", __func__, eth_type, proto, - hdr->csum_start, hdr->csum_offset); -#endif - break; - } - - return (0); -} - -/* - * Set the appropriate CSUM_* flags. Unfortunately, the information - * provided is not directly useful to us. The VirtIO header gives the - * offset of the checksum, which is all Linux needs, but this is not - * how FreeBSD does things. We are forced to peek inside the packet - * a bit. - * - * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD - * could accept the offsets and let the stack figure it out. - */ -static int -ptnet_rx_csum(struct mbuf *m, struct virtio_net_hdr *hdr) -{ - struct ether_header *eh; - struct ether_vlan_header *evh; - uint16_t eth_type; - int offset, error; - - eh = mtod(m, struct ether_header *); - eth_type = ntohs(eh->ether_type); - if (eth_type == ETHERTYPE_VLAN) { - /* BMV: We should handle nested VLAN tags too. */ - evh = mtod(m, struct ether_vlan_header *); - eth_type = ntohs(evh->evl_proto); - offset = sizeof(struct ether_vlan_header); - } else - offset = sizeof(struct ether_header); - - if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) - error = ptnet_rx_csum_by_offset(m, eth_type, offset, hdr); - else - error = ptnet_rx_csum_by_parse(m, eth_type, offset, hdr); - - return (error); -} -/* End of offloading-related functions to be shared with vtnet. */ - static void ptnet_ring_update(struct ptnet_queue *pq, struct netmap_kring *kring, unsigned int head, unsigned int sync_flags) @@ -1776,7 +1481,7 @@ * two 8-bytes-wide writes. */ memset(nmbuf, 0, PTNET_HDR_SIZE); if (mhead->m_pkthdr.csum_flags & PTNET_ALL_OFFLOAD) { - mhead = ptnet_tx_offload(ifp, mhead, false, + mhead = virtio_net_tx_offload(ifp, mhead, false, vh); if (unlikely(!mhead)) { /* Packet dropped because errors @@ -2154,14 +1859,11 @@ } } - if (have_vnet_hdr && (vh->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM - | VIRTIO_NET_HDR_F_DATA_VALID))) { - if (unlikely(ptnet_rx_csum(mhead, vh))) { - m_freem(mhead); - nm_prlim(1, "Csum offload error: dropping"); - pq->stats.iqdrops ++; - deliver = 0; - } + if (unlikely(have_vnet_hdr && virtio_net_rx_csum(mhead, vh))) { + m_freem(mhead); + nm_prlim(1, "Csum offload error: dropping"); + pq->stats.iqdrops ++; + deliver = 0; } skip: Index: head/sys/dev/virtio/network/virtio_net.h =================================================================== --- head/sys/dev/virtio/network/virtio_net.h +++ head/sys/dev/virtio/network/virtio_net.h @@ -201,4 +201,297 @@ #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 +/* + * Use the checksum offset in the VirtIO header to set the + * correct CSUM_* flags. + */ +static inline int +virtio_net_rx_csum_by_offset(struct mbuf *m, uint16_t eth_type, int ip_start, + struct virtio_net_hdr *hdr) +{ +#if defined(INET) || defined(INET6) + int offset = hdr->csum_start + hdr->csum_offset; +#endif + + /* Only do a basic sanity check on the offset. */ + switch (eth_type) { +#if defined(INET) + case ETHERTYPE_IP: + if (__predict_false(offset < ip_start + sizeof(struct ip))) + return (1); + break; +#endif +#if defined(INET6) + case ETHERTYPE_IPV6: + if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) + return (1); + break; +#endif + default: + /* Here we should increment the rx_csum_bad_ethtype counter. */ + return (1); + } + + /* + * Use the offset to determine the appropriate CSUM_* flags. This is + * a bit dirty, but we can get by with it since the checksum offsets + * happen to be different. We assume the host host does not do IPv4 + * header checksum offloading. + */ + switch (hdr->csum_offset) { + case offsetof(struct udphdr, uh_sum): + case offsetof(struct tcphdr, th_sum): + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xFFFF; + break; + default: + /* Here we should increment the rx_csum_bad_offset counter. */ + return (1); + } + + return (0); +} + +static inline int +virtio_net_rx_csum_by_parse(struct mbuf *m, uint16_t eth_type, int ip_start, + struct virtio_net_hdr *hdr) +{ + int offset, proto; + + switch (eth_type) { +#if defined(INET) + case ETHERTYPE_IP: { + struct ip *ip; + if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) + return (1); + ip = (struct ip *)(m->m_data + ip_start); + proto = ip->ip_p; + offset = ip_start + (ip->ip_hl << 2); + break; + } +#endif +#if defined(INET6) + case ETHERTYPE_IPV6: + if (__predict_false(m->m_len < ip_start + + sizeof(struct ip6_hdr))) + return (1); + offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); + if (__predict_false(offset < 0)) + return (1); + break; +#endif + default: + /* Here we should increment the rx_csum_bad_ethtype counter. */ + return (1); + } + + switch (proto) { + case IPPROTO_TCP: + if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) + return (1); + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xFFFF; + break; + case IPPROTO_UDP: + if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) + return (1); + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xFFFF; + break; + default: + /* + * For the remaining protocols, FreeBSD does not support + * checksum offloading, so the checksum will be recomputed. + */ +#if 0 + if_printf(ifp, "cksum offload of unsupported " + "protocol eth_type=%#x proto=%d csum_start=%d " + "csum_offset=%d\n", __func__, eth_type, proto, + hdr->csum_start, hdr->csum_offset); +#endif + break; + } + + return (0); +} + +/* + * Set the appropriate CSUM_* flags. Unfortunately, the information + * provided is not directly useful to us. The VirtIO header gives the + * offset of the checksum, which is all Linux needs, but this is not + * how FreeBSD does things. We are forced to peek inside the packet + * a bit. + * + * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD + * could accept the offsets and let the stack figure it out. + */ +static inline int +virtio_net_rx_csum(struct mbuf *m, struct virtio_net_hdr *hdr) +{ + struct ether_header *eh; + struct ether_vlan_header *evh; + uint16_t eth_type; + int offset, error; + + if ((hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | + VIRTIO_NET_HDR_F_DATA_VALID)) == 0) { + return (0); + } + + eh = mtod(m, struct ether_header *); + eth_type = ntohs(eh->ether_type); + if (eth_type == ETHERTYPE_VLAN) { + /* BMV: We should handle nested VLAN tags too. */ + evh = mtod(m, struct ether_vlan_header *); + eth_type = ntohs(evh->evl_proto); + offset = sizeof(struct ether_vlan_header); + } else + offset = sizeof(struct ether_header); + + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) + error = virtio_net_rx_csum_by_offset(m, eth_type, offset, hdr); + else + error = virtio_net_rx_csum_by_parse(m, eth_type, offset, hdr); + + return (error); +} + +static inline int +virtio_net_tx_offload_ctx(struct mbuf *m, int *etype, int *proto, int *start) +{ + struct ether_vlan_header *evh; + int offset; + + evh = mtod(m, struct ether_vlan_header *); + if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + /* BMV: We should handle nested VLAN tags too. */ + *etype = ntohs(evh->evl_proto); + offset = sizeof(struct ether_vlan_header); + } else { + *etype = ntohs(evh->evl_encap_proto); + offset = sizeof(struct ether_header); + } + + switch (*etype) { +#if defined(INET) + case ETHERTYPE_IP: { + struct ip *ip, iphdr; + if (__predict_false(m->m_len < offset + sizeof(struct ip))) { + m_copydata(m, offset, sizeof(struct ip), + (caddr_t) &iphdr); + ip = &iphdr; + } else + ip = (struct ip *)(m->m_data + offset); + *proto = ip->ip_p; + *start = offset + (ip->ip_hl << 2); + break; + } +#endif +#if defined(INET6) + case ETHERTYPE_IPV6: + *proto = -1; + *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); + /* Assert the network stack sent us a valid packet. */ + KASSERT(*start > offset, + ("%s: mbuf %p start %d offset %d proto %d", __func__, m, + *start, offset, *proto)); + break; +#endif + default: + /* Here we should increment the tx_csum_bad_ethtype counter. */ + return (EINVAL); + } + + return (0); +} + +static inline int +virtio_net_tx_offload_tso(if_t ifp, struct mbuf *m, int eth_type, + int offset, bool allow_ecn, struct virtio_net_hdr *hdr) +{ + static struct timeval lastecn; + static int curecn; + struct tcphdr *tcp, tcphdr; + + if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { + m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); + tcp = &tcphdr; + } else + tcp = (struct tcphdr *)(m->m_data + offset); + + hdr->hdr_len = offset + (tcp->th_off << 2); + hdr->gso_size = m->m_pkthdr.tso_segsz; + hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : + VIRTIO_NET_HDR_GSO_TCPV6; + + if (tcp->th_flags & TH_CWR) { + /* + * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, + * ECN support is not on a per-interface basis, but globally via + * the net.inet.tcp.ecn.enable sysctl knob. The default is off. + */ + if (!allow_ecn) { + if (ppsratecheck(&lastecn, &curecn, 1)) + if_printf(ifp, + "TSO with ECN not negotiated with host\n"); + return (ENOTSUP); + } + hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; + } + + /* Here we should increment tx_tso counter. */ + + return (0); +} + +static inline struct mbuf * +virtio_net_tx_offload(if_t ifp, struct mbuf *m, bool allow_ecn, + struct virtio_net_hdr *hdr) +{ + int flags, etype, csum_start, proto, error; + + flags = m->m_pkthdr.csum_flags; + + error = virtio_net_tx_offload_ctx(m, &etype, &proto, &csum_start); + if (error) + goto drop; + + if ((etype == ETHERTYPE_IP && (flags & (CSUM_TCP | CSUM_UDP))) || + (etype == ETHERTYPE_IPV6 && + (flags & (CSUM_TCP_IPV6 | CSUM_UDP_IPV6)))) { + /* + * We could compare the IP protocol vs the CSUM_ flag too, + * but that really should not be necessary. + */ + hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; + hdr->csum_start = csum_start; + hdr->csum_offset = m->m_pkthdr.csum_data; + /* Here we should increment the tx_csum counter. */ + } + + if (flags & CSUM_TSO) { + if (__predict_false(proto != IPPROTO_TCP)) { + /* Likely failed to correctly parse the mbuf. + * Here we should increment the tx_tso_not_tcp + * counter. */ + goto drop; + } + + KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, + ("%s: mbuf %p TSO without checksum offload %#x", + __func__, m, flags)); + + error = virtio_net_tx_offload_tso(ifp, m, etype, csum_start, + allow_ecn, hdr); + if (error) + goto drop; + } + + return (m); + +drop: + m_freem(m); + return (NULL); +} + #endif /* _VIRTIO_NET_H */ Index: head/sys/net/if_tap.h =================================================================== --- head/sys/net/if_tap.h +++ head/sys/net/if_tap.h @@ -43,7 +43,7 @@ #include /* maximum receive packet size (hard limit) */ -#define TAPMRU 16384 +#define TAPMRU 65535 #define tapinfo tuninfo @@ -56,6 +56,8 @@ #define TAPSIFINFO TUNSIFINFO #define TAPGIFINFO TUNGIFINFO #define TAPGIFNAME TUNGIFNAME +#define TAPSVNETHDR _IOW('t', 91, int) +#define TAPGVNETHDR _IOR('t', 94, int) /* VMware ioctl's */ #define VMIO_SIOCSIFFLAGS _IOWINT('V', 0) Index: head/sys/net/if_tuntap.c =================================================================== --- head/sys/net/if_tuntap.c +++ head/sys/net/if_tuntap.c @@ -84,16 +84,24 @@ #include #include #include +#include #include #include #include #ifdef INET #include +#include +#include +#include +#include +#include #endif #include #include #include +#include + #include #include #include @@ -134,6 +142,7 @@ struct cv tun_cv; /* for ref'd dev destroy */ struct ether_addr tun_ether; /* remote address */ int tun_busy; /* busy count */ + int tun_vhdrlen; /* virtio-net header length */ }; #define TUN2IFP(sc) ((sc)->tun_ifp) @@ -146,6 +155,19 @@ #define TUN_VMIO_FLAG_MASK 0x0fff /* + * Interface capabilities of a tap device that supports the virtio-net + * header. + */ +#define TAP_VNET_HDR_CAPS (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 \ + | IFCAP_VLAN_HWCSUM \ + | IFCAP_TSO | IFCAP_LRO \ + | IFCAP_VLAN_HWTSO) + +#define TAP_ALL_OFFLOAD (CSUM_TSO | CSUM_TCP | CSUM_UDP |\ + CSUM_TCP_IPV6 | CSUM_UDP_IPV6) + + +/* * All mutable global variables in if_tun are locked using tunmtx, with * the exception of tundebug, which is used unlocked, and the drivers' *clones, * which are static after setup. @@ -211,6 +233,7 @@ static int vmnet_clone_match(struct if_clone *ifc, const char *name); static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t); static int tun_clone_destroy(struct if_clone *, struct ifnet *); +static void tun_vnethdr_set(struct ifnet *ifp, int vhdrlen); static d_open_t tunopen; static d_close_t tunclose; @@ -1140,6 +1163,7 @@ TUNDEBUG (ifp, "closed\n"); tp->tun_flags &= ~TUN_OPEN; tp->tun_pid = 0; + tun_vnethdr_set(ifp, 0); tun_unbusy_locked(tp); TUN_UNLOCK(tp); @@ -1202,6 +1226,65 @@ } /* + * To be called under TUN_LOCK. Update ifp->if_hwassist according to the + * current value of ifp->if_capenable. + */ +static void +tun_caps_changed(struct ifnet *ifp) +{ + uint64_t hwassist = 0; + + TUN_LOCK_ASSERT((struct tuntap_softc *)ifp->if_softc); + if (ifp->if_capenable & IFCAP_TXCSUM) + hwassist |= CSUM_TCP | CSUM_UDP; + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) + hwassist |= CSUM_TCP_IPV6 + | CSUM_UDP_IPV6; + if (ifp->if_capenable & IFCAP_TSO4) + hwassist |= CSUM_IP_TSO; + if (ifp->if_capenable & IFCAP_TSO6) + hwassist |= CSUM_IP6_TSO; + ifp->if_hwassist = hwassist; +} + +/* + * To be called under TUN_LOCK. Update tp->tun_vhdrlen and adjust + * if_capabilities and if_capenable as needed. + */ +static void +tun_vnethdr_set(struct ifnet *ifp, int vhdrlen) +{ + struct tuntap_softc *tp = ifp->if_softc; + + TUN_LOCK_ASSERT(tp); + + if (tp->tun_vhdrlen == vhdrlen) + return; + + /* + * Update if_capabilities to reflect the + * functionalities offered by the virtio-net + * header. + */ + if (vhdrlen != 0) + ifp->if_capabilities |= + TAP_VNET_HDR_CAPS; + else + ifp->if_capabilities &= + ~TAP_VNET_HDR_CAPS; + /* + * Disable any capabilities that we don't + * support anymore. + */ + ifp->if_capenable &= ifp->if_capabilities; + tun_caps_changed(ifp); + tp->tun_vhdrlen = vhdrlen; + + TUNDEBUG(ifp, "vnet_hdr_len=%d, if_capabilities=%x\n", + vhdrlen, ifp->if_capabilities); +} + +/* * Process an ioctl request. */ static int @@ -1268,6 +1351,13 @@ error = copyout(&media, ifmr->ifm_ulist, sizeof(int)); } break; + case SIOCSIFCAP: + TUN_LOCK(tp); + ifp->if_capenable = ifr->ifr_reqcap; + tun_caps_changed(ifp); + TUN_UNLOCK(tp); + VLAN_CAPABILITIES(ifp); + break; default: if (l2tun) { error = ether_ioctl(ifp, cmd, data); @@ -1378,12 +1468,9 @@ { struct ifreq ifr, *ifrp; struct tuntap_softc *tp = dev->si_drv1; + struct ifnet *ifp = TUN2IFP(tp); struct tuninfo *tunp; - int error, iflags; -#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ - defined(COMPAT_FREEBSD4) - int ival; -#endif + int error, iflags, ival; bool l2tun; l2tun = (tp->tun_flags & TUN_L2) != 0; @@ -1405,8 +1492,8 @@ iflags |= IFF_UP; TUN_LOCK(tp); - TUN2IFP(tp)->if_flags = iflags | - (TUN2IFP(tp)->if_flags & IFF_CANTCHANGE); + ifp->if_flags = iflags | + (ifp->if_flags & IFF_CANTCHANGE); TUN_UNLOCK(tp); return (0); @@ -1424,6 +1511,24 @@ TUN_UNLOCK(tp); return (0); + case TAPSVNETHDR: + ival = *(int *)data; + if (ival != 0 && + ival != sizeof(struct virtio_net_hdr) && + ival != sizeof(struct virtio_net_hdr_mrg_rxbuf)) { + return (EINVAL); + } + TUN_LOCK(tp); + tun_vnethdr_set(ifp, ival); + TUN_UNLOCK(tp); + + return (0); + case TAPGVNETHDR: + TUN_LOCK(tp); + *(int *)data = tp->tun_vhdrlen; + TUN_UNLOCK(tp); + + return (0); } /* Fall through to the common ioctls if unhandled */ @@ -1578,7 +1683,8 @@ struct tuntap_softc *tp = dev->si_drv1; struct ifnet *ifp = TUN2IFP(tp); struct mbuf *m; - int error=0, len; + size_t len; + int error = 0; TUNDEBUG (ifp, "read\n"); TUN_LOCK(tp); @@ -1611,6 +1717,23 @@ if ((tp->tun_flags & TUN_L2) != 0) BPF_MTAP(ifp, m); + len = min(tp->tun_vhdrlen, uio->uio_resid); + if (len > 0) { + struct virtio_net_hdr_mrg_rxbuf vhdr; + + bzero(&vhdr, sizeof(vhdr)); + if (m->m_pkthdr.csum_flags & TAP_ALL_OFFLOAD) { + m = virtio_net_tx_offload(ifp, m, false, &vhdr.hdr); + } + + TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, " + "gs %u, cs %u, co %u\n", vhdr.hdr.flags, + vhdr.hdr.gso_type, vhdr.hdr.hdr_len, + vhdr.hdr.gso_size, vhdr.hdr.csum_start, + vhdr.hdr.csum_offset); + error = uiomove(&vhdr, len, uio); + } + while (m && uio->uio_resid > 0 && error == 0) { len = min(uio->uio_resid, m->m_len); if (len != 0) @@ -1626,7 +1749,8 @@ } static int -tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m) +tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m, + struct virtio_net_hdr_mrg_rxbuf *vhdr) { struct ether_header *eh; struct ifnet *ifp; @@ -1651,6 +1775,11 @@ return (0); } + if (vhdr != NULL && virtio_net_rx_csum(m, &vhdr->hdr)) { + m_freem(m); + return (0); + } + /* Pass packet up to parent. */ CURVNET_SET(ifp->if_vnet); (*ifp->if_input)(ifp, m); @@ -1717,11 +1846,12 @@ static int tunwrite(struct cdev *dev, struct uio *uio, int flag) { + struct virtio_net_hdr_mrg_rxbuf vhdr; struct tuntap_softc *tp; struct ifnet *ifp; struct mbuf *m; uint32_t mru; - int align; + int align, vhdrlen, error; bool l2tun; tp = dev->si_drv1; @@ -1735,17 +1865,30 @@ return (0); l2tun = (tp->tun_flags & TUN_L2) != 0; - align = 0; mru = l2tun ? TAPMRU : TUNMRU; - if (l2tun) + vhdrlen = tp->tun_vhdrlen; + align = 0; + if (l2tun) { align = ETHER_ALIGN; - else if ((tp->tun_flags & TUN_IFHEAD) != 0) + mru += vhdrlen; + } else if ((tp->tun_flags & TUN_IFHEAD) != 0) mru += sizeof(uint32_t); /* family */ if (uio->uio_resid < 0 || uio->uio_resid > mru) { TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid); return (EIO); } + if (vhdrlen > 0) { + error = uiomove(&vhdr, vhdrlen, uio); + if (error != 0) + return (error); + TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, " + "gs %u, cs %u, co %u\n", vhdr.hdr.flags, + vhdr.hdr.gso_type, vhdr.hdr.hdr_len, + vhdr.hdr.gso_size, vhdr.hdr.csum_start, + vhdr.hdr.csum_offset); + } + if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return (ENOBUFS); @@ -1757,7 +1900,7 @@ #endif if (l2tun) - return (tunwrite_l2(tp, m)); + return (tunwrite_l2(tp, m, vhdrlen > 0 ? &vhdr : NULL)); return (tunwrite_l3(tp, m)); }