Changeset View
Changeset View
Standalone View
Standalone View
sys/dev/netmap/if_ptnet.c
Show First 20 Lines • Show All 991 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
/* Like vtnet, if_igb and if_em drivers when using MSI-X interrupts, | /* Like vtnet, if_igb and if_em drivers when using MSI-X interrupts, | ||||
* receive-side processing is executed directly in the interrupt | * receive-side processing is executed directly in the interrupt | ||||
* service routine. Alternatively, we may schedule the taskqueue. */ | * service routine. Alternatively, we may schedule the taskqueue. */ | ||||
ptnet_rx_eof(pq, PTNET_RX_BUDGET, true); | ptnet_rx_eof(pq, PTNET_RX_BUDGET, true); | ||||
} | } | ||||
/* The following offloadings-related functions are taken from the vtnet | |||||
* driver, but the same functionality is required for the ptnet driver. | |||||
* As a temporary solution, I copied this code from vtnet and I started | |||||
* to generalize it (taking away driver-specific statistic accounting), | |||||
* making as little modifications as possible. | |||||
* In the future we need to share these functions between vtnet and ptnet. | |||||
*/ | |||||
static int | |||||
ptnet_tx_offload_ctx(struct mbuf *m, int *etype, int *proto, int *start) | |||||
{ | |||||
struct ether_vlan_header *evh; | |||||
int offset; | |||||
evh = mtod(m, struct ether_vlan_header *); | |||||
if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { | |||||
/* BMV: We should handle nested VLAN tags too. */ | |||||
*etype = ntohs(evh->evl_proto); | |||||
offset = sizeof(struct ether_vlan_header); | |||||
} else { | |||||
*etype = ntohs(evh->evl_encap_proto); | |||||
offset = sizeof(struct ether_header); | |||||
} | |||||
switch (*etype) { | |||||
#if defined(INET) | |||||
case ETHERTYPE_IP: { | |||||
struct ip *ip, iphdr; | |||||
if (__predict_false(m->m_len < offset + sizeof(struct ip))) { | |||||
m_copydata(m, offset, sizeof(struct ip), | |||||
(caddr_t) &iphdr); | |||||
ip = &iphdr; | |||||
} else | |||||
ip = (struct ip *)(m->m_data + offset); | |||||
*proto = ip->ip_p; | |||||
*start = offset + (ip->ip_hl << 2); | |||||
break; | |||||
} | |||||
#endif | |||||
#if defined(INET6) | |||||
case ETHERTYPE_IPV6: | |||||
*proto = -1; | |||||
*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); | |||||
/* Assert the network stack sent us a valid packet. */ | |||||
KASSERT(*start > offset, | |||||
("%s: mbuf %p start %d offset %d proto %d", __func__, m, | |||||
*start, offset, *proto)); | |||||
break; | |||||
#endif | |||||
default: | |||||
/* Here we should increment the tx_csum_bad_ethtype counter. */ | |||||
return (EINVAL); | |||||
} | |||||
return (0); | |||||
} | |||||
static int | |||||
ptnet_tx_offload_tso(if_t ifp, struct mbuf *m, int eth_type, | |||||
int offset, bool allow_ecn, struct virtio_net_hdr *hdr) | |||||
{ | |||||
static struct timeval lastecn; | |||||
static int curecn; | |||||
struct tcphdr *tcp, tcphdr; | |||||
if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { | |||||
m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); | |||||
tcp = &tcphdr; | |||||
} else | |||||
tcp = (struct tcphdr *)(m->m_data + offset); | |||||
hdr->hdr_len = offset + (tcp->th_off << 2); | |||||
hdr->gso_size = m->m_pkthdr.tso_segsz; | |||||
hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : | |||||
VIRTIO_NET_HDR_GSO_TCPV6; | |||||
if (tcp->th_flags & TH_CWR) { | |||||
/* | |||||
* Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, | |||||
* ECN support is not on a per-interface basis, but globally via | |||||
* the net.inet.tcp.ecn.enable sysctl knob. The default is off. | |||||
*/ | |||||
if (!allow_ecn) { | |||||
if (ppsratecheck(&lastecn, &curecn, 1)) | |||||
if_printf(ifp, | |||||
"TSO with ECN not negotiated with host\n"); | |||||
return (ENOTSUP); | |||||
} | |||||
hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; | |||||
} | |||||
/* Here we should increment tx_tso counter. */ | |||||
return (0); | |||||
} | |||||
static struct mbuf * | |||||
ptnet_tx_offload(if_t ifp, struct mbuf *m, bool allow_ecn, | |||||
struct virtio_net_hdr *hdr) | |||||
{ | |||||
int flags, etype, csum_start, proto, error; | |||||
flags = m->m_pkthdr.csum_flags; | |||||
error = ptnet_tx_offload_ctx(m, &etype, &proto, &csum_start); | |||||
if (error) | |||||
goto drop; | |||||
if ((etype == ETHERTYPE_IP && flags & PTNET_CSUM_OFFLOAD) || | |||||
(etype == ETHERTYPE_IPV6 && flags & PTNET_CSUM_OFFLOAD_IPV6)) { | |||||
/* | |||||
* We could compare the IP protocol vs the CSUM_ flag too, | |||||
* but that really should not be necessary. | |||||
*/ | |||||
hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; | |||||
hdr->csum_start = csum_start; | |||||
hdr->csum_offset = m->m_pkthdr.csum_data; | |||||
/* Here we should increment the tx_csum counter. */ | |||||
} | |||||
if (flags & CSUM_TSO) { | |||||
if (__predict_false(proto != IPPROTO_TCP)) { | |||||
/* Likely failed to correctly parse the mbuf. | |||||
* Here we should increment the tx_tso_not_tcp | |||||
* counter. */ | |||||
goto drop; | |||||
} | |||||
KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, | |||||
("%s: mbuf %p TSO without checksum offload %#x", | |||||
__func__, m, flags)); | |||||
error = ptnet_tx_offload_tso(ifp, m, etype, csum_start, | |||||
allow_ecn, hdr); | |||||
if (error) | |||||
goto drop; | |||||
} | |||||
return (m); | |||||
drop: | |||||
m_freem(m); | |||||
return (NULL); | |||||
} | |||||
static void | static void | ||||
ptnet_vlan_tag_remove(struct mbuf *m) | ptnet_vlan_tag_remove(struct mbuf *m) | ||||
{ | { | ||||
struct ether_vlan_header *evh; | struct ether_vlan_header *evh; | ||||
evh = mtod(m, struct ether_vlan_header *); | evh = mtod(m, struct ether_vlan_header *); | ||||
m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); | m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); | ||||
m->m_flags |= M_VLANTAG; | m->m_flags |= M_VLANTAG; | ||||
/* Strip the 802.1Q header. */ | /* Strip the 802.1Q header. */ | ||||
bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, | bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, | ||||
ETHER_HDR_LEN - ETHER_TYPE_LEN); | ETHER_HDR_LEN - ETHER_TYPE_LEN); | ||||
m_adj(m, ETHER_VLAN_ENCAP_LEN); | m_adj(m, ETHER_VLAN_ENCAP_LEN); | ||||
} | } | ||||
/* | |||||
* Use the checksum offset in the VirtIO header to set the | |||||
* correct CSUM_* flags. | |||||
*/ | |||||
static int | |||||
ptnet_rx_csum_by_offset(struct mbuf *m, uint16_t eth_type, int ip_start, | |||||
struct virtio_net_hdr *hdr) | |||||
{ | |||||
#if defined(INET) || defined(INET6) | |||||
int offset = hdr->csum_start + hdr->csum_offset; | |||||
#endif | |||||
/* Only do a basic sanity check on the offset. */ | |||||
switch (eth_type) { | |||||
#if defined(INET) | |||||
case ETHERTYPE_IP: | |||||
if (__predict_false(offset < ip_start + sizeof(struct ip))) | |||||
return (1); | |||||
break; | |||||
#endif | |||||
#if defined(INET6) | |||||
case ETHERTYPE_IPV6: | |||||
if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) | |||||
return (1); | |||||
break; | |||||
#endif | |||||
default: | |||||
/* Here we should increment the rx_csum_bad_ethtype counter. */ | |||||
return (1); | |||||
} | |||||
/* | |||||
* Use the offset to determine the appropriate CSUM_* flags. This is | |||||
* a bit dirty, but we can get by with it since the checksum offsets | |||||
* happen to be different. We assume the host host does not do IPv4 | |||||
* header checksum offloading. | |||||
*/ | |||||
switch (hdr->csum_offset) { | |||||
case offsetof(struct udphdr, uh_sum): | |||||
case offsetof(struct tcphdr, th_sum): | |||||
m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; | |||||
m->m_pkthdr.csum_data = 0xFFFF; | |||||
break; | |||||
default: | |||||
/* Here we should increment the rx_csum_bad_offset counter. */ | |||||
return (1); | |||||
} | |||||
return (0); | |||||
} | |||||
static int | |||||
ptnet_rx_csum_by_parse(struct mbuf *m, uint16_t eth_type, int ip_start, | |||||
struct virtio_net_hdr *hdr) | |||||
{ | |||||
int offset, proto; | |||||
switch (eth_type) { | |||||
#if defined(INET) | |||||
case ETHERTYPE_IP: { | |||||
struct ip *ip; | |||||
if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) | |||||
return (1); | |||||
ip = (struct ip *)(m->m_data + ip_start); | |||||
proto = ip->ip_p; | |||||
offset = ip_start + (ip->ip_hl << 2); | |||||
break; | |||||
} | |||||
#endif | |||||
#if defined(INET6) | |||||
case ETHERTYPE_IPV6: | |||||
if (__predict_false(m->m_len < ip_start + | |||||
sizeof(struct ip6_hdr))) | |||||
return (1); | |||||
offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); | |||||
if (__predict_false(offset < 0)) | |||||
return (1); | |||||
break; | |||||
#endif | |||||
default: | |||||
/* Here we should increment the rx_csum_bad_ethtype counter. */ | |||||
return (1); | |||||
} | |||||
switch (proto) { | |||||
case IPPROTO_TCP: | |||||
if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) | |||||
return (1); | |||||
m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; | |||||
m->m_pkthdr.csum_data = 0xFFFF; | |||||
break; | |||||
case IPPROTO_UDP: | |||||
if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) | |||||
return (1); | |||||
m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; | |||||
m->m_pkthdr.csum_data = 0xFFFF; | |||||
break; | |||||
default: | |||||
/* | |||||
* For the remaining protocols, FreeBSD does not support | |||||
* checksum offloading, so the checksum will be recomputed. | |||||
*/ | |||||
#if 0 | |||||
if_printf(ifp, "cksum offload of unsupported " | |||||
"protocol eth_type=%#x proto=%d csum_start=%d " | |||||
"csum_offset=%d\n", __func__, eth_type, proto, | |||||
hdr->csum_start, hdr->csum_offset); | |||||
#endif | |||||
break; | |||||
} | |||||
return (0); | |||||
} | |||||
/* | |||||
* Set the appropriate CSUM_* flags. Unfortunately, the information | |||||
* provided is not directly useful to us. The VirtIO header gives the | |||||
* offset of the checksum, which is all Linux needs, but this is not | |||||
* how FreeBSD does things. We are forced to peek inside the packet | |||||
* a bit. | |||||
* | |||||
* It would be nice if VirtIO gave us the L4 protocol or if FreeBSD | |||||
* could accept the offsets and let the stack figure it out. | |||||
*/ | |||||
static int | |||||
ptnet_rx_csum(struct mbuf *m, struct virtio_net_hdr *hdr) | |||||
{ | |||||
struct ether_header *eh; | |||||
struct ether_vlan_header *evh; | |||||
uint16_t eth_type; | |||||
int offset, error; | |||||
eh = mtod(m, struct ether_header *); | |||||
eth_type = ntohs(eh->ether_type); | |||||
if (eth_type == ETHERTYPE_VLAN) { | |||||
/* BMV: We should handle nested VLAN tags too. */ | |||||
evh = mtod(m, struct ether_vlan_header *); | |||||
eth_type = ntohs(evh->evl_proto); | |||||
offset = sizeof(struct ether_vlan_header); | |||||
} else | |||||
offset = sizeof(struct ether_header); | |||||
if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) | |||||
error = ptnet_rx_csum_by_offset(m, eth_type, offset, hdr); | |||||
else | |||||
error = ptnet_rx_csum_by_parse(m, eth_type, offset, hdr); | |||||
return (error); | |||||
} | |||||
/* End of offloading-related functions to be shared with vtnet. */ | |||||
static void | static void | ||||
ptnet_ring_update(struct ptnet_queue *pq, struct netmap_kring *kring, | ptnet_ring_update(struct ptnet_queue *pq, struct netmap_kring *kring, | ||||
unsigned int head, unsigned int sync_flags) | unsigned int head, unsigned int sync_flags) | ||||
{ | { | ||||
struct netmap_ring *ring = kring->ring; | struct netmap_ring *ring = kring->ring; | ||||
struct nm_csb_atok *atok = pq->atok; | struct nm_csb_atok *atok = pq->atok; | ||||
struct nm_csb_ktoa *ktoa = pq->ktoa; | struct nm_csb_ktoa *ktoa = pq->ktoa; | ||||
▲ Show 20 Lines • Show All 115 Lines • ▼ Show 20 Lines | |||||
if (have_vnet_hdr) { | if (have_vnet_hdr) { | ||||
struct virtio_net_hdr *vh = | struct virtio_net_hdr *vh = | ||||
(struct virtio_net_hdr *)nmbuf; | (struct virtio_net_hdr *)nmbuf; | ||||
/* For performance, we could replace this memset() with | /* For performance, we could replace this memset() with | ||||
* two 8-bytes-wide writes. */ | * two 8-bytes-wide writes. */ | ||||
memset(nmbuf, 0, PTNET_HDR_SIZE); | memset(nmbuf, 0, PTNET_HDR_SIZE); | ||||
if (mhead->m_pkthdr.csum_flags & PTNET_ALL_OFFLOAD) { | if (mhead->m_pkthdr.csum_flags & PTNET_ALL_OFFLOAD) { | ||||
mhead = ptnet_tx_offload(ifp, mhead, false, | mhead = virtio_net_tx_offload(ifp, mhead, false, | ||||
vh); | vh); | ||||
if (unlikely(!mhead)) { | if (unlikely(!mhead)) { | ||||
/* Packet dropped because errors | /* Packet dropped because errors | ||||
* occurred while preparing the vnet | * occurred while preparing the vnet | ||||
* header. Let's go ahead with the next | * header. Let's go ahead with the next | ||||
* packet. */ | * packet. */ | ||||
pq->stats.errors ++; | pq->stats.errors ++; | ||||
drbr_advance(ifp, pq->bufring); | drbr_advance(ifp, pq->bufring); | ||||
▲ Show 20 Lines • Show All 361 Lines • ▼ Show 20 Lines | |||||
* With the 802.1Q header removed, update the | * With the 802.1Q header removed, update the | ||||
* checksum starting location accordingly. | * checksum starting location accordingly. | ||||
*/ | */ | ||||
if (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) | if (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) | ||||
vh->csum_start -= ETHER_VLAN_ENCAP_LEN; | vh->csum_start -= ETHER_VLAN_ENCAP_LEN; | ||||
} | } | ||||
} | } | ||||
if (have_vnet_hdr && (vh->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | if (unlikely(have_vnet_hdr && virtio_net_rx_csum(mhead, vh))) { | ||||
| VIRTIO_NET_HDR_F_DATA_VALID))) { | m_freem(mhead); | ||||
if (unlikely(ptnet_rx_csum(mhead, vh))) { | nm_prlim(1, "Csum offload error: dropping"); | ||||
m_freem(mhead); | pq->stats.iqdrops ++; | ||||
nm_prlim(1, "Csum offload error: dropping"); | deliver = 0; | ||||
pq->stats.iqdrops ++; | |||||
deliver = 0; | |||||
} | |||||
} | } | ||||
skip: | skip: | ||||
count ++; | count ++; | ||||
if (++batch_count >= PTNET_RX_BATCH) { | if (++batch_count >= PTNET_RX_BATCH) { | ||||
/* Some packets have been (or will be) pushed to the network | /* Some packets have been (or will be) pushed to the network | ||||
* stack. We need to update the CSB to tell the host about | * stack. We need to update the CSB to tell the host about | ||||
* the new ring->cur and ring->head (RX buffer refill). */ | * the new ring->cur and ring->head (RX buffer refill). */ | ||||
▲ Show 20 Lines • Show All 121 Lines • Show Last 20 Lines |