Index: stable/10/sys/dev/hyperv/include/hyperv.h =================================================================== --- stable/10/sys/dev/hyperv/include/hyperv.h +++ stable/10/sys/dev/hyperv/include/hyperv.h @@ -107,7 +107,7 @@ #define HV_MAX_PIPE_USER_DEFINED_BYTES 116 -#define HV_MAX_PAGE_BUFFER_COUNT 16 +#define HV_MAX_PAGE_BUFFER_COUNT 32 #define HV_MAX_MULTIPAGE_BUFFER_COUNT 32 #define HV_ALIGN_UP(value, align) \ Index: stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h =================================================================== --- stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h +++ stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h @@ -41,20 +41,26 @@ #include #include #include +#include #include #include +MALLOC_DECLARE(M_NETVSC); #define NVSP_INVALID_PROTOCOL_VERSION (0xFFFFFFFF) #define NVSP_PROTOCOL_VERSION_1 2 #define NVSP_PROTOCOL_VERSION_2 0x30002 +#define NVSP_PROTOCOL_VERSION_4 0x40000 +#define NVSP_PROTOCOL_VERSION_5 0x50000 #define NVSP_MIN_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_1) #define NVSP_MAX_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_2) #define NVSP_PROTOCOL_VERSION_CURRENT NVSP_PROTOCOL_VERSION_2 +#define VERSION_4_OFFLOAD_SIZE 22 + #define NVSP_OPERATIONAL_STATUS_OK (0x00000000) #define NVSP_OPERATIONAL_STATUS_DEGRADED (0x00000001) #define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE (0x00000002) @@ -544,7 +550,7 @@ #define NVSP_1_CHIMNEY_SEND_INVALID_OOB_INDEX 0xffffu -#define NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX 0xffffu +#define NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX 0xffffffff /* * NvspMessage2TypeSendChimneyPacket @@ -842,11 +848,11 @@ * Defines */ -#define NETVSC_SEND_BUFFER_SIZE (64*1024) /* 64K */ +#define NETVSC_SEND_BUFFER_SIZE (1024*1024*15) /* 15M */ #define NETVSC_SEND_BUFFER_ID 0xface -#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024) /* 1MB */ +#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_ID 0xcafe @@ -862,6 +868,8 @@ */ #define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024) +#define NETVSC_PACKET_SIZE PAGE_SIZE + /* * Data types */ @@ -873,15 +881,14 @@ struct hv_device *dev; int num_outstanding_sends; - /* List of free preallocated NETVSC_PACKET to represent RX packet */ - STAILQ_HEAD(PQ, netvsc_packet_) myrx_packet_list; - struct mtx rx_pkt_list_lock; - /* Send buffer allocated by us but manages by NetVSP */ void *send_buf; uint32_t send_buf_size; uint32_t send_buf_gpadl_handle; uint32_t send_section_size; + uint32_t send_section_count; + unsigned long bitsmap_words; + unsigned long *send_section_bitsmap; /* Receive buffer allocated by us but managed by NetVSP */ void *rx_buf; @@ -903,35 +910,43 @@ hv_bool_uint8_t destroy; /* Negotiated NVSP version */ uint32_t nvsp_version; + + uint8_t callback_buf[NETVSC_PACKET_SIZE]; } netvsc_dev; typedef void (*pfn_on_send_rx_completion)(void *); -#define NETVSC_DEVICE_RING_BUFFER_SIZE (64 * PAGE_SIZE) -#define NETVSC_PACKET_MAXPAGE 16 - +#define NETVSC_DEVICE_RING_BUFFER_SIZE (128 * PAGE_SIZE) +#define NETVSC_PACKET_MAXPAGE 32 -typedef struct xfer_page_packet_ { - /* - * This needs to be here because the network RX code casts - * an instantiation of this structure to a netvsc_packet. - */ - STAILQ_ENTRY(netvsc_packet_) mylist_entry; - uint32_t count; -} xfer_page_packet; +#define NETVSC_VLAN_PRIO_MASK 0xe000 +#define NETVSC_VLAN_PRIO_SHIFT 13 +#define NETVSC_VLAN_VID_MASK 0x0fff + +#define TYPE_IPV4 2 +#define TYPE_IPV6 4 +#define TYPE_TCP 2 +#define TYPE_UDP 4 + +#define TRANSPORT_TYPE_NOT_IP 0 +#define TRANSPORT_TYPE_IPV4_TCP ((TYPE_IPV4 << 16) | TYPE_TCP) +#define TRANSPORT_TYPE_IPV4_UDP ((TYPE_IPV4 << 16) | TYPE_UDP) +#define TRANSPORT_TYPE_IPV6_TCP ((TYPE_IPV6 << 16) | TYPE_TCP) +#define TRANSPORT_TYPE_IPV6_UDP ((TYPE_IPV6 << 16) | TYPE_UDP) + +#ifdef __LP64__ +#define BITS_PER_LONG 64 +#else +#define BITS_PER_LONG 32 +#endif typedef struct netvsc_packet_ { - /* - * List used when enqueued on &net_dev->rx_packet_list, - * and when enqueued within the netvsc code - */ - STAILQ_ENTRY(netvsc_packet_) mylist_entry; struct hv_device *device; hv_bool_uint8_t is_data_pkt; /* One byte */ uint16_t vlan_tci; - xfer_page_packet *xfer_page_pkt; + uint32_t status; /* Completion */ union { @@ -948,9 +963,12 @@ pfn_on_send_rx_completion on_send_completion; } send; } compl; + uint32_t send_buf_section_idx; + uint32_t send_buf_section_size; - void *extension; + void *rndis_mesg; uint32_t tot_data_buf_len; + void *data; uint32_t page_buf_count; hv_vmbus_page_buffer page_buffers[NETVSC_PACKET_MAXPAGE]; } netvsc_packet; @@ -984,16 +1002,16 @@ */ extern int hv_promisc_mode; -extern void netvsc_linkstatus_callback(struct hv_device *device_obj, - uint32_t status); -extern int netvsc_recv(struct hv_device *device_obj, netvsc_packet *packet); -extern void netvsc_xmit_completion(void *context); - -extern void hv_nv_on_receive_completion(void *context); -extern netvsc_dev *hv_nv_on_device_add(struct hv_device *device, void *additional_info); -extern int hv_nv_on_device_remove(struct hv_device *device, - boolean_t destroy_channel); -extern int hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt); +void netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status); +void netvsc_xmit_completion(void *context); +void hv_nv_on_receive_completion(struct hv_device *device, + uint64_t tid, uint32_t status); +netvsc_dev *hv_nv_on_device_add(struct hv_device *device, + void *additional_info); +int hv_nv_on_device_remove(struct hv_device *device, + boolean_t destroy_channel); +int hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt); +int hv_nv_get_next_send_section(netvsc_dev *net_dev); #endif /* __HV_NET_VSC_H__ */ Index: stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c =================================================================== --- stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -48,6 +48,7 @@ #include "hv_rndis.h" #include "hv_rndis_filter.h" +MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper-V netvsc driver"); /* * Forward declarations @@ -58,13 +59,10 @@ static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev); static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev); static int hv_nv_connect_to_vsp(struct hv_device *device); -static void hv_nv_on_send_completion(struct hv_device *device, - hv_vm_packet_descriptor *pkt); -static void hv_nv_on_receive(struct hv_device *device, - hv_vm_packet_descriptor *pkt); -static void hv_nv_send_receive_completion(struct hv_device *device, - uint64_t tid); - +static void hv_nv_on_send_completion(netvsc_dev *net_dev, + struct hv_device *device, hv_vm_packet_descriptor *pkt); +static void hv_nv_on_receive(netvsc_dev *net_dev, + struct hv_device *device, hv_vm_packet_descriptor *pkt); /* * @@ -75,7 +73,7 @@ netvsc_dev *net_dev; hn_softc_t *sc = device_get_softc(device->device); - net_dev = malloc(sizeof(netvsc_dev), M_DEVBUF, M_NOWAIT | M_ZERO); + net_dev = malloc(sizeof(netvsc_dev), M_NETVSC, M_NOWAIT | M_ZERO); if (net_dev == NULL) { return (NULL); } @@ -127,6 +125,34 @@ return (net_dev); } +int +hv_nv_get_next_send_section(netvsc_dev *net_dev) +{ + unsigned long bitsmap_words = net_dev->bitsmap_words; + unsigned long *bitsmap = net_dev->send_section_bitsmap; + unsigned long idx; + int ret = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; + int i; + + for (i = 0; i < bitsmap_words; i++) { + idx = ffs(~bitsmap[i]); + if (0 == idx) + continue; + + idx--; + if (i * BITS_PER_LONG + idx >= net_dev->send_section_count) + return (ret); + + if (synch_test_and_set_bit(idx, &bitsmap[i])) + continue; + + ret = i * BITS_PER_LONG + idx; + break; + } + + return (ret); +} + /* * Net VSC initialize receive buffer with net VSP * @@ -145,12 +171,8 @@ return (ENODEV); } - net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_DEVBUF, + net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_NETVSC, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); - if (net_dev->rx_buf == NULL) { - ret = ENOMEM; - goto cleanup; - } /* * Establish the GPADL handle for this buffer on this channel. @@ -201,7 +223,7 @@ init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections; net_dev->rx_sections = malloc(net_dev->rx_section_count * - sizeof(nvsp_1_rx_buf_section), M_DEVBUF, M_NOWAIT); + sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_NOWAIT); if (net_dev->rx_sections == NULL) { ret = EINVAL; goto cleanup; @@ -245,7 +267,7 @@ return (ENODEV); } - net_dev->send_buf = contigmalloc(net_dev->send_buf_size, M_DEVBUF, + net_dev->send_buf = contigmalloc(net_dev->send_buf_size, M_NETVSC, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); if (net_dev->send_buf == NULL) { ret = ENOMEM; @@ -258,7 +280,7 @@ * channel to establish the gpadl handle. */ ret = hv_vmbus_channel_establish_gpadl(device->channel, - net_dev->send_buf, net_dev->send_buf_size, + net_dev->send_buf, net_dev->send_buf_size, &net_dev->send_buf_gpadl_handle); if (ret != 0) { goto cleanup; @@ -279,7 +301,7 @@ /* Send the gpadl notification request */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, - sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt, + sizeof(nvsp_msg), (uint64_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { @@ -297,6 +319,17 @@ net_dev->send_section_size = init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size; + net_dev->send_section_count = + net_dev->send_buf_size / net_dev->send_section_size; + net_dev->bitsmap_words = howmany(net_dev->send_section_count, + BITS_PER_LONG); + net_dev->send_section_bitsmap = + malloc(net_dev->bitsmap_words * sizeof(long), M_NETVSC, + M_NOWAIT | M_ZERO); + if (NULL == net_dev->send_section_bitsmap) { + ret = ENOMEM; + goto cleanup; + } goto exit; @@ -361,12 +394,12 @@ if (net_dev->rx_buf) { /* Free up the receive buffer */ - contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_DEVBUF); + contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_NETVSC); net_dev->rx_buf = NULL; } if (net_dev->rx_sections) { - free(net_dev->rx_sections, M_DEVBUF); + free(net_dev->rx_sections, M_NETVSC); net_dev->rx_sections = NULL; net_dev->rx_section_count = 0; } @@ -429,10 +462,14 @@ if (net_dev->send_buf) { /* Free up the receive buffer */ - contigfree(net_dev->send_buf, net_dev->send_buf_size, M_DEVBUF); + contigfree(net_dev->send_buf, net_dev->send_buf_size, M_NETVSC); net_dev->send_buf = NULL; } + if (net_dev->send_section_bitsmap) { + free(net_dev->send_section_bitsmap, M_NETVSC); + } + return (ret); } @@ -446,7 +483,7 @@ */ static int hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev, - uint32_t nvsp_ver) + uint32_t nvsp_ver) { nvsp_msg *init_pkt; int ret; @@ -523,8 +560,13 @@ { netvsc_dev *net_dev; nvsp_msg *init_pkt; - uint32_t nvsp_vers; uint32_t ndis_version; + uint32_t protocol_list[] = { NVSP_PROTOCOL_VERSION_1, + NVSP_PROTOCOL_VERSION_2, + NVSP_PROTOCOL_VERSION_4, + NVSP_PROTOCOL_VERSION_5 }; + int i; + int protocol_number = nitems(protocol_list); int ret = 0; device_t dev = device->device; hn_softc_t *sc = device_get_softc(dev); @@ -536,26 +578,31 @@ } /* - * Negotiate the NVSP version. Try NVSP v2 first. + * Negotiate the NVSP version. Try the latest NVSP first. */ - nvsp_vers = NVSP_PROTOCOL_VERSION_2; - ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers); - if (ret != 0) { - /* NVSP v2 failed, try NVSP v1 */ - nvsp_vers = NVSP_PROTOCOL_VERSION_1; - ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers); - if (ret != 0) { - /* NVSP v1 failed, return bad status */ - return (ret); + for (i = protocol_number - 1; i >= 0; i--) { + if (hv_nv_negotiate_nvsp_protocol(device, net_dev, + protocol_list[i]) == 0) { + net_dev->nvsp_version = protocol_list[i]; + if (bootverbose) + device_printf(dev, "Netvsc: got version 0x%x\n", + net_dev->nvsp_version); + break; } } - net_dev->nvsp_version = nvsp_vers; + + if (i < 0) { + if (bootverbose) + device_printf(dev, "failed to negotiate a valid " + "protocol.\n"); + return (EPROTO); + } /* * Set the MTU if supported by this NVSP protocol version * This needs to be right after the NVSP init message per Haiyang */ - if (nvsp_vers >= NVSP_PROTOCOL_VERSION_2) + if (net_dev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) ret = hv_nv_send_ndis_config(device, ifp->if_mtu); /* @@ -565,10 +612,11 @@ memset(init_pkt, 0, sizeof(nvsp_msg)); - /* - * Updated to version 5.1, minimum, for VLAN per Haiyang - */ - ndis_version = NDIS_VERSION; + if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_4) { + ndis_version = NDIS_VERSION_6_1; + } else { + ndis_version = NDIS_VERSION_6_30; + } init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers; init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers = @@ -620,9 +668,7 @@ hv_nv_on_device_add(struct hv_device *device, void *additional_info) { netvsc_dev *net_dev; - netvsc_packet *packet; - netvsc_packet *next_packet; - int i, ret = 0; + int ret = 0; net_dev = hv_nv_alloc_net_device(device); if (!net_dev) @@ -630,29 +676,9 @@ /* Initialize the NetVSC channel extension */ net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; - mtx_init(&net_dev->rx_pkt_list_lock, "HV-RPL", NULL, - MTX_SPIN | MTX_RECURSE); net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; - /* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */ - STAILQ_INIT(&net_dev->myrx_packet_list); - - /* - * malloc a sufficient number of netvsc_packet buffers to hold - * a packet list. Add them to the netvsc device packet queue. - */ - for (i=0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) { - packet = malloc(sizeof(netvsc_packet) + - (NETVSC_RECEIVE_SG_COUNT * sizeof(hv_vmbus_page_buffer)), - M_DEVBUF, M_NOWAIT | M_ZERO); - if (!packet) { - break; - } - STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, - mylist_entry); - } - sema_init(&net_dev->channel_init_sema, 0, "netdev_sema"); /* @@ -685,19 +711,7 @@ */ if (net_dev) { sema_destroy(&net_dev->channel_init_sema); - - packet = STAILQ_FIRST(&net_dev->myrx_packet_list); - while (packet != NULL) { - next_packet = STAILQ_NEXT(packet, mylist_entry); - free(packet, M_DEVBUF); - packet = next_packet; - } - /* Reset the list to initial state */ - STAILQ_INIT(&net_dev->myrx_packet_list); - - mtx_destroy(&net_dev->rx_pkt_list_lock); - - free(net_dev, M_DEVBUF); + free(net_dev, M_NETVSC); } return (NULL); @@ -709,8 +723,6 @@ int hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel) { - netvsc_packet *net_vsc_pkt; - netvsc_packet *next_net_vsc_pkt; hn_softc_t *sc = device_get_softc(device->device); netvsc_dev *net_dev = sc->net_dev;; @@ -737,20 +749,8 @@ hv_vmbus_channel_close(device->channel); - /* Release all resources */ - net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list); - while (net_vsc_pkt != NULL) { - next_net_vsc_pkt = STAILQ_NEXT(net_vsc_pkt, mylist_entry); - free(net_vsc_pkt, M_DEVBUF); - net_vsc_pkt = next_net_vsc_pkt; - } - - /* Reset the list to initial state */ - STAILQ_INIT(&net_dev->myrx_packet_list); - - mtx_destroy(&net_dev->rx_pkt_list_lock); sema_destroy(&net_dev->channel_init_sema); - free(net_dev, M_DEVBUF); + free(net_dev, M_NETVSC); return (0); } @@ -758,18 +758,13 @@ /* * Net VSC on send completion */ -static void -hv_nv_on_send_completion(struct hv_device *device, hv_vm_packet_descriptor *pkt) +static void +hv_nv_on_send_completion(netvsc_dev *net_dev, + struct hv_device *device, hv_vm_packet_descriptor *pkt) { - netvsc_dev *net_dev; nvsp_msg *nvsp_msg_pkt; netvsc_packet *net_vsc_pkt; - net_dev = hv_nv_get_inbound_net_device(device); - if (!net_dev) { - return; - } - nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3)); @@ -780,17 +775,25 @@ == nvsp_msg_1_type_send_send_buf_complete) { /* Copy the response back */ memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt, - sizeof(nvsp_msg)); + sizeof(nvsp_msg)); sema_post(&net_dev->channel_init_sema); } else if (nvsp_msg_pkt->hdr.msg_type == - nvsp_msg_1_type_send_rndis_pkt_complete) { + nvsp_msg_1_type_send_rndis_pkt_complete) { /* Get the send context */ net_vsc_pkt = (netvsc_packet *)(unsigned long)pkt->transaction_id; + if (NULL != net_vsc_pkt) { + if (net_vsc_pkt->send_buf_section_idx != + NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { + synch_change_bit(net_vsc_pkt->send_buf_section_idx, + net_dev->send_section_bitsmap); + } + + /* Notify the layer above us */ + net_vsc_pkt->compl.send.on_send_completion( + net_vsc_pkt->compl.send.send_completion_context); - /* Notify the layer above us */ - net_vsc_pkt->compl.send.on_send_completion( - net_vsc_pkt->compl.send.send_completion_context); + } atomic_subtract_int(&net_dev->num_outstanding_sends, 1); } @@ -821,10 +824,10 @@ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1; } - /* Not using send buffer section */ send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx = - 0xFFFFFFFF; - send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = 0; + pkt->send_buf_section_idx; + send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = + pkt->send_buf_section_size; if (pkt->page_buf_count) { ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel, @@ -850,157 +853,81 @@ * In the FreeBSD Hyper-V virtual world, this function deals exclusively * with virtual addresses. */ -static void -hv_nv_on_receive(struct hv_device *device, hv_vm_packet_descriptor *pkt) +static void +hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device, + hv_vm_packet_descriptor *pkt) { - netvsc_dev *net_dev; hv_vm_transfer_page_packet_header *vm_xfer_page_pkt; nvsp_msg *nvsp_msg_pkt; - netvsc_packet *net_vsc_pkt = NULL; - unsigned long start; - xfer_page_packet *xfer_page_pkt = NULL; - STAILQ_HEAD(PKT_LIST, netvsc_packet_) mylist_head = - STAILQ_HEAD_INITIALIZER(mylist_head); + netvsc_packet vsc_pkt; + netvsc_packet *net_vsc_pkt = &vsc_pkt; + device_t dev = device->device; int count = 0; int i = 0; - - net_dev = hv_nv_get_inbound_net_device(device); - if (!net_dev) - return; + int status = nvsp_status_success; /* * All inbound packets other than send completion should be * xfer page packet. */ - if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES) + if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES) { + device_printf(dev, "packet type %d is invalid!\n", pkt->type); return; + } nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3)); /* Make sure this is a valid nvsp packet */ - if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt) + if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt) { + device_printf(dev, "packet hdr type %d is invalid!\n", + pkt->type); return; + } vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt; - if (vm_xfer_page_pkt->transfer_page_set_id - != NETVSC_RECEIVE_BUFFER_ID) { + if (vm_xfer_page_pkt->transfer_page_set_id != + NETVSC_RECEIVE_BUFFER_ID) { + device_printf(dev, "transfer_page_set_id %d is invalid!\n", + vm_xfer_page_pkt->transfer_page_set_id); return; } - STAILQ_INIT(&mylist_head); - - /* - * Grab free packets (range count + 1) to represent this xfer page - * packet. +1 to represent the xfer page packet itself. We grab it - * here so that we know exactly how many we can fulfill. - */ - mtx_lock_spin(&net_dev->rx_pkt_list_lock); - while (!STAILQ_EMPTY(&net_dev->myrx_packet_list)) { - net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list); - STAILQ_REMOVE_HEAD(&net_dev->myrx_packet_list, mylist_entry); - - STAILQ_INSERT_TAIL(&mylist_head, net_vsc_pkt, mylist_entry); - - if (++count == vm_xfer_page_pkt->range_count + 1) - break; - } - - mtx_unlock_spin(&net_dev->rx_pkt_list_lock); - - /* - * We need at least 2 netvsc pkts (1 to represent the xfer page - * and at least 1 for the range) i.e. we can handle some of the - * xfer page packet ranges... - */ - if (count < 2) { - /* Return netvsc packet to the freelist */ - mtx_lock_spin(&net_dev->rx_pkt_list_lock); - for (i=count; i != 0; i--) { - net_vsc_pkt = STAILQ_FIRST(&mylist_head); - STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry); - - STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, - net_vsc_pkt, mylist_entry); - } - mtx_unlock_spin(&net_dev->rx_pkt_list_lock); - - hv_nv_send_receive_completion(device, - vm_xfer_page_pkt->d.transaction_id); - - return; - } - - /* Take the first packet in the list */ - xfer_page_pkt = (xfer_page_packet *)STAILQ_FIRST(&mylist_head); - STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry); - - /* This is how many data packets we can supply */ - xfer_page_pkt->count = count - 1; + count = vm_xfer_page_pkt->range_count; + net_vsc_pkt->device = device; /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ - for (i=0; i < (count - 1); i++) { - net_vsc_pkt = STAILQ_FIRST(&mylist_head); - STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry); - - /* - * Initialize the netvsc packet - */ - net_vsc_pkt->xfer_page_pkt = xfer_page_pkt; - net_vsc_pkt->compl.rx.rx_completion_context = net_vsc_pkt; - net_vsc_pkt->device = device; - /* Save this so that we can send it back */ - net_vsc_pkt->compl.rx.rx_completion_tid = - vm_xfer_page_pkt->d.transaction_id; - - net_vsc_pkt->tot_data_buf_len = - vm_xfer_page_pkt->ranges[i].byte_count; - net_vsc_pkt->page_buf_count = 1; - - net_vsc_pkt->page_buffers[0].length = - vm_xfer_page_pkt->ranges[i].byte_count; - - /* The virtual address of the packet in the receive buffer */ - start = ((unsigned long)net_dev->rx_buf + + for (i = 0; i < count; i++) { + net_vsc_pkt->status = nvsp_status_success; + net_vsc_pkt->data = (void *)((unsigned long)net_dev->rx_buf + vm_xfer_page_pkt->ranges[i].byte_offset); - start = ((unsigned long)start) & ~(PAGE_SIZE - 1); - - /* Page number of the virtual page containing packet start */ - net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT; - - /* Calculate the page relative offset */ - net_vsc_pkt->page_buffers[0].offset = - vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1); - - /* - * In this implementation, we are dealing with virtual - * addresses exclusively. Since we aren't using physical - * addresses at all, we don't care if a packet crosses a - * page boundary. For this reason, the original code to - * check for and handle page crossings has been removed. - */ - - /* - * Pass it to the upper layer. The receive completion call - * has been moved into this function. - */ - hv_rf_on_receive(device, net_vsc_pkt); + net_vsc_pkt->tot_data_buf_len = + vm_xfer_page_pkt->ranges[i].byte_count; - /* - * Moved completion call back here so that all received - * messages (not just data messages) will trigger a response - * message back to the host. - */ - hv_nv_on_receive_completion(net_vsc_pkt); + hv_rf_on_receive(net_dev, device, net_vsc_pkt); + if (net_vsc_pkt->status != nvsp_status_success) { + status = nvsp_status_failure; + } } + + /* + * Moved completion call back here so that all received + * messages (not just data messages) will trigger a response + * message back to the host. + */ + hv_nv_on_receive_completion(device, vm_xfer_page_pkt->d.transaction_id, + status); } /* - * Net VSC send receive completion + * Net VSC on receive completion + * + * Send a receive completion packet to RNDIS device (ie NetVsp) */ -static void -hv_nv_send_receive_completion(struct hv_device *device, uint64_t tid) +void +hv_nv_on_receive_completion(struct hv_device *device, uint64_t tid, + uint32_t status) { nvsp_msg rx_comp_msg; int retries = 0; @@ -1010,7 +937,7 @@ /* Pass in the status */ rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status = - nvsp_status_success; + status; retry_send_cmplt: /* Send the completion */ @@ -1031,81 +958,26 @@ } /* - * Net VSC on receive completion - * - * Send a receive completion packet to RNDIS device (ie NetVsp) - */ -void -hv_nv_on_receive_completion(void *context) -{ - netvsc_packet *packet = (netvsc_packet *)context; - struct hv_device *device = (struct hv_device *)packet->device; - netvsc_dev *net_dev; - uint64_t tid = 0; - boolean_t send_rx_completion = FALSE; - - /* - * Even though it seems logical to do a hv_nv_get_outbound_net_device() - * here to send out receive completion, we are using - * hv_nv_get_inbound_net_device() since we may have disabled - * outbound traffic already. - */ - net_dev = hv_nv_get_inbound_net_device(device); - if (net_dev == NULL) - return; - - /* Overloading use of the lock. */ - mtx_lock_spin(&net_dev->rx_pkt_list_lock); - - packet->xfer_page_pkt->count--; - - /* - * Last one in the line that represent 1 xfer page packet. - * Return the xfer page packet itself to the free list. - */ - if (packet->xfer_page_pkt->count == 0) { - send_rx_completion = TRUE; - tid = packet->compl.rx.rx_completion_tid; - STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, - (netvsc_packet *)(packet->xfer_page_pkt), mylist_entry); - } - - /* Put the packet back on the free list */ - STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, mylist_entry); - mtx_unlock_spin(&net_dev->rx_pkt_list_lock); - - /* Send a receive completion for the xfer page packet */ - if (send_rx_completion) - hv_nv_send_receive_completion(device, tid); -} - -/* * Net VSC on channel callback */ static void hv_nv_on_channel_callback(void *context) { - /* Fixme: Magic number */ - const int net_pkt_size = 2048; struct hv_device *device = (struct hv_device *)context; netvsc_dev *net_dev; + device_t dev = device->device; uint32_t bytes_rxed; uint64_t request_id; - uint8_t *packet; - hv_vm_packet_descriptor *desc; + hv_vm_packet_descriptor *desc; uint8_t *buffer; - int bufferlen = net_pkt_size; - int ret = 0; - - packet = malloc(net_pkt_size * sizeof(uint8_t), M_DEVBUF, M_NOWAIT); - if (!packet) - return; - - buffer = packet; + int bufferlen = NETVSC_PACKET_SIZE; + int ret = 0; net_dev = hv_nv_get_inbound_net_device(device); if (net_dev == NULL) - goto out; + return; + + buffer = net_dev->callback_buf; do { ret = hv_vmbus_channel_recv_packet_raw(device->channel, @@ -1115,12 +987,15 @@ desc = (hv_vm_packet_descriptor *)buffer; switch (desc->type) { case HV_VMBUS_PACKET_TYPE_COMPLETION: - hv_nv_on_send_completion(device, desc); + hv_nv_on_send_completion(net_dev, device, desc); break; case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES: - hv_nv_on_receive(device, desc); + hv_nv_on_receive(net_dev, device, desc); break; default: + device_printf(dev, + "hv_cb recv unknow type %d " + " packet\n", desc->type); break; } } else { @@ -1128,16 +1003,24 @@ } } else if (ret == ENOBUFS) { /* Handle large packet */ - free(buffer, M_DEVBUF); - buffer = malloc(bytes_rxed, M_DEVBUF, M_NOWAIT); + if (bufferlen > NETVSC_PACKET_SIZE) { + free(buffer, M_NETVSC); + buffer = NULL; + } + + /* alloc new buffer */ + buffer = malloc(bytes_rxed, M_NETVSC, M_NOWAIT); if (buffer == NULL) { + device_printf(dev, + "hv_cb malloc buffer failed, len=%u\n", + bytes_rxed); + bufferlen = 0; break; } bufferlen = bytes_rxed; } } while (1); -out: - free(buffer, M_DEVBUF); + if (bufferlen > NETVSC_PACKET_SIZE) + free(buffer, M_NETVSC); } - Index: stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c =================================================================== --- stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -55,6 +55,9 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_inet6.h" +#include "opt_inet.h" + #include #include #include @@ -83,6 +86,9 @@ #include #include #include +#include +#include +#include #include #include @@ -103,6 +109,8 @@ #include +#include + #include #include "hv_net_vsc.h" #include "hv_rndis.h" @@ -165,6 +173,61 @@ static int hn_start_locked(struct ifnet *ifp); static void hn_start(struct ifnet *ifp); +/* + * NetVsc get message transport protocol type + */ +static uint32_t get_transport_proto_type(struct mbuf *m_head) +{ + uint32_t ret_val = TRANSPORT_TYPE_NOT_IP; + uint16_t ether_type = 0; + int ether_len = 0; + struct ether_vlan_header *eh; +#ifdef INET + struct ip *iph; +#endif +#ifdef INET6 + struct ip6_hdr *ip6; +#endif + + eh = mtod(m_head, struct ether_vlan_header*); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + ether_type = eh->evl_proto; + } else { + ether_len = ETHER_HDR_LEN; + ether_type = eh->evl_encap_proto; + } + + switch (ntohs(ether_type)) { +#ifdef INET6 + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(m_head->m_data + ether_len); + + if (IPPROTO_TCP == ip6->ip6_nxt) { + ret_val = TRANSPORT_TYPE_IPV6_TCP; + } else if (IPPROTO_UDP == ip6->ip6_nxt) { + ret_val = TRANSPORT_TYPE_IPV6_UDP; + } + break; +#endif +#ifdef INET + case ETHERTYPE_IP: + iph = (struct ip *)(m_head->m_data + ether_len); + + if (IPPROTO_TCP == iph->ip_p) { + ret_val = TRANSPORT_TYPE_IPV4_TCP; + } else if (IPPROTO_UDP == iph->ip_p) { + ret_val = TRANSPORT_TYPE_IPV4_UDP; + } + break; +#endif + default: + ret_val = TRANSPORT_TYPE_NOT_IP; + break; + } + + return (ret_val); +} /* * NetVsc driver initialization @@ -276,8 +339,11 @@ * Tell upper layers that we support full VLAN capability. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); - ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; - ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; + ifp->if_capabilities |= + IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO; + ifp->if_capenable |= + IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO; + ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; ret = hv_rf_on_device_add(device_ctx, &device_info); if (ret != 0) { @@ -347,7 +413,7 @@ mb = (struct mbuf *)(uintptr_t)packet->compl.send.send_completion_tid; buf = ((uint8_t *)packet) - HV_NV_PACKET_OFFSET_IN_BUF; - free(buf, M_DEVBUF); + free(buf, M_NETVSC); if (mb != NULL) { m_freem(mb); @@ -362,17 +428,29 @@ { hn_softc_t *sc = ifp->if_softc; struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); + netvsc_dev *net_dev = sc->net_dev; + device_t dev = device_ctx->device; uint8_t *buf; netvsc_packet *packet; struct mbuf *m_head, *m; struct mbuf *mc_head = NULL; + struct ether_vlan_header *eh; + rndis_msg *rndis_mesg; + rndis_packet *rndis_pkt; + rndis_per_packet_info *rppi; + ndis_8021q_info *rppi_vlan_info; + rndis_tcp_ip_csum_info *csum_info; + rndis_tcp_tso_info *tso_info; + int ether_len; int i; int num_frags; int len; - int xlen; - int rppi_size; int retries = 0; - int ret = 0; + int ret = 0; + uint32_t rndis_msg_size = 0; + uint32_t trans_proto_type; + uint32_t send_buf_section_idx = + NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; while (!IFQ_DRV_IS_EMPTY(&sc->hn_ifp->if_snd)) { IFQ_DRV_DEQUEUE(&sc->hn_ifp->if_snd, m_head); @@ -382,7 +460,6 @@ len = 0; num_frags = 0; - xlen = 0; /* Walk the mbuf list computing total length and num frags */ for (m = m_head; m != NULL; m = m->m_next) { @@ -401,66 +478,224 @@ /* If exceeds # page_buffers in netvsc_packet */ if (num_frags > NETVSC_PACKET_MAXPAGE) { - m_freem(m); - + device_printf(dev, "exceed max page buffers,%d,%d\n", + num_frags, NETVSC_PACKET_MAXPAGE); + m_freem(m_head); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (EINVAL); } - rppi_size = 0; - if (m_head->m_flags & M_VLANTAG) { - rppi_size = sizeof(rndis_per_packet_info) + - sizeof(ndis_8021q_info); - } - /* * Allocate a buffer with space for a netvsc packet plus a * number of reserved areas. First comes a (currently 16 * bytes, currently unused) reserved data area. Second is - * the netvsc_packet, which includes (currently 4) page - * buffers. Third (optional) is a rndis_per_packet_info - * struct, but only if a VLAN tag should be inserted into the - * Ethernet frame by the Hyper-V infrastructure. Fourth is - * an area reserved for an rndis_filter_packet struct. + * the netvsc_packet. Third is an area reserved for an + * rndis_filter_packet struct. Fourth (optional) is a + * rndis_per_packet_info struct. * Changed malloc to M_NOWAIT to avoid sleep under spin lock. * No longer reserving extra space for page buffers, as they * are already part of the netvsc_packet. */ buf = malloc(HV_NV_PACKET_OFFSET_IN_BUF + - sizeof(netvsc_packet) + rppi_size + - sizeof(rndis_filter_packet), - M_DEVBUF, M_ZERO | M_NOWAIT); + sizeof(netvsc_packet) + + sizeof(rndis_msg) + + RNDIS_VLAN_PPI_SIZE + + RNDIS_TSO_PPI_SIZE + + RNDIS_CSUM_PPI_SIZE, + M_NETVSC, M_ZERO | M_NOWAIT); if (buf == NULL) { - m_freem(m); - + device_printf(dev, "hn:malloc packet failed\n"); + m_freem(m_head); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENOMEM); } packet = (netvsc_packet *)(buf + HV_NV_PACKET_OFFSET_IN_BUF); *(vm_offset_t *)buf = HV_NV_SC_PTR_OFFSET_IN_BUF; + packet->is_data_pkt = TRUE; + + /* Set up the rndis header */ + packet->page_buf_count = num_frags; + + /* Initialize it from the mbuf */ + packet->tot_data_buf_len = len; + /* * extension points to the area reserved for the * rndis_filter_packet, which is placed just after * the netvsc_packet (and rppi struct, if present; * length is updated later). */ - packet->extension = packet + 1; + packet->rndis_mesg = packet + 1; + rndis_mesg = (rndis_msg *)packet->rndis_mesg; + rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; + + rndis_pkt = &rndis_mesg->msg.packet; + rndis_pkt->data_offset = sizeof(rndis_packet); + rndis_pkt->data_length = packet->tot_data_buf_len; + rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); - /* Set up the rndis header */ - packet->page_buf_count = num_frags; - - /* Initialize it from the mbuf */ - packet->tot_data_buf_len = len; + rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); /* * If the Hyper-V infrastructure needs to embed a VLAN tag, * initialize netvsc_packet and rppi struct values as needed. */ - if (rppi_size) { - /* Lower layers need the VLAN TCI */ + if (m_head->m_flags & M_VLANTAG) { + /* + * set up some additional fields so the Hyper-V infrastructure will stuff the VLAN tag + * into the frame. + */ packet->vlan_tci = m_head->m_pkthdr.ether_vtag; + + rndis_msg_size += RNDIS_VLAN_PPI_SIZE; + + rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE, + ieee_8021q_info); + + /* VLAN info immediately follows rppi struct */ + rppi_vlan_info = (ndis_8021q_info *)((char*)rppi + + rppi->per_packet_info_offset); + /* FreeBSD does not support CFI or priority */ + rppi_vlan_info->u1.s1.vlan_id = + packet->vlan_tci & 0xfff; + } + + if (0 == m_head->m_pkthdr.csum_flags) { + goto pre_send; + } + + eh = mtod(m_head, struct ether_vlan_header*); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + ether_len = ETHER_HDR_LEN; + } + + trans_proto_type = get_transport_proto_type(m_head); + if (TRANSPORT_TYPE_NOT_IP == trans_proto_type) { + goto pre_send; + } + + /* + * TSO packet needless to setup the send side checksum + * offload. + */ + if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { + goto do_tso; + } + + /* setup checksum offload */ + rndis_msg_size += RNDIS_CSUM_PPI_SIZE; + rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE, + tcpip_chksum_info); + csum_info = (rndis_tcp_ip_csum_info *)((char*)rppi + + rppi->per_packet_info_offset); + + if (trans_proto_type & (TYPE_IPV4 << 16)) { + csum_info->xmit.is_ipv4 = 1; + } else { + csum_info->xmit.is_ipv6 = 1; + } + + if (trans_proto_type & TYPE_TCP) { + csum_info->xmit.tcp_csum = 1; + csum_info->xmit.tcp_header_offset = 0; + } else if (trans_proto_type & TYPE_UDP) { + csum_info->xmit.udp_csum = 1; } + goto pre_send; + +do_tso: + /* setup TCP segmentation offload */ + rndis_msg_size += RNDIS_TSO_PPI_SIZE; + rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE, + tcp_large_send_info); + + tso_info = (rndis_tcp_tso_info *)((char *)rppi + + rppi->per_packet_info_offset); + tso_info->lso_v2_xmit.type = + RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; + +#ifdef INET + if (trans_proto_type & (TYPE_IPV4 << 16)) { + struct ip *ip = + (struct ip *)(m_head->m_data + ether_len); + unsigned long iph_len = ip->ip_hl << 2; + struct tcphdr *th = + (struct tcphdr *)((caddr_t)ip + iph_len); + + tso_info->lso_v2_xmit.ip_version = + RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; + ip->ip_len = 0; + ip->ip_sum = 0; + + th->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, + htons(IPPROTO_TCP)); + } +#endif +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET6 + { + struct ip6_hdr *ip6 = + (struct ip6_hdr *)(m_head->m_data + ether_len); + struct tcphdr *th = (struct tcphdr *)(ip6 + 1); + + tso_info->lso_v2_xmit.ip_version = + RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; + ip6->ip6_plen = 0; + th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); + } +#endif + tso_info->lso_v2_xmit.tcp_header_offset = 0; + tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz; + +pre_send: + rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size; + packet->tot_data_buf_len = rndis_mesg->msg_len; + + /* send packet with send buffer */ + if (packet->tot_data_buf_len < net_dev->send_section_size) { + send_buf_section_idx = + hv_nv_get_next_send_section(net_dev); + if (send_buf_section_idx != + NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { + char *dest = ((char *)net_dev->send_buf + + send_buf_section_idx * + net_dev->send_section_size); + + memcpy(dest, rndis_mesg, rndis_msg_size); + dest += rndis_msg_size; + for (m = m_head; m != NULL; m = m->m_next) { + if (m->m_len) { + memcpy(dest, + (void *)mtod(m, vm_offset_t), + m->m_len); + dest += m->m_len; + } + } + + packet->send_buf_section_idx = + send_buf_section_idx; + packet->send_buf_section_size = + packet->tot_data_buf_len; + packet->page_buf_count = 0; + goto do_send; + } + } + + /* send packet with page buffer */ + packet->page_buffers[0].pfn = + atop(hv_get_phys_addr(rndis_mesg)); + packet->page_buffers[0].offset = + (unsigned long)rndis_mesg & PAGE_MASK; + packet->page_buffers[0].length = rndis_msg_size; + /* * Fill the page buffers with mbuf info starting at index * HV_RF_NUM_TX_RESERVED_PAGE_BUFS. @@ -479,6 +714,12 @@ } } + packet->send_buf_section_idx = + NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; + packet->send_buf_section_size = 0; + +do_send: + /* * If bpf, copy the mbuf chain. This is less expensive than * it appears; the mbuf clusters are not copied, only their @@ -497,8 +738,7 @@ packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)m_head; /* Removed critical_enter(), does not appear necessary */ - ret = hv_rf_on_send(device_ctx, packet); - + ret = hv_nv_on_send(device_ctx, packet); if (ret == 0) { ifp->if_opackets++; /* if bpf && mc_head, call bpf_mtap code */ @@ -526,6 +766,7 @@ * send completion */ netvsc_xmit_completion(packet); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } /* if bpf && mc_head, free the mbuf chain copy */ @@ -621,13 +862,14 @@ * Note: This is no longer used as a callback */ int -netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet) +netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, + rndis_tcp_ip_csum_info *csum_info) { hn_softc_t *sc = (hn_softc_t *)device_get_softc(device_ctx->device); struct mbuf *m_new; struct ifnet *ifp; + device_t dev = device_ctx->device; int size; - int i; if (sc == NULL) { return (0); /* TODO: KYS how can this be! */ @@ -663,36 +905,35 @@ m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size); - if (m_new == NULL) + if (m_new == NULL) { + device_printf(dev, "alloc mbuf failed.\n"); return (0); + } - /* - * Remove trailing junk from RX data buffer. - * Fixme: This will not work for multiple Hyper-V RX buffers. - * Fortunately, the channel gathers all RX data into one buffer. - * - * L2 frame length, with L2 header, not including CRC - */ - packet->page_buffers[0].length = packet->tot_data_buf_len; + hv_m_append(m_new, packet->tot_data_buf_len, + packet->data); - /* - * Copy the received packet to one or more mbufs. - * The copy is required since the memory pointed to by netvsc_packet - * cannot be deallocated - */ - for (i=0; i < packet->page_buf_count; i++) { - /* Shift virtual page number to form virtual page address */ - uint8_t *vaddr = (uint8_t *)(uintptr_t) - (packet->page_buffers[i].pfn << PAGE_SHIFT); + m_new->m_pkthdr.rcvif = ifp; - hv_m_append(m_new, packet->page_buffers[i].length, - vaddr + packet->page_buffers[i].offset); - } + /* receive side checksum offload */ + m_new->m_pkthdr.csum_flags = 0; + if (NULL != csum_info) { + /* IP csum offload */ + if (csum_info->receive.ip_csum_succeeded) { + m_new->m_pkthdr.csum_flags |= + (CSUM_IP_CHECKED | CSUM_IP_VALID); + } - m_new->m_pkthdr.rcvif = ifp; + /* TCP csum offload */ + if (csum_info->receive.tcp_csum_succeeded) { + m_new->m_pkthdr.csum_flags |= + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + m_new->m_pkthdr.csum_data = 0xffff; + } + } if ((packet->vlan_tci != 0) && - (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { + (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { m_new->m_pkthdr.ether_vtag = packet->vlan_tci; m_new->m_flags |= M_VLANTAG; } @@ -730,6 +971,9 @@ { hn_softc_t *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; +#ifdef INET + struct ifaddr *ifa = (struct ifaddr *)data; +#endif netvsc_device_info device_info; struct hv_device *hn_dev; int mask, error = 0; @@ -860,13 +1104,34 @@ break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ ifp->if_capenable; - if (mask & IFCAP_HWCSUM) { - if (IFCAP_HWCSUM & ifp->if_capenable) { - ifp->if_capenable &= ~IFCAP_HWCSUM; + if (mask & IFCAP_TXCSUM) { + if (IFCAP_TXCSUM & ifp->if_capenable) { + ifp->if_capenable &= ~IFCAP_TXCSUM; + ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); + } else { + ifp->if_capenable |= IFCAP_TXCSUM; + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); + } + } + + if (mask & IFCAP_RXCSUM) { + if (IFCAP_RXCSUM & ifp->if_capenable) { + ifp->if_capenable &= ~IFCAP_RXCSUM; } else { - ifp->if_capenable |= IFCAP_HWCSUM; + ifp->if_capenable |= IFCAP_RXCSUM; } } + + if (mask & IFCAP_TSO4) { + ifp->if_capenable ^= IFCAP_TSO4; + ifp->if_hwassist ^= CSUM_IP_TSO; + } + + if (mask & IFCAP_TSO6) { + ifp->if_capenable ^= IFCAP_TSO6; + ifp->if_hwassist ^= CSUM_IP6_TSO; + } + error = 0; break; case SIOCADDMULTI: Index: stable/10/sys/dev/hyperv/netvsc/hv_rndis.h =================================================================== --- stable/10/sys/dev/hyperv/netvsc/hv_rndis.h +++ stable/10/sys/dev/hyperv/netvsc/hv_rndis.h @@ -24,6 +24,8 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ */ #ifndef __HV_RNDIS_H__ @@ -36,6 +38,9 @@ #define NDIS_VERSION_5_0 0x00050000 #define NDIS_VERSION_5_1 0x00050001 #define NDIS_VERSION_6_0 0x00060000 +#define NDIS_VERSION_6_1 0x00060001 +#define NDIS_VERSION_6_30 0x0006001e + #define NDIS_VERSION (NDIS_VERSION_5_1) /* @@ -347,6 +352,36 @@ #define RNDIS_MAJOR_VERSION 0x00000001 #define RNDIS_MINOR_VERSION 0x00000000 + +/* + * Remote NDIS offload parameters + */ +#define RNDIS_OBJECT_TYPE_DEFAULT 0x80 + +#define RNDIS_OFFLOAD_PARAMETERS_REVISION_3 3 +#define RNDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0 +#define RNDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1 +#define RNDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 2 +#define RNDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED 2 +#define RNDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1 +#define RNDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2 +#define RNDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1 +#define RNDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2 +#define RNDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3 +#define RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4 + +#define RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE 1 +#define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4 0 +#define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6 1 + + +#define RNDIS_OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */ +#define RNDIS_OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C /* set only */ +#define RNDIS_OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */ +#define RNDIS_OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */ +#define RNDIS_OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */ +#define RNDIS_OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */ + /* * NdisInitialize message */ @@ -585,6 +620,108 @@ } u1; } ndis_8021q_info; +struct rndis_object_header { + uint8_t type; + uint8_t revision; + uint16_t size; +}; + +typedef struct rndis_offload_params_ { + struct rndis_object_header header; + uint8_t ipv4_csum; + uint8_t tcp_ipv4_csum; + uint8_t udp_ipv4_csum; + uint8_t tcp_ipv6_csum; + uint8_t udp_ipv6_csum; + uint8_t lso_v1; + uint8_t ip_sec_v1; + uint8_t lso_v2_ipv4; + uint8_t lso_v2_ipv6; + uint8_t tcp_connection_ipv4; + uint8_t tcp_connection_ipv6; + uint32_t flags; + uint8_t ip_sec_v2; + uint8_t ip_sec_v2_ipv4; + struct { + uint8_t rsc_ipv4; + uint8_t rsc_ipv6; + }; + struct { + uint8_t encapsulated_packet_task_offload; + uint8_t encapsulation_types; + }; + +} rndis_offload_params; + + +typedef struct rndis_tcp_ip_csum_info_ { + union { + struct { + uint32_t is_ipv4:1; + uint32_t is_ipv6:1; + uint32_t tcp_csum:1; + uint32_t udp_csum:1; + uint32_t ip_header_csum:1; + uint32_t reserved:11; + uint32_t tcp_header_offset:10; + } xmit; + struct { + uint32_t tcp_csum_failed:1; + uint32_t udp_csum_failed:1; + uint32_t ip_csum_failed:1; + uint32_t tcp_csum_succeeded:1; + uint32_t udp_csum_succeeded:1; + uint32_t ip_csum_succeeded:1; + uint32_t loopback:1; + uint32_t tcp_csum_value_invalid:1; + uint32_t ip_csum_value_invalid:1; + } receive; + uint32_t value; + }; +} rndis_tcp_ip_csum_info; + +typedef struct rndis_tcp_tso_info_ { + union { + struct { + uint32_t unused:30; + uint32_t type:1; + uint32_t reserved2:1; + } xmit; + struct { + uint32_t mss:20; + uint32_t tcp_header_offset:10; + uint32_t type:1; + uint32_t reserved2:1; + } lso_v1_xmit; + struct { + uint32_t tcp_payload:30; + uint32_t type:1; + uint32_t reserved2:1; + } lso_v1_xmit_complete; + struct { + uint32_t mss:20; + uint32_t tcp_header_offset:10; + uint32_t type:1; + uint32_t ip_version:1; + } lso_v2_xmit; + struct { + uint32_t reserved:30; + uint32_t type:1; + uint32_t reserved2:1; + } lso_v2_xmit_complete; + uint32_t value; + }; +} rndis_tcp_tso_info; + +#define RNDIS_VLAN_PPI_SIZE (sizeof(rndis_per_packet_info) + \ + sizeof(ndis_8021q_info)) + +#define RNDIS_CSUM_PPI_SIZE (sizeof(rndis_per_packet_info) + \ + sizeof(rndis_tcp_ip_csum_info)) + +#define RNDIS_TSO_PPI_SIZE (sizeof(rndis_per_packet_info) + \ + sizeof(rndis_tcp_tso_info)) + /* * Format of Information buffer passed in a SetRequest for the OID * OID_GEN_RNDIS_CONFIG_PARAMETER. @@ -906,6 +1043,18 @@ #define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400 #define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800 +/* + * Externs + */ +int netvsc_recv(struct hv_device *device_ctx, + netvsc_packet *packet, + rndis_tcp_ip_csum_info *csum_info); + +void* hv_set_rppi_data(rndis_msg *rndis_mesg, + uint32_t rppi_size, + int pkt_type); + +void* hv_get_ppi_data(rndis_packet *rpkt, uint32_t type); #endif /* __HV_RNDIS_H__ */ Index: stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.h =================================================================== --- stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.h +++ stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.h @@ -24,6 +24,8 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ */ #ifndef __HV_RNDIS_FILTER_H__ @@ -90,27 +92,16 @@ uint8_t hw_mac_addr[HW_MACADDR_LEN]; } rndis_device; -typedef struct rndis_filter_packet_ { - void *completion_context; - /* No longer used */ - pfn_on_send_rx_completion on_completion; - - rndis_msg message; -} rndis_filter_packet; - - /* * Externs */ -extern int hv_rf_on_receive(struct hv_device *device, netvsc_packet *pkt); -extern int hv_rf_on_device_add(struct hv_device *device, void *additl_info); -extern int hv_rf_on_device_remove(struct hv_device *device, - boolean_t destroy_channel); -extern int hv_rf_on_open(struct hv_device *device); -extern int hv_rf_on_close(struct hv_device *device); -extern int hv_rf_on_send(struct hv_device *device, netvsc_packet *pkt); - +int hv_rf_on_receive(netvsc_dev *net_dev, + struct hv_device *device, netvsc_packet *pkt); +int hv_rf_on_device_add(struct hv_device *device, void *additl_info); +int hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel); +int hv_rf_on_open(struct hv_device *device); +int hv_rf_on_close(struct hv_device *device); #endif /* __HV_RNDIS_FILTER_H__ */ Index: stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c =================================================================== --- stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c +++ stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c @@ -67,9 +67,64 @@ static int hv_rf_init_device(rndis_device *device); static int hv_rf_open_device(rndis_device *device); static int hv_rf_close_device(rndis_device *device); -static void hv_rf_on_send_completion(void *context); static void hv_rf_on_send_request_completion(void *context); static void hv_rf_on_send_request_halt_completion(void *context); +int +hv_rf_send_offload_request(struct hv_device *device, + rndis_offload_params *offloads); +/* + * Set the Per-Packet-Info with the specified type + */ +void * +hv_set_rppi_data(rndis_msg *rndis_mesg, uint32_t rppi_size, + int pkt_type) +{ + rndis_packet *rndis_pkt; + rndis_per_packet_info *rppi; + + rndis_pkt = &rndis_mesg->msg.packet; + rndis_pkt->data_offset += rppi_size; + + rppi = (rndis_per_packet_info *)((char *)rndis_pkt + + rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_length); + + rppi->size = rppi_size; + rppi->type = pkt_type; + rppi->per_packet_info_offset = sizeof(rndis_per_packet_info); + + rndis_pkt->per_pkt_info_length += rppi_size; + + return (rppi); +} + +/* + * Get the Per-Packet-Info with the specified type + * return NULL if not found. + */ +void * +hv_get_ppi_data(rndis_packet *rpkt, uint32_t type) +{ + rndis_per_packet_info *ppi; + int len; + + if (rpkt->per_pkt_info_offset == 0) + return (NULL); + + ppi = (rndis_per_packet_info *)((unsigned long)rpkt + + rpkt->per_pkt_info_offset); + len = rpkt->per_pkt_info_length; + + while (len > 0) { + if (ppi->type == type) + return (void *)((unsigned long)ppi + + ppi->per_packet_info_offset); + + len -= ppi->size; + ppi = (rndis_per_packet_info *)((unsigned long)ppi + ppi->size); + } + + return (NULL); +} /* @@ -80,7 +135,7 @@ { rndis_device *device; - device = malloc(sizeof(rndis_device), M_DEVBUF, M_NOWAIT | M_ZERO); + device = malloc(sizeof(rndis_device), M_NETVSC, M_NOWAIT | M_ZERO); if (device == NULL) { return (NULL); } @@ -102,7 +157,7 @@ hv_put_rndis_device(rndis_device *device) { mtx_destroy(&device->req_lock); - free(device, M_DEVBUF); + free(device, M_NETVSC); } /* @@ -116,7 +171,7 @@ rndis_msg *rndis_mesg; rndis_set_request *set; - request = malloc(sizeof(rndis_request), M_DEVBUF, M_NOWAIT | M_ZERO); + request = malloc(sizeof(rndis_request), M_NETVSC, M_NOWAIT | M_ZERO); if (request == NULL) { return (NULL); } @@ -161,7 +216,7 @@ mtx_unlock_spin(&device->req_lock); sema_destroy(&request->wait_sema); - free(request, M_DEVBUF); + free(request, M_NETVSC); } /* @@ -169,7 +224,7 @@ */ static int hv_rf_send_request(rndis_device *device, rndis_request *request, - uint32_t message_type) + uint32_t message_type) { int ret; netvsc_packet *packet; @@ -196,6 +251,9 @@ hv_rf_on_send_request_halt_completion; } packet->compl.send.send_completion_tid = (unsigned long)device; + packet->send_buf_section_idx = + NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; + packet->send_buf_section_size = 0; ret = hv_nv_on_send(device->net_dev->dev, packet); @@ -248,6 +306,84 @@ } } +int +hv_rf_send_offload_request(struct hv_device *device, + rndis_offload_params *offloads) +{ + rndis_request *request; + rndis_set_request *set; + rndis_offload_params *offload_req; + rndis_set_complete *set_complete; + rndis_device *rndis_dev; + hn_softc_t *sc = device_get_softc(device->device); + device_t dev = device->device; + netvsc_dev *net_dev = sc->net_dev; + uint32_t vsp_version = net_dev->nvsp_version; + uint32_t extlen = sizeof(rndis_offload_params); + int ret; + + if (vsp_version <= NVSP_PROTOCOL_VERSION_4) { + extlen = VERSION_4_OFFLOAD_SIZE; + /* On NVSP_PROTOCOL_VERSION_4 and below, we do not support + * UDP checksum offload. + */ + offloads->udp_ipv4_csum = 0; + offloads->udp_ipv6_csum = 0; + } + + rndis_dev = net_dev->extension; + + request = hv_rndis_request(rndis_dev, REMOTE_NDIS_SET_MSG, + RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen); + if (!request) + return (ENOMEM); + + set = &request->request_msg.msg.set_request; + set->oid = RNDIS_OID_TCP_OFFLOAD_PARAMETERS; + set->info_buffer_length = extlen; + set->info_buffer_offset = sizeof(rndis_set_request); + set->device_vc_handle = 0; + + offload_req = (rndis_offload_params *)((unsigned long)set + + set->info_buffer_offset); + *offload_req = *offloads; + offload_req->header.type = RNDIS_OBJECT_TYPE_DEFAULT; + offload_req->header.revision = RNDIS_OFFLOAD_PARAMETERS_REVISION_3; + offload_req->header.size = extlen; + + ret = hv_rf_send_request(rndis_dev, request, REMOTE_NDIS_SET_MSG); + if (ret != 0) { + device_printf(dev, "hv send offload request failed, ret=%d!\n", + ret); + goto cleanup; + } + + ret = sema_timedwait(&request->wait_sema, 500); + if (ret != 0) { + device_printf(dev, "hv send offload request timeout\n"); + goto cleanup; + } + + set_complete = &request->response_msg.msg.set_complete; + if (set_complete->status == RNDIS_STATUS_SUCCESS) { + device_printf(dev, "hv send offload request succeeded\n"); + ret = 0; + } else { + if (set_complete->status == STATUS_NOT_SUPPORTED) { + device_printf(dev, "HV Not support offload\n"); + ret = 0; + } else { + ret = set_complete->status; + } + } + +cleanup: + if (request) + hv_put_rndis_request(rndis_dev, request); + + return (ret); +} + /* * RNDIS filter receive indicate status */ @@ -256,12 +392,18 @@ { rndis_indicate_status *indicate = &response->msg.indicate_status; - if (indicate->status == RNDIS_STATUS_MEDIA_CONNECT) { + switch(indicate->status) { + case RNDIS_STATUS_MEDIA_CONNECT: netvsc_linkstatus_callback(device->net_dev->dev, 1); - } else if (indicate->status == RNDIS_STATUS_MEDIA_DISCONNECT) { + break; + case RNDIS_STATUS_MEDIA_DISCONNECT: netvsc_linkstatus_callback(device->net_dev->dev, 0); - } else { + break; + default: /* TODO: */ + device_printf(device->net_dev->dev->device, + "unknown status %d received\n", indicate->status); + break; } } @@ -272,9 +414,10 @@ hv_rf_receive_data(rndis_device *device, rndis_msg *message, netvsc_packet *pkt) { rndis_packet *rndis_pkt; - rndis_per_packet_info *rppi; - ndis_8021q_info *rppi_vlan_info; + ndis_8021q_info *rppi_vlan_info; uint32_t data_offset; + rndis_tcp_ip_csum_info *csum_info = NULL; + device_t dev = device->net_dev->dev->device; rndis_pkt = &message->msg.packet; @@ -286,88 +429,57 @@ /* Remove rndis header, then pass data packet up the stack */ data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; - /* L2 frame length, with L2 header, not including CRC */ - pkt->tot_data_buf_len = rndis_pkt->data_length; - pkt->page_buffers[0].offset += data_offset; - /* Buffer length now L2 frame length plus trailing junk */ - pkt->page_buffers[0].length -= data_offset; - - pkt->is_data_pkt = TRUE; - - pkt->vlan_tci = 0; + pkt->tot_data_buf_len -= data_offset; + if (pkt->tot_data_buf_len < rndis_pkt->data_length) { + pkt->status = nvsp_status_failure; + device_printf(dev, + "total length %u is less than data length %u\n", + pkt->tot_data_buf_len, rndis_pkt->data_length); + return; + } - /* - * Read the VLAN ID if supplied by the Hyper-V infrastructure. - * Let higher-level driver code decide if it wants to use it. - * Ignore CFI, priority for now as FreeBSD does not support these. - */ - if (rndis_pkt->per_pkt_info_offset != 0) { - /* rppi struct exists; compute its address */ - rppi = (rndis_per_packet_info *)((uint8_t *)rndis_pkt + - rndis_pkt->per_pkt_info_offset); - /* if VLAN ppi struct, get the VLAN ID */ - if (rppi->type == ieee_8021q_info) { - rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi - + rppi->per_packet_info_offset); - pkt->vlan_tci = rppi_vlan_info->u1.s1.vlan_id; - } + pkt->tot_data_buf_len = rndis_pkt->data_length; + pkt->data = (void *)((unsigned long)pkt->data + data_offset); + + rppi_vlan_info = hv_get_ppi_data(rndis_pkt, ieee_8021q_info); + if (rppi_vlan_info) { + pkt->vlan_tci = rppi_vlan_info->u1.s1.vlan_id; + } else { + pkt->vlan_tci = 0; } - netvsc_recv(device->net_dev->dev, pkt); + csum_info = hv_get_ppi_data(rndis_pkt, tcpip_chksum_info); + netvsc_recv(device->net_dev->dev, pkt, csum_info); } /* * RNDIS filter on receive */ int -hv_rf_on_receive(struct hv_device *device, netvsc_packet *pkt) +hv_rf_on_receive(netvsc_dev *net_dev, struct hv_device *device, netvsc_packet *pkt) { - hn_softc_t *sc = device_get_softc(device->device); - netvsc_dev *net_dev = sc->net_dev; rndis_device *rndis_dev; - rndis_msg rndis_mesg; rndis_msg *rndis_hdr; /* Make sure the rndis device state is initialized */ - if (net_dev->extension == NULL) + if (net_dev->extension == NULL) { + pkt->status = nvsp_status_failure; return (ENODEV); + } rndis_dev = (rndis_device *)net_dev->extension; - if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) + if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) { + pkt->status = nvsp_status_failure; return (EINVAL); - - /* Shift virtual page number to form virtual page address */ - rndis_hdr = (rndis_msg *)(uintptr_t)(pkt->page_buffers[0].pfn << PAGE_SHIFT); - - rndis_hdr = (void *)((unsigned long)rndis_hdr - + pkt->page_buffers[0].offset); - - /* - * Make sure we got a valid rndis message - * Fixme: There seems to be a bug in set completion msg where - * its msg_len is 16 bytes but the byte_count field in the - * xfer page range shows 52 bytes - */ -#if 0 - if (pkt->tot_data_buf_len != rndis_hdr->msg_len) { - DPRINT_ERR(NETVSC, "invalid rndis message? (expected %u " - "bytes got %u)... dropping this message!", - rndis_hdr->msg_len, pkt->tot_data_buf_len); - DPRINT_EXIT(NETVSC); - - return (-1); } -#endif - memcpy(&rndis_mesg, rndis_hdr, - (rndis_hdr->msg_len > sizeof(rndis_msg)) ? - sizeof(rndis_msg) : rndis_hdr->msg_len); + rndis_hdr = pkt->data; - switch (rndis_mesg.ndis_msg_type) { + switch (rndis_hdr->ndis_msg_type) { /* data message */ case REMOTE_NDIS_PACKET_MSG: - hv_rf_receive_data(rndis_dev, &rndis_mesg, pkt); + hv_rf_receive_data(rndis_dev, rndis_hdr, pkt); break; /* completion messages */ case REMOTE_NDIS_INITIALIZE_CMPLT: @@ -375,15 +487,15 @@ case REMOTE_NDIS_SET_CMPLT: case REMOTE_NDIS_RESET_CMPLT: case REMOTE_NDIS_KEEPALIVE_CMPLT: - hv_rf_receive_response(rndis_dev, &rndis_mesg); + hv_rf_receive_response(rndis_dev, rndis_hdr); break; /* notification message */ case REMOTE_NDIS_INDICATE_STATUS_MSG: - hv_rf_receive_indicate_status(rndis_dev, &rndis_mesg); + hv_rf_receive_indicate_status(rndis_dev, rndis_hdr); break; default: printf("hv_rf_on_receive(): Unknown msg_type 0x%x\n", - rndis_mesg.ndis_msg_type); + rndis_hdr->ndis_msg_type); break; } @@ -711,7 +823,9 @@ int ret; netvsc_dev *net_dev; rndis_device *rndis_dev; + rndis_offload_params offloads; netvsc_device_info *dev_info = (netvsc_device_info *)additl_info; + device_t dev = device->device; rndis_dev = hv_get_rndis_device(); if (rndis_dev == NULL) { @@ -752,6 +866,22 @@ if (ret != 0) { /* TODO: shut down rndis device and the channel */ } + + /* config csum offload and send request to host */ + memset(&offloads, 0, sizeof(offloads)); + offloads.ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + offloads.tcp_ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + offloads.udp_ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + offloads.tcp_ipv6_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + offloads.udp_ipv6_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + offloads.lso_v2_ipv4 = RNDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; + + ret = hv_rf_send_offload_request(device, &offloads); + if (ret != 0) { + /* TODO: shut down rndis device and the channel */ + device_printf(dev, + "hv_rf_send_offload_request failed, ret=%d\n", ret); + } memcpy(dev_info->mac_addr, rndis_dev->hw_mac_addr, HW_MACADDR_LEN); @@ -810,103 +940,6 @@ } /* - * RNDIS filter on send - */ -int -hv_rf_on_send(struct hv_device *device, netvsc_packet *pkt) -{ - rndis_filter_packet *filter_pkt; - rndis_msg *rndis_mesg; - rndis_packet *rndis_pkt; - rndis_per_packet_info *rppi; - ndis_8021q_info *rppi_vlan_info; - uint32_t rndis_msg_size; - int ret = 0; - - /* Add the rndis header */ - filter_pkt = (rndis_filter_packet *)pkt->extension; - - memset(filter_pkt, 0, sizeof(rndis_filter_packet)); - - rndis_mesg = &filter_pkt->message; - rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); - - if (pkt->vlan_tci != 0) { - rndis_msg_size += sizeof(rndis_per_packet_info) + - sizeof(ndis_8021q_info); - } - - rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; - rndis_mesg->msg_len = pkt->tot_data_buf_len + rndis_msg_size; - - rndis_pkt = &rndis_mesg->msg.packet; - rndis_pkt->data_offset = sizeof(rndis_packet); - rndis_pkt->data_length = pkt->tot_data_buf_len; - - pkt->is_data_pkt = TRUE; - pkt->page_buffers[0].pfn = hv_get_phys_addr(rndis_mesg) >> PAGE_SHIFT; - pkt->page_buffers[0].offset = - (unsigned long)rndis_mesg & (PAGE_SIZE - 1); - pkt->page_buffers[0].length = rndis_msg_size; - - /* Save the packet context */ - filter_pkt->completion_context = - pkt->compl.send.send_completion_context; - - /* Use ours */ - pkt->compl.send.on_send_completion = hv_rf_on_send_completion; - pkt->compl.send.send_completion_context = filter_pkt; - - /* - * If there is a VLAN tag, we need to set up some additional - * fields so the Hyper-V infrastructure will stuff the VLAN tag - * into the frame. - */ - if (pkt->vlan_tci != 0) { - /* Move data offset past end of rppi + VLAN structs */ - rndis_pkt->data_offset += sizeof(rndis_per_packet_info) + - sizeof(ndis_8021q_info); - - /* must be set when we have rppi, VLAN info */ - rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); - rndis_pkt->per_pkt_info_length = sizeof(rndis_per_packet_info) + - sizeof(ndis_8021q_info); - - /* rppi immediately follows rndis_pkt */ - rppi = (rndis_per_packet_info *)(rndis_pkt + 1); - rppi->size = sizeof(rndis_per_packet_info) + - sizeof(ndis_8021q_info); - rppi->type = ieee_8021q_info; - rppi->per_packet_info_offset = sizeof(rndis_per_packet_info); - - /* VLAN info immediately follows rppi struct */ - rppi_vlan_info = (ndis_8021q_info *)(rppi + 1); - /* FreeBSD does not support CFI or priority */ - rppi_vlan_info->u1.s1.vlan_id = pkt->vlan_tci & 0xfff; - } - - /* - * Invoke netvsc send. If return status is bad, the caller now - * resets the context pointers before retrying. - */ - ret = hv_nv_on_send(device, pkt); - - return (ret); -} - -/* - * RNDIS filter on send completion callback - */ -static void -hv_rf_on_send_completion(void *context) -{ - rndis_filter_packet *filter_pkt = (rndis_filter_packet *)context; - - /* Pass it back to the original handler */ - netvsc_xmit_completion(filter_pkt->completion_context); -} - -/* * RNDIS filter on send request completion callback */ static void