Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.h =================================================================== --- head/sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 299400) +++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 299401) @@ -1,1274 +1,1275 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * HyperV vmbus (virtual machine bus) network VSC (virtual services client) * header file * * (Updated from unencumbered NvspProtocol.h) */ #ifndef __HV_NET_VSC_H__ #define __HV_NET_VSC_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define HN_USE_TXDESC_BUFRING MALLOC_DECLARE(M_NETVSC); #define NVSP_INVALID_PROTOCOL_VERSION (0xFFFFFFFF) #define NVSP_PROTOCOL_VERSION_1 2 #define NVSP_PROTOCOL_VERSION_2 0x30002 #define NVSP_PROTOCOL_VERSION_4 0x40000 #define NVSP_PROTOCOL_VERSION_5 0x50000 #define NVSP_MIN_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_1) #define NVSP_MAX_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_2) #define NVSP_PROTOCOL_VERSION_CURRENT NVSP_PROTOCOL_VERSION_2 #define VERSION_4_OFFLOAD_SIZE 22 #define NVSP_OPERATIONAL_STATUS_OK (0x00000000) #define NVSP_OPERATIONAL_STATUS_DEGRADED (0x00000001) #define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE (0x00000002) #define NVSP_OPERATIONAL_STATUS_NO_CONTACT (0x00000003) #define NVSP_OPERATIONAL_STATUS_LOST_COMMUNICATION (0x00000004) /* * Maximun number of transfer pages (packets) the VSP will use on a receive */ #define NVSP_MAX_PACKETS_PER_RECEIVE 375 /* vRSS stuff */ #define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 #define RNDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 #define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 #define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 struct rndis_obj_header { uint8_t type; uint8_t rev; uint16_t size; } __packed; /* rndis_recv_scale_cap/cap_flag */ #define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000 #define RNDIS_RSS_CAPS_USING_MSI_X 0x08000000 #define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000 #define RNDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400 /* RNDIS_RECEIVE_SCALE_CAPABILITIES */ struct rndis_recv_scale_cap { struct rndis_obj_header hdr; uint32_t cap_flag; uint32_t num_int_msg; uint32_t num_recv_que; uint16_t num_indirect_tabent; } __packed; /* rndis_recv_scale_param flags */ #define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001 #define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002 #define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004 #define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008 #define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010 /* Hash info bits */ #define RNDIS_HASH_FUNC_TOEPLITZ 0x00000001 #define RNDIS_HASH_IPV4 0x00000100 #define RNDIS_HASH_TCP_IPV4 0x00000200 #define RNDIS_HASH_IPV6 0x00000400 #define RNDIS_HASH_IPV6_EX 0x00000800 #define RNDIS_HASH_TCP_IPV6 0x00001000 #define RNDIS_HASH_TCP_IPV6_EX 0x00002000 #define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) #define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 #define ITAB_NUM 128 #define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 /* RNDIS_RECEIVE_SCALE_PARAMETERS */ typedef struct rndis_recv_scale_param_ { struct rndis_obj_header hdr; /* Qualifies the rest of the information */ uint16_t flag; /* The base CPU number to do receive processing. not used */ uint16_t base_cpu_number; /* This describes the hash function and type being enabled */ uint32_t hashinfo; /* The size of indirection table array */ uint16_t indirect_tabsize; /* The offset of the indirection table from the beginning of this * structure */ uint32_t indirect_taboffset; /* The size of the hash secret key */ uint16_t hashkey_size; /* The offset of the secret key from the beginning of this structure */ uint32_t hashkey_offset; uint32_t processor_masks_offset; uint32_t num_processor_masks; uint32_t processor_masks_entry_size; } rndis_recv_scale_param; typedef enum nvsp_msg_type_ { nvsp_msg_type_none = 0, /* * Init Messages */ nvsp_msg_type_init = 1, nvsp_msg_type_init_complete = 2, nvsp_version_msg_start = 100, /* * Version 1 Messages */ nvsp_msg_1_type_send_ndis_vers = nvsp_version_msg_start, nvsp_msg_1_type_send_rx_buf, nvsp_msg_1_type_send_rx_buf_complete, nvsp_msg_1_type_revoke_rx_buf, nvsp_msg_1_type_send_send_buf, nvsp_msg_1_type_send_send_buf_complete, nvsp_msg_1_type_revoke_send_buf, nvsp_msg_1_type_send_rndis_pkt, nvsp_msg_1_type_send_rndis_pkt_complete, /* * Version 2 Messages */ nvsp_msg_2_type_send_chimney_delegated_buf, nvsp_msg_2_type_send_chimney_delegated_buf_complete, nvsp_msg_2_type_revoke_chimney_delegated_buf, nvsp_msg_2_type_resume_chimney_rx_indication, nvsp_msg_2_type_terminate_chimney, nvsp_msg_2_type_terminate_chimney_complete, nvsp_msg_2_type_indicate_chimney_event, nvsp_msg_2_type_send_chimney_packet, nvsp_msg_2_type_send_chimney_packet_complete, nvsp_msg_2_type_post_chimney_rx_request, nvsp_msg_2_type_post_chimney_rx_request_complete, nvsp_msg_2_type_alloc_rx_buf, nvsp_msg_2_type_alloc_rx_buf_complete, nvsp_msg_2_type_free_rx_buf, nvsp_msg_2_send_vmq_rndis_pkt, nvsp_msg_2_send_vmq_rndis_pkt_complete, nvsp_msg_2_type_send_ndis_config, nvsp_msg_2_type_alloc_chimney_handle, nvsp_msg_2_type_alloc_chimney_handle_complete, nvsp_msg2_max = nvsp_msg_2_type_alloc_chimney_handle_complete, /* * Version 4 Messages */ nvsp_msg4_type_send_vf_association, nvsp_msg4_type_switch_data_path, nvsp_msg4_type_uplink_connect_state_deprecated, nvsp_msg4_max = nvsp_msg4_type_uplink_connect_state_deprecated, /* * Version 5 Messages */ nvsp_msg5_type_oid_query_ex, nvsp_msg5_type_oid_query_ex_comp, nvsp_msg5_type_subchannel, nvsp_msg5_type_send_indirection_table, nvsp_msg5_max = nvsp_msg5_type_send_indirection_table, } nvsp_msg_type; typedef enum nvsp_status_ { nvsp_status_none = 0, nvsp_status_success, nvsp_status_failure, /* Deprecated */ nvsp_status_prot_vers_range_too_new, /* Deprecated */ nvsp_status_prot_vers_range_too_old, nvsp_status_invalid_rndis_pkt, nvsp_status_busy, nvsp_status_max, } nvsp_status; typedef struct nvsp_msg_hdr_ { uint32_t msg_type; } __packed nvsp_msg_hdr; /* * Init Messages */ /* * This message is used by the VSC to initialize the channel * after the channels has been opened. This message should * never include anything other then versioning (i.e. this * message will be the same for ever). * * Forever is a long time. The values have been redefined * in Win7 to indicate major and minor protocol version * number. */ typedef struct nvsp_msg_init_ { union { struct { uint16_t minor_protocol_version; uint16_t major_protocol_version; } s; /* Formerly min_protocol_version */ uint32_t protocol_version; } p1; /* Formerly max_protocol_version */ uint32_t protocol_version_2; } __packed nvsp_msg_init; /* * This message is used by the VSP to complete the initialization * of the channel. This message should never include anything other * then versioning (i.e. this message will be the same forever). */ typedef struct nvsp_msg_init_complete_ { /* Deprecated */ uint32_t negotiated_prot_vers; uint32_t max_mdl_chain_len; uint32_t status; } __packed nvsp_msg_init_complete; typedef union nvsp_msg_init_uber_ { nvsp_msg_init init; nvsp_msg_init_complete init_compl; } __packed nvsp_msg_init_uber; /* * Version 1 Messages */ /* * This message is used by the VSC to send the NDIS version * to the VSP. The VSP can use this information when handling * OIDs sent by the VSC. */ typedef struct nvsp_1_msg_send_ndis_version_ { uint32_t ndis_major_vers; /* Deprecated */ uint32_t ndis_minor_vers; } __packed nvsp_1_msg_send_ndis_version; /* * This message is used by the VSC to send a receive buffer * to the VSP. The VSP can then use the receive buffer to * send data to the VSC. */ typedef struct nvsp_1_msg_send_rx_buf_ { uint32_t gpadl_handle; uint16_t id; } __packed nvsp_1_msg_send_rx_buf; typedef struct nvsp_1_rx_buf_section_ { uint32_t offset; uint32_t sub_allocation_size; uint32_t num_sub_allocations; uint32_t end_offset; } __packed nvsp_1_rx_buf_section; /* * This message is used by the VSP to acknowledge a receive * buffer send by the VSC. This message must be sent by the * VSP before the VSP uses the receive buffer. */ typedef struct nvsp_1_msg_send_rx_buf_complete_ { uint32_t status; uint32_t num_sections; /* * The receive buffer is split into two parts, a large * suballocation section and a small suballocation * section. These sections are then suballocated by a * certain size. * * For example, the following break up of the receive * buffer has 6 large suballocations and 10 small * suballocations. * * | Large Section | | Small Section | * ------------------------------------------------------------ * | | | | | | | | | | | | | | | | | | * | | * LargeOffset SmallOffset */ nvsp_1_rx_buf_section sections[1]; } __packed nvsp_1_msg_send_rx_buf_complete; /* * This message is sent by the VSC to revoke the receive buffer. * After the VSP completes this transaction, the VSP should never * use the receive buffer again. */ typedef struct nvsp_1_msg_revoke_rx_buf_ { uint16_t id; } __packed nvsp_1_msg_revoke_rx_buf; /* * This message is used by the VSC to send a send buffer * to the VSP. The VSC can then use the send buffer to * send data to the VSP. */ typedef struct nvsp_1_msg_send_send_buf_ { uint32_t gpadl_handle; uint16_t id; } __packed nvsp_1_msg_send_send_buf; /* * This message is used by the VSP to acknowledge a send * buffer sent by the VSC. This message must be sent by the * VSP before the VSP uses the sent buffer. */ typedef struct nvsp_1_msg_send_send_buf_complete_ { uint32_t status; /* * The VSC gets to choose the size of the send buffer and * the VSP gets to choose the sections size of the buffer. * This was done to enable dynamic reconfigurations when * the cost of GPA-direct buffers decreases. */ uint32_t section_size; } __packed nvsp_1_msg_send_send_buf_complete; /* * This message is sent by the VSC to revoke the send buffer. * After the VSP completes this transaction, the vsp should never * use the send buffer again. */ typedef struct nvsp_1_msg_revoke_send_buf_ { uint16_t id; } __packed nvsp_1_msg_revoke_send_buf; /* * This message is used by both the VSP and the VSC to send * an RNDIS message to the opposite channel endpoint. */ typedef struct nvsp_1_msg_send_rndis_pkt_ { /* * This field is specified by RNIDS. They assume there's * two different channels of communication. However, * the Network VSP only has one. Therefore, the channel * travels with the RNDIS packet. */ uint32_t chan_type; /* * This field is used to send part or all of the data * through a send buffer. This values specifies an * index into the send buffer. If the index is * 0xFFFFFFFF, then the send buffer is not being used * and all of the data was sent through other VMBus * mechanisms. */ uint32_t send_buf_section_idx; uint32_t send_buf_section_size; } __packed nvsp_1_msg_send_rndis_pkt; /* * This message is used by both the VSP and the VSC to complete * a RNDIS message to the opposite channel endpoint. At this * point, the initiator of this message cannot use any resources * associated with the original RNDIS packet. */ typedef struct nvsp_1_msg_send_rndis_pkt_complete_ { uint32_t status; } __packed nvsp_1_msg_send_rndis_pkt_complete; /* * Version 2 Messages */ /* * This message is used by the VSC to send the NDIS version * to the VSP. The VSP can use this information when handling * OIDs sent by the VSC. */ typedef struct nvsp_2_netvsc_capabilities_ { union { uint64_t as_uint64; struct { uint64_t vmq : 1; uint64_t chimney : 1; uint64_t sriov : 1; uint64_t ieee8021q : 1; uint64_t correlationid : 1; uint64_t teaming : 1; } u2; } u1; } __packed nvsp_2_netvsc_capabilities; typedef struct nvsp_2_msg_send_ndis_config_ { uint32_t mtu; uint32_t reserved; nvsp_2_netvsc_capabilities capabilities; } __packed nvsp_2_msg_send_ndis_config; /* * NvspMessage2TypeSendChimneyDelegatedBuffer */ typedef struct nvsp_2_msg_send_chimney_buf_ { /* * On WIN7 beta, delegated_obj_max_size is defined as a uint32_t * Since WIN7 RC, it was split into two uint16_t. To have the same * struct layout, delegated_obj_max_size shall be the first field. */ uint16_t delegated_obj_max_size; /* * The revision # of chimney protocol used between NVSC and NVSP. * * This revision is NOT related to the chimney revision between * NDIS protocol and miniport drivers. */ uint16_t revision; uint32_t gpadl_handle; } __packed nvsp_2_msg_send_chimney_buf; /* Unsupported chimney revision 0 (only present in WIN7 beta) */ #define NVSP_CHIMNEY_REVISION_0 0 /* WIN7 Beta Chimney QFE */ #define NVSP_CHIMNEY_REVISION_1 1 /* The chimney revision since WIN7 RC */ #define NVSP_CHIMNEY_REVISION_2 2 /* * NvspMessage2TypeSendChimneyDelegatedBufferComplete */ typedef struct nvsp_2_msg_send_chimney_buf_complete_ { uint32_t status; /* * Maximum number outstanding sends and pre-posted receives. * * NVSC should not post more than SendQuota/ReceiveQuota packets. * Otherwise, it can block the non-chimney path for an indefinite * amount of time. * (since chimney sends/receives are affected by the remote peer). * * Note: NVSP enforces the quota restrictions on a per-VMBCHANNEL * basis. It doesn't enforce the restriction separately for chimney * send/receive. If NVSC doesn't voluntarily enforce "SendQuota", * it may kill its own network connectivity. */ uint32_t send_quota; uint32_t rx_quota; } __packed nvsp_2_msg_send_chimney_buf_complete; /* * NvspMessage2TypeRevokeChimneyDelegatedBuffer */ typedef struct nvsp_2_msg_revoke_chimney_buf_ { uint32_t gpadl_handle; } __packed nvsp_2_msg_revoke_chimney_buf; #define NVSP_CHIMNEY_OBJECT_TYPE_NEIGHBOR 0 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH4 1 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH6 2 #define NVSP_CHIMNEY_OBJECT_TYPE_TCP 3 /* * NvspMessage2TypeAllocateChimneyHandle */ typedef struct nvsp_2_msg_alloc_chimney_handle_ { uint64_t vsc_context; uint32_t object_type; } __packed nvsp_2_msg_alloc_chimney_handle; /* * NvspMessage2TypeAllocateChimneyHandleComplete */ typedef struct nvsp_2_msg_alloc_chimney_handle_complete_ { uint32_t vsp_handle; } __packed nvsp_2_msg_alloc_chimney_handle_complete; /* * NvspMessage2TypeResumeChimneyRXIndication */ typedef struct nvsp_2_msg_resume_chimney_rx_indication { /* * Handle identifying the offloaded connection */ uint32_t vsp_tcp_handle; } __packed nvsp_2_msg_resume_chimney_rx_indication; #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_FIRST_STAGE (0x01u) #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_RESERVED (~(0x01u)) /* * NvspMessage2TypeTerminateChimney */ typedef struct nvsp_2_msg_terminate_chimney_ { /* * Handle identifying the offloaded object */ uint32_t vsp_handle; /* * Terminate Offload Flags * Bit 0: * When set to 0, terminate the offload at the destination NIC * Bit 1-31: Reserved, shall be zero */ uint32_t flags; union { /* * This field is valid only when bit 0 of flags is clear. * It specifies the index into the premapped delegated * object buffer. The buffer was sent through the * NvspMessage2TypeSendChimneyDelegatedBuffer * message at initialization time. * * NVSP will write the delegated state into the delegated * buffer upon upload completion. */ uint32_t index; /* * This field is valid only when bit 0 of flags is set. * * The seqence number of the most recently accepted RX * indication when VSC sets its TCP context into * "terminating" state. * * This allows NVSP to determines if there are any in-flight * RX indications for which the acceptance state is still * undefined. */ uint64_t last_accepted_rx_seq_no; } f0; } __packed nvsp_2_msg_terminate_chimney; #define NVSP_TERMINATE_CHIMNEY_COMPLETE_FLAG_DATA_CORRUPTED 0x0000001u /* * NvspMessage2TypeTerminateChimneyComplete */ typedef struct nvsp_2_msg_terminate_chimney_complete_ { uint64_t vsc_context; uint32_t flags; } __packed nvsp_2_msg_terminate_chimney_complete; /* * NvspMessage2TypeIndicateChimneyEvent */ typedef struct nvsp_2_msg_indicate_chimney_event_ { /* * When VscTcpContext is 0, event_type is an NDIS_STATUS event code * Otherwise, EventType is an TCP connection event (defined in * NdisTcpOffloadEventHandler chimney DDK document). */ uint32_t event_type; /* * When VscTcpContext is 0, EventType is an NDIS_STATUS event code * Otherwise, EventType is an TCP connection event specific information * (defined in NdisTcpOffloadEventHandler chimney DDK document). */ uint32_t event_specific_info; /* * If not 0, the event is per-TCP connection event. This field * contains the VSC's TCP context. * If 0, the event indication is global. */ uint64_t vsc_tcp_context; } __packed nvsp_2_msg_indicate_chimney_event; #define NVSP_1_CHIMNEY_SEND_INVALID_OOB_INDEX 0xffffu #define NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX 0xffffffff /* * NvspMessage2TypeSendChimneyPacket */ typedef struct nvsp_2_msg_send_chimney_pkt_ { /* * Identify the TCP connection for which this chimney send is */ uint32_t vsp_tcp_handle; /* * This field is used to send part or all of the data * through a send buffer. This values specifies an * index into the send buffer. If the index is * 0xFFFF, then the send buffer is not being used * and all of the data was sent through other VMBus * mechanisms. */ uint16_t send_buf_section_index; uint16_t send_buf_section_size; /* * OOB Data Index * This an index to the OOB data buffer. If the index is 0xFFFFFFFF, * then there is no OOB data. * * This field shall be always 0xFFFFFFFF for now. It is reserved for * the future. */ uint16_t oob_data_index; /* * DisconnectFlags = 0 * Normal chimney send. See MiniportTcpOffloadSend for details. * * DisconnectFlags = TCP_DISCONNECT_GRACEFUL_CLOSE (0x01) * Graceful disconnect. See MiniportTcpOffloadDisconnect for details. * * DisconnectFlags = TCP_DISCONNECT_ABORTIVE_CLOSE (0x02) * Abortive disconnect. See MiniportTcpOffloadDisconnect for details. */ uint16_t disconnect_flags; uint32_t seq_no; } __packed nvsp_2_msg_send_chimney_pkt; /* * NvspMessage2TypeSendChimneyPacketComplete */ typedef struct nvsp_2_msg_send_chimney_pkt_complete_ { /* * The NDIS_STATUS for the chimney send */ uint32_t status; /* * Number of bytes that have been sent to the peer (and ACKed by the peer). */ uint32_t bytes_transferred; } __packed nvsp_2_msg_send_chimney_pkt_complete; #define NVSP_1_CHIMNEY_RECV_FLAG_NO_PUSH 0x0001u #define NVSP_1_CHIMNEY_RECV_INVALID_OOB_INDEX 0xffffu /* * NvspMessage2TypePostChimneyRecvRequest */ typedef struct nvsp_2_msg_post_chimney_rx_request_ { /* * Identify the TCP connection which this chimney receive request * is for. */ uint32_t vsp_tcp_handle; /* * OOB Data Index * This an index to the OOB data buffer. If the index is 0xFFFFFFFF, * then there is no OOB data. * * This field shall be always 0xFFFFFFFF for now. It is reserved for * the future. */ uint32_t oob_data_index; /* * Bit 0 * When it is set, this is a "no-push" receive. * When it is clear, this is a "push" receive. * * Bit 1-15: Reserved and shall be zero */ uint16_t flags; /* * For debugging and diagnoses purpose. * The SeqNo is per TCP connection and starts from 0. */ uint32_t seq_no; } __packed nvsp_2_msg_post_chimney_rx_request; /* * NvspMessage2TypePostChimneyRecvRequestComplete */ typedef struct nvsp_2_msg_post_chimney_rx_request_complete_ { /* * The NDIS_STATUS for the chimney send */ uint32_t status; /* * Number of bytes that have been sent to the peer (and ACKed by * the peer). */ uint32_t bytes_xferred; } __packed nvsp_2_msg_post_chimney_rx_request_complete; /* * NvspMessage2TypeAllocateReceiveBuffer */ typedef struct nvsp_2_msg_alloc_rx_buf_ { /* * Allocation ID to match the allocation request and response */ uint32_t allocation_id; /* * Length of the VM shared memory receive buffer that needs to * be allocated */ uint32_t length; } __packed nvsp_2_msg_alloc_rx_buf; /* * NvspMessage2TypeAllocateReceiveBufferComplete */ typedef struct nvsp_2_msg_alloc_rx_buf_complete_ { /* * The NDIS_STATUS code for buffer allocation */ uint32_t status; /* * Allocation ID from NVSP_2_MESSAGE_ALLOCATE_RECEIVE_BUFFER */ uint32_t allocation_id; /* * GPADL handle for the allocated receive buffer */ uint32_t gpadl_handle; /* * Receive buffer ID that is further used in * NvspMessage2SendVmqRndisPacket */ uint64_t rx_buf_id; } __packed nvsp_2_msg_alloc_rx_buf_complete; /* * NvspMessage2TypeFreeReceiveBuffer */ typedef struct nvsp_2_msg_free_rx_buf_ { /* * Receive buffer ID previous returned in * NvspMessage2TypeAllocateReceiveBufferComplete message */ uint64_t rx_buf_id; } __packed nvsp_2_msg_free_rx_buf; /* * This structure is used in defining the buffers in * NVSP_2_MESSAGE_SEND_VMQ_RNDIS_PACKET structure */ typedef struct nvsp_xfer_page_range_ { /* * Specifies the ID of the receive buffer that has the buffer. This * ID can be the general receive buffer ID specified in * NvspMessage1TypeSendReceiveBuffer or it can be the shared memory * receive buffer ID allocated by the VSC and specified in * NvspMessage2TypeAllocateReceiveBufferComplete message */ uint64_t xfer_page_set_id; /* * Number of bytes */ uint32_t byte_count; /* * Offset in bytes from the beginning of the buffer */ uint32_t byte_offset; } __packed nvsp_xfer_page_range; /* * NvspMessage2SendVmqRndisPacket */ typedef struct nvsp_2_msg_send_vmq_rndis_pkt_ { /* * This field is specified by RNIDS. They assume there's * two different channels of communication. However, * the Network VSP only has one. Therefore, the channel * travels with the RNDIS packet. It must be RMC_DATA */ uint32_t channel_type; /* * Only the Range element corresponding to the RNDIS header of * the first RNDIS message in the multiple RNDIS messages sent * in one NVSP message. Information about the data portions as well * as the subsequent RNDIS messages in the same NVSP message are * embedded in the RNDIS header itself */ nvsp_xfer_page_range range; } __packed nvsp_2_msg_send_vmq_rndis_pkt; /* * This message is used by the VSC to complete * a RNDIS VMQ message to the VSP. At this point, * the initiator of this message can use any resources * associated with the original RNDIS VMQ packet. */ typedef struct nvsp_2_msg_send_vmq_rndis_pkt_complete_ { uint32_t status; } __packed nvsp_2_msg_send_vmq_rndis_pkt_complete; /* * Version 5 messages */ enum nvsp_subchannel_operation { NVSP_SUBCHANNEL_NONE = 0, NVSP_SUBCHANNE_ALLOCATE, NVSP_SUBCHANNE_MAX }; typedef struct nvsp_5_subchannel_request_ { uint32_t op; uint32_t num_subchannels; } __packed nvsp_5_subchannel_request; typedef struct nvsp_5_subchannel_complete_ { uint32_t status; /* Actual number of subchannels allocated */ uint32_t num_subchannels; } __packed nvsp_5_subchannel_complete; typedef struct nvsp_5_send_indirect_table_ { /* The number of entries in the send indirection table */ uint32_t count; /* * The offset of the send indireciton table from top of * this struct. The send indirection table tells which channel * to put the send traffic on. Each entry is a channel number. */ uint32_t offset; } __packed nvsp_5_send_indirect_table; typedef union nvsp_1_msg_uber_ { nvsp_1_msg_send_ndis_version send_ndis_vers; nvsp_1_msg_send_rx_buf send_rx_buf; nvsp_1_msg_send_rx_buf_complete send_rx_buf_complete; nvsp_1_msg_revoke_rx_buf revoke_rx_buf; nvsp_1_msg_send_send_buf send_send_buf; nvsp_1_msg_send_send_buf_complete send_send_buf_complete; nvsp_1_msg_revoke_send_buf revoke_send_buf; nvsp_1_msg_send_rndis_pkt send_rndis_pkt; nvsp_1_msg_send_rndis_pkt_complete send_rndis_pkt_complete; } __packed nvsp_1_msg_uber; typedef union nvsp_2_msg_uber_ { nvsp_2_msg_send_ndis_config send_ndis_config; nvsp_2_msg_send_chimney_buf send_chimney_buf; nvsp_2_msg_send_chimney_buf_complete send_chimney_buf_complete; nvsp_2_msg_revoke_chimney_buf revoke_chimney_buf; nvsp_2_msg_resume_chimney_rx_indication resume_chimney_rx_indication; nvsp_2_msg_terminate_chimney terminate_chimney; nvsp_2_msg_terminate_chimney_complete terminate_chimney_complete; nvsp_2_msg_indicate_chimney_event indicate_chimney_event; nvsp_2_msg_send_chimney_pkt send_chimney_packet; nvsp_2_msg_send_chimney_pkt_complete send_chimney_packet_complete; nvsp_2_msg_post_chimney_rx_request post_chimney_rx_request; nvsp_2_msg_post_chimney_rx_request_complete post_chimney_rx_request_complete; nvsp_2_msg_alloc_rx_buf alloc_rx_buffer; nvsp_2_msg_alloc_rx_buf_complete alloc_rx_buffer_complete; nvsp_2_msg_free_rx_buf free_rx_buffer; nvsp_2_msg_send_vmq_rndis_pkt send_vmq_rndis_pkt; nvsp_2_msg_send_vmq_rndis_pkt_complete send_vmq_rndis_pkt_complete; nvsp_2_msg_alloc_chimney_handle alloc_chimney_handle; nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete; } __packed nvsp_2_msg_uber; typedef union nvsp_5_msg_uber_ { nvsp_5_subchannel_request subchannel_request; nvsp_5_subchannel_complete subchn_complete; nvsp_5_send_indirect_table send_table; } __packed nvsp_5_msg_uber; typedef union nvsp_all_msgs_ { nvsp_msg_init_uber init_msgs; nvsp_1_msg_uber vers_1_msgs; nvsp_2_msg_uber vers_2_msgs; nvsp_5_msg_uber vers_5_msgs; } __packed nvsp_all_msgs; /* * ALL Messages */ typedef struct nvsp_msg_ { nvsp_msg_hdr hdr; nvsp_all_msgs msgs; } __packed nvsp_msg; /* * The following arguably belongs in a separate header file */ /* * Defines */ #define NETVSC_SEND_BUFFER_SIZE (1024*1024*15) /* 15M */ #define NETVSC_SEND_BUFFER_ID 0xface #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_ID 0xcafe #define NETVSC_RECEIVE_SG_COUNT 1 /* Preallocated receive packets */ #define NETVSC_RECEIVE_PACKETLIST_COUNT 256 /* * Maximum MTU we permit to be configured for a netvsc interface. * When the code was developed, a max MTU of 12232 was tested and * proven to work. 9K is a reasonable maximum for an Ethernet. */ #define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024) #define NETVSC_PACKET_SIZE PAGE_SIZE #define VRSS_SEND_TABLE_SIZE 16 /* * Data types */ /* * Per netvsc channel-specific */ typedef struct netvsc_dev_ { struct hv_device *dev; /* Send buffer allocated by us but manages by NetVSP */ void *send_buf; uint32_t send_buf_size; uint32_t send_buf_gpadl_handle; uint32_t send_section_size; uint32_t send_section_count; unsigned long bitsmap_words; unsigned long *send_section_bitsmap; /* Receive buffer allocated by us but managed by NetVSP */ void *rx_buf; uint32_t rx_buf_size; uint32_t rx_buf_gpadl_handle; uint32_t rx_section_count; nvsp_1_rx_buf_section *rx_sections; /* Used for NetVSP initialization protocol */ struct sema channel_init_sema; nvsp_msg channel_init_packet; nvsp_msg revoke_packet; /*uint8_t hw_mac_addr[HW_MACADDR_LEN];*/ /* Holds rndis device info */ void *extension; hv_bool_uint8_t destroy; /* Negotiated NVSP version */ uint32_t nvsp_version; uint32_t num_channel; uint32_t vrss_send_table[VRSS_SEND_TABLE_SIZE]; } netvsc_dev; struct hv_vmbus_channel; typedef void (*pfn_on_send_rx_completion)(struct hv_vmbus_channel *, void *); #define NETVSC_DEVICE_RING_BUFFER_SIZE (128 * PAGE_SIZE) #define NETVSC_PACKET_MAXPAGE 32 #define NETVSC_VLAN_PRIO_MASK 0xe000 #define NETVSC_VLAN_PRIO_SHIFT 13 #define NETVSC_VLAN_VID_MASK 0x0fff #define TYPE_IPV4 2 #define TYPE_IPV6 4 #define TYPE_TCP 2 #define TYPE_UDP 4 #define TRANSPORT_TYPE_NOT_IP 0 #define TRANSPORT_TYPE_IPV4_TCP ((TYPE_IPV4 << 16) | TYPE_TCP) #define TRANSPORT_TYPE_IPV4_UDP ((TYPE_IPV4 << 16) | TYPE_UDP) #define TRANSPORT_TYPE_IPV6_TCP ((TYPE_IPV6 << 16) | TYPE_TCP) #define TRANSPORT_TYPE_IPV6_UDP ((TYPE_IPV6 << 16) | TYPE_UDP) #ifdef __LP64__ #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif typedef struct netvsc_packet_ { struct hv_device *device; hv_bool_uint8_t is_data_pkt; /* One byte */ uint16_t vlan_tci; uint32_t status; /* Completion */ union { struct { uint64_t rx_completion_tid; void *rx_completion_context; /* This is no longer used */ pfn_on_send_rx_completion on_rx_completion; } rx; struct { uint64_t send_completion_tid; void *send_completion_context; /* Still used in netvsc and filter code */ pfn_on_send_rx_completion on_send_completion; } send; } compl; uint32_t send_buf_section_idx; uint32_t send_buf_section_size; void *rndis_mesg; uint32_t tot_data_buf_len; void *data; uint32_t page_buf_count; hv_vmbus_page_buffer page_buffers[NETVSC_PACKET_MAXPAGE]; } netvsc_packet; typedef struct { uint8_t mac_addr[6]; /* Assumption unsigned long */ hv_bool_uint8_t link_state; } netvsc_device_info; #ifndef HN_USE_TXDESC_BUFRING struct hn_txdesc; SLIST_HEAD(hn_txdesc_list, hn_txdesc); #else struct buf_ring; #endif struct hn_rx_ring { struct ifnet *hn_ifp; int hn_rx_idx; /* Trust csum verification on host side */ int hn_trust_hcsum; /* HN_TRUST_HCSUM_ */ struct lro_ctrl hn_lro; u_long hn_csum_ip; u_long hn_csum_tcp; u_long hn_csum_udp; u_long hn_csum_trusted; u_long hn_lro_tried; u_long hn_small_pkts; u_long hn_pkts; + u_long hn_rss_pkts; /* Rarely used stuffs */ struct sysctl_oid *hn_rx_sysctl_tree; int hn_rx_flags; } __aligned(CACHE_LINE_SIZE); #define HN_TRUST_HCSUM_IP 0x0001 #define HN_TRUST_HCSUM_TCP 0x0002 #define HN_TRUST_HCSUM_UDP 0x0004 #define HN_RX_FLAG_ATTACHED 0x1 struct hn_tx_ring { #ifndef HN_USE_TXDESC_BUFRING struct mtx hn_txlist_spin; struct hn_txdesc_list hn_txlist; #else struct buf_ring *hn_txdesc_br; #endif int hn_txdesc_cnt; int hn_txdesc_avail; u_short hn_has_txeof; u_short hn_txdone_cnt; int hn_sched_tx; void (*hn_txeof)(struct hn_tx_ring *); struct taskqueue *hn_tx_taskq; struct task hn_tx_task; struct task hn_txeof_task; struct buf_ring *hn_mbuf_br; int hn_oactive; int hn_tx_idx; struct mtx hn_tx_lock; struct hn_softc *hn_sc; struct hv_vmbus_channel *hn_chan; int hn_direct_tx_size; int hn_tx_chimney_size; bus_dma_tag_t hn_tx_data_dtag; uint64_t hn_csum_assist; u_long hn_no_txdescs; u_long hn_send_failed; u_long hn_txdma_failed; u_long hn_tx_collapsed; u_long hn_tx_chimney_tried; u_long hn_tx_chimney; u_long hn_pkts; /* Rarely used stuffs */ struct hn_txdesc *hn_txdesc; bus_dma_tag_t hn_tx_rndis_dtag; struct sysctl_oid *hn_tx_sysctl_tree; int hn_tx_flags; } __aligned(CACHE_LINE_SIZE); #define HN_TX_FLAG_ATTACHED 0x1 /* * Device-specific softc structure */ typedef struct hn_softc { struct ifnet *hn_ifp; struct ifmedia hn_media; device_t hn_dev; uint8_t hn_unit; int hn_carrier; int hn_if_flags; struct mtx hn_lock; int hn_initdone; /* See hv_netvsc_drv_freebsd.c for rules on how to use */ int temp_unusable; struct hv_device *hn_dev_obj; netvsc_dev *net_dev; int hn_rx_ring_cnt; int hn_rx_ring_inuse; struct hn_rx_ring *hn_rx_ring; int hn_tx_ring_cnt; int hn_tx_ring_inuse; struct hn_tx_ring *hn_tx_ring; int hn_cpu; int hn_tx_chimney_max; struct taskqueue *hn_tx_taskq; struct sysctl_oid *hn_tx_sysctl_tree; struct sysctl_oid *hn_rx_sysctl_tree; } hn_softc_t; /* * Externs */ extern int hv_promisc_mode; void netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status); netvsc_dev *hv_nv_on_device_add(struct hv_device *device, void *additional_info); int hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel); int hv_nv_on_send(struct hv_vmbus_channel *chan, netvsc_packet *pkt); int hv_nv_get_next_send_section(netvsc_dev *net_dev); void hv_nv_subchan_attach(struct hv_vmbus_channel *chan); #endif /* __HV_NET_VSC_H__ */ Index: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 299400) +++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 299401) @@ -1,2966 +1,3014 @@ /*- * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2004-2006 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_net_vsc.h" #include "hv_rndis.h" #include "hv_rndis_filter.h" #define hv_chan_rxr hv_chan_priv1 #define hv_chan_txr hv_chan_priv2 /* Short for Hyper-V network interface */ #define NETVSC_DEVNAME "hn" /* * It looks like offset 0 of buf is reserved to hold the softc pointer. * The sc pointer evidently not needed, and is not presently populated. * The packet offset is where the netvsc_packet starts in the buffer. */ #define HV_NV_SC_PTR_OFFSET_IN_BUF 0 #define HV_NV_PACKET_OFFSET_IN_BUF 16 /* YYY should get it from the underlying channel */ #define HN_TX_DESC_CNT 512 #define HN_LROENT_CNT_DEF 128 #define HN_RING_CNT_DEF_MAX 8 #define HN_RNDIS_MSG_LEN \ (sizeof(rndis_msg) + \ - RNDIS_HASH_PPI_SIZE + \ + RNDIS_HASHVAL_PPI_SIZE + \ RNDIS_VLAN_PPI_SIZE + \ RNDIS_TSO_PPI_SIZE + \ RNDIS_CSUM_PPI_SIZE) #define HN_RNDIS_MSG_BOUNDARY PAGE_SIZE #define HN_RNDIS_MSG_ALIGN CACHE_LINE_SIZE #define HN_TX_DATA_BOUNDARY PAGE_SIZE #define HN_TX_DATA_MAXSIZE IP_MAXPACKET #define HN_TX_DATA_SEGSIZE PAGE_SIZE #define HN_TX_DATA_SEGCNT_MAX \ (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS) #define HN_DIRECT_TX_SIZE_DEF 128 #define HN_EARLY_TXEOF_THRESH 8 struct hn_txdesc { #ifndef HN_USE_TXDESC_BUFRING SLIST_ENTRY(hn_txdesc) link; #endif struct mbuf *m; struct hn_tx_ring *txr; int refs; uint32_t flags; /* HN_TXD_FLAG_ */ netvsc_packet netvsc_pkt; /* XXX to be removed */ bus_dmamap_t data_dmap; bus_addr_t rndis_msg_paddr; rndis_msg *rndis_msg; bus_dmamap_t rndis_msg_dmap; }; #define HN_TXD_FLAG_ONLIST 0x1 #define HN_TXD_FLAG_DMAMAP 0x2 /* * Only enable UDP checksum offloading when it is on 2012R2 or * later. UDP checksum offloading doesn't work on earlier * Windows releases. */ #define HN_CSUM_ASSIST_WIN8 (CSUM_IP | CSUM_TCP) #define HN_CSUM_ASSIST (CSUM_IP | CSUM_UDP | CSUM_TCP) #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) /* YYY 2*MTU is a bit rough, but should be good enough. */ #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) #define HN_LRO_ACKCNT_DEF 1 /* * Be aware that this sleepable mutex will exhibit WITNESS errors when * certain TCP and ARP code paths are taken. This appears to be a * well-known condition, as all other drivers checked use a sleeping * mutex to protect their transmit paths. * Also Be aware that mutexes do not play well with semaphores, and there * is a conflicting semaphore in a certain channel code path. */ #define NV_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) #define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) #define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) #define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) #define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) /* * Globals */ int hv_promisc_mode = 0; /* normal mode by default */ SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V network interface"); /* Trust tcp segements verification on host side. */ static int hn_trust_hosttcp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, &hn_trust_hosttcp, 0, "Trust tcp segement verification on host side, " "when csum info is missing (global setting)"); /* Trust udp datagrams verification on host side. */ static int hn_trust_hostudp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, &hn_trust_hostudp, 0, "Trust udp datagram verification on host side, " "when csum info is missing (global setting)"); /* Trust ip packets verification on host side. */ static int hn_trust_hostip = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, &hn_trust_hostip, 0, "Trust ip packet verification on host side, " "when csum info is missing (global setting)"); #if __FreeBSD_version >= 1100045 /* Limit TSO burst size */ static int hn_tso_maxlen = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, &hn_tso_maxlen, 0, "TSO burst limit"); #endif /* Limit chimney send size */ static int hn_tx_chimney_size = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, &hn_tx_chimney_size, 0, "Chimney send packet size limit"); /* Limit the size of packet for direct transmission */ static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, &hn_lro_entry_count, 0, "LRO entry count"); #endif #endif static int hn_share_tx_taskq = 0; SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN, &hn_share_tx_taskq, 0, "Enable shared TX taskqueue"); static struct taskqueue *hn_tx_taskq; #ifndef HN_USE_TXDESC_BUFRING static int hn_use_txdesc_bufring = 0; #else static int hn_use_txdesc_bufring = 1; #endif SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); static int hn_bind_tx_taskq = -1; SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); static int hn_use_if_start = 0; SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, &hn_use_if_start, 0, "Use if_start TX method"); static int hn_chan_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, &hn_chan_cnt, 0, "# of channels to use; each channel has one RX ring and one TX ring"); static int hn_tx_ring_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, &hn_tx_ring_cnt, 0, "# of TX rings to use"); static int hn_tx_swq_depth = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); static u_int hn_cpu_index; /* * Forward declarations */ static void hn_stop(hn_softc_t *sc); static void hn_ifinit_locked(hn_softc_t *sc); static void hn_ifinit(void *xsc); static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int hn_start_locked(struct hn_tx_ring *txr, int len); static void hn_start(struct ifnet *ifp); static void hn_start_txeof(struct hn_tx_ring *); static int hn_ifmedia_upd(struct ifnet *ifp); static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); static int hn_check_iplen(const struct mbuf *, int); static int hn_create_tx_ring(struct hn_softc *, int); static void hn_destroy_tx_ring(struct hn_tx_ring *); static int hn_create_tx_data(struct hn_softc *, int); static void hn_destroy_tx_data(struct hn_softc *); static void hn_start_taskfunc(void *, int); static void hn_start_txeof_taskfunc(void *, int); static void hn_stop_tx_tasks(struct hn_softc *); static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **); static void hn_create_rx_data(struct hn_softc *sc, int); static void hn_destroy_rx_data(struct hn_softc *sc); static void hn_set_tx_chimney_size(struct hn_softc *, int); static void hn_channel_attach(struct hn_softc *, struct hv_vmbus_channel *); static void hn_subchan_attach(struct hn_softc *, struct hv_vmbus_channel *); static int hn_transmit(struct ifnet *, struct mbuf *); static void hn_xmit_qflush(struct ifnet *); static int hn_xmit(struct hn_tx_ring *, int); static void hn_xmit_txeof(struct hn_tx_ring *); static void hn_xmit_taskfunc(void *, int); static void hn_xmit_txeof_taskfunc(void *, int); #if __FreeBSD_version >= 1100099 static void hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) { int i; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; } #endif static int hn_get_txswq_depth(const struct hn_tx_ring *txr) { KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); if (hn_tx_swq_depth < txr->hn_txdesc_cnt) return txr->hn_txdesc_cnt; return hn_tx_swq_depth; } static int hn_ifmedia_upd(struct ifnet *ifp __unused) { return EOPNOTSUPP; } static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct hn_softc *sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->hn_carrier) { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_10G_T | IFM_FDX; } /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ static const hv_guid g_net_vsc_device_type = { .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} }; /* * Standard probe entry point. * */ static int netvsc_probe(device_t dev) { const char *p; p = vmbus_get_type(dev); if (!memcmp(p, &g_net_vsc_device_type.data, sizeof(hv_guid))) { device_set_desc(dev, "Hyper-V Network Interface"); if (bootverbose) printf("Netvsc probe... DONE \n"); return (BUS_PROBE_DEFAULT); } return (ENXIO); } /* * Standard attach entry point. * * Called when the driver is loaded. It allocates needed resources, * and initializes the "hardware" and software. */ static int netvsc_attach(device_t dev) { struct hv_device *device_ctx = vmbus_get_devctx(dev); struct hv_vmbus_channel *pri_chan; netvsc_device_info device_info; hn_softc_t *sc; int unit = device_get_unit(dev); struct ifnet *ifp = NULL; int error, ring_cnt, tx_ring_cnt; #if __FreeBSD_version >= 1100045 int tso_maxlen; #endif sc = device_get_softc(dev); sc->hn_unit = unit; sc->hn_dev = dev; if (hn_tx_taskq == NULL) { sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &sc->hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1, PI_NET, &cpu_set, "%s tx", device_get_nameunit(dev)); } else { taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx", device_get_nameunit(dev)); } } else { sc->hn_tx_taskq = hn_tx_taskq; } NV_LOCK_INIT(sc, "NetVSCLock"); sc->hn_dev_obj = device_ctx; ifp = sc->hn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); /* * Figure out the # of RX rings (ring_cnt) and the # of TX rings * to use (tx_ring_cnt). * * NOTE: * The # of RX rings to use is same as the # of channels to use. */ ring_cnt = hn_chan_cnt; if (ring_cnt <= 0) { /* Default */ ring_cnt = mp_ncpus; if (ring_cnt > HN_RING_CNT_DEF_MAX) ring_cnt = HN_RING_CNT_DEF_MAX; } else if (ring_cnt > mp_ncpus) { ring_cnt = mp_ncpus; } tx_ring_cnt = hn_tx_ring_cnt; if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) tx_ring_cnt = ring_cnt; if (hn_use_if_start) { /* ifnet.if_start only needs one TX ring. */ tx_ring_cnt = 1; } /* * Set the leader CPU for channels. */ sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; error = hn_create_tx_data(sc, tx_ring_cnt); if (error) goto failed; hn_create_rx_data(sc, ring_cnt); /* * Associate the first TX/RX ring w/ the primary channel. */ pri_chan = device_ctx->channel; KASSERT(HV_VMBUS_CHAN_ISPRIMARY(pri_chan), ("not primary channel")); KASSERT(pri_chan->offer_msg.offer.sub_channel_index == 0, ("primary channel subidx %u", pri_chan->offer_msg.offer.sub_channel_index)); hn_channel_attach(sc, pri_chan); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = hn_ioctl; ifp->if_init = hn_ifinit; /* needed by hv_rf_on_device_add() code */ ifp->if_mtu = ETHERMTU; if (hn_use_if_start) { int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); ifp->if_start = hn_start; IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); ifp->if_snd.ifq_drv_maxlen = qdepth - 1; IFQ_SET_READY(&ifp->if_snd); } else { ifp->if_transmit = hn_transmit; ifp->if_qflush = hn_xmit_qflush; } ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); /* XXX ifmedia_set really should do this for us */ sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; /* * Tell upper layers that we support full VLAN capability. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO; error = hv_rf_on_device_add(device_ctx, &device_info, ring_cnt); if (error) goto failed; KASSERT(sc->net_dev->num_channel > 0 && sc->net_dev->num_channel <= sc->hn_rx_ring_inuse, ("invalid channel count %u, should be less than %d", sc->net_dev->num_channel, sc->hn_rx_ring_inuse)); /* * Set the # of TX/RX rings that could be used according to * the # of channels that host offered. */ if (sc->hn_tx_ring_inuse > sc->net_dev->num_channel) sc->hn_tx_ring_inuse = sc->net_dev->num_channel; sc->hn_rx_ring_inuse = sc->net_dev->num_channel; device_printf(dev, "%d TX ring, %d RX ring\n", sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); if (sc->net_dev->num_channel > 1) { struct hv_vmbus_channel **subchan; int subchan_cnt = sc->net_dev->num_channel - 1; int i; /* Wait for sub-channels setup to complete. */ subchan = vmbus_get_subchan(pri_chan, subchan_cnt); /* Attach the sub-channels. */ for (i = 0; i < subchan_cnt; ++i) { /* NOTE: Calling order is critical. */ hn_subchan_attach(sc, subchan[i]); hv_nv_subchan_attach(subchan[i]); } /* Release the sub-channels */ vmbus_rel_subchan(subchan, subchan_cnt); device_printf(dev, "%d sub-channels setup done\n", subchan_cnt); } #if __FreeBSD_version >= 1100099 if (sc->hn_rx_ring_inuse > 1) { /* * Reduce TCP segment aggregation limit for multiple * RX rings to increase ACK timeliness. */ hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); } #endif if (device_info.link_state == 0) { sc->hn_carrier = 1; } #if __FreeBSD_version >= 1100045 tso_maxlen = hn_tso_maxlen; if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET) tso_maxlen = IP_MAXPACKET; ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); #endif ether_ifattach(ifp, device_info.mac_addr); #if __FreeBSD_version >= 1100045 if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); #endif sc->hn_tx_chimney_max = sc->net_dev->send_section_size; hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max); if (hn_tx_chimney_size > 0 && hn_tx_chimney_size < sc->hn_tx_chimney_max) hn_set_tx_chimney_size(sc, hn_tx_chimney_size); return (0); failed: hn_destroy_tx_data(sc); if (ifp != NULL) if_free(ifp); return (error); } /* * Standard detach entry point */ static int netvsc_detach(device_t dev) { struct hn_softc *sc = device_get_softc(dev); struct hv_device *hv_device = vmbus_get_devctx(dev); if (bootverbose) printf("netvsc_detach\n"); /* * XXXKYS: Need to clean up all our * driver state; this is the driver * unloading. */ /* * XXXKYS: Need to stop outgoing traffic and unregister * the netdevice. */ hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL); hn_stop_tx_tasks(sc); ifmedia_removeall(&sc->hn_media); hn_destroy_rx_data(sc); hn_destroy_tx_data(sc); if (sc->hn_tx_taskq != hn_tx_taskq) taskqueue_free(sc->hn_tx_taskq); return (0); } /* * Standard shutdown entry point */ static int netvsc_shutdown(device_t dev) { return (0); } static __inline int hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) { struct mbuf *m = *m_head; int error; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m_new; m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); if (m_new == NULL) return ENOBUFS; else *m_head = m = m_new; txr->hn_tx_collapsed++; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); } if (!error) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_PREWRITE); txd->flags |= HN_TXD_FLAG_DMAMAP; } return error; } static __inline void hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd) { if (txd->flags & HN_TXD_FLAG_DMAMAP) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->hn_tx_data_dtag, txd->data_dmap); txd->flags &= ~HN_TXD_FLAG_DMAMAP; } } static __inline int hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, ("put an onlist txd %#x", txd->flags)); KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); if (atomic_fetchadd_int(&txd->refs, -1) != 1) return 0; hn_txdesc_dmamap_unload(txr, txd); if (txd->m != NULL) { m_freem(txd->m); txd->m = NULL; } txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); KASSERT(txr->hn_txdesc_avail >= 0 && txr->hn_txdesc_avail < txr->hn_txdesc_cnt, ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail++; SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); mtx_unlock_spin(&txr->hn_txlist_spin); #else atomic_add_int(&txr->hn_txdesc_avail, 1); buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif return 1; } static __inline struct hn_txdesc * hn_txdesc_get(struct hn_tx_ring *txr) { struct hn_txdesc *txd; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); txd = SLIST_FIRST(&txr->hn_txlist); if (txd != NULL) { KASSERT(txr->hn_txdesc_avail > 0, ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail--; SLIST_REMOVE_HEAD(&txr->hn_txlist, link); } mtx_unlock_spin(&txr->hn_txlist_spin); #else txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); #endif if (txd != NULL) { #ifdef HN_USE_TXDESC_BUFRING atomic_subtract_int(&txr->hn_txdesc_avail, 1); #endif KASSERT(txd->m == NULL && txd->refs == 0 && (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd")); txd->flags &= ~HN_TXD_FLAG_ONLIST; txd->refs = 1; } return txd; } static __inline void hn_txdesc_hold(struct hn_txdesc *txd) { /* 0->1 transition will never work */ KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs)); atomic_add_int(&txd->refs, 1); } static __inline void hn_txeof(struct hn_tx_ring *txr) { txr->hn_has_txeof = 0; txr->hn_txeof(txr); } static void hn_tx_done(struct hv_vmbus_channel *chan, void *xpkt) { netvsc_packet *packet = xpkt; struct hn_txdesc *txd; struct hn_tx_ring *txr; txd = (struct hn_txdesc *)(uintptr_t) packet->compl.send.send_completion_tid; txr = txd->txr; KASSERT(txr->hn_chan == chan, ("channel mismatch, on channel%u, should be channel%u", chan->offer_msg.offer.sub_channel_index, txr->hn_chan->offer_msg.offer.sub_channel_index)); txr->hn_has_txeof = 1; hn_txdesc_put(txr, txd); ++txr->hn_txdone_cnt; if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { txr->hn_txdone_cnt = 0; if (txr->hn_oactive) hn_txeof(txr); } } void netvsc_channel_rollup(struct hv_vmbus_channel *chan) { struct hn_tx_ring *txr = chan->hv_chan_txr; #if defined(INET) || defined(INET6) struct hn_rx_ring *rxr = chan->hv_chan_rxr; tcp_lro_flush_all(&rxr->hn_lro); #endif /* * NOTE: * 'txr' could be NULL, if multiple channels and * ifnet.if_start method are enabled. */ if (txr == NULL || !txr->hn_has_txeof) return; txr->hn_txdone_cnt = 0; hn_txeof(txr); } /* * NOTE: * If this function fails, then both txd and m_head0 will be freed. */ static int hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) { bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; int error, nsegs, i; struct mbuf *m_head = *m_head0; netvsc_packet *packet; rndis_msg *rndis_mesg; rndis_packet *rndis_pkt; rndis_per_packet_info *rppi; - struct ndis_hash_info *hash_info; + struct rndis_hash_value *hash_value; uint32_t rndis_msg_size; packet = &txd->netvsc_pkt; packet->is_data_pkt = TRUE; packet->tot_data_buf_len = m_head->m_pkthdr.len; /* * extension points to the area reserved for the * rndis_filter_packet, which is placed just after * the netvsc_packet (and rppi struct, if present; * length is updated later). */ rndis_mesg = txd->rndis_msg; /* XXX not necessary */ memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN); rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; rndis_pkt = &rndis_mesg->msg.packet; rndis_pkt->data_offset = sizeof(rndis_packet); rndis_pkt->data_length = packet->tot_data_buf_len; rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); /* - * Set the hash info for this packet, so that the host could + * Set the hash value for this packet, so that the host could * dispatch the TX done event for this packet back to this TX * ring's channel. */ - rndis_msg_size += RNDIS_HASH_PPI_SIZE; - rppi = hv_set_rppi_data(rndis_mesg, RNDIS_HASH_PPI_SIZE, + rndis_msg_size += RNDIS_HASHVAL_PPI_SIZE; + rppi = hv_set_rppi_data(rndis_mesg, RNDIS_HASHVAL_PPI_SIZE, nbl_hash_value); - hash_info = (struct ndis_hash_info *)((uint8_t *)rppi + + hash_value = (struct rndis_hash_value *)((uint8_t *)rppi + rppi->per_packet_info_offset); - hash_info->hash = txr->hn_tx_idx; + hash_value->hash_value = txr->hn_tx_idx; if (m_head->m_flags & M_VLANTAG) { ndis_8021q_info *rppi_vlan_info; rndis_msg_size += RNDIS_VLAN_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE, ieee_8021q_info); rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); rppi_vlan_info->u1.s1.vlan_id = m_head->m_pkthdr.ether_vtag & 0xfff; } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { rndis_tcp_tso_info *tso_info; struct ether_vlan_header *eh; int ether_len; /* * XXX need m_pullup and use mtodo */ eh = mtod(m_head, struct ether_vlan_header*); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; else ether_len = ETHER_HDR_LEN; rndis_msg_size += RNDIS_TSO_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE, tcp_large_send_info); tso_info = (rndis_tcp_tso_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); tso_info->lso_v2_xmit.type = RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; #ifdef INET if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { struct ip *ip = (struct ip *)(m_head->m_data + ether_len); unsigned long iph_len = ip->ip_hl << 2; struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iph_len); tso_info->lso_v2_xmit.ip_version = RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ip->ip_len = 0; ip->ip_sum = 0; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET6 { struct ip6_hdr *ip6 = (struct ip6_hdr *) (m_head->m_data + ether_len); struct tcphdr *th = (struct tcphdr *)(ip6 + 1); tso_info->lso_v2_xmit.ip_version = RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; ip6->ip6_plen = 0; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); } #endif tso_info->lso_v2_xmit.tcp_header_offset = 0; tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz; } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { rndis_tcp_ip_csum_info *csum_info; rndis_msg_size += RNDIS_CSUM_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE, tcpip_chksum_info); csum_info = (rndis_tcp_ip_csum_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); csum_info->xmit.is_ipv4 = 1; if (m_head->m_pkthdr.csum_flags & CSUM_IP) csum_info->xmit.ip_header_csum = 1; if (m_head->m_pkthdr.csum_flags & CSUM_TCP) { csum_info->xmit.tcp_csum = 1; csum_info->xmit.tcp_header_offset = 0; } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { csum_info->xmit.udp_csum = 1; } } rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size; packet->tot_data_buf_len = rndis_mesg->msg_len; /* * Chimney send, if the packet could fit into one chimney buffer. */ if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) { netvsc_dev *net_dev = txr->hn_sc->net_dev; uint32_t send_buf_section_idx; txr->hn_tx_chimney_tried++; send_buf_section_idx = hv_nv_get_next_send_section(net_dev); if (send_buf_section_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { uint8_t *dest = ((uint8_t *)net_dev->send_buf + (send_buf_section_idx * net_dev->send_section_size)); memcpy(dest, rndis_mesg, rndis_msg_size); dest += rndis_msg_size; m_copydata(m_head, 0, m_head->m_pkthdr.len, dest); packet->send_buf_section_idx = send_buf_section_idx; packet->send_buf_section_size = packet->tot_data_buf_len; packet->page_buf_count = 0; txr->hn_tx_chimney++; goto done; } } error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); if (error) { int freed; /* * This mbuf is not linked w/ the txd yet, so free it now. */ m_freem(m_head); *m_head0 = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon txdma error")); txr->hn_txdma_failed++; if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1); return error; } *m_head0 = m_head; packet->page_buf_count = nsegs + HV_RF_NUM_TX_RESERVED_PAGE_BUFS; /* send packet with page buffer */ packet->page_buffers[0].pfn = atop(txd->rndis_msg_paddr); packet->page_buffers[0].offset = txd->rndis_msg_paddr & PAGE_MASK; packet->page_buffers[0].length = rndis_msg_size; /* * Fill the page buffers with mbuf info starting at index * HV_RF_NUM_TX_RESERVED_PAGE_BUFS. */ for (i = 0; i < nsegs; ++i) { hv_vmbus_page_buffer *pb = &packet->page_buffers[ i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS]; pb->pfn = atop(segs[i].ds_addr); pb->offset = segs[i].ds_addr & PAGE_MASK; pb->length = segs[i].ds_len; } packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; packet->send_buf_section_size = 0; done: txd->m = m_head; /* Set the completion routine */ packet->compl.send.on_send_completion = hn_tx_done; packet->compl.send.send_completion_context = packet; packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)txd; return 0; } /* * NOTE: * If this function fails, then txd will be freed, but the mbuf * associated w/ the txd will _not_ be freed. */ static int hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) { int error, send_failed = 0; again: /* * Make sure that txd is not freed before ETHER_BPF_MTAP. */ hn_txdesc_hold(txd); error = hv_nv_on_send(txr->hn_chan, &txd->netvsc_pkt); if (!error) { ETHER_BPF_MTAP(ifp, txd->m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (!hn_use_if_start) { if_inc_counter(ifp, IFCOUNTER_OBYTES, txd->m->m_pkthdr.len); if (txd->m->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } txr->hn_pkts++; } hn_txdesc_put(txr, txd); if (__predict_false(error)) { int freed; /* * This should "really rarely" happen. * * XXX Too many RX to be acked or too many sideband * commands to run? Ask netvsc_channel_rollup() * to kick start later. */ txr->hn_has_txeof = 1; if (!send_failed) { txr->hn_send_failed++; send_failed = 1; /* * Try sending again after set hn_has_txeof; * in case that we missed the last * netvsc_channel_rollup(). */ goto again; } if_printf(ifp, "send failed\n"); /* * Caller will perform further processing on the * associated mbuf, so don't free it in hn_txdesc_put(); * only unload it from the DMA map in hn_txdesc_put(), * if it was loaded. */ txd->m = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon send error")); txr->hn_send_failed++; } return error; } /* * Start a transmit of one or more packets */ static int hn_start_locked(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(hn_use_if_start, ("hn_start_locked is called, when if_start is disabled")); KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); mtx_assert(&txr->hn_tx_lock, MA_OWNED); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return 0; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { struct hn_txdesc *txd; struct mbuf *m_head; int error; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); return 1; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed */ continue; } error = hn_send_pkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } } return 0; } /* * Link up/down notification */ void netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status) { hn_softc_t *sc = device_get_softc(device_obj->device); if (status == 1) { sc->hn_carrier = 1; } else { sc->hn_carrier = 0; } } /* * Append the specified data to the indicated mbuf chain, * Extend the mbuf chain if the new data does not fit in * existing space. * * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. * There should be an equivalent in the kernel mbuf code, * but there does not appear to be one yet. * * Differs from m_append() in that additional mbufs are * allocated with cluster size MJUMPAGESIZE, and filled * accordingly. * * Return 1 if able to complete the job; otherwise 0. */ static int hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) { struct mbuf *m, *n; int remainder, space; for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; space = M_TRAILINGSPACE(m); if (space > 0) { /* * Copy into available space. */ if (space > remainder) space = remainder; bcopy(cp, mtod(m, caddr_t) + m->m_len, space); m->m_len += space; cp += space; remainder -= space; } while (remainder > 0) { /* * Allocate a new mbuf; could check space * and allocate a cluster instead. */ n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); if (n == NULL) break; n->m_len = min(MJUMPAGESIZE, remainder); bcopy(cp, mtod(n, caddr_t), n->m_len); cp += n->m_len; remainder -= n->m_len; m->m_next = n; m = n; } if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += len - remainder; return (remainder == 0); } /* * Called when we receive a data packet from the "wire" on the * specified device * * Note: This is no longer used as a callback */ int netvsc_recv(struct hv_vmbus_channel *chan, netvsc_packet *packet, - rndis_tcp_ip_csum_info *csum_info) + rndis_tcp_ip_csum_info *csum_info, + const struct rndis_hash_info *hash_info, + const struct rndis_hash_value *hash_value) { struct hn_rx_ring *rxr = chan->hv_chan_rxr; struct ifnet *ifp = rxr->hn_ifp; struct mbuf *m_new; int size, do_lro = 0, do_csum = 1; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return (0); /* * Bail out if packet contains more data than configured MTU. */ if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) { return (0); } else if (packet->tot_data_buf_len <= MHLEN) { m_new = m_gethdr(M_NOWAIT, MT_DATA); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } memcpy(mtod(m_new, void *), packet->data, packet->tot_data_buf_len); m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len; rxr->hn_small_pkts++; } else { /* * Get an mbuf with a cluster. For packets 2K or less, * get a standard 2K cluster. For anything larger, get a * 4K cluster. Any buffers larger than 4K can cause problems * if looped around to the Hyper-V TX channel, so avoid them. */ size = MCLBYTES; if (packet->tot_data_buf_len > MCLBYTES) { /* 4096 */ size = MJUMPAGESIZE; } m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } hv_m_append(m_new, packet->tot_data_buf_len, packet->data); } m_new->m_pkthdr.rcvif = ifp; if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) do_csum = 0; /* receive side checksum offload */ if (csum_info != NULL) { /* IP csum offload */ if (csum_info->receive.ip_csum_succeeded && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); rxr->hn_csum_ip++; } /* TCP/UDP csum offload */ if ((csum_info->receive.tcp_csum_succeeded || csum_info->receive.udp_csum_succeeded) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; if (csum_info->receive.tcp_csum_succeeded) rxr->hn_csum_tcp++; else rxr->hn_csum_udp++; } if (csum_info->receive.ip_csum_succeeded && csum_info->receive.tcp_csum_succeeded) do_lro = 1; } else { const struct ether_header *eh; uint16_t etype; int hoff; hoff = sizeof(*eh); if (m_new->m_len < hoff) goto skip; eh = mtod(m_new, struct ether_header *); etype = ntohs(eh->ether_type); if (etype == ETHERTYPE_VLAN) { const struct ether_vlan_header *evl; hoff = sizeof(*evl); if (m_new->m_len < hoff) goto skip; evl = mtod(m_new, struct ether_vlan_header *); etype = ntohs(evl->evl_proto); } if (etype == ETHERTYPE_IP) { int pr; pr = hn_check_iplen(m_new, hoff); if (pr == IPPROTO_TCP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_TCP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } - /* Rely on SW csum verification though... */ do_lro = 1; } else if (pr == IPPROTO_UDP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_UDP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } } else if (pr != IPPROTO_DONE && do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); } } } skip: if ((packet->vlan_tci != 0) && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { m_new->m_pkthdr.ether_vtag = packet->vlan_tci; m_new->m_flags |= M_VLANTAG; } - m_new->m_pkthdr.flowid = rxr->hn_rx_idx; - M_HASHTYPE_SET(m_new, M_HASHTYPE_OPAQUE); + if (hash_info != NULL && hash_value != NULL) { + int hash_type = M_HASHTYPE_OPAQUE; + rxr->hn_rss_pkts++; + m_new->m_pkthdr.flowid = hash_value->hash_value; + if ((hash_info->hash_info & NDIS_HASH_FUNCTION_MASK) == + NDIS_HASH_FUNCTION_TOEPLITZ) { + uint32_t type = + (hash_info->hash_info & NDIS_HASH_TYPE_MASK); + + switch (type) { + case NDIS_HASH_IPV4: + hash_type = M_HASHTYPE_RSS_IPV4; + break; + + case NDIS_HASH_TCP_IPV4: + hash_type = M_HASHTYPE_RSS_TCP_IPV4; + break; + + case NDIS_HASH_IPV6: + hash_type = M_HASHTYPE_RSS_IPV6; + break; + + case NDIS_HASH_IPV6_EX: + hash_type = M_HASHTYPE_RSS_IPV6_EX; + break; + + case NDIS_HASH_TCP_IPV6: + hash_type = M_HASHTYPE_RSS_TCP_IPV6; + break; + + case NDIS_HASH_TCP_IPV6_EX: + hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; + break; + } + } + M_HASHTYPE_SET(m_new, hash_type); + } else { + if (hash_value != NULL) + m_new->m_pkthdr.flowid = hash_value->hash_value; + else + m_new->m_pkthdr.flowid = rxr->hn_rx_idx; + M_HASHTYPE_SET(m_new, M_HASHTYPE_OPAQUE); + } + /* * Note: Moved RX completion back to hv_nv_on_receive() so all * messages (not just data messages) will trigger a response. */ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); rxr->hn_pkts++; if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { #if defined(INET) || defined(INET6) struct lro_ctrl *lro = &rxr->hn_lro; if (lro->lro_cnt) { rxr->hn_lro_tried++; if (tcp_lro_rx(lro, m_new, 0) == 0) { /* DONE! */ return 0; } } #endif } /* We're not holding the lock here, so don't release it */ (*ifp->if_input)(ifp, m_new); return (0); } /* * Rules for using sc->temp_unusable: * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() * 2. code reading sc->temp_unusable under NV_LOCK(), and finding * sc->temp_unusable set, must release NV_LOCK() and exit * 3. to retain exclusive control of the interface, * sc->temp_unusable must be set by code before releasing NV_LOCK() * 4. only code setting sc->temp_unusable can clear sc->temp_unusable * 5. code setting sc->temp_unusable must eventually clear sc->temp_unusable */ /* * Standard ioctl entry point. Called when the user wants to configure * the interface. */ static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { hn_softc_t *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif netvsc_device_info device_info; struct hv_device *hn_dev; int mask, error = 0; int retry_cnt = 500; switch(cmd) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) hn_ifinit(sc); arp_ifinit(ifp, ifa); } else #endif error = ether_ioctl(ifp, cmd, data); break; case SIOCSIFMTU: hn_dev = vmbus_get_devctx(sc->hn_dev); /* Check MTU value change */ if (ifp->if_mtu == ifr->ifr_mtu) break; if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) { error = EINVAL; break; } /* Obtain and record requested MTU */ ifp->if_mtu = ifr->ifr_mtu; #if __FreeBSD_version >= 1100099 /* * Make sure that LRO aggregation length limit is still * valid, after the MTU change. */ NV_LOCK(sc); if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); NV_UNLOCK(sc); #endif do { NV_LOCK(sc); if (!sc->temp_unusable) { sc->temp_unusable = TRUE; retry_cnt = -1; } NV_UNLOCK(sc); if (retry_cnt > 0) { retry_cnt--; DELAY(5 * 1000); } } while (retry_cnt > 0); if (retry_cnt == 0) { error = EINVAL; break; } /* We must remove and add back the device to cause the new * MTU to take effect. This includes tearing down, but not * deleting the channel, then bringing it back up. */ error = hv_rf_on_device_remove(hn_dev, HV_RF_NV_RETAIN_CHANNEL); if (error) { NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; } error = hv_rf_on_device_add(hn_dev, &device_info, sc->hn_rx_ring_inuse); if (error) { NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; } sc->hn_tx_chimney_max = sc->net_dev->send_section_size; if (sc->hn_tx_ring[0].hn_tx_chimney_size > sc->hn_tx_chimney_max) hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max); hn_ifinit_locked(sc); NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; case SIOCSIFFLAGS: do { NV_LOCK(sc); if (!sc->temp_unusable) { sc->temp_unusable = TRUE; retry_cnt = -1; } NV_UNLOCK(sc); if (retry_cnt > 0) { retry_cnt--; DELAY(5 * 1000); } } while (retry_cnt > 0); if (retry_cnt == 0) { error = EINVAL; break; } if (ifp->if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ #ifdef notyet /* Fixme: Promiscuous mode? */ if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->hn_if_flags & IFF_PROMISC)) { /* do something here for Hyper-V */ } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->hn_if_flags & IFF_PROMISC) { /* do something here for Hyper-V */ } else #endif hn_ifinit_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { hn_stop(sc); } } NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); sc->hn_if_flags = ifp->if_flags; error = 0; break; case SIOCSIFCAP: NV_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= sc->hn_tx_ring[0].hn_csum_assist; } else { ifp->if_hwassist &= ~sc->hn_tx_ring[0].hn_csum_assist; } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; else ifp->if_hwassist &= ~CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; else ifp->if_hwassist &= ~CSUM_IP6_TSO; } NV_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: #ifdef notyet /* Fixme: Multicast mode? */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { NV_LOCK(sc); netvsc_setmulti(sc); NV_UNLOCK(sc); error = 0; } #endif error = EINVAL; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * */ static void hn_stop(hn_softc_t *sc) { struct ifnet *ifp; int ret, i; struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); ifp = sc->hn_ifp; if (bootverbose) printf(" Closing Device ...\n"); atomic_clear_int(&ifp->if_drv_flags, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; if_link_state_change(ifp, LINK_STATE_DOWN); sc->hn_initdone = 0; ret = hv_rf_on_close(device_ctx); } /* * FreeBSD transmit entry point */ static void hn_start(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } static void hn_start_txeof(struct hn_tx_ring *txr) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the OACTIVE earlier, with the hope, that * others could catch up. The task will clear the * flag again with the hn_tx_lock to avoid possible * races. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } /* * */ static void hn_ifinit_locked(hn_softc_t *sc) { struct ifnet *ifp; struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); int ret, i; ifp = sc->hn_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { return; } hv_promisc_mode = 1; ret = hv_rf_on_open(device_ctx); if (ret != 0) { return; } else { sc->hn_initdone = 1; } atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); if_link_state_change(ifp, LINK_STATE_UP); } /* * */ static void hn_ifinit(void *xsc) { hn_softc_t *sc = xsc; NV_LOCK(sc); if (sc->temp_unusable) { NV_UNLOCK(sc); return; } sc->temp_unusable = TRUE; NV_UNLOCK(sc); hn_ifinit_locked(sc); NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); } #ifdef LATER /* * */ static void hn_watchdog(struct ifnet *ifp) { hn_softc_t *sc; sc = ifp->if_softc; printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit); hn_ifinit(sc); /*???*/ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } #endif #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; unsigned int lenlim; int error; lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; error = sysctl_handle_int(oidp, &lenlim, 0, req); if (error || req->newptr == NULL) return error; if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || lenlim > TCP_LRO_LENGTH_MAX) return EINVAL; NV_LOCK(sc); hn_set_lro_lenlim(sc, lenlim); NV_UNLOCK(sc); return 0; } static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ackcnt, error, i; /* * lro_ackcnt_lim is append count limit, * +1 to turn it into aggregation limit. */ ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; error = sysctl_handle_int(oidp, &ackcnt, 0, req); if (error || req->newptr == NULL) return error; if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) return EINVAL; /* * Convert aggregation limit back to append * count limit. */ --ackcnt; NV_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_inuse; ++i) sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; NV_UNLOCK(sc); return 0; } #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int hcsum = arg2; int on, error, i; on = 0; if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) on = 1; error = sysctl_handle_int(oidp, &on, 0, req); if (error || req->newptr == NULL) return error; NV_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (on) rxr->hn_trust_hcsum |= hcsum; else rxr->hn_trust_hcsum &= ~hcsum; } NV_UNLOCK(sc); return 0; } static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int chimney_size, error; chimney_size = sc->hn_tx_ring[0].hn_tx_chimney_size; error = sysctl_handle_int(oidp, &chimney_size, 0, req); if (error || req->newptr == NULL) return error; if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0) return EINVAL; hn_set_tx_chimney_size(sc, chimney_size); return 0; } static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; u_long stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((u_long *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; *((u_long *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; uint64_t stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((uint64_t *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_64(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_tx_ring *txr; u_long stat; stat = 0; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; stat += *((u_long *)((uint8_t *)txr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; *((u_long *)((uint8_t *)txr + ofs)) = 0; } return 0; } static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error, conf; struct hn_tx_ring *txr; txr = &sc->hn_tx_ring[0]; conf = *((int *)((uint8_t *)txr + ofs)); error = sysctl_handle_int(oidp, &conf, 0, req); if (error || req->newptr == NULL) return error; NV_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; *((int *)((uint8_t *)txr + ofs)) = conf; } NV_UNLOCK(sc); return 0; } static int hn_check_iplen(const struct mbuf *m, int hoff) { const struct ip *ip; int len, iphlen, iplen; const struct tcphdr *th; int thoff; /* TCP data offset */ len = hoff + sizeof(struct ip); /* The packet must be at least the size of an IP header. */ if (m->m_pkthdr.len < len) return IPPROTO_DONE; /* The fixed IP header must reside completely in the first mbuf. */ if (m->m_len < len) return IPPROTO_DONE; ip = mtodo(m, hoff); /* Bound check the packet's stated IP header length. */ iphlen = ip->ip_hl << 2; if (iphlen < sizeof(struct ip)) /* minimum header length */ return IPPROTO_DONE; /* The full IP header must reside completely in the one mbuf. */ if (m->m_len < hoff + iphlen) return IPPROTO_DONE; iplen = ntohs(ip->ip_len); /* * Check that the amount of data in the buffers is as * at least much as the IP header would have us expect. */ if (m->m_pkthdr.len < hoff + iplen) return IPPROTO_DONE; /* * Ignore IP fragments. */ if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) return IPPROTO_DONE; /* * The TCP/IP or UDP/IP header must be entirely contained within * the first fragment of a packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (iplen < iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); thoff = th->th_off << 2; if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + thoff) return IPPROTO_DONE; break; case IPPROTO_UDP: if (iplen < iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; break; default: if (iplen < iphlen) return IPPROTO_DONE; break; } return ip->ip_p; } static void hn_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *paddr = arg; if (error) return; KASSERT(nseg == 1, ("too many segments %d!", nseg)); *paddr = segs->ds_addr; } static void hn_create_rx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; device_t dev = sc->hn_dev; #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 int lroent_cnt; #endif #endif int i; sc->hn_rx_ring_cnt = ring_cnt; sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, M_NETVSC, M_WAITOK | M_ZERO); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 lroent_cnt = hn_lro_entry_count; if (lroent_cnt < TCP_LRO_ENTRIES) lroent_cnt = TCP_LRO_ENTRIES; device_printf(dev, "LRO: entry count %d\n", lroent_cnt); #endif #endif /* INET || INET6 */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Create dev.hn.UNIT.rx sysctl tree */ sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (hn_trust_hosttcp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; if (hn_trust_hostudp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; if (hn_trust_hostip) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; rxr->hn_ifp = sc->hn_ifp; rxr->hn_rx_idx = i; /* * Initialize LRO. */ #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 0); #else tcp_lro_init(&rxr->hn_lro); rxr->hn_lro.ifp = sc->hn_ifp; #endif #if __FreeBSD_version >= 1100099 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; #endif #endif /* INET || INET6 */ if (sc->hn_rx_sysctl_tree != NULL) { char name[16]; /* * Create per RX ring sysctl tree: * dev.hn.UNIT.rx.RINGID */ snprintf(name, sizeof(name), "%d", i); rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (rxr->hn_rx_sysctl_tree != NULL) { SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "packets", CTLFLAG_RW, &rxr->hn_pkts, "# of packets received"); + SYSCTL_ADD_ULONG(ctx, + SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), + OID_AUTO, "rss_pkts", CTLFLAG_RW, + &rxr->hn_rss_pkts, + "# of packets w/ RSS info received"); } } } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_queued), hn_rx_stat_u64_sysctl, "LU", "LRO queued"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), hn_rx_stat_u64_sysctl, "LU", "LRO flushed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro_tried), hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); #if __FreeBSD_version >= 1100099 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_lenlim_sysctl, "IU", "Max # of data bytes to be aggregated by LRO"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_ackcnt_sysctl, "I", "Max # of ACKs to be aggregated by LRO"); #endif SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, hn_trust_hcsum_sysctl, "I", "Trust tcp segement verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, hn_trust_hcsum_sysctl, "I", "Trust udp datagram verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, hn_trust_hcsum_sysctl, "I", "Trust ip packet verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_ip), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_tcp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_udp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_trusted), hn_rx_stat_ulong_sysctl, "LU", "# of packets that we trust host's csum verification"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_small_pkts), hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); } static void hn_destroy_rx_data(struct hn_softc *sc) { #if defined(INET) || defined(INET6) int i; #endif if (sc->hn_rx_ring_cnt == 0) return; #if defined(INET) || defined(INET6) for (i = 0; i < sc->hn_rx_ring_cnt; ++i) tcp_lro_free(&sc->hn_rx_ring[i].hn_lro); #endif free(sc->hn_rx_ring, M_NETVSC); sc->hn_rx_ring = NULL; sc->hn_rx_ring_cnt = 0; sc->hn_rx_ring_inuse = 0; } static int hn_create_tx_ring(struct hn_softc *sc, int id) { struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; bus_dma_tag_t parent_dtag; int error, i; txr->hn_sc = sc; txr->hn_tx_idx = id; #ifndef HN_USE_TXDESC_BUFRING mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); #endif mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); txr->hn_txdesc_cnt = HN_TX_DESC_CNT; txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK | M_ZERO); #ifndef HN_USE_TXDESC_BUFRING SLIST_INIT(&txr->hn_txlist); #else txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK, &txr->hn_tx_lock); #endif txr->hn_tx_taskq = sc->hn_tx_taskq; if (hn_use_if_start) { txr->hn_txeof = hn_start_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); } else { int br_depth; txr->hn_txeof = hn_xmit_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); br_depth = hn_get_txswq_depth(txr); txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_NETVSC, M_WAITOK, &txr->hn_tx_lock); } txr->hn_direct_tx_size = hn_direct_tx_size; if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1) txr->hn_csum_assist = HN_CSUM_ASSIST; else txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8; /* * Always schedule transmission instead of trying to do direct * transmission. This one gives the best performance so far. */ txr->hn_sched_tx = 1; parent_dtag = bus_get_dma_tag(sc->hn_dev); /* DMA tag for RNDIS messages. */ error = bus_dma_tag_create(parent_dtag, /* parent */ HN_RNDIS_MSG_ALIGN, /* alignment */ HN_RNDIS_MSG_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_RNDIS_MSG_LEN, /* maxsize */ 1, /* nsegments */ HN_RNDIS_MSG_LEN, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_rndis_dtag); if (error) { device_printf(sc->hn_dev, "failed to create rndis dmatag\n"); return error; } /* DMA tag for data. */ error = bus_dma_tag_create(parent_dtag, /* parent */ 1, /* alignment */ HN_TX_DATA_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_TX_DATA_MAXSIZE, /* maxsize */ HN_TX_DATA_SEGCNT_MAX, /* nsegments */ HN_TX_DATA_SEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_data_dtag); if (error) { device_printf(sc->hn_dev, "failed to create data dmatag\n"); return error; } for (i = 0; i < txr->hn_txdesc_cnt; ++i) { struct hn_txdesc *txd = &txr->hn_txdesc[i]; txd->txr = txr; /* * Allocate and load RNDIS messages. */ error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, (void **)&txd->rndis_msg, BUS_DMA_WAITOK | BUS_DMA_COHERENT, &txd->rndis_msg_dmap); if (error) { device_printf(sc->hn_dev, "failed to allocate rndis_msg, %d\n", i); return error; } error = bus_dmamap_load(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap, txd->rndis_msg, HN_RNDIS_MSG_LEN, hn_dma_map_paddr, &txd->rndis_msg_paddr, BUS_DMA_NOWAIT); if (error) { device_printf(sc->hn_dev, "failed to load rndis_msg, %d\n", i); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); return error; } /* DMA map for TX data. */ error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, &txd->data_dmap); if (error) { device_printf(sc->hn_dev, "failed to allocate tx data dmamap\n"); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); return error; } /* All set, put it to list */ txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); #else buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif } txr->hn_txdesc_avail = txr->hn_txdesc_cnt; if (sc->hn_tx_sysctl_tree != NULL) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; char name[16]; /* * Create per TX ring sysctl tree: * dev.hn.UNIT.tx.RINGID */ ctx = device_get_sysctl_ctx(sc->hn_dev); child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); snprintf(name, sizeof(name), "%d", id); txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (txr->hn_tx_sysctl_tree != NULL) { child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", CTLFLAG_RD, &txr->hn_txdesc_avail, 0, "# of available TX descs"); if (!hn_use_if_start) { SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", CTLFLAG_RD, &txr->hn_oactive, 0, "over active"); } SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", CTLFLAG_RW, &txr->hn_pkts, "# of packets transmitted"); } } return 0; } static void hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) { struct hn_tx_ring *txr = txd->txr; KASSERT(txd->m == NULL, ("still has mbuf installed")); KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); } static void hn_destroy_tx_ring(struct hn_tx_ring *txr) { struct hn_txdesc *txd; if (txr->hn_txdesc == NULL) return; #ifndef HN_USE_TXDESC_BUFRING while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) { SLIST_REMOVE_HEAD(&txr->hn_txlist, link); hn_txdesc_dmamap_destroy(txd); } #else mtx_lock(&txr->hn_tx_lock); while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL) hn_txdesc_dmamap_destroy(txd); mtx_unlock(&txr->hn_tx_lock); #endif if (txr->hn_tx_data_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_data_dtag); if (txr->hn_tx_rndis_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); #ifdef HN_USE_TXDESC_BUFRING buf_ring_free(txr->hn_txdesc_br, M_NETVSC); #endif free(txr->hn_txdesc, M_NETVSC); txr->hn_txdesc = NULL; if (txr->hn_mbuf_br != NULL) buf_ring_free(txr->hn_mbuf_br, M_NETVSC); #ifndef HN_USE_TXDESC_BUFRING mtx_destroy(&txr->hn_txlist_spin); #endif mtx_destroy(&txr->hn_tx_lock); } static int hn_create_tx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; int i; sc->hn_tx_ring_cnt = ring_cnt; sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, M_NETVSC, M_WAITOK | M_ZERO); ctx = device_get_sysctl_ctx(sc->hn_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); /* Create dev.hn.UNIT.tx sysctl tree */ sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { int error; error = hn_create_tx_ring(sc, i); if (error) return error; } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_no_txdescs), hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_send_failed), hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_txdma_failed), hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_collapsed), hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, "# of total TX descs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", CTLFLAG_RD, &sc->hn_tx_chimney_max, 0, "Chimney send packet size upper boundary"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_tx_chimney_size_sysctl, "I", "Chimney send packet size limit"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_direct_tx_size), hn_tx_conf_int_sysctl, "I", "Size of the packet for direct transmission"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_sched_tx), hn_tx_conf_int_sysctl, "I", "Always schedule transmission " "instead of doing direct transmission"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); return 0; } static void hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size) { int i; NV_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_tx_chimney_size = chimney_size; NV_UNLOCK(sc); } static void hn_destroy_tx_data(struct hn_softc *sc) { int i; if (sc->hn_tx_ring_cnt == 0) return; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) hn_destroy_tx_ring(&sc->hn_tx_ring[i]); free(sc->hn_tx_ring, M_NETVSC); sc->hn_tx_ring = NULL; sc->hn_tx_ring_cnt = 0; sc->hn_tx_ring_inuse = 0; } static void hn_start_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_start_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_stop_tx_tasks(struct hn_softc *sc) { int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static int hn_xmit(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; struct mbuf *m_head; mtx_assert(&txr->hn_tx_lock, MA_OWNED); KASSERT(hn_use_if_start == 0, ("hn_xmit is called, when if_start is enabled")); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) return 0; while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { struct hn_txdesc *txd; int error; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); return 1; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed; discard */ drbr_advance(ifp, txr->hn_mbuf_br); continue; } error = hn_send_pkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } /* Sent */ drbr_advance(ifp, txr->hn_mbuf_br); } return 0; } static int hn_transmit(struct ifnet *ifp, struct mbuf *m) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr; int error, idx = 0; /* * Select the TX ring based on flowid */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; txr = &sc->hn_tx_ring[idx]; error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); if (error) { if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); return error; } if (txr->hn_oactive) return 0; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return 0; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); return 0; } static void hn_xmit_qflush(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; struct mbuf *m; mtx_lock(&txr->hn_tx_lock); while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) m_freem(m); mtx_unlock(&txr->hn_tx_lock); } if_qflush(ifp); } static void hn_xmit_txeof(struct hn_tx_ring *txr) { if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; txr->hn_oactive = 0; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the oactive earlier, with the hope, that * others could catch up. The task will clear the * oactive again with the hn_tx_lock to avoid possible * races. */ txr->hn_oactive = 0; taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_xmit_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); txr->hn_oactive = 0; hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_channel_attach(struct hn_softc *sc, struct hv_vmbus_channel *chan) { struct hn_rx_ring *rxr; int idx; idx = chan->offer_msg.offer.sub_channel_index; KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, ("invalid channel index %d, should > 0 && < %d", idx, sc->hn_rx_ring_inuse)); rxr = &sc->hn_rx_ring[idx]; KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, ("RX ring %d already attached", idx)); rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; chan->hv_chan_rxr = rxr; if (bootverbose) { if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n", idx, chan->offer_msg.child_rel_id); } if (idx < sc->hn_tx_ring_inuse) { struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, ("TX ring %d already attached", idx)); txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; chan->hv_chan_txr = txr; txr->hn_chan = chan; if (bootverbose) { if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n", idx, chan->offer_msg.child_rel_id); } } /* Bind channel to a proper CPU */ vmbus_channel_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus); } static void hn_subchan_attach(struct hn_softc *sc, struct hv_vmbus_channel *chan) { KASSERT(!HV_VMBUS_CHAN_ISPRIMARY(chan), ("subchannel callback on primary channel")); KASSERT(chan->offer_msg.offer.sub_channel_index > 0, ("invalid channel subidx %u", chan->offer_msg.offer.sub_channel_index)); hn_channel_attach(sc, chan); } static void hn_tx_taskq_create(void *arg __unused) { if (!hn_share_tx_taskq) return; hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET, &cpu_set, "hn tx"); } else { taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx"); } } SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_create, NULL); static void hn_tx_taskq_destroy(void *arg __unused) { if (hn_tx_taskq != NULL) taskqueue_free(hn_tx_taskq); } SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_destroy, NULL); static device_method_t netvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netvsc_probe), DEVMETHOD(device_attach, netvsc_attach), DEVMETHOD(device_detach, netvsc_detach), DEVMETHOD(device_shutdown, netvsc_shutdown), { 0, 0 } }; static driver_t netvsc_driver = { NETVSC_DEVNAME, netvsc_methods, sizeof(hn_softc_t) }; static devclass_t netvsc_devclass; DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); MODULE_VERSION(hn, 1); MODULE_DEPEND(hn, vmbus, 1, 1, 1); Index: head/sys/dev/hyperv/netvsc/hv_rndis.h =================================================================== --- head/sys/dev/hyperv/netvsc/hv_rndis.h (revision 299400) +++ head/sys/dev/hyperv/netvsc/hv_rndis.h (revision 299401) @@ -1,1079 +1,1100 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __HV_RNDIS_H__ #define __HV_RNDIS_H__ /* * NDIS protocol version numbers */ #define NDIS_VERSION_5_0 0x00050000 #define NDIS_VERSION_5_1 0x00050001 #define NDIS_VERSION_6_0 0x00060000 #define NDIS_VERSION_6_1 0x00060001 #define NDIS_VERSION_6_30 0x0006001e #define NDIS_VERSION (NDIS_VERSION_5_1) /* * Status codes */ #define STATUS_SUCCESS (0x00000000L) #define STATUS_UNSUCCESSFUL (0xC0000001L) #define STATUS_PENDING (0x00000103L) #define STATUS_INSUFFICIENT_RESOURCES (0xC000009AL) #define STATUS_BUFFER_OVERFLOW (0x80000005L) #define STATUS_NOT_SUPPORTED (0xC00000BBL) #define RNDIS_STATUS_SUCCESS (STATUS_SUCCESS) #define RNDIS_STATUS_PENDING (STATUS_PENDING) #define RNDIS_STATUS_NOT_RECOGNIZED (0x00010001L) #define RNDIS_STATUS_NOT_COPIED (0x00010002L) #define RNDIS_STATUS_NOT_ACCEPTED (0x00010003L) #define RNDIS_STATUS_CALL_ACTIVE (0x00010007L) #define RNDIS_STATUS_ONLINE (0x40010003L) #define RNDIS_STATUS_RESET_START (0x40010004L) #define RNDIS_STATUS_RESET_END (0x40010005L) #define RNDIS_STATUS_RING_STATUS (0x40010006L) #define RNDIS_STATUS_CLOSED (0x40010007L) #define RNDIS_STATUS_WAN_LINE_UP (0x40010008L) #define RNDIS_STATUS_WAN_LINE_DOWN (0x40010009L) #define RNDIS_STATUS_WAN_FRAGMENT (0x4001000AL) #define RNDIS_STATUS_MEDIA_CONNECT (0x4001000BL) #define RNDIS_STATUS_MEDIA_DISCONNECT (0x4001000CL) #define RNDIS_STATUS_HARDWARE_LINE_UP (0x4001000DL) #define RNDIS_STATUS_HARDWARE_LINE_DOWN (0x4001000EL) #define RNDIS_STATUS_INTERFACE_UP (0x4001000FL) #define RNDIS_STATUS_INTERFACE_DOWN (0x40010010L) #define RNDIS_STATUS_MEDIA_BUSY (0x40010011L) #define RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION (0x40010012L) #define RNDIS_STATUS_WW_INDICATION RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION #define RNDIS_STATUS_LINK_SPEED_CHANGE (0x40010013L) #define RNDIS_STATUS_NOT_RESETTABLE (0x80010001L) #define RNDIS_STATUS_SOFT_ERRORS (0x80010003L) #define RNDIS_STATUS_HARD_ERRORS (0x80010004L) #define RNDIS_STATUS_BUFFER_OVERFLOW (STATUS_BUFFER_OVERFLOW) #define RNDIS_STATUS_FAILURE (STATUS_UNSUCCESSFUL) #define RNDIS_STATUS_RESOURCES (STATUS_INSUFFICIENT_RESOURCES) #define RNDIS_STATUS_CLOSING (0xC0010002L) #define RNDIS_STATUS_BAD_VERSION (0xC0010004L) #define RNDIS_STATUS_BAD_CHARACTERISTICS (0xC0010005L) #define RNDIS_STATUS_ADAPTER_NOT_FOUND (0xC0010006L) #define RNDIS_STATUS_OPEN_FAILED (0xC0010007L) #define RNDIS_STATUS_DEVICE_FAILED (0xC0010008L) #define RNDIS_STATUS_MULTICAST_FULL (0xC0010009L) #define RNDIS_STATUS_MULTICAST_EXISTS (0xC001000AL) #define RNDIS_STATUS_MULTICAST_NOT_FOUND (0xC001000BL) #define RNDIS_STATUS_REQUEST_ABORTED (0xC001000CL) #define RNDIS_STATUS_RESET_IN_PROGRESS (0xC001000DL) #define RNDIS_STATUS_CLOSING_INDICATING (0xC001000EL) #define RNDIS_STATUS_NOT_SUPPORTED (STATUS_NOT_SUPPORTED) #define RNDIS_STATUS_INVALID_PACKET (0xC001000FL) #define RNDIS_STATUS_OPEN_LIST_FULL (0xC0010010L) #define RNDIS_STATUS_ADAPTER_NOT_READY (0xC0010011L) #define RNDIS_STATUS_ADAPTER_NOT_OPEN (0xC0010012L) #define RNDIS_STATUS_NOT_INDICATING (0xC0010013L) #define RNDIS_STATUS_INVALID_LENGTH (0xC0010014L) #define RNDIS_STATUS_INVALID_DATA (0xC0010015L) #define RNDIS_STATUS_BUFFER_TOO_SHORT (0xC0010016L) #define RNDIS_STATUS_INVALID_OID (0xC0010017L) #define RNDIS_STATUS_ADAPTER_REMOVED (0xC0010018L) #define RNDIS_STATUS_UNSUPPORTED_MEDIA (0xC0010019L) #define RNDIS_STATUS_GROUP_ADDRESS_IN_USE (0xC001001AL) #define RNDIS_STATUS_FILE_NOT_FOUND (0xC001001BL) #define RNDIS_STATUS_ERROR_READING_FILE (0xC001001CL) #define RNDIS_STATUS_ALREADY_MAPPED (0xC001001DL) #define RNDIS_STATUS_RESOURCE_CONFLICT (0xC001001EL) #define RNDIS_STATUS_NO_CABLE (0xC001001FL) #define RNDIS_STATUS_INVALID_SAP (0xC0010020L) #define RNDIS_STATUS_SAP_IN_USE (0xC0010021L) #define RNDIS_STATUS_INVALID_ADDRESS (0xC0010022L) #define RNDIS_STATUS_VC_NOT_ACTIVATED (0xC0010023L) #define RNDIS_STATUS_DEST_OUT_OF_ORDER (0xC0010024L) #define RNDIS_STATUS_VC_NOT_AVAILABLE (0xC0010025L) #define RNDIS_STATUS_CELLRATE_NOT_AVAILABLE (0xC0010026L) #define RNDIS_STATUS_INCOMPATABLE_QOS (0xC0010027L) #define RNDIS_STATUS_AAL_PARAMS_UNSUPPORTED (0xC0010028L) #define RNDIS_STATUS_NO_ROUTE_TO_DESTINATION (0xC0010029L) #define RNDIS_STATUS_TOKEN_RING_OPEN_ERROR (0xC0011000L) /* * Object Identifiers used by NdisRequest Query/Set Information */ /* * General Objects */ #define RNDIS_OID_GEN_SUPPORTED_LIST 0x00010101 #define RNDIS_OID_GEN_HARDWARE_STATUS 0x00010102 #define RNDIS_OID_GEN_MEDIA_SUPPORTED 0x00010103 #define RNDIS_OID_GEN_MEDIA_IN_USE 0x00010104 #define RNDIS_OID_GEN_MAXIMUM_LOOKAHEAD 0x00010105 #define RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE 0x00010106 #define RNDIS_OID_GEN_LINK_SPEED 0x00010107 #define RNDIS_OID_GEN_TRANSMIT_BUFFER_SPACE 0x00010108 #define RNDIS_OID_GEN_RECEIVE_BUFFER_SPACE 0x00010109 #define RNDIS_OID_GEN_TRANSMIT_BLOCK_SIZE 0x0001010A #define RNDIS_OID_GEN_RECEIVE_BLOCK_SIZE 0x0001010B #define RNDIS_OID_GEN_VENDOR_ID 0x0001010C #define RNDIS_OID_GEN_VENDOR_DESCRIPTION 0x0001010D #define RNDIS_OID_GEN_CURRENT_PACKET_FILTER 0x0001010E #define RNDIS_OID_GEN_CURRENT_LOOKAHEAD 0x0001010F #define RNDIS_OID_GEN_DRIVER_VERSION 0x00010110 #define RNDIS_OID_GEN_MAXIMUM_TOTAL_SIZE 0x00010111 #define RNDIS_OID_GEN_PROTOCOL_OPTIONS 0x00010112 #define RNDIS_OID_GEN_MAC_OPTIONS 0x00010113 #define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS 0x00010114 #define RNDIS_OID_GEN_MAXIMUM_SEND_PACKETS 0x00010115 #define RNDIS_OID_GEN_VENDOR_DRIVER_VERSION 0x00010116 #define RNDIS_OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118 #define RNDIS_OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119 #define RNDIS_OID_GEN_MACHINE_NAME 0x0001021A #define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B /* * For receive side scale */ /* Query only */ #define RNDIS_OID_GEN_RSS_CAPABILITIES 0x00010203 /* Query and set */ #define RNDIS_OID_GEN_RSS_PARAMETERS 0x00010204 #define RNDIS_OID_GEN_XMIT_OK 0x00020101 #define RNDIS_OID_GEN_RCV_OK 0x00020102 #define RNDIS_OID_GEN_XMIT_ERROR 0x00020103 #define RNDIS_OID_GEN_RCV_ERROR 0x00020104 #define RNDIS_OID_GEN_RCV_NO_BUFFER 0x00020105 #define RNDIS_OID_GEN_DIRECTED_BYTES_XMIT 0x00020201 #define RNDIS_OID_GEN_DIRECTED_FRAMES_XMIT 0x00020202 #define RNDIS_OID_GEN_MULTICAST_BYTES_XMIT 0x00020203 #define RNDIS_OID_GEN_MULTICAST_FRAMES_XMIT 0x00020204 #define RNDIS_OID_GEN_BROADCAST_BYTES_XMIT 0x00020205 #define RNDIS_OID_GEN_BROADCAST_FRAMES_XMIT 0x00020206 #define RNDIS_OID_GEN_DIRECTED_BYTES_RCV 0x00020207 #define RNDIS_OID_GEN_DIRECTED_FRAMES_RCV 0x00020208 #define RNDIS_OID_GEN_MULTICAST_BYTES_RCV 0x00020209 #define RNDIS_OID_GEN_MULTICAST_FRAMES_RCV 0x0002020A #define RNDIS_OID_GEN_BROADCAST_BYTES_RCV 0x0002020B #define RNDIS_OID_GEN_BROADCAST_FRAMES_RCV 0x0002020C #define RNDIS_OID_GEN_RCV_CRC_ERROR 0x0002020D #define RNDIS_OID_GEN_TRANSMIT_QUEUE_LENGTH 0x0002020E #define RNDIS_OID_GEN_GET_TIME_CAPS 0x0002020F #define RNDIS_OID_GEN_GET_NETCARD_TIME 0x00020210 /* * These are connection-oriented general OIDs. * These replace the above OIDs for connection-oriented media. */ #define RNDIS_OID_GEN_CO_SUPPORTED_LIST 0x00010101 #define RNDIS_OID_GEN_CO_HARDWARE_STATUS 0x00010102 #define RNDIS_OID_GEN_CO_MEDIA_SUPPORTED 0x00010103 #define RNDIS_OID_GEN_CO_MEDIA_IN_USE 0x00010104 #define RNDIS_OID_GEN_CO_LINK_SPEED 0x00010105 #define RNDIS_OID_GEN_CO_VENDOR_ID 0x00010106 #define RNDIS_OID_GEN_CO_VENDOR_DESCRIPTION 0x00010107 #define RNDIS_OID_GEN_CO_DRIVER_VERSION 0x00010108 #define RNDIS_OID_GEN_CO_PROTOCOL_OPTIONS 0x00010109 #define RNDIS_OID_GEN_CO_MAC_OPTIONS 0x0001010A #define RNDIS_OID_GEN_CO_MEDIA_CONNECT_STATUS 0x0001010B #define RNDIS_OID_GEN_CO_VENDOR_DRIVER_VERSION 0x0001010C #define RNDIS_OID_GEN_CO_MINIMUM_LINK_SPEED 0x0001010D #define RNDIS_OID_GEN_CO_GET_TIME_CAPS 0x00010201 #define RNDIS_OID_GEN_CO_GET_NETCARD_TIME 0x00010202 /* * These are connection-oriented statistics OIDs. */ #define RNDIS_OID_GEN_CO_XMIT_PDUS_OK 0x00020101 #define RNDIS_OID_GEN_CO_RCV_PDUS_OK 0x00020102 #define RNDIS_OID_GEN_CO_XMIT_PDUS_ERROR 0x00020103 #define RNDIS_OID_GEN_CO_RCV_PDUS_ERROR 0x00020104 #define RNDIS_OID_GEN_CO_RCV_PDUS_NO_BUFFER 0x00020105 #define RNDIS_OID_GEN_CO_RCV_CRC_ERROR 0x00020201 #define RNDIS_OID_GEN_CO_TRANSMIT_QUEUE_LENGTH 0x00020202 #define RNDIS_OID_GEN_CO_BYTES_XMIT 0x00020203 #define RNDIS_OID_GEN_CO_BYTES_RCV 0x00020204 #define RNDIS_OID_GEN_CO_BYTES_XMIT_OUTSTANDING 0x00020205 #define RNDIS_OID_GEN_CO_NETCARD_LOAD 0x00020206 /* * These are objects for Connection-oriented media call-managers. */ #define RNDIS_OID_CO_ADD_PVC 0xFF000001 #define RNDIS_OID_CO_DELETE_PVC 0xFF000002 #define RNDIS_OID_CO_GET_CALL_INFORMATION 0xFF000003 #define RNDIS_OID_CO_ADD_ADDRESS 0xFF000004 #define RNDIS_OID_CO_DELETE_ADDRESS 0xFF000005 #define RNDIS_OID_CO_GET_ADDRESSES 0xFF000006 #define RNDIS_OID_CO_ADDRESS_CHANGE 0xFF000007 #define RNDIS_OID_CO_SIGNALING_ENABLED 0xFF000008 #define RNDIS_OID_CO_SIGNALING_DISABLED 0xFF000009 /* * 802.3 Objects (Ethernet) */ #define RNDIS_OID_802_3_PERMANENT_ADDRESS 0x01010101 #define RNDIS_OID_802_3_CURRENT_ADDRESS 0x01010102 #define RNDIS_OID_802_3_MULTICAST_LIST 0x01010103 #define RNDIS_OID_802_3_MAXIMUM_LIST_SIZE 0x01010104 #define RNDIS_OID_802_3_MAC_OPTIONS 0x01010105 /* * */ #define NDIS_802_3_MAC_OPTION_PRIORITY 0x00000001 #define RNDIS_OID_802_3_RCV_ERROR_ALIGNMENT 0x01020101 #define RNDIS_OID_802_3_XMIT_ONE_COLLISION 0x01020102 #define RNDIS_OID_802_3_XMIT_MORE_COLLISIONS 0x01020103 #define RNDIS_OID_802_3_XMIT_DEFERRED 0x01020201 #define RNDIS_OID_802_3_XMIT_MAX_COLLISIONS 0x01020202 #define RNDIS_OID_802_3_RCV_OVERRUN 0x01020203 #define RNDIS_OID_802_3_XMIT_UNDERRUN 0x01020204 #define RNDIS_OID_802_3_XMIT_HEARTBEAT_FAILURE 0x01020205 #define RNDIS_OID_802_3_XMIT_TIMES_CRS_LOST 0x01020206 #define RNDIS_OID_802_3_XMIT_LATE_COLLISIONS 0x01020207 /* * RNDIS MP custom OID for test */ #define OID_RNDISMP_GET_RECEIVE_BUFFERS 0xFFA0C90D // Query only /* * Remote NDIS message types */ #define REMOTE_NDIS_PACKET_MSG 0x00000001 #define REMOTE_NDIS_INITIALIZE_MSG 0x00000002 #define REMOTE_NDIS_HALT_MSG 0x00000003 #define REMOTE_NDIS_QUERY_MSG 0x00000004 #define REMOTE_NDIS_SET_MSG 0x00000005 #define REMOTE_NDIS_RESET_MSG 0x00000006 #define REMOTE_NDIS_INDICATE_STATUS_MSG 0x00000007 #define REMOTE_NDIS_KEEPALIVE_MSG 0x00000008 #define REMOTE_CONDIS_MP_CREATE_VC_MSG 0x00008001 #define REMOTE_CONDIS_MP_DELETE_VC_MSG 0x00008002 #define REMOTE_CONDIS_MP_ACTIVATE_VC_MSG 0x00008005 #define REMOTE_CONDIS_MP_DEACTIVATE_VC_MSG 0x00008006 #define REMOTE_CONDIS_INDICATE_STATUS_MSG 0x00008007 /* * Remote NDIS message completion types */ #define REMOTE_NDIS_INITIALIZE_CMPLT 0x80000002 #define REMOTE_NDIS_QUERY_CMPLT 0x80000004 #define REMOTE_NDIS_SET_CMPLT 0x80000005 #define REMOTE_NDIS_RESET_CMPLT 0x80000006 #define REMOTE_NDIS_KEEPALIVE_CMPLT 0x80000008 #define REMOTE_CONDIS_MP_CREATE_VC_CMPLT 0x80008001 #define REMOTE_CONDIS_MP_DELETE_VC_CMPLT 0x80008002 #define REMOTE_CONDIS_MP_ACTIVATE_VC_CMPLT 0x80008005 #define REMOTE_CONDIS_MP_DEACTIVATE_VC_CMPLT 0x80008006 /* * Reserved message type for private communication between lower-layer * host driver and remote device, if necessary. */ #define REMOTE_NDIS_BUS_MSG 0xff000001 /* * Defines for DeviceFlags in rndis_initialize_complete */ #define RNDIS_DF_CONNECTIONLESS 0x00000001 #define RNDIS_DF_CONNECTION_ORIENTED 0x00000002 #define RNDIS_DF_RAW_DATA 0x00000004 /* * Remote NDIS medium types. */ #define RNDIS_MEDIUM_802_3 0x00000000 #define RNDIS_MEDIUM_802_5 0x00000001 #define RNDIS_MEDIUM_FDDI 0x00000002 #define RNDIS_MEDIUM_WAN 0x00000003 #define RNDIS_MEDIUM_LOCAL_TALK 0x00000004 #define RNDIS_MEDIUM_ARCNET_RAW 0x00000006 #define RNDIS_MEDIUM_ARCNET_878_2 0x00000007 #define RNDIS_MEDIUM_ATM 0x00000008 #define RNDIS_MEDIUM_WIRELESS_WAN 0x00000009 #define RNDIS_MEDIUM_IRDA 0x0000000a #define RNDIS_MEDIUM_CO_WAN 0x0000000b /* Not a real medium, defined as an upper bound */ #define RNDIS_MEDIUM_MAX 0x0000000d /* * Remote NDIS medium connection states. */ #define RNDIS_MEDIA_STATE_CONNECTED 0x00000000 #define RNDIS_MEDIA_STATE_DISCONNECTED 0x00000001 /* * Remote NDIS version numbers */ #define RNDIS_MAJOR_VERSION 0x00000001 #define RNDIS_MINOR_VERSION 0x00000000 /* * Remote NDIS offload parameters */ #define RNDIS_OBJECT_TYPE_DEFAULT 0x80 #define RNDIS_OFFLOAD_PARAMETERS_REVISION_3 3 #define RNDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0 #define RNDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1 #define RNDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1 #define RNDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1 #define RNDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3 #define RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE 1 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4 0 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6 1 #define RNDIS_OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */ #define RNDIS_OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C /* set only */ #define RNDIS_OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */ #define RNDIS_OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */ #define RNDIS_OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */ #define RNDIS_OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */ /* * NdisInitialize message */ typedef struct rndis_initialize_request_ { /* RNDIS request ID */ uint32_t request_id; uint32_t major_version; uint32_t minor_version; uint32_t max_xfer_size; } rndis_initialize_request; /* * Response to NdisInitialize */ typedef struct rndis_initialize_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; uint32_t major_version; uint32_t minor_version; uint32_t device_flags; /* RNDIS medium */ uint32_t medium; uint32_t max_pkts_per_msg; uint32_t max_xfer_size; uint32_t pkt_align_factor; uint32_t af_list_offset; uint32_t af_list_size; } rndis_initialize_complete; /* * Call manager devices only: Information about an address family * supported by the device is appended to the response to NdisInitialize. */ typedef struct rndis_co_address_family_ { /* RNDIS AF */ uint32_t address_family; uint32_t major_version; uint32_t minor_version; } rndis_co_address_family; /* * NdisHalt message */ typedef struct rndis_halt_request_ { /* RNDIS request ID */ uint32_t request_id; } rndis_halt_request; /* * NdisQueryRequest message */ typedef struct rndis_query_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS OID */ uint32_t oid; uint32_t info_buffer_length; uint32_t info_buffer_offset; /* RNDIS handle */ uint32_t device_vc_handle; } rndis_query_request; /* * Response to NdisQueryRequest */ typedef struct rndis_query_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; uint32_t info_buffer_length; uint32_t info_buffer_offset; } rndis_query_complete; /* * NdisSetRequest message */ typedef struct rndis_set_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS OID */ uint32_t oid; uint32_t info_buffer_length; uint32_t info_buffer_offset; /* RNDIS handle */ uint32_t device_vc_handle; } rndis_set_request; /* * Response to NdisSetRequest */ typedef struct rndis_set_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rndis_set_complete; /* * NdisReset message */ typedef struct rndis_reset_request_ { uint32_t reserved; } rndis_reset_request; /* * Response to NdisReset */ typedef struct rndis_reset_complete_ { /* RNDIS status */ uint32_t status; uint32_t addressing_reset; } rndis_reset_complete; /* * NdisMIndicateStatus message */ typedef struct rndis_indicate_status_ { /* RNDIS status */ uint32_t status; uint32_t status_buf_length; uint32_t status_buf_offset; } rndis_indicate_status; /* * Diagnostic information passed as the status buffer in * rndis_indicate_status messages signifying error conditions. */ typedef struct rndis_diagnostic_info_ { /* RNDIS status */ uint32_t diag_status; uint32_t error_offset; } rndis_diagnostic_info; /* * NdisKeepAlive message */ typedef struct rndis_keepalive_request_ { /* RNDIS request ID */ uint32_t request_id; } rndis_keepalive_request; /* * Response to NdisKeepAlive */ typedef struct rndis_keepalive_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rndis_keepalive_complete; /* * Data message. All offset fields contain byte offsets from the beginning * of the rndis_packet structure. All length fields are in bytes. * VcHandle is set to 0 for connectionless data, otherwise it * contains the VC handle. */ typedef struct rndis_packet_ { uint32_t data_offset; uint32_t data_length; uint32_t oob_data_offset; uint32_t oob_data_length; uint32_t num_oob_data_elements; uint32_t per_pkt_info_offset; uint32_t per_pkt_info_length; /* RNDIS handle */ uint32_t vc_handle; uint32_t reserved; } rndis_packet; typedef struct rndis_packet_ex_ { uint32_t data_offset; uint32_t data_length; uint32_t oob_data_offset; uint32_t oob_data_length; uint32_t num_oob_data_elements; uint32_t per_pkt_info_offset; uint32_t per_pkt_info_length; /* RNDIS handle */ uint32_t vc_handle; uint32_t reserved; uint64_t data_buf_id; uint32_t data_buf_offset; uint64_t next_header_buf_id; uint32_t next_header_byte_offset; uint32_t next_header_byte_count; } rndis_packet_ex; /* * Optional Out of Band data associated with a Data message. */ typedef struct rndis_oobd_ { uint32_t size; /* RNDIS class ID */ uint32_t type; uint32_t class_info_offset; } rndis_oobd; /* * Packet extension field contents associated with a Data message. */ typedef struct rndis_per_packet_info_ { uint32_t size; uint32_t type; uint32_t per_packet_info_offset; } rndis_per_packet_info; typedef enum ndis_per_pkt_infotype_ { tcpip_chksum_info, ipsec_info, tcp_large_send_info, classification_handle_info, ndis_reserved, sgl_info, ieee_8021q_info, original_pkt_info, pkt_cancel_id, original_netbuf_list, cached_netbuf_list, short_pkt_padding_info, max_perpkt_info } ndis_per_pkt_infotype; #define nbl_hash_value pkt_cancel_id +#define nbl_hash_info original_netbuf_list typedef struct ndis_8021q_info_ { union { struct { uint32_t user_pri : 3; /* User Priority */ uint32_t cfi : 1; /* Canonical Format ID */ uint32_t vlan_id : 12; uint32_t reserved : 16; } s1; uint32_t value; } u1; } ndis_8021q_info; -struct ndis_hash_info { - uint32_t hash; -} __packed; - struct rndis_object_header { uint8_t type; uint8_t revision; uint16_t size; }; typedef struct rndis_offload_params_ { struct rndis_object_header header; uint8_t ipv4_csum; uint8_t tcp_ipv4_csum; uint8_t udp_ipv4_csum; uint8_t tcp_ipv6_csum; uint8_t udp_ipv6_csum; uint8_t lso_v1; uint8_t ip_sec_v1; uint8_t lso_v2_ipv4; uint8_t lso_v2_ipv6; uint8_t tcp_connection_ipv4; uint8_t tcp_connection_ipv6; uint32_t flags; uint8_t ip_sec_v2; uint8_t ip_sec_v2_ipv4; struct { uint8_t rsc_ipv4; uint8_t rsc_ipv6; }; struct { uint8_t encapsulated_packet_task_offload; uint8_t encapsulation_types; }; } rndis_offload_params; typedef struct rndis_tcp_ip_csum_info_ { union { struct { uint32_t is_ipv4:1; uint32_t is_ipv6:1; uint32_t tcp_csum:1; uint32_t udp_csum:1; uint32_t ip_header_csum:1; uint32_t reserved:11; uint32_t tcp_header_offset:10; } xmit; struct { uint32_t tcp_csum_failed:1; uint32_t udp_csum_failed:1; uint32_t ip_csum_failed:1; uint32_t tcp_csum_succeeded:1; uint32_t udp_csum_succeeded:1; uint32_t ip_csum_succeeded:1; uint32_t loopback:1; uint32_t tcp_csum_value_invalid:1; uint32_t ip_csum_value_invalid:1; } receive; uint32_t value; }; } rndis_tcp_ip_csum_info; +struct rndis_hash_value { + uint32_t hash_value; +} __packed; + +struct rndis_hash_info { + uint32_t hash_info; +} __packed; + +#define NDIS_HASH_FUNCTION_MASK 0x000000FF /* see hash function */ +#define NDIS_HASH_TYPE_MASK 0x00FFFF00 /* see hash type */ + +/* hash function */ +#define NDIS_HASH_FUNCTION_TOEPLITZ 0x00000001 + +/* hash type */ +#define NDIS_HASH_IPV4 0x00000100 +#define NDIS_HASH_TCP_IPV4 0x00000200 +#define NDIS_HASH_IPV6 0x00000400 +#define NDIS_HASH_IPV6_EX 0x00000800 +#define NDIS_HASH_TCP_IPV6 0x00001000 +#define NDIS_HASH_TCP_IPV6_EX 0x00002000 + typedef struct rndis_tcp_tso_info_ { union { struct { uint32_t unused:30; uint32_t type:1; uint32_t reserved2:1; } xmit; struct { uint32_t mss:20; uint32_t tcp_header_offset:10; uint32_t type:1; uint32_t reserved2:1; } lso_v1_xmit; struct { uint32_t tcp_payload:30; uint32_t type:1; uint32_t reserved2:1; } lso_v1_xmit_complete; struct { uint32_t mss:20; uint32_t tcp_header_offset:10; uint32_t type:1; uint32_t ip_version:1; } lso_v2_xmit; struct { uint32_t reserved:30; uint32_t type:1; uint32_t reserved2:1; } lso_v2_xmit_complete; uint32_t value; }; } rndis_tcp_tso_info; -#define RNDIS_HASH_PPI_SIZE (sizeof(rndis_per_packet_info) + \ - sizeof(struct ndis_hash_info)) +#define RNDIS_HASHVAL_PPI_SIZE (sizeof(rndis_per_packet_info) + \ + sizeof(struct rndis_hash_value)) #define RNDIS_VLAN_PPI_SIZE (sizeof(rndis_per_packet_info) + \ sizeof(ndis_8021q_info)) #define RNDIS_CSUM_PPI_SIZE (sizeof(rndis_per_packet_info) + \ sizeof(rndis_tcp_ip_csum_info)) #define RNDIS_TSO_PPI_SIZE (sizeof(rndis_per_packet_info) + \ sizeof(rndis_tcp_tso_info)) /* * Format of Information buffer passed in a SetRequest for the OID * OID_GEN_RNDIS_CONFIG_PARAMETER. */ typedef struct rndis_config_parameter_info_ { uint32_t parameter_name_offset; uint32_t parameter_name_length; uint32_t parameter_type; uint32_t parameter_value_offset; uint32_t parameter_value_length; } rndis_config_parameter_info; /* * Values for ParameterType in rndis_config_parameter_info */ #define RNDIS_CONFIG_PARAM_TYPE_INTEGER 0 #define RNDIS_CONFIG_PARAM_TYPE_STRING 2 /* * CONDIS Miniport messages for connection oriented devices * that do not implement a call manager. */ /* * CoNdisMiniportCreateVc message */ typedef struct rcondis_mp_create_vc_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS handle */ uint32_t ndis_vc_handle; } rcondis_mp_create_vc; /* * Response to CoNdisMiniportCreateVc */ typedef struct rcondis_mp_create_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS handle */ uint32_t device_vc_handle; /* RNDIS status */ uint32_t status; } rcondis_mp_create_vc_complete; /* * CoNdisMiniportDeleteVc message */ typedef struct rcondis_mp_delete_vc_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS handle */ uint32_t device_vc_handle; } rcondis_mp_delete_vc; /* * Response to CoNdisMiniportDeleteVc */ typedef struct rcondis_mp_delete_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rcondis_mp_delete_vc_complete; /* * CoNdisMiniportQueryRequest message */ typedef struct rcondis_mp_query_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS request type */ uint32_t request_type; /* RNDIS OID */ uint32_t oid; /* RNDIS handle */ uint32_t device_vc_handle; uint32_t info_buf_length; uint32_t info_buf_offset; } rcondis_mp_query_request; /* * CoNdisMiniportSetRequest message */ typedef struct rcondis_mp_set_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS request type */ uint32_t request_type; /* RNDIS OID */ uint32_t oid; /* RNDIS handle */ uint32_t device_vc_handle; uint32_t info_buf_length; uint32_t info_buf_offset; } rcondis_mp_set_request; /* * CoNdisIndicateStatus message */ typedef struct rcondis_indicate_status_ { /* RNDIS handle */ uint32_t ndis_vc_handle; /* RNDIS status */ uint32_t status; uint32_t status_buf_length; uint32_t status_buf_offset; } rcondis_indicate_status; /* * CONDIS Call/VC parameters */ typedef struct rcondis_specific_parameters_ { uint32_t parameter_type; uint32_t parameter_length; uint32_t parameter_offset; } rcondis_specific_parameters; typedef struct rcondis_media_parameters_ { uint32_t flags; uint32_t reserved1; uint32_t reserved2; rcondis_specific_parameters media_specific; } rcondis_media_parameters; typedef struct rndis_flowspec_ { uint32_t token_rate; uint32_t token_bucket_size; uint32_t peak_bandwidth; uint32_t latency; uint32_t delay_variation; uint32_t service_type; uint32_t max_sdu_size; uint32_t minimum_policed_size; } rndis_flowspec; typedef struct rcondis_call_manager_parameters_ { rndis_flowspec transmit; rndis_flowspec receive; rcondis_specific_parameters call_mgr_specific; } rcondis_call_manager_parameters; /* * CoNdisMiniportActivateVc message */ typedef struct rcondis_mp_activate_vc_request_ { /* RNDIS request ID */ uint32_t request_id; uint32_t flags; /* RNDIS handle */ uint32_t device_vc_handle; uint32_t media_params_offset; uint32_t media_params_length; uint32_t call_mgr_params_offset; uint32_t call_mgr_params_length; } rcondis_mp_activate_vc_request; /* * Response to CoNdisMiniportActivateVc */ typedef struct rcondis_mp_activate_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rcondis_mp_activate_vc_complete; /* * CoNdisMiniportDeactivateVc message */ typedef struct rcondis_mp_deactivate_vc_request_ { /* RNDIS request ID */ uint32_t request_id; uint32_t flags; /* RNDIS handle */ uint32_t device_vc_handle; } rcondis_mp_deactivate_vc_request; /* * Response to CoNdisMiniportDeactivateVc */ typedef struct rcondis_mp_deactivate_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rcondis_mp_deactivate_vc_complete; /* * union with all of the RNDIS messages */ typedef union rndis_msg_container_ { rndis_packet packet; rndis_initialize_request init_request; rndis_halt_request halt_request; rndis_query_request query_request; rndis_set_request set_request; rndis_reset_request reset_request; rndis_keepalive_request keepalive_request; rndis_indicate_status indicate_status; rndis_initialize_complete init_complete; rndis_query_complete query_complete; rndis_set_complete set_complete; rndis_reset_complete reset_complete; rndis_keepalive_complete keepalive_complete; rcondis_mp_create_vc co_miniport_create_vc; rcondis_mp_delete_vc co_miniport_delete_vc; rcondis_indicate_status co_miniport_status; rcondis_mp_activate_vc_request co_miniport_activate_vc; rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc; rcondis_mp_create_vc_complete co_miniport_create_vc_complete; rcondis_mp_delete_vc_complete co_miniport_delete_vc_complete; rcondis_mp_activate_vc_complete co_miniport_activate_vc_complete; rcondis_mp_deactivate_vc_complete co_miniport_deactivate_vc_complete; rndis_packet_ex packet_ex; } rndis_msg_container; /* * Remote NDIS message format */ typedef struct rndis_msg_ { uint32_t ndis_msg_type; /* * Total length of this message, from the beginning * of the rndis_msg struct, in bytes. */ uint32_t msg_len; /* Actual message */ rndis_msg_container msg; } rndis_msg; /* * Handy macros */ /* * get the size of an RNDIS message. Pass in the message type, * rndis_set_request, rndis_packet for example */ #define RNDIS_MESSAGE_SIZE(message) \ (sizeof(message) + (sizeof(rndis_msg) - sizeof(rndis_msg_container))) /* * get pointer to info buffer with message pointer */ #define MESSAGE_TO_INFO_BUFFER(message) \ (((PUCHAR)(message)) + message->InformationBufferOffset) /* * get pointer to status buffer with message pointer */ #define MESSAGE_TO_STATUS_BUFFER(message) \ (((PUCHAR)(message)) + message->StatusBufferOffset) /* * get pointer to OOBD buffer with message pointer */ #define MESSAGE_TO_OOBD_BUFFER(message) \ (((PUCHAR)(message)) + message->OOBDataOffset) /* * get pointer to data buffer with message pointer */ #define MESSAGE_TO_DATA_BUFFER(message) \ (((PUCHAR)(message)) + message->PerPacketInfoOffset) /* * get pointer to contained message from NDIS_MESSAGE pointer */ #define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_message) \ ((void *) &rndis_message->Message) /* * get pointer to contained message from NDIS_MESSAGE pointer */ #define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_message) \ ((void *) rndis_message) /* * Structures used in OID_RNDISMP_GET_RECEIVE_BUFFERS */ #define RNDISMP_RECEIVE_BUFFER_ELEM_FLAG_VMQ_RECEIVE_BUFFER 0x00000001 typedef struct rndismp_rx_buf_elem_ { uint32_t flags; uint32_t length; uint64_t rx_buf_id; uint32_t gpadl_handle; void *rx_buf; } rndismp_rx_buf_elem; typedef struct rndismp_rx_bufs_info_ { uint32_t num_rx_bufs; rndismp_rx_buf_elem rx_buf_elems[1]; } rndismp_rx_bufs_info; #define RNDIS_HEADER_SIZE (sizeof(rndis_msg) - sizeof(rndis_msg_container)) #define NDIS_PACKET_TYPE_DIRECTED 0x00000001 #define NDIS_PACKET_TYPE_MULTICAST 0x00000002 #define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004 #define NDIS_PACKET_TYPE_BROADCAST 0x00000008 #define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010 #define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020 #define NDIS_PACKET_TYPE_SMT 0x00000040 #define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080 #define NDIS_PACKET_TYPE_GROUP 0x00000100 #define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00000200 #define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400 #define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800 /* * Externs */ struct hv_vmbus_channel; int netvsc_recv(struct hv_vmbus_channel *chan, - netvsc_packet *packet, rndis_tcp_ip_csum_info *csum_info); + netvsc_packet *packet, rndis_tcp_ip_csum_info *csum_info, + const struct rndis_hash_info *hash_info, + const struct rndis_hash_value *hash_value); void netvsc_channel_rollup(struct hv_vmbus_channel *chan); void* hv_set_rppi_data(rndis_msg *rndis_mesg, uint32_t rppi_size, int pkt_type); void* hv_get_ppi_data(rndis_packet *rpkt, uint32_t type); #endif /* __HV_RNDIS_H__ */ Index: head/sys/dev/hyperv/netvsc/hv_rndis_filter.c =================================================================== --- head/sys/dev/hyperv/netvsc/hv_rndis_filter.c (revision 299400) +++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.c (revision 299401) @@ -1,1185 +1,1189 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_net_vsc.h" #include "hv_rndis.h" #include "hv_rndis_filter.h" /* * Forward declarations */ static int hv_rf_send_request(rndis_device *device, rndis_request *request, uint32_t message_type); static void hv_rf_receive_response(rndis_device *device, rndis_msg *response); static void hv_rf_receive_indicate_status(rndis_device *device, rndis_msg *response); static void hv_rf_receive_data(rndis_device *device, rndis_msg *message, struct hv_vmbus_channel *chan, netvsc_packet *pkt); static int hv_rf_query_device(rndis_device *device, uint32_t oid, void *result, uint32_t *result_size); static inline int hv_rf_query_device_mac(rndis_device *device); static inline int hv_rf_query_device_link_status(rndis_device *device); static int hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter); static int hv_rf_init_device(rndis_device *device); static int hv_rf_open_device(rndis_device *device); static int hv_rf_close_device(rndis_device *device); static void hv_rf_on_send_request_completion(struct hv_vmbus_channel *, void *context); static void hv_rf_on_send_request_halt_completion(struct hv_vmbus_channel *, void *context); int hv_rf_send_offload_request(struct hv_device *device, rndis_offload_params *offloads); /* * Set the Per-Packet-Info with the specified type */ void * hv_set_rppi_data(rndis_msg *rndis_mesg, uint32_t rppi_size, int pkt_type) { rndis_packet *rndis_pkt; rndis_per_packet_info *rppi; rndis_pkt = &rndis_mesg->msg.packet; rndis_pkt->data_offset += rppi_size; rppi = (rndis_per_packet_info *)((char *)rndis_pkt + rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_length); rppi->size = rppi_size; rppi->type = pkt_type; rppi->per_packet_info_offset = sizeof(rndis_per_packet_info); rndis_pkt->per_pkt_info_length += rppi_size; return (rppi); } /* * Get the Per-Packet-Info with the specified type * return NULL if not found. */ void * hv_get_ppi_data(rndis_packet *rpkt, uint32_t type) { rndis_per_packet_info *ppi; int len; if (rpkt->per_pkt_info_offset == 0) return (NULL); ppi = (rndis_per_packet_info *)((unsigned long)rpkt + rpkt->per_pkt_info_offset); len = rpkt->per_pkt_info_length; while (len > 0) { if (ppi->type == type) return (void *)((unsigned long)ppi + ppi->per_packet_info_offset); len -= ppi->size; ppi = (rndis_per_packet_info *)((unsigned long)ppi + ppi->size); } return (NULL); } /* * Allow module_param to work and override to switch to promiscuous mode. */ static inline rndis_device * hv_get_rndis_device(void) { rndis_device *device; device = malloc(sizeof(rndis_device), M_NETVSC, M_WAITOK | M_ZERO); mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_DEF); /* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */ STAILQ_INIT(&device->myrequest_list); device->state = RNDIS_DEV_UNINITIALIZED; return (device); } /* * */ static inline void hv_put_rndis_device(rndis_device *device) { mtx_destroy(&device->req_lock); free(device, M_NETVSC); } /* * */ static inline rndis_request * hv_rndis_request(rndis_device *device, uint32_t message_type, uint32_t message_length) { rndis_request *request; rndis_msg *rndis_mesg; rndis_set_request *set; request = malloc(sizeof(rndis_request), M_NETVSC, M_WAITOK | M_ZERO); sema_init(&request->wait_sema, 0, "rndis sema"); rndis_mesg = &request->request_msg; rndis_mesg->ndis_msg_type = message_type; rndis_mesg->msg_len = message_length; /* * Set the request id. This field is always after the rndis header * for request/response packet types so we just use the set_request * as a template. */ set = &rndis_mesg->msg.set_request; set->request_id = atomic_fetchadd_int(&device->new_request_id, 1); /* Increment to get the new value (call above returns old value) */ set->request_id += 1; /* Add to the request list */ mtx_lock(&device->req_lock); STAILQ_INSERT_TAIL(&device->myrequest_list, request, mylist_entry); mtx_unlock(&device->req_lock); return (request); } /* * */ static inline void hv_put_rndis_request(rndis_device *device, rndis_request *request) { mtx_lock(&device->req_lock); /* Fixme: Has O(n) performance */ /* * XXXKYS: Use Doubly linked lists. */ STAILQ_REMOVE(&device->myrequest_list, request, rndis_request_, mylist_entry); mtx_unlock(&device->req_lock); sema_destroy(&request->wait_sema); free(request, M_NETVSC); } /* * */ static int hv_rf_send_request(rndis_device *device, rndis_request *request, uint32_t message_type) { int ret; netvsc_packet *packet; netvsc_dev *net_dev = device->net_dev; int send_buf_section_idx; /* Set up the packet to send it */ packet = &request->pkt; packet->is_data_pkt = FALSE; packet->tot_data_buf_len = request->request_msg.msg_len; packet->page_buf_count = 1; packet->page_buffers[0].pfn = hv_get_phys_addr(&request->request_msg) >> PAGE_SHIFT; packet->page_buffers[0].length = request->request_msg.msg_len; packet->page_buffers[0].offset = (unsigned long)&request->request_msg & (PAGE_SIZE - 1); if (packet->page_buffers[0].offset + packet->page_buffers[0].length > PAGE_SIZE) { packet->page_buf_count = 2; packet->page_buffers[0].length = PAGE_SIZE - packet->page_buffers[0].offset; packet->page_buffers[1].pfn = hv_get_phys_addr((char*)&request->request_msg + packet->page_buffers[0].length) >> PAGE_SHIFT; packet->page_buffers[1].offset = 0; packet->page_buffers[1].length = request->request_msg.msg_len - packet->page_buffers[0].length; } packet->compl.send.send_completion_context = request; /* packet */ if (message_type != REMOTE_NDIS_HALT_MSG) { packet->compl.send.on_send_completion = hv_rf_on_send_request_completion; } else { packet->compl.send.on_send_completion = hv_rf_on_send_request_halt_completion; } packet->compl.send.send_completion_tid = (unsigned long)device; if (packet->tot_data_buf_len < net_dev->send_section_size) { send_buf_section_idx = hv_nv_get_next_send_section(net_dev); if (send_buf_section_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { char *dest = ((char *)net_dev->send_buf + send_buf_section_idx * net_dev->send_section_size); memcpy(dest, &request->request_msg, request->request_msg.msg_len); packet->send_buf_section_idx = send_buf_section_idx; packet->send_buf_section_size = packet->tot_data_buf_len; packet->page_buf_count = 0; goto sendit; } /* Failed to allocate chimney send buffer; move on */ } packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; packet->send_buf_section_size = 0; sendit: ret = hv_nv_on_send(device->net_dev->dev->channel, packet); return (ret); } /* * RNDIS filter receive response */ static void hv_rf_receive_response(rndis_device *device, rndis_msg *response) { rndis_request *request = NULL; rndis_request *next_request; boolean_t found = FALSE; mtx_lock(&device->req_lock); request = STAILQ_FIRST(&device->myrequest_list); while (request != NULL) { /* * All request/response message contains request_id as the * first field */ if (request->request_msg.msg.init_request.request_id == response->msg.init_complete.request_id) { found = TRUE; break; } next_request = STAILQ_NEXT(request, mylist_entry); request = next_request; } mtx_unlock(&device->req_lock); if (found) { if (response->msg_len <= sizeof(rndis_msg)) { memcpy(&request->response_msg, response, response->msg_len); } else { if (response->ndis_msg_type == REMOTE_NDIS_RESET_CMPLT) { /* Does not have a request id field */ request->response_msg.msg.reset_complete.status = STATUS_BUFFER_OVERFLOW; } else { request->response_msg.msg.init_complete.status = STATUS_BUFFER_OVERFLOW; } } sema_post(&request->wait_sema); } } int hv_rf_send_offload_request(struct hv_device *device, rndis_offload_params *offloads) { rndis_request *request; rndis_set_request *set; rndis_offload_params *offload_req; rndis_set_complete *set_complete; rndis_device *rndis_dev; hn_softc_t *sc = device_get_softc(device->device); device_t dev = device->device; netvsc_dev *net_dev = sc->net_dev; uint32_t vsp_version = net_dev->nvsp_version; uint32_t extlen = sizeof(rndis_offload_params); int ret; if (vsp_version <= NVSP_PROTOCOL_VERSION_4) { extlen = VERSION_4_OFFLOAD_SIZE; /* On NVSP_PROTOCOL_VERSION_4 and below, we do not support * UDP checksum offload. */ offloads->udp_ipv4_csum = 0; offloads->udp_ipv6_csum = 0; } rndis_dev = net_dev->extension; request = hv_rndis_request(rndis_dev, REMOTE_NDIS_SET_MSG, RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen); if (!request) return (ENOMEM); set = &request->request_msg.msg.set_request; set->oid = RNDIS_OID_TCP_OFFLOAD_PARAMETERS; set->info_buffer_length = extlen; set->info_buffer_offset = sizeof(rndis_set_request); set->device_vc_handle = 0; offload_req = (rndis_offload_params *)((unsigned long)set + set->info_buffer_offset); *offload_req = *offloads; offload_req->header.type = RNDIS_OBJECT_TYPE_DEFAULT; offload_req->header.revision = RNDIS_OFFLOAD_PARAMETERS_REVISION_3; offload_req->header.size = extlen; ret = hv_rf_send_request(rndis_dev, request, REMOTE_NDIS_SET_MSG); if (ret != 0) { device_printf(dev, "hv send offload request failed, ret=%d!\n", ret); goto cleanup; } ret = sema_timedwait(&request->wait_sema, 5 * hz); if (ret != 0) { device_printf(dev, "hv send offload request timeout\n"); goto cleanup; } set_complete = &request->response_msg.msg.set_complete; if (set_complete->status == RNDIS_STATUS_SUCCESS) { device_printf(dev, "hv send offload request succeeded\n"); ret = 0; } else { if (set_complete->status == STATUS_NOT_SUPPORTED) { device_printf(dev, "HV Not support offload\n"); ret = 0; } else { ret = set_complete->status; } } cleanup: hv_put_rndis_request(rndis_dev, request); return (ret); } /* * RNDIS filter receive indicate status */ static void hv_rf_receive_indicate_status(rndis_device *device, rndis_msg *response) { rndis_indicate_status *indicate = &response->msg.indicate_status; switch(indicate->status) { case RNDIS_STATUS_MEDIA_CONNECT: netvsc_linkstatus_callback(device->net_dev->dev, 1); break; case RNDIS_STATUS_MEDIA_DISCONNECT: netvsc_linkstatus_callback(device->net_dev->dev, 0); break; default: /* TODO: */ device_printf(device->net_dev->dev->device, "unknown status %d received\n", indicate->status); break; } } /* * RNDIS filter receive data */ static void hv_rf_receive_data(rndis_device *device, rndis_msg *message, struct hv_vmbus_channel *chan, netvsc_packet *pkt) { rndis_packet *rndis_pkt; ndis_8021q_info *rppi_vlan_info; uint32_t data_offset; rndis_tcp_ip_csum_info *csum_info = NULL; + const struct rndis_hash_info *hash_info; + const struct rndis_hash_value *hash_value; device_t dev = device->net_dev->dev->device; rndis_pkt = &message->msg.packet; /* * Fixme: Handle multiple rndis pkt msgs that may be enclosed in this * netvsc packet (ie tot_data_buf_len != message_length) */ /* Remove rndis header, then pass data packet up the stack */ data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; pkt->tot_data_buf_len -= data_offset; if (pkt->tot_data_buf_len < rndis_pkt->data_length) { pkt->status = nvsp_status_failure; device_printf(dev, "total length %u is less than data length %u\n", pkt->tot_data_buf_len, rndis_pkt->data_length); return; } pkt->tot_data_buf_len = rndis_pkt->data_length; pkt->data = (void *)((unsigned long)pkt->data + data_offset); rppi_vlan_info = hv_get_ppi_data(rndis_pkt, ieee_8021q_info); if (rppi_vlan_info) { pkt->vlan_tci = rppi_vlan_info->u1.s1.vlan_id; } else { pkt->vlan_tci = 0; } csum_info = hv_get_ppi_data(rndis_pkt, tcpip_chksum_info); - netvsc_recv(chan, pkt, csum_info); + hash_value = hv_get_ppi_data(rndis_pkt, nbl_hash_value); + hash_info = hv_get_ppi_data(rndis_pkt, nbl_hash_info); + netvsc_recv(chan, pkt, csum_info, hash_info, hash_value); } /* * RNDIS filter on receive */ int hv_rf_on_receive(netvsc_dev *net_dev, struct hv_device *device, struct hv_vmbus_channel *chan, netvsc_packet *pkt) { rndis_device *rndis_dev; rndis_msg *rndis_hdr; /* Make sure the rndis device state is initialized */ if (net_dev->extension == NULL) { pkt->status = nvsp_status_failure; return (ENODEV); } rndis_dev = (rndis_device *)net_dev->extension; if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) { pkt->status = nvsp_status_failure; return (EINVAL); } rndis_hdr = pkt->data; switch (rndis_hdr->ndis_msg_type) { /* data message */ case REMOTE_NDIS_PACKET_MSG: hv_rf_receive_data(rndis_dev, rndis_hdr, chan, pkt); break; /* completion messages */ case REMOTE_NDIS_INITIALIZE_CMPLT: case REMOTE_NDIS_QUERY_CMPLT: case REMOTE_NDIS_SET_CMPLT: case REMOTE_NDIS_RESET_CMPLT: case REMOTE_NDIS_KEEPALIVE_CMPLT: hv_rf_receive_response(rndis_dev, rndis_hdr); break; /* notification message */ case REMOTE_NDIS_INDICATE_STATUS_MSG: hv_rf_receive_indicate_status(rndis_dev, rndis_hdr); break; default: printf("hv_rf_on_receive(): Unknown msg_type 0x%x\n", rndis_hdr->ndis_msg_type); break; } return (0); } /* * RNDIS filter query device */ static int hv_rf_query_device(rndis_device *device, uint32_t oid, void *result, uint32_t *result_size) { rndis_request *request; uint32_t in_result_size = *result_size; rndis_query_request *query; rndis_query_complete *query_complete; int ret = 0; *result_size = 0; request = hv_rndis_request(device, REMOTE_NDIS_QUERY_MSG, RNDIS_MESSAGE_SIZE(rndis_query_request)); if (request == NULL) { ret = -1; goto cleanup; } /* Set up the rndis query */ query = &request->request_msg.msg.query_request; query->oid = oid; query->info_buffer_offset = sizeof(rndis_query_request); query->info_buffer_length = 0; query->device_vc_handle = 0; if (oid == RNDIS_OID_GEN_RSS_CAPABILITIES) { struct rndis_recv_scale_cap *cap; request->request_msg.msg_len += sizeof(struct rndis_recv_scale_cap); query->info_buffer_length = sizeof(struct rndis_recv_scale_cap); cap = (struct rndis_recv_scale_cap *)((unsigned long)query + query->info_buffer_offset); cap->hdr.type = RNDIS_OBJECT_TYPE_RSS_CAPABILITIES; cap->hdr.rev = RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2; cap->hdr.size = sizeof(struct rndis_recv_scale_cap); } ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG); if (ret != 0) { /* Fixme: printf added */ printf("RNDISFILTER request failed to Send!\n"); goto cleanup; } sema_wait(&request->wait_sema); /* Copy the response back */ query_complete = &request->response_msg.msg.query_complete; if (query_complete->info_buffer_length > in_result_size) { ret = EINVAL; goto cleanup; } memcpy(result, (void *)((unsigned long)query_complete + query_complete->info_buffer_offset), query_complete->info_buffer_length); *result_size = query_complete->info_buffer_length; cleanup: if (request != NULL) hv_put_rndis_request(device, request); return (ret); } /* * RNDIS filter query device MAC address */ static inline int hv_rf_query_device_mac(rndis_device *device) { uint32_t size = HW_MACADDR_LEN; return (hv_rf_query_device(device, RNDIS_OID_802_3_PERMANENT_ADDRESS, device->hw_mac_addr, &size)); } /* * RNDIS filter query device link status */ static inline int hv_rf_query_device_link_status(rndis_device *device) { uint32_t size = sizeof(uint32_t); return (hv_rf_query_device(device, RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size)); } static uint8_t netvsc_hash_key[HASH_KEYLEN] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; /* * RNDIS set vRSS parameters */ static int hv_rf_set_rss_param(rndis_device *device, int num_queue) { rndis_request *request; rndis_set_request *set; rndis_set_complete *set_complete; rndis_recv_scale_param *rssp; uint32_t extlen = sizeof(rndis_recv_scale_param) + (4 * ITAB_NUM) + HASH_KEYLEN; uint32_t *itab, status; uint8_t *keyp; int i, ret; request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG, RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen); if (request == NULL) { if (bootverbose) printf("Netvsc: No memory to set vRSS parameters.\n"); ret = -1; goto cleanup; } set = &request->request_msg.msg.set_request; set->oid = RNDIS_OID_GEN_RSS_PARAMETERS; set->info_buffer_length = extlen; set->info_buffer_offset = sizeof(rndis_set_request); set->device_vc_handle = 0; /* Fill out the rssp parameter structure */ rssp = (rndis_recv_scale_param *)(set + 1); rssp->hdr.type = RNDIS_OBJECT_TYPE_RSS_PARAMETERS; rssp->hdr.rev = RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2; rssp->hdr.size = sizeof(rndis_recv_scale_param); rssp->flag = 0; rssp->hashinfo = RNDIS_HASH_FUNC_TOEPLITZ | RNDIS_HASH_IPV4 | RNDIS_HASH_TCP_IPV4 | RNDIS_HASH_IPV6 | RNDIS_HASH_TCP_IPV6; rssp->indirect_tabsize = 4 * ITAB_NUM; rssp->indirect_taboffset = sizeof(rndis_recv_scale_param); rssp->hashkey_size = HASH_KEYLEN; rssp->hashkey_offset = rssp->indirect_taboffset + rssp->indirect_tabsize; /* Set indirection table entries */ itab = (uint32_t *)(rssp + 1); for (i = 0; i < ITAB_NUM; i++) itab[i] = i % num_queue; /* Set hash key values */ keyp = (uint8_t *)((unsigned long)rssp + rssp->hashkey_offset); for (i = 0; i < HASH_KEYLEN; i++) keyp[i] = netvsc_hash_key[i]; ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG); if (ret != 0) { goto cleanup; } /* * Wait for the response from the host. Another thread will signal * us when the response has arrived. In the failure case, * sema_timedwait() returns a non-zero status after waiting 5 seconds. */ ret = sema_timedwait(&request->wait_sema, 5 * hz); if (ret == 0) { /* Response received, check status */ set_complete = &request->response_msg.msg.set_complete; status = set_complete->status; if (status != RNDIS_STATUS_SUCCESS) { /* Bad response status, return error */ if (bootverbose) printf("Netvsc: Failed to set vRSS " "parameters.\n"); ret = -2; } else { if (bootverbose) printf("Netvsc: Successfully set vRSS " "parameters.\n"); } } else { /* * We cannot deallocate the request since we may still * receive a send completion for it. */ printf("Netvsc: vRSS set timeout, id = %u, ret = %d\n", request->request_msg.msg.init_request.request_id, ret); goto exit; } cleanup: if (request != NULL) { hv_put_rndis_request(device, request); } exit: return (ret); } /* * RNDIS filter set packet filter * Sends an rndis request with the new filter, then waits for a response * from the host. * Returns zero on success, non-zero on failure. */ static int hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter) { rndis_request *request; rndis_set_request *set; rndis_set_complete *set_complete; uint32_t status; int ret; request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG, RNDIS_MESSAGE_SIZE(rndis_set_request) + sizeof(uint32_t)); if (request == NULL) { ret = -1; goto cleanup; } /* Set up the rndis set */ set = &request->request_msg.msg.set_request; set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER; set->info_buffer_length = sizeof(uint32_t); set->info_buffer_offset = sizeof(rndis_set_request); memcpy((void *)((unsigned long)set + sizeof(rndis_set_request)), &new_filter, sizeof(uint32_t)); ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG); if (ret != 0) { goto cleanup; } /* * Wait for the response from the host. Another thread will signal * us when the response has arrived. In the failure case, * sema_timedwait() returns a non-zero status after waiting 5 seconds. */ ret = sema_timedwait(&request->wait_sema, 5 * hz); if (ret == 0) { /* Response received, check status */ set_complete = &request->response_msg.msg.set_complete; status = set_complete->status; if (status != RNDIS_STATUS_SUCCESS) { /* Bad response status, return error */ ret = -2; } } else { /* * We cannot deallocate the request since we may still * receive a send completion for it. */ goto exit; } cleanup: if (request != NULL) { hv_put_rndis_request(device, request); } exit: return (ret); } /* * RNDIS filter init device */ static int hv_rf_init_device(rndis_device *device) { rndis_request *request; rndis_initialize_request *init; rndis_initialize_complete *init_complete; uint32_t status; int ret; request = hv_rndis_request(device, REMOTE_NDIS_INITIALIZE_MSG, RNDIS_MESSAGE_SIZE(rndis_initialize_request)); if (!request) { ret = -1; goto cleanup; } /* Set up the rndis set */ init = &request->request_msg.msg.init_request; init->major_version = RNDIS_MAJOR_VERSION; init->minor_version = RNDIS_MINOR_VERSION; /* * Per the RNDIS document, this should be set to the max MTU * plus the header size. However, 2048 works fine, so leaving * it as is. */ init->max_xfer_size = 2048; device->state = RNDIS_DEV_INITIALIZING; ret = hv_rf_send_request(device, request, REMOTE_NDIS_INITIALIZE_MSG); if (ret != 0) { device->state = RNDIS_DEV_UNINITIALIZED; goto cleanup; } sema_wait(&request->wait_sema); init_complete = &request->response_msg.msg.init_complete; status = init_complete->status; if (status == RNDIS_STATUS_SUCCESS) { device->state = RNDIS_DEV_INITIALIZED; ret = 0; } else { device->state = RNDIS_DEV_UNINITIALIZED; ret = -1; } cleanup: if (request) { hv_put_rndis_request(device, request); } return (ret); } #define HALT_COMPLETION_WAIT_COUNT 25 /* * RNDIS filter halt device */ static int hv_rf_halt_device(rndis_device *device) { rndis_request *request; rndis_halt_request *halt; int i, ret; /* Attempt to do a rndis device halt */ request = hv_rndis_request(device, REMOTE_NDIS_HALT_MSG, RNDIS_MESSAGE_SIZE(rndis_halt_request)); if (request == NULL) { return (-1); } /* initialize "poor man's semaphore" */ request->halt_complete_flag = 0; /* Set up the rndis set */ halt = &request->request_msg.msg.halt_request; halt->request_id = atomic_fetchadd_int(&device->new_request_id, 1); /* Increment to get the new value (call above returns old value) */ halt->request_id += 1; ret = hv_rf_send_request(device, request, REMOTE_NDIS_HALT_MSG); if (ret != 0) { return (-1); } /* * Wait for halt response from halt callback. We must wait for * the transaction response before freeing the request and other * resources. */ for (i=HALT_COMPLETION_WAIT_COUNT; i > 0; i--) { if (request->halt_complete_flag != 0) { break; } DELAY(400); } if (i == 0) { return (-1); } device->state = RNDIS_DEV_UNINITIALIZED; hv_put_rndis_request(device, request); return (0); } /* * RNDIS filter open device */ static int hv_rf_open_device(rndis_device *device) { int ret; if (device->state != RNDIS_DEV_INITIALIZED) { return (0); } if (hv_promisc_mode != 1) { ret = hv_rf_set_packet_filter(device, NDIS_PACKET_TYPE_BROADCAST | NDIS_PACKET_TYPE_ALL_MULTICAST | NDIS_PACKET_TYPE_DIRECTED); } else { ret = hv_rf_set_packet_filter(device, NDIS_PACKET_TYPE_PROMISCUOUS); } if (ret == 0) { device->state = RNDIS_DEV_DATAINITIALIZED; } return (ret); } /* * RNDIS filter close device */ static int hv_rf_close_device(rndis_device *device) { int ret; if (device->state != RNDIS_DEV_DATAINITIALIZED) { return (0); } ret = hv_rf_set_packet_filter(device, 0); if (ret == 0) { device->state = RNDIS_DEV_INITIALIZED; } return (ret); } /* * RNDIS filter on device add */ int hv_rf_on_device_add(struct hv_device *device, void *additl_info, int nchan) { int ret; netvsc_dev *net_dev; rndis_device *rndis_dev; nvsp_msg *init_pkt; rndis_offload_params offloads; struct rndis_recv_scale_cap rsscaps; uint32_t rsscaps_size = sizeof(struct rndis_recv_scale_cap); netvsc_device_info *dev_info = (netvsc_device_info *)additl_info; device_t dev = device->device; rndis_dev = hv_get_rndis_device(); if (rndis_dev == NULL) { return (ENOMEM); } /* * Let the inner driver handle this first to create the netvsc channel * NOTE! Once the channel is created, we may get a receive callback * (hv_rf_on_receive()) before this call is completed. * Note: Earlier code used a function pointer here. */ net_dev = hv_nv_on_device_add(device, additl_info); if (!net_dev) { hv_put_rndis_device(rndis_dev); return (ENOMEM); } /* * Initialize the rndis device */ net_dev->extension = rndis_dev; rndis_dev->net_dev = net_dev; /* Send the rndis initialization message */ ret = hv_rf_init_device(rndis_dev); if (ret != 0) { /* * TODO: If rndis init failed, we will need to shut down * the channel */ } /* Get the mac address */ ret = hv_rf_query_device_mac(rndis_dev); if (ret != 0) { /* TODO: shut down rndis device and the channel */ } /* config csum offload and send request to host */ memset(&offloads, 0, sizeof(offloads)); offloads.ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; offloads.tcp_ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; offloads.udp_ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; offloads.tcp_ipv6_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; offloads.udp_ipv6_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; offloads.lso_v2_ipv4 = RNDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; ret = hv_rf_send_offload_request(device, &offloads); if (ret != 0) { /* TODO: shut down rndis device and the channel */ device_printf(dev, "hv_rf_send_offload_request failed, ret=%d\n", ret); } memcpy(dev_info->mac_addr, rndis_dev->hw_mac_addr, HW_MACADDR_LEN); hv_rf_query_device_link_status(rndis_dev); dev_info->link_state = rndis_dev->link_status; net_dev->num_channel = 1; if (net_dev->nvsp_version < NVSP_PROTOCOL_VERSION_5 || nchan == 1) return (0); memset(&rsscaps, 0, rsscaps_size); ret = hv_rf_query_device(rndis_dev, RNDIS_OID_GEN_RSS_CAPABILITIES, &rsscaps, &rsscaps_size); if ((ret != 0) || (rsscaps.num_recv_que < 2)) { device_printf(dev, "hv_rf_query_device failed or " "rsscaps.num_recv_que < 2 \n"); goto out; } device_printf(dev, "channel, offered %u, requested %d\n", rsscaps.num_recv_que, nchan); if (nchan > rsscaps.num_recv_que) nchan = rsscaps.num_recv_que; net_dev->num_channel = nchan; if (net_dev->num_channel == 1) { device_printf(dev, "net_dev->num_channel == 1 under VRSS\n"); goto out; } /* request host to create sub channels */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg5_type_subchannel; init_pkt->msgs.vers_5_msgs.subchannel_request.op = NVSP_SUBCHANNE_ALLOCATE; init_pkt->msgs.vers_5_msgs.subchannel_request.num_subchannels = net_dev->num_channel - 1; ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { device_printf(dev, "Fail to allocate subchannel\n"); goto out; } sema_wait(&net_dev->channel_init_sema); if (init_pkt->msgs.vers_5_msgs.subchn_complete.status != nvsp_status_success) { ret = ENODEV; device_printf(dev, "sub channel complete error\n"); goto out; } net_dev->num_channel = 1 + init_pkt->msgs.vers_5_msgs.subchn_complete.num_subchannels; ret = hv_rf_set_rss_param(rndis_dev, net_dev->num_channel); out: if (ret) net_dev->num_channel = 1; return (ret); } /* * RNDIS filter on device remove */ int hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel) { hn_softc_t *sc = device_get_softc(device->device); netvsc_dev *net_dev = sc->net_dev; rndis_device *rndis_dev = (rndis_device *)net_dev->extension; int ret; /* Halt and release the rndis device */ ret = hv_rf_halt_device(rndis_dev); hv_put_rndis_device(rndis_dev); net_dev->extension = NULL; /* Pass control to inner driver to remove the device */ ret |= hv_nv_on_device_remove(device, destroy_channel); return (ret); } /* * RNDIS filter on open */ int hv_rf_on_open(struct hv_device *device) { hn_softc_t *sc = device_get_softc(device->device); netvsc_dev *net_dev = sc->net_dev; return (hv_rf_open_device((rndis_device *)net_dev->extension)); } /* * RNDIS filter on close */ int hv_rf_on_close(struct hv_device *device) { hn_softc_t *sc = device_get_softc(device->device); netvsc_dev *net_dev = sc->net_dev; return (hv_rf_close_device((rndis_device *)net_dev->extension)); } /* * RNDIS filter on send request completion callback */ static void hv_rf_on_send_request_completion(struct hv_vmbus_channel *chan __unused, void *context __unused) { } /* * RNDIS filter on send request (halt only) completion callback */ static void hv_rf_on_send_request_halt_completion(struct hv_vmbus_channel *chan __unused, void *context) { rndis_request *request = context; /* * Notify hv_rf_halt_device() about halt completion. * The halt code must wait for completion before freeing * the transaction resources. */ request->halt_complete_flag = 1; } void hv_rf_channel_rollup(struct hv_vmbus_channel *chan) { netvsc_channel_rollup(chan); }