diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c index ddd5218a8936..3c6ebde3ee49 100644 --- a/sys/dev/xen/netback/netback.c +++ b/sys/dev/xen/netback/netback.c @@ -1,2504 +1,2504 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009-2011 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) * Alan Somers (Spectra Logic Corporation) * John Suykerbuyk (Spectra Logic Corporation) */ #include __FBSDID("$FreeBSD$"); /** * \file netback.c * * \brief Device driver supporting the vending of network access * from this FreeBSD domain to other domains. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*--------------------------- Compile-time Tunables --------------------------*/ /*---------------------------------- Macros ----------------------------------*/ /** * Custom malloc type for all driver allocations. */ static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data"); #define XNB_SG 1 /* netback driver supports feature-sg */ #define XNB_GSO_TCPV4 0 /* netback driver supports feature-gso-tcpv4 */ #define XNB_RX_COPY 1 /* netback driver supports feature-rx-copy */ #define XNB_RX_FLIP 0 /* netback driver does not support feature-rx-flip */ #undef XNB_DEBUG #define XNB_DEBUG /* hardcode on during development */ #ifdef XNB_DEBUG #define DPRINTF(fmt, args...) \ printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) #else #define DPRINTF(fmt, args...) do {} while (0) #endif /* Default length for stack-allocated grant tables */ #define GNTTAB_LEN (64) /* Features supported by all backends. TSO and LRO can be negotiated */ #define XNB_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) /** * Two argument version of the standard macro. Second argument is a tentative * value of req_cons */ #define RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({ \ unsigned int req = (_r)->sring->req_prod - cons; \ unsigned int rsp = RING_SIZE(_r) - \ (cons - (_r)->rsp_prod_pvt); \ req < rsp ? req : rsp; \ }) #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) #define virt_to_offset(x) ((x) & (PAGE_SIZE - 1)) /** * Predefined array type of grant table copy descriptors. Used to pass around * statically allocated memory structures. */ typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN]; /*--------------------------- Forward Declarations ---------------------------*/ struct xnb_softc; struct xnb_pkt; static void xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...) __printflike(3,4); static int xnb_shutdown(struct xnb_softc *xnb); static int create_netdev(device_t dev); static int xnb_detach(device_t dev); -static int xnb_ifmedia_upd(struct ifnet *ifp); -static void xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); +static int xnb_ifmedia_upd(if_t ifp); +static void xnb_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr); static void xnb_intr(void *arg); static int xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend, const struct mbuf *mbufc, gnttab_copy_table gnttab); static int xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, - struct mbuf **mbufc, struct ifnet *ifnet, + struct mbuf **mbufc, if_t ifnet, gnttab_copy_table gnttab); static int xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring, RING_IDX start); static void xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring, int error); -static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp); +static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, if_t ifp); static int xnb_txpkt2gnttab(const struct xnb_pkt *pkt, struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb, domid_t otherend_id); static void xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab, int n_entries); static int xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt, RING_IDX start, int space); static int xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb, domid_t otherend_id); static int xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab, int n_entries, netif_rx_back_ring_t *ring); static void xnb_stop(struct xnb_softc*); -static int xnb_ioctl(struct ifnet*, u_long, caddr_t); -static void xnb_start_locked(struct ifnet*); -static void xnb_start(struct ifnet*); +static int xnb_ioctl(if_t, u_long, caddr_t); +static void xnb_start_locked(if_t); +static void xnb_start(if_t); static void xnb_ifinit_locked(struct xnb_softc*); static void xnb_ifinit(void*); #ifdef XNB_DEBUG static int xnb_unit_test_main(SYSCTL_HANDLER_ARGS); static int xnb_dump_rings(SYSCTL_HANDLER_ARGS); #endif #if defined(INET) || defined(INET6) static void xnb_add_mbuf_cksum(struct mbuf *mbufc); #endif /*------------------------------ Data Structures -----------------------------*/ /** * Representation of a xennet packet. Simplified version of a packet as * stored in the Xen tx ring. Applicable to both RX and TX packets */ struct xnb_pkt{ /** * Array index of the first data-bearing (eg, not extra info) entry * for this packet */ RING_IDX car; /** * Array index of the second data-bearing entry for this packet. * Invalid if the packet has only one data-bearing entry. If the * packet has more than two data-bearing entries, then the second * through the last will be sequential modulo the ring size */ RING_IDX cdr; /** * Optional extra info. Only valid if flags contains * NETTXF_extra_info. Note that extra.type will always be * XEN_NETIF_EXTRA_TYPE_GSO. Currently, no known netfront or netback * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_* */ netif_extra_info_t extra; /** Size of entire packet in bytes. */ uint16_t size; /** The size of the first entry's data in bytes */ uint16_t car_size; /** * Either NETTXF_ or NETRXF_ flags. Note that the flag values are * not the same for TX and RX packets */ uint16_t flags; /** * The number of valid data-bearing entries (either netif_tx_request's * or netif_rx_response's) in the packet. If this is 0, it means the * entire packet is invalid. */ uint16_t list_len; /** There was an error processing the packet */ uint8_t error; }; /** xnb_pkt method: initialize it */ static inline void xnb_pkt_initialize(struct xnb_pkt *pxnb) { bzero(pxnb, sizeof(*pxnb)); } /** xnb_pkt method: mark the packet as valid */ static inline void xnb_pkt_validate(struct xnb_pkt *pxnb) { pxnb->error = 0; }; /** xnb_pkt method: mark the packet as invalid */ static inline void xnb_pkt_invalidate(struct xnb_pkt *pxnb) { pxnb->error = 1; }; /** xnb_pkt method: Check whether the packet is valid */ static inline int xnb_pkt_is_valid(const struct xnb_pkt *pxnb) { return (! pxnb->error); } #ifdef XNB_DEBUG /** xnb_pkt method: print the packet's contents in human-readable format*/ static void __unused xnb_dump_pkt(const struct xnb_pkt *pkt) { if (pkt == NULL) { DPRINTF("Was passed a null pointer.\n"); return; } DPRINTF("pkt address= %p\n", pkt); DPRINTF("pkt->size=%d\n", pkt->size); DPRINTF("pkt->car_size=%d\n", pkt->car_size); DPRINTF("pkt->flags=0x%04x\n", pkt->flags); DPRINTF("pkt->list_len=%d\n", pkt->list_len); /* DPRINTF("pkt->extra"); TODO */ DPRINTF("pkt->car=%d\n", pkt->car); DPRINTF("pkt->cdr=%d\n", pkt->cdr); DPRINTF("pkt->error=%d\n", pkt->error); } #endif /* XNB_DEBUG */ static void xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq) { if (txreq != NULL) { DPRINTF("netif_tx_request index =%u\n", idx); DPRINTF("netif_tx_request.gref =%u\n", txreq->gref); DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset); DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags); DPRINTF("netif_tx_request.id =%hu\n", txreq->id); DPRINTF("netif_tx_request.size =%hu\n", txreq->size); } } /** * \brief Configuration data for a shared memory request ring * used to communicate with the front-end client of this * this driver. */ struct xnb_ring_config { /** * Runtime structures for ring access. Unfortunately, TX and RX rings * use different data structures, and that cannot be changed since it * is part of the interdomain protocol. */ union{ netif_rx_back_ring_t rx_ring; netif_tx_back_ring_t tx_ring; } back_ring; /** * The device bus address returned by the hypervisor when * mapping the ring and required to unmap it when a connection * is torn down. */ uint64_t bus_addr; /** The pseudo-physical address where ring memory is mapped.*/ uint64_t gnt_addr; /** KVA address where ring memory is mapped. */ vm_offset_t va; /** * Grant table handles, one per-ring page, returned by the * hyperpervisor upon mapping of the ring and required to * unmap it when a connection is torn down. */ grant_handle_t handle; /** The number of ring pages mapped for the current connection. */ unsigned ring_pages; /** * The grant references, one per-ring page, supplied by the * front-end, allowing us to reference the ring pages in the * front-end's domain and to map these pages into our own domain. */ grant_ref_t ring_ref; }; /** * Per-instance connection state flags. */ typedef enum { /** Communication with the front-end has been established. */ XNBF_RING_CONNECTED = 0x01, /** * Front-end requests exist in the ring and are waiting for * xnb_xen_req objects to free up. */ XNBF_RESOURCE_SHORTAGE = 0x02, /** Connection teardown has started. */ XNBF_SHUTDOWN = 0x04, /** A thread is already performing shutdown processing. */ XNBF_IN_SHUTDOWN = 0x08 } xnb_flag_t; /** * Types of rings. Used for array indices and to identify a ring's control * data structure type */ typedef enum{ XNB_RING_TYPE_TX = 0, /* ID of TX rings, used for array indices */ XNB_RING_TYPE_RX = 1, /* ID of RX rings, used for array indices */ XNB_NUM_RING_TYPES } xnb_ring_type_t; /** * Per-instance configuration data. */ struct xnb_softc { /** NewBus device corresponding to this instance. */ device_t dev; /* Media related fields */ /** Generic network media state */ struct ifmedia sc_media; /** Media carrier info */ - struct ifnet *xnb_ifp; + if_t xnb_ifp; /** Our own private carrier state */ unsigned carrier; /** Device MAC Address */ uint8_t mac[ETHER_ADDR_LEN]; /* Xen related fields */ /** * \brief The netif protocol abi in effect. * * There are situations where the back and front ends can * have a different, native abi (e.g. intel x86_64 and * 32bit x86 domains on the same machine). The back-end * always accommodates the front-end's native abi. That * value is pulled from the XenStore and recorded here. */ int abi; /** * Name of the bridge to which this VIF is connected, if any * This field is dynamically allocated by xenbus and must be free()ed * when no longer needed */ char *bridge; /** The interrupt driven even channel used to signal ring events. */ evtchn_port_t evtchn; /** Xen device handle.*/ long handle; /** Handle to the communication ring event channel. */ xen_intr_handle_t xen_intr_handle; /** * \brief Cached value of the front-end's domain id. * * This value is used at once for each mapped page in * a transaction. We cache it to avoid incuring the * cost of an ivar access every time this is needed. */ domid_t otherend_id; /** * Undocumented frontend feature. Has something to do with * scatter/gather IO */ uint8_t can_sg; /** Undocumented frontend feature */ uint8_t gso; /** Undocumented frontend feature */ uint8_t gso_prefix; /** Can checksum TCP/UDP over IPv4 */ uint8_t ip_csum; /* Implementation related fields */ /** * Preallocated grant table copy descriptor for RX operations. * Access must be protected by rx_lock */ gnttab_copy_table rx_gnttab; /** * Preallocated grant table copy descriptor for TX operations. * Access must be protected by tx_lock */ gnttab_copy_table tx_gnttab; /** * Resource representing allocated physical address space * associated with our per-instance kva region. */ struct resource *pseudo_phys_res; /** Resource id for allocated physical address space. */ int pseudo_phys_res_id; /** Ring mapping and interrupt configuration data. */ struct xnb_ring_config ring_configs[XNB_NUM_RING_TYPES]; /** * Global pool of kva used for mapping remote domain ring * and I/O transaction data. */ vm_offset_t kva; /** Pseudo-physical address corresponding to kva. */ uint64_t gnt_base_addr; /** Various configuration and state bit flags. */ xnb_flag_t flags; /** Mutex protecting per-instance data in the receive path. */ struct mtx rx_lock; /** Mutex protecting per-instance data in the softc structure. */ struct mtx sc_lock; /** Mutex protecting per-instance data in the transmit path. */ struct mtx tx_lock; /** The size of the global kva pool. */ int kva_size; /** Name of the interface */ char if_name[IFNAMSIZ]; }; /*---------------------------- Debugging functions ---------------------------*/ #ifdef XNB_DEBUG static void __unused xnb_dump_gnttab_copy(const struct gnttab_copy *entry) { if (entry == NULL) { printf("NULL grant table pointer\n"); return; } if (entry->flags & GNTCOPY_dest_gref) printf("gnttab dest ref=\t%u\n", entry->dest.u.ref); else printf("gnttab dest gmfn=\t%"PRI_xen_pfn"\n", entry->dest.u.gmfn); printf("gnttab dest offset=\t%hu\n", entry->dest.offset); printf("gnttab dest domid=\t%hu\n", entry->dest.domid); if (entry->flags & GNTCOPY_source_gref) printf("gnttab source ref=\t%u\n", entry->source.u.ref); else printf("gnttab source gmfn=\t%"PRI_xen_pfn"\n", entry->source.u.gmfn); printf("gnttab source offset=\t%hu\n", entry->source.offset); printf("gnttab source domid=\t%hu\n", entry->source.domid); printf("gnttab len=\t%hu\n", entry->len); printf("gnttab flags=\t%hu\n", entry->flags); printf("gnttab status=\t%hd\n", entry->status); } static int xnb_dump_rings(SYSCTL_HANDLER_ARGS) { static char results[720]; struct xnb_softc const* xnb = (struct xnb_softc*)arg1; netif_rx_back_ring_t const* rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring; netif_tx_back_ring_t const* txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring; /* empty the result strings */ results[0] = 0; if ( !txb || !txb->sring || !rxb || !rxb->sring ) return (SYSCTL_OUT(req, results, strnlen(results, 720))); snprintf(results, 720, "\n\t%35s %18s\n" /* TX, RX */ "\t%16s %18d %18d\n" /* req_cons */ "\t%16s %18d %18d\n" /* nr_ents */ "\t%16s %18d %18d\n" /* rsp_prod_pvt */ "\t%16s %18p %18p\n" /* sring */ "\t%16s %18d %18d\n" /* req_prod */ "\t%16s %18d %18d\n" /* req_event */ "\t%16s %18d %18d\n" /* rsp_prod */ "\t%16s %18d %18d\n", /* rsp_event */ "TX", "RX", "req_cons", txb->req_cons, rxb->req_cons, "nr_ents", txb->nr_ents, rxb->nr_ents, "rsp_prod_pvt", txb->rsp_prod_pvt, rxb->rsp_prod_pvt, "sring", txb->sring, rxb->sring, "sring->req_prod", txb->sring->req_prod, rxb->sring->req_prod, "sring->req_event", txb->sring->req_event, rxb->sring->req_event, "sring->rsp_prod", txb->sring->rsp_prod, rxb->sring->rsp_prod, "sring->rsp_event", txb->sring->rsp_event, rxb->sring->rsp_event); return (SYSCTL_OUT(req, results, strnlen(results, 720))); } static void __unused xnb_dump_mbuf(const struct mbuf *m) { int len; uint8_t *d; if (m == NULL) return; printf("xnb_dump_mbuf:\n"); if (m->m_flags & M_PKTHDR) { printf(" flowid=%10d, csum_flags=%#8x, csum_data=%#8x, " "tso_segsz=%5hd\n", m->m_pkthdr.flowid, (int)m->m_pkthdr.csum_flags, m->m_pkthdr.csum_data, m->m_pkthdr.tso_segsz); printf(" rcvif=%16p, len=%19d\n", m->m_pkthdr.rcvif, m->m_pkthdr.len); } printf(" m_next=%16p, m_nextpk=%16p, m_data=%16p\n", m->m_next, m->m_nextpkt, m->m_data); printf(" m_len=%17d, m_flags=%#15x, m_type=%18u\n", m->m_len, m->m_flags, m->m_type); len = m->m_len; d = mtod(m, uint8_t*); while (len > 0) { int i; printf(" "); for (i = 0; (i < 16) && (len > 0); i++, len--) { printf("%02hhx ", *(d++)); } printf("\n"); } } #endif /* XNB_DEBUG */ /*------------------------ Inter-Domain Communication ------------------------*/ /** * Free dynamically allocated KVA or pseudo-physical address allocations. * * \param xnb Per-instance xnb configuration structure. */ static void xnb_free_communication_mem(struct xnb_softc *xnb) { if (xnb->kva != 0) { if (xnb->pseudo_phys_res != NULL) { xenmem_free(xnb->dev, xnb->pseudo_phys_res_id, xnb->pseudo_phys_res); xnb->pseudo_phys_res = NULL; } } xnb->kva = 0; xnb->gnt_base_addr = 0; } /** * Cleanup all inter-domain communication mechanisms. * * \param xnb Per-instance xnb configuration structure. */ static int xnb_disconnect(struct xnb_softc *xnb) { struct gnttab_unmap_grant_ref gnts[XNB_NUM_RING_TYPES]; int error __diagused; int i; if (xnb->xen_intr_handle != NULL) xen_intr_unbind(&xnb->xen_intr_handle); /* * We may still have another thread currently processing requests. We * must acquire the rx and tx locks to make sure those threads are done, * but we can release those locks as soon as we acquire them, because no * more interrupts will be arriving. */ mtx_lock(&xnb->tx_lock); mtx_unlock(&xnb->tx_lock); mtx_lock(&xnb->rx_lock); mtx_unlock(&xnb->rx_lock); mtx_lock(&xnb->sc_lock); /* Free malloc'd softc member variables */ if (xnb->bridge != NULL) { free(xnb->bridge, M_XENSTORE); xnb->bridge = NULL; } /* All request processing has stopped, so unmap the rings */ for (i=0; i < XNB_NUM_RING_TYPES; i++) { gnts[i].host_addr = xnb->ring_configs[i].gnt_addr; gnts[i].dev_bus_addr = xnb->ring_configs[i].bus_addr; gnts[i].handle = xnb->ring_configs[i].handle; } error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, gnts, XNB_NUM_RING_TYPES); KASSERT(error == 0, ("Grant table unmap op failed (%d)", error)); xnb_free_communication_mem(xnb); /* * Zero the ring config structs because the pointers, handles, and * grant refs contained therein are no longer valid. */ bzero(&xnb->ring_configs[XNB_RING_TYPE_TX], sizeof(struct xnb_ring_config)); bzero(&xnb->ring_configs[XNB_RING_TYPE_RX], sizeof(struct xnb_ring_config)); xnb->flags &= ~XNBF_RING_CONNECTED; mtx_unlock(&xnb->sc_lock); return (0); } /** * Map a single shared memory ring into domain local address space and * initialize its control structure * * \param xnb Per-instance xnb configuration structure * \param ring_type Array index of this ring in the xnb's array of rings * \return An errno */ static int xnb_connect_ring(struct xnb_softc *xnb, xnb_ring_type_t ring_type) { struct gnttab_map_grant_ref gnt; struct xnb_ring_config *ring = &xnb->ring_configs[ring_type]; int error; /* TX ring type = 0, RX =1 */ ring->va = xnb->kva + ring_type * PAGE_SIZE; ring->gnt_addr = xnb->gnt_base_addr + ring_type * PAGE_SIZE; gnt.host_addr = ring->gnt_addr; gnt.flags = GNTMAP_host_map; gnt.ref = ring->ring_ref; gnt.dom = xnb->otherend_id; error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &gnt, 1); if (error != 0) panic("netback: Ring page grant table op failed (%d)", error); if (gnt.status != 0) { ring->va = 0; error = EACCES; xenbus_dev_fatal(xnb->dev, error, "Ring shared page mapping failed. " "Status %d.", gnt.status); } else { ring->handle = gnt.handle; ring->bus_addr = gnt.dev_bus_addr; if (ring_type == XNB_RING_TYPE_TX) { BACK_RING_INIT(&ring->back_ring.tx_ring, (netif_tx_sring_t*)ring->va, ring->ring_pages * PAGE_SIZE); } else if (ring_type == XNB_RING_TYPE_RX) { BACK_RING_INIT(&ring->back_ring.rx_ring, (netif_rx_sring_t*)ring->va, ring->ring_pages * PAGE_SIZE); } else { xenbus_dev_fatal(xnb->dev, error, "Unknown ring type %d", ring_type); } } return error; } /** * Setup the shared memory rings and bind an interrupt to the event channel * used to notify us of ring changes. * * \param xnb Per-instance xnb configuration structure. */ static int xnb_connect_comms(struct xnb_softc *xnb) { int error; xnb_ring_type_t i; if ((xnb->flags & XNBF_RING_CONNECTED) != 0) return (0); /* * Kva for our rings are at the tail of the region of kva allocated * by xnb_alloc_communication_mem(). */ for (i=0; i < XNB_NUM_RING_TYPES; i++) { error = xnb_connect_ring(xnb, i); if (error != 0) return error; } xnb->flags |= XNBF_RING_CONNECTED; error = xen_intr_bind_remote_port(xnb->dev, xnb->otherend_id, xnb->evtchn, /*filter*/NULL, xnb_intr, /*arg*/xnb, INTR_TYPE_NET | INTR_MPSAFE, &xnb->xen_intr_handle); if (error != 0) { (void)xnb_disconnect(xnb); xenbus_dev_fatal(xnb->dev, error, "binding event channel"); return (error); } DPRINTF("rings connected!\n"); return (0); } /** * Size KVA and pseudo-physical address allocations based on negotiated * values for the size and number of I/O requests, and the size of our * communication ring. * * \param xnb Per-instance xnb configuration structure. * * These address spaces are used to dynamically map pages in the * front-end's domain into our own. */ static int xnb_alloc_communication_mem(struct xnb_softc *xnb) { xnb_ring_type_t i; xnb->kva_size = 0; for (i=0; i < XNB_NUM_RING_TYPES; i++) { xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE; } /* * Reserve a range of pseudo physical memory that we can map * into kva. These pages will only be backed by machine * pages ("real memory") during the lifetime of front-end requests * via grant table operations. We will map the netif tx and rx rings * into this space. */ xnb->pseudo_phys_res_id = 0; xnb->pseudo_phys_res = xenmem_alloc(xnb->dev, &xnb->pseudo_phys_res_id, xnb->kva_size); if (xnb->pseudo_phys_res == NULL) { xnb->kva = 0; return (ENOMEM); } xnb->kva = (vm_offset_t)rman_get_virtual(xnb->pseudo_phys_res); xnb->gnt_base_addr = rman_get_start(xnb->pseudo_phys_res); return (0); } /** * Collect information from the XenStore related to our device and its frontend * * \param xnb Per-instance xnb configuration structure. */ static int xnb_collect_xenstore_info(struct xnb_softc *xnb) { /** * \todo Linux collects the following info. We should collect most * of this, too: * "feature-rx-notify" */ const char *otherend_path; const char *our_path; int err; unsigned int rx_copy, bridge_len; uint8_t no_csum_offload; otherend_path = xenbus_get_otherend_path(xnb->dev); our_path = xenbus_get_node(xnb->dev); /* Collect the critical communication parameters */ err = xs_gather(XST_NIL, otherend_path, "tx-ring-ref", "%l" PRIu32, &xnb->ring_configs[XNB_RING_TYPE_TX].ring_ref, "rx-ring-ref", "%l" PRIu32, &xnb->ring_configs[XNB_RING_TYPE_RX].ring_ref, "event-channel", "%" PRIu32, &xnb->evtchn, NULL); if (err != 0) { xenbus_dev_fatal(xnb->dev, err, "Unable to retrieve ring information from " "frontend %s. Unable to connect.", otherend_path); return (err); } /* Collect the handle from xenstore */ err = xs_scanf(XST_NIL, our_path, "handle", NULL, "%li", &xnb->handle); if (err != 0) { xenbus_dev_fatal(xnb->dev, err, "Error reading handle from frontend %s. " "Unable to connect.", otherend_path); } /* * Collect the bridgename, if any. We do not need bridge_len; we just * throw it away */ err = xs_read(XST_NIL, our_path, "bridge", &bridge_len, (void**)&xnb->bridge); if (err != 0) xnb->bridge = NULL; /* * Does the frontend request that we use rx copy? If not, return an * error because this driver only supports rx copy. */ err = xs_scanf(XST_NIL, otherend_path, "request-rx-copy", NULL, "%" PRIu32, &rx_copy); if (err == ENOENT) { err = 0; rx_copy = 0; } if (err < 0) { xenbus_dev_fatal(xnb->dev, err, "reading %s/request-rx-copy", otherend_path); return err; } /** * \todo: figure out the exact meaning of this feature, and when * the frontend will set it to true. It should be set to true * at some point */ /* if (!rx_copy)*/ /* return EOPNOTSUPP;*/ /** \todo Collect the rx notify feature */ /* Collect the feature-sg. */ if (xs_scanf(XST_NIL, otherend_path, "feature-sg", NULL, "%hhu", &xnb->can_sg) < 0) xnb->can_sg = 0; /* Collect remaining frontend features */ if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4", NULL, "%hhu", &xnb->gso) < 0) xnb->gso = 0; if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4-prefix", NULL, "%hhu", &xnb->gso_prefix) < 0) xnb->gso_prefix = 0; if (xs_scanf(XST_NIL, otherend_path, "feature-no-csum-offload", NULL, "%hhu", &no_csum_offload) < 0) no_csum_offload = 0; xnb->ip_csum = (no_csum_offload == 0); return (0); } /** * Supply information about the physical device to the frontend * via XenBus. * * \param xnb Per-instance xnb configuration structure. */ static int xnb_publish_backend_info(struct xnb_softc *xnb) { struct xs_transaction xst; const char *our_path; int error; our_path = xenbus_get_node(xnb->dev); do { error = xs_transaction_start(&xst); if (error != 0) { xenbus_dev_fatal(xnb->dev, error, "Error publishing backend info " "(start transaction)"); break; } error = xs_printf(xst, our_path, "feature-sg", "%d", XNB_SG); if (error != 0) break; error = xs_printf(xst, our_path, "feature-gso-tcpv4", "%d", XNB_GSO_TCPV4); if (error != 0) break; error = xs_printf(xst, our_path, "feature-rx-copy", "%d", XNB_RX_COPY); if (error != 0) break; error = xs_printf(xst, our_path, "feature-rx-flip", "%d", XNB_RX_FLIP); if (error != 0) break; error = xs_transaction_end(xst, 0); if (error != 0 && error != EAGAIN) { xenbus_dev_fatal(xnb->dev, error, "ending transaction"); break; } } while (error == EAGAIN); return (error); } /** * Connect to our netfront peer now that it has completed publishing * its configuration into the XenStore. * * \param xnb Per-instance xnb configuration structure. */ static void xnb_connect(struct xnb_softc *xnb) { int error; if (xenbus_get_state(xnb->dev) == XenbusStateConnected) return; if (xnb_collect_xenstore_info(xnb) != 0) return; xnb->flags &= ~XNBF_SHUTDOWN; /* Read front end configuration. */ /* Allocate resources whose size depends on front-end configuration. */ error = xnb_alloc_communication_mem(xnb); if (error != 0) { xenbus_dev_fatal(xnb->dev, error, "Unable to allocate communication memory"); return; } /* * Connect communication channel. */ error = xnb_connect_comms(xnb); if (error != 0) { /* Specific errors are reported by xnb_connect_comms(). */ return; } xnb->carrier = 1; /* Ready for I/O. */ xenbus_set_state(xnb->dev, XenbusStateConnected); } /*-------------------------- Device Teardown Support -------------------------*/ /** * Perform device shutdown functions. * * \param xnb Per-instance xnb configuration structure. * * Mark this instance as shutting down, wait for any active requests * to drain, disconnect from the front-end, and notify any waiters (e.g. * a thread invoking our detach method) that detach can now proceed. */ static int xnb_shutdown(struct xnb_softc *xnb) { /* * Due to the need to drop our mutex during some * xenbus operations, it is possible for two threads * to attempt to close out shutdown processing at * the same time. Tell the caller that hits this * race to try back later. */ if ((xnb->flags & XNBF_IN_SHUTDOWN) != 0) return (EAGAIN); xnb->flags |= XNBF_SHUTDOWN; xnb->flags |= XNBF_IN_SHUTDOWN; mtx_unlock(&xnb->sc_lock); /* Free the network interface */ xnb->carrier = 0; if (xnb->xnb_ifp != NULL) { ether_ifdetach(xnb->xnb_ifp); if_free(xnb->xnb_ifp); xnb->xnb_ifp = NULL; } xnb_disconnect(xnb); if (xenbus_get_state(xnb->dev) < XenbusStateClosing) xenbus_set_state(xnb->dev, XenbusStateClosing); mtx_lock(&xnb->sc_lock); xnb->flags &= ~XNBF_IN_SHUTDOWN; /* Indicate to xnb_detach() that is it safe to proceed. */ wakeup(xnb); return (0); } /** * Report an attach time error to the console and Xen, and cleanup * this instance by forcing immediate detach processing. * * \param xnb Per-instance xnb configuration structure. * \param err Errno describing the error. * \param fmt Printf style format and arguments */ static void xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...) { va_list ap; va_list ap_hotplug; va_start(ap, fmt); va_copy(ap_hotplug, ap); xs_vprintf(XST_NIL, xenbus_get_node(xnb->dev), "hotplug-error", fmt, ap_hotplug); va_end(ap_hotplug); (void)xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "hotplug-status", "error"); xenbus_dev_vfatal(xnb->dev, err, fmt, ap); va_end(ap); (void)xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "online", "0"); xnb_detach(xnb->dev); } /*---------------------------- NewBus Entrypoints ----------------------------*/ /** * Inspect a XenBus device and claim it if is of the appropriate type. * * \param dev NewBus device object representing a candidate XenBus device. * * \return 0 for success, errno codes for failure. */ static int xnb_probe(device_t dev) { if (!strcmp(xenbus_get_type(dev), "vif")) { DPRINTF("Claiming device %d, %s\n", device_get_unit(dev), devclass_get_name(device_get_devclass(dev))); device_set_desc(dev, "Backend Virtual Network Device"); device_quiet(dev); return (0); } return (ENXIO); } /** * Setup sysctl variables to control various Network Back parameters. * * \param xnb Xen Net Back softc. * */ static void xnb_setup_sysctl(struct xnb_softc *xnb) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; sysctl_ctx = device_get_sysctl_ctx(xnb->dev); if (sysctl_ctx == NULL) return; sysctl_tree = device_get_sysctl_tree(xnb->dev); if (sysctl_tree == NULL) return; #ifdef XNB_DEBUG SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "unit_test_results", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, xnb, 0, xnb_unit_test_main, "A", "Results of builtin unit tests"); SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "dump_rings", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, xnb, 0, xnb_dump_rings, "A", "Xennet Back Rings"); #endif /* XNB_DEBUG */ } /** * Create a network device. * @param handle device handle */ int create_netdev(device_t dev) { - struct ifnet *ifp; + if_t ifp; struct xnb_softc *xnb; int err = 0; uint32_t handle; xnb = device_get_softc(dev); mtx_init(&xnb->sc_lock, "xnb_softc", "xen netback softc lock", MTX_DEF); mtx_init(&xnb->tx_lock, "xnb_tx", "xen netback tx lock", MTX_DEF); mtx_init(&xnb->rx_lock, "xnb_rx", "xen netback rx lock", MTX_DEF); xnb->dev = dev; ifmedia_init(&xnb->sc_media, 0, xnb_ifmedia_upd, xnb_ifmedia_sts); ifmedia_add(&xnb->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); ifmedia_set(&xnb->sc_media, IFM_ETHER|IFM_MANUAL); /* * Set the MAC address to a dummy value (00:00:00:00:00), * if the MAC address of the host-facing interface is set * to the same as the guest-facing one (the value found in * xenstore), the bridge would stop delivering packets to * us because it would see that the destination address of * the packet is the same as the interface, and so the bridge * would expect the packet has already been delivered locally * (and just drop it). */ bzero(&xnb->mac[0], sizeof(xnb->mac)); /* The interface will be named using the following nomenclature: * * xnb. * * Where handle is the oder of the interface referred to the guest. */ err = xs_scanf(XST_NIL, xenbus_get_node(xnb->dev), "handle", NULL, "%" PRIu32, &handle); if (err != 0) return (err); snprintf(xnb->if_name, IFNAMSIZ, "xnb%" PRIu16 ".%" PRIu32, xenbus_get_otherend_id(dev), handle); if (err == 0) { /* Set up ifnet structure */ ifp = xnb->xnb_ifp = if_alloc(IFT_ETHER); - ifp->if_softc = xnb; + if_setsoftc(ifp, xnb); if_initname(ifp, xnb->if_name, IF_DUNIT_NONE); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_ioctl = xnb_ioctl; - ifp->if_start = xnb_start; - ifp->if_init = xnb_ifinit; - ifp->if_mtu = ETHERMTU; - ifp->if_snd.ifq_maxlen = NET_RX_RING_SIZE - 1; + if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); + if_setioctlfn(ifp, xnb_ioctl); + if_setstartfn(ifp, xnb_start); + if_setinitfn(ifp, xnb_ifinit); + if_setmtu(ifp, ETHERMTU); + if_setsendqlen(ifp, NET_RX_RING_SIZE - 1); - ifp->if_hwassist = XNB_CSUM_FEATURES; - ifp->if_capabilities = IFCAP_HWCSUM; - ifp->if_capenable = IFCAP_HWCSUM; + if_sethwassist(ifp, XNB_CSUM_FEATURES); + if_setcapabilities(ifp, IFCAP_HWCSUM); + if_setcapenable(ifp, IFCAP_HWCSUM); ether_ifattach(ifp, xnb->mac); xnb->carrier = 0; } return err; } /** * Attach to a XenBus device that has been claimed by our probe routine. * * \param dev NewBus device object representing this Xen Net Back instance. * * \return 0 for success, errno codes for failure. */ static int xnb_attach(device_t dev) { struct xnb_softc *xnb; int error; xnb_ring_type_t i; error = create_netdev(dev); if (error != 0) { xenbus_dev_fatal(dev, error, "creating netdev"); return (error); } DPRINTF("Attaching to %s\n", xenbus_get_node(dev)); /* * Basic initialization. * After this block it is safe to call xnb_detach() * to clean up any allocated data for this instance. */ xnb = device_get_softc(dev); xnb->otherend_id = xenbus_get_otherend_id(dev); for (i=0; i < XNB_NUM_RING_TYPES; i++) { xnb->ring_configs[i].ring_pages = 1; } /* * Setup sysctl variables. */ xnb_setup_sysctl(xnb); /* Update hot-plug status to satisfy xend. */ error = xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "hotplug-status", "connected"); if (error != 0) { xnb_attach_failed(xnb, error, "writing %s/hotplug-status", xenbus_get_node(xnb->dev)); return (error); } if ((error = xnb_publish_backend_info(xnb)) != 0) { /* * If we can't publish our data, we cannot participate * in this connection, and waiting for a front-end state * change will not help the situation. */ xnb_attach_failed(xnb, error, "Publishing backend status for %s", xenbus_get_node(xnb->dev)); return error; } /* Tell the front end that we are ready to connect. */ xenbus_set_state(dev, XenbusStateInitWait); return (0); } /** * Detach from a net back device instance. * * \param dev NewBus device object representing this Xen Net Back instance. * * \return 0 for success, errno codes for failure. * * \note A net back device may be detached at any time in its life-cycle, * including part way through the attach process. For this reason, * initialization order and the initialization state checks in this * routine must be carefully coupled so that attach time failures * are gracefully handled. */ static int xnb_detach(device_t dev) { struct xnb_softc *xnb; DPRINTF("\n"); xnb = device_get_softc(dev); mtx_lock(&xnb->sc_lock); while (xnb_shutdown(xnb) == EAGAIN) { msleep(xnb, &xnb->sc_lock, /*wakeup prio unchanged*/0, "xnb_shutdown", 0); } mtx_unlock(&xnb->sc_lock); DPRINTF("\n"); mtx_destroy(&xnb->tx_lock); mtx_destroy(&xnb->rx_lock); mtx_destroy(&xnb->sc_lock); return (0); } /** * Prepare this net back device for suspension of this VM. * * \param dev NewBus device object representing this Xen net Back instance. * * \return 0 for success, errno codes for failure. */ static int xnb_suspend(device_t dev) { return (0); } /** * Perform any processing required to recover from a suspended state. * * \param dev NewBus device object representing this Xen Net Back instance. * * \return 0 for success, errno codes for failure. */ static int xnb_resume(device_t dev) { return (0); } /** * Handle state changes expressed via the XenStore by our front-end peer. * * \param dev NewBus device object representing this Xen * Net Back instance. * \param frontend_state The new state of the front-end. * * \return 0 for success, errno codes for failure. */ static void xnb_frontend_changed(device_t dev, XenbusState frontend_state) { struct xnb_softc *xnb; xnb = device_get_softc(dev); DPRINTF("frontend_state=%s, xnb_state=%s\n", xenbus_strstate(frontend_state), xenbus_strstate(xenbus_get_state(xnb->dev))); switch (frontend_state) { case XenbusStateInitialising: case XenbusStateInitialised: break; case XenbusStateConnected: xnb_connect(xnb); break; case XenbusStateClosing: case XenbusStateClosed: mtx_lock(&xnb->sc_lock); xnb_shutdown(xnb); mtx_unlock(&xnb->sc_lock); if (frontend_state == XenbusStateClosed) xenbus_set_state(xnb->dev, XenbusStateClosed); break; default: xenbus_dev_fatal(xnb->dev, EINVAL, "saw state %d at frontend", frontend_state); break; } } /*---------------------------- Request Processing ----------------------------*/ /** * Interrupt handler bound to the shared ring's event channel. * Entry point for the xennet transmit path in netback * Transfers packets from the Xen ring to the host's generic networking stack * * \param arg Callback argument registerd during event channel * binding - the xnb_softc for this instance. */ static void xnb_intr(void *arg) { struct xnb_softc *xnb; - struct ifnet *ifp; + if_t ifp; netif_tx_back_ring_t *txb; RING_IDX req_prod_local; xnb = (struct xnb_softc *)arg; ifp = xnb->xnb_ifp; txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring; mtx_lock(&xnb->tx_lock); do { int notify; req_prod_local = txb->sring->req_prod; xen_rmb(); for (;;) { struct mbuf *mbufc; int err; err = xnb_recv(txb, xnb->otherend_id, &mbufc, ifp, xnb->tx_gnttab); if (err || (mbufc == NULL)) break; /* Send the packet to the generic network stack */ - (*xnb->xnb_ifp->if_input)(xnb->xnb_ifp, mbufc); + if_input(xnb->xnb_ifp, mbufc); } RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify); if (notify != 0) xen_intr_signal(xnb->xen_intr_handle); txb->sring->req_event = txb->req_cons + 1; xen_mb(); } while (txb->sring->req_prod != req_prod_local) ; mtx_unlock(&xnb->tx_lock); xnb_start(ifp); } /** * Build a struct xnb_pkt based on netif_tx_request's from a netif tx ring. * Will read exactly 0 or 1 packets from the ring; never a partial packet. * \param[out] pkt The returned packet. If there is an error building * the packet, pkt.list_len will be set to 0. * \param[in] tx_ring Pointer to the Ring that is the input to this function * \param[in] start The ring index of the first potential request * \return The number of requests consumed to build this packet */ static int xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring, RING_IDX start) { /* * Outline: * 1) Initialize pkt * 2) Read the first request of the packet * 3) Read the extras * 4) Set cdr * 5) Loop on the remainder of the packet * 6) Finalize pkt (stuff like car_size and list_len) */ int idx = start; int discard = 0; /* whether to discard the packet */ int more_data = 0; /* there are more request past the last one */ uint16_t cdr_size = 0; /* accumulated size of requests 2 through n */ xnb_pkt_initialize(pkt); /* Read the first request */ if (RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx); pkt->size = tx->size; pkt->flags = tx->flags & ~NETTXF_more_data; more_data = tx->flags & NETTXF_more_data; pkt->list_len++; pkt->car = idx; idx++; } /* Read the extra info */ if ((pkt->flags & NETTXF_extra_info) && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { netif_extra_info_t *ext = (netif_extra_info_t*) RING_GET_REQUEST(tx_ring, idx); pkt->extra.type = ext->type; switch (pkt->extra.type) { case XEN_NETIF_EXTRA_TYPE_GSO: pkt->extra.u.gso = ext->u.gso; break; default: /* * The reference Linux netfront driver will * never set any other extra.type. So we don't * know what to do with it. Let's print an * error, then consume and discard the packet */ printf("xnb(%s:%d): Unknown extra info type %d." " Discarding packet\n", __func__, __LINE__, pkt->extra.type); xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); discard = 1; break; } pkt->extra.flags = ext->flags; if (ext->flags & XEN_NETIF_EXTRA_FLAG_MORE) { /* * The reference linux netfront driver never sets this * flag (nor does any other known netfront). So we * will discard the packet. */ printf("xnb(%s:%d): Request sets " "XEN_NETIF_EXTRA_FLAG_MORE, but we can't handle " "that\n", __func__, __LINE__); xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); discard = 1; } idx++; } /* Set cdr. If there is not more data, cdr is invalid */ pkt->cdr = idx; /* Loop on remainder of packet */ while (more_data && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx); pkt->list_len++; cdr_size += tx->size; if (tx->flags & ~NETTXF_more_data) { /* There should be no other flags set at this point */ printf("xnb(%s:%d): Request sets unknown flags %d " "after the 1st request in the packet.\n", __func__, __LINE__, tx->flags); xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); } more_data = tx->flags & NETTXF_more_data; idx++; } /* Finalize packet */ if (more_data != 0) { /* The ring ran out of requests before finishing the packet */ xnb_pkt_invalidate(pkt); idx = start; /* tell caller that we consumed no requests */ } else { /* Calculate car_size */ pkt->car_size = pkt->size - cdr_size; } if (discard != 0) { xnb_pkt_invalidate(pkt); } return idx - start; } /** * Respond to all the requests that constituted pkt. Builds the responses and * writes them to the ring, but doesn't push them to the shared ring. * \param[in] pkt the packet that needs a response * \param[in] error true if there was an error handling the packet, such * as in the hypervisor copy op or mbuf allocation * \param[out] ring Responses go here */ static void xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring, int error) { /* * Outline: * 1) Respond to the first request * 2) Respond to the extra info reques * Loop through every remaining request in the packet, generating * responses that copy those requests' ids and sets the status * appropriately. */ netif_tx_request_t *tx; netif_tx_response_t *rsp; int i; uint16_t status; status = (xnb_pkt_is_valid(pkt) == 0) || error ? NETIF_RSP_ERROR : NETIF_RSP_OKAY; KASSERT((pkt->list_len == 0) || (ring->rsp_prod_pvt == pkt->car), ("Cannot respond to ring requests out of order")); if (pkt->list_len >= 1) { uint16_t id; tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt); id = tx->id; rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->id = id; rsp->status = status; ring->rsp_prod_pvt++; if (pkt->flags & NETRXF_extra_info) { rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->status = NETIF_RSP_NULL; ring->rsp_prod_pvt++; } } for (i=0; i < pkt->list_len - 1; i++) { uint16_t id; tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt); id = tx->id; rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->id = id; rsp->status = status; ring->rsp_prod_pvt++; } } /** * Create an mbuf chain to represent a packet. Initializes all of the headers * in the mbuf chain, but does not copy the data. The returned chain must be * free()'d when no longer needed * \param[in] pkt A packet to model the mbuf chain after * \return A newly allocated mbuf chain, possibly with clusters attached. * NULL on failure */ static struct mbuf* -xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp) +xnb_pkt2mbufc(const struct xnb_pkt *pkt, if_t ifp) { /** * \todo consider using a memory pool for mbufs instead of * reallocating them for every packet */ /** \todo handle extra data */ struct mbuf *m; m = m_getm(NULL, pkt->size, M_NOWAIT, MT_DATA); if (m != NULL) { m->m_pkthdr.rcvif = ifp; if (pkt->flags & NETTXF_data_validated) { /* * We lie to the host OS and always tell it that the * checksums are ok, because the packet is unlikely to * get corrupted going across domains. */ m->m_pkthdr.csum_flags = ( CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR ); m->m_pkthdr.csum_data = 0xffff; } } return m; } /** * Build a gnttab_copy table that can be used to copy data from a pkt * to an mbufc. Does not actually perform the copy. Always uses gref's on * the packet side. * \param[in] pkt pkt's associated requests form the src for * the copy operation * \param[in] mbufc mbufc's storage forms the dest for the copy operation * \param[out] gnttab Storage for the returned grant table * \param[in] txb Pointer to the backend ring structure * \param[in] otherend_id The domain ID of the other end of the copy * \return The number of gnttab entries filled */ static int xnb_txpkt2gnttab(const struct xnb_pkt *pkt, struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb, domid_t otherend_id) { struct mbuf *mbuf = mbufc;/* current mbuf within the chain */ int gnt_idx = 0; /* index into grant table */ RING_IDX r_idx = pkt->car; /* index into tx ring buffer */ int r_ofs = 0; /* offset of next data within tx request's data area */ int m_ofs = 0; /* offset of next data within mbuf's data area */ /* size in bytes that still needs to be represented in the table */ uint16_t size_remaining = pkt->size; while (size_remaining > 0) { const netif_tx_request_t *txq = RING_GET_REQUEST(txb, r_idx); const size_t mbuf_space = M_TRAILINGSPACE(mbuf) - m_ofs; const size_t req_size = r_idx == pkt->car ? pkt->car_size : txq->size; const size_t pkt_space = req_size - r_ofs; /* * space is the largest amount of data that can be copied in the * grant table's next entry */ const size_t space = MIN(pkt_space, mbuf_space); /* TODO: handle this error condition without panicking */ KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short")); gnttab[gnt_idx].source.u.ref = txq->gref; gnttab[gnt_idx].source.domid = otherend_id; gnttab[gnt_idx].source.offset = txq->offset + r_ofs; gnttab[gnt_idx].dest.u.gmfn = virt_to_mfn( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].dest.offset = virt_to_offset( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].dest.domid = DOMID_SELF; gnttab[gnt_idx].len = space; gnttab[gnt_idx].flags = GNTCOPY_source_gref; gnt_idx++; r_ofs += space; m_ofs += space; size_remaining -= space; if (req_size - r_ofs <= 0) { /* Must move to the next tx request */ r_ofs = 0; r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1; } if (M_TRAILINGSPACE(mbuf) - m_ofs <= 0) { /* Must move to the next mbuf */ m_ofs = 0; mbuf = mbuf->m_next; } } return gnt_idx; } /** * Check the status of the grant copy operations, and update mbufs various * non-data fields to reflect the data present. * \param[in,out] mbufc mbuf chain to update. The chain must be valid and of * the correct length, and data should already be present * \param[in] gnttab A grant table for a just completed copy op * \param[in] n_entries The number of valid entries in the grant table */ static void xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab, int n_entries) { struct mbuf *mbuf = mbufc; int i; size_t total_size = 0; for (i = 0; i < n_entries; i++) { KASSERT(gnttab[i].status == GNTST_okay, ("Some gnttab_copy entry had error status %hd\n", gnttab[i].status)); mbuf->m_len += gnttab[i].len; total_size += gnttab[i].len; if (M_TRAILINGSPACE(mbuf) <= 0) { mbuf = mbuf->m_next; } } mbufc->m_pkthdr.len = total_size; #if defined(INET) || defined(INET6) xnb_add_mbuf_cksum(mbufc); #endif } /** * Dequeue at most one packet from the shared ring * \param[in,out] txb Netif tx ring. A packet will be removed from it, and * its private indices will be updated. But the indices * will not be pushed to the shared ring. * \param[in] ifnet Interface to which the packet will be sent * \param[in] otherend Domain ID of the other end of the ring * \param[out] mbufc The assembled mbuf chain, ready to send to the generic * networking stack * \param[in,out] gnttab Pointer to enough memory for a grant table. We make * this a function parameter so that we will take less * stack space. * \return An error code */ static int xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, struct mbuf **mbufc, - struct ifnet *ifnet, gnttab_copy_table gnttab) + if_t ifnet, gnttab_copy_table gnttab) { struct xnb_pkt pkt; /* number of tx requests consumed to build the last packet */ int num_consumed; int nr_ents; *mbufc = NULL; num_consumed = xnb_ring2pkt(&pkt, txb, txb->req_cons); if (num_consumed == 0) return 0; /* Nothing to receive */ /* update statistics independent of errors */ if_inc_counter(ifnet, IFCOUNTER_IPACKETS, 1); /* * if we got here, then 1 or more requests was consumed, but the packet * is not necessarily valid. */ if (xnb_pkt_is_valid(&pkt) == 0) { /* got a garbage packet, respond and drop it */ xnb_txpkt2rsp(&pkt, txb, 1); txb->req_cons += num_consumed; DPRINTF("xnb_intr: garbage packet, num_consumed=%d\n", num_consumed); if_inc_counter(ifnet, IFCOUNTER_IERRORS, 1); return EINVAL; } *mbufc = xnb_pkt2mbufc(&pkt, ifnet); if (*mbufc == NULL) { /* * Couldn't allocate mbufs. Respond and drop the packet. Do * not consume the requests */ xnb_txpkt2rsp(&pkt, txb, 1); DPRINTF("xnb_intr: Couldn't allocate mbufs, num_consumed=%d\n", num_consumed); if_inc_counter(ifnet, IFCOUNTER_IQDROPS, 1); return ENOMEM; } nr_ents = xnb_txpkt2gnttab(&pkt, *mbufc, gnttab, txb, otherend); if (nr_ents > 0) { int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, gnttab, nr_ents); KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n", hv_ret)); xnb_update_mbufc(*mbufc, gnttab, nr_ents); } xnb_txpkt2rsp(&pkt, txb, 0); txb->req_cons += num_consumed; return 0; } /** * Create an xnb_pkt based on the contents of an mbuf chain. * \param[in] mbufc mbuf chain to transform into a packet * \param[out] pkt Storage for the newly generated xnb_pkt * \param[in] start The ring index of the first available slot in the rx * ring * \param[in] space The number of free slots in the rx ring * \retval 0 Success * \retval EINVAL mbufc was corrupt or not convertible into a pkt * \retval EAGAIN There was not enough space in the ring to queue the * packet */ static int xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt, RING_IDX start, int space) { int retval = 0; if ((mbufc == NULL) || ( (mbufc->m_flags & M_PKTHDR) == 0) || (mbufc->m_pkthdr.len == 0)) { xnb_pkt_invalidate(pkt); retval = EINVAL; } else { int slots_required; xnb_pkt_validate(pkt); pkt->flags = 0; pkt->size = mbufc->m_pkthdr.len; pkt->car = start; pkt->car_size = mbufc->m_len; if (mbufc->m_pkthdr.csum_flags & CSUM_TSO) { pkt->flags |= NETRXF_extra_info; pkt->extra.u.gso.size = mbufc->m_pkthdr.tso_segsz; pkt->extra.u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; pkt->extra.u.gso.pad = 0; pkt->extra.u.gso.features = 0; pkt->extra.type = XEN_NETIF_EXTRA_TYPE_GSO; pkt->extra.flags = 0; pkt->cdr = start + 2; } else { pkt->cdr = start + 1; } if (mbufc->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_DELAY_DATA)) { pkt->flags |= (NETRXF_csum_blank | NETRXF_data_validated); } /* * Each ring response can have up to PAGE_SIZE of data. * Assume that we can defragment the mbuf chain efficiently * into responses so that each response but the last uses all * PAGE_SIZE bytes. */ pkt->list_len = howmany(pkt->size, PAGE_SIZE); if (pkt->list_len > 1) { pkt->flags |= NETRXF_more_data; } slots_required = pkt->list_len + (pkt->flags & NETRXF_extra_info ? 1 : 0); if (slots_required > space) { xnb_pkt_invalidate(pkt); retval = EAGAIN; } } return retval; } /** * Build a gnttab_copy table that can be used to copy data from an mbuf chain * to the frontend's shared buffers. Does not actually perform the copy. * Always uses gref's on the other end's side. * \param[in] pkt pkt's associated responses form the dest for the copy * operatoin * \param[in] mbufc The source for the copy operation * \param[out] gnttab Storage for the returned grant table * \param[in] rxb Pointer to the backend ring structure * \param[in] otherend_id The domain ID of the other end of the copy * \return The number of gnttab entries filled */ static int xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb, domid_t otherend_id) { const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */ int gnt_idx = 0; /* index into grant table */ RING_IDX r_idx = pkt->car; /* index into rx ring buffer */ int r_ofs = 0; /* offset of next data within rx request's data area */ int m_ofs = 0; /* offset of next data within mbuf's data area */ /* size in bytes that still needs to be represented in the table */ uint16_t size_remaining; size_remaining = (xnb_pkt_is_valid(pkt) != 0) ? pkt->size : 0; while (size_remaining > 0) { const netif_rx_request_t *rxq = RING_GET_REQUEST(rxb, r_idx); const size_t mbuf_space = mbuf->m_len - m_ofs; /* Xen shared pages have an implied size of PAGE_SIZE */ const size_t req_size = PAGE_SIZE; const size_t pkt_space = req_size - r_ofs; /* * space is the largest amount of data that can be copied in the * grant table's next entry */ const size_t space = MIN(pkt_space, mbuf_space); /* TODO: handle this error condition without panicing */ KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short")); gnttab[gnt_idx].dest.u.ref = rxq->gref; gnttab[gnt_idx].dest.domid = otherend_id; gnttab[gnt_idx].dest.offset = r_ofs; gnttab[gnt_idx].source.u.gmfn = virt_to_mfn( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].source.offset = virt_to_offset( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].source.domid = DOMID_SELF; gnttab[gnt_idx].len = space; gnttab[gnt_idx].flags = GNTCOPY_dest_gref; gnt_idx++; r_ofs += space; m_ofs += space; size_remaining -= space; if (req_size - r_ofs <= 0) { /* Must move to the next rx request */ r_ofs = 0; r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1; } if (mbuf->m_len - m_ofs <= 0) { /* Must move to the next mbuf */ m_ofs = 0; mbuf = mbuf->m_next; } } return gnt_idx; } /** * Generates responses for all the requests that constituted pkt. Builds * responses and writes them to the ring, but doesn't push the shared ring * indices. * \param[in] pkt the packet that needs a response * \param[in] gnttab The grant copy table corresponding to this packet. * Used to determine how many rsp->netif_rx_response_t's to * generate. * \param[in] n_entries Number of relevant entries in the grant table * \param[out] ring Responses go here * \return The number of RX requests that were consumed to generate * the responses */ static int xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab, int n_entries, netif_rx_back_ring_t *ring) { /* * This code makes the following assumptions: * * All entries in gnttab set GNTCOPY_dest_gref * * The entries in gnttab are grouped by their grefs: any two * entries with the same gref must be adjacent */ int error = 0; int gnt_idx, i; int n_responses = 0; grant_ref_t last_gref = GRANT_REF_INVALID; RING_IDX r_idx; KASSERT(gnttab != NULL, ("Received a null granttable copy")); /* * In the event of an error, we only need to send one response to the * netfront. In that case, we musn't write any data to the responses * after the one we send. So we must loop all the way through gnttab * looking for errors before we generate any responses * * Since we're looping through the grant table anyway, we'll count the * number of different gref's in it, which will tell us how many * responses to generate */ for (gnt_idx = 0; gnt_idx < n_entries; gnt_idx++) { int16_t status = gnttab[gnt_idx].status; if (status != GNTST_okay) { DPRINTF( "Got error %d for hypervisor gnttab_copy status\n", status); error = 1; break; } if (gnttab[gnt_idx].dest.u.ref != last_gref) { n_responses++; last_gref = gnttab[gnt_idx].dest.u.ref; } } if (error != 0) { uint16_t id; netif_rx_response_t *rsp; id = RING_GET_REQUEST(ring, ring->rsp_prod_pvt)->id; rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->id = id; rsp->status = NETIF_RSP_ERROR; n_responses = 1; } else { gnt_idx = 0; const int has_extra = pkt->flags & NETRXF_extra_info; if (has_extra != 0) n_responses++; for (i = 0; i < n_responses; i++) { netif_rx_request_t rxq; netif_rx_response_t *rsp; r_idx = ring->rsp_prod_pvt + i; /* * We copy the structure of rxq instead of making a * pointer because it shares the same memory as rsp. */ rxq = *(RING_GET_REQUEST(ring, r_idx)); rsp = RING_GET_RESPONSE(ring, r_idx); if (has_extra && (i == 1)) { netif_extra_info_t *ext = (netif_extra_info_t*)rsp; ext->type = XEN_NETIF_EXTRA_TYPE_GSO; ext->flags = 0; ext->u.gso.size = pkt->extra.u.gso.size; ext->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; ext->u.gso.pad = 0; ext->u.gso.features = 0; } else { rsp->id = rxq.id; rsp->status = GNTST_okay; rsp->offset = 0; rsp->flags = 0; if (i < pkt->list_len - 1) rsp->flags |= NETRXF_more_data; if ((i == 0) && has_extra) rsp->flags |= NETRXF_extra_info; if ((i == 0) && (pkt->flags & NETRXF_data_validated)) { rsp->flags |= NETRXF_data_validated; rsp->flags |= NETRXF_csum_blank; } rsp->status = 0; for (; gnttab[gnt_idx].dest.u.ref == rxq.gref; gnt_idx++) { rsp->status += gnttab[gnt_idx].len; } } } } ring->req_cons += n_responses; ring->rsp_prod_pvt += n_responses; return n_responses; } #if defined(INET) || defined(INET6) /** * Add IP, TCP, and/or UDP checksums to every mbuf in a chain. The first mbuf * in the chain must start with a struct ether_header. * * XXX This function will perform incorrectly on UDP packets that are split up * into multiple ethernet frames. */ static void xnb_add_mbuf_cksum(struct mbuf *mbufc) { struct ether_header *eh; struct ip *iph; uint16_t ether_type; eh = mtod(mbufc, struct ether_header*); ether_type = ntohs(eh->ether_type); if (ether_type != ETHERTYPE_IP) { /* Nothing to calculate */ return; } iph = (struct ip*)(eh + 1); if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { iph->ip_sum = 0; iph->ip_sum = in_cksum_hdr(iph); } switch (iph->ip_p) { case IPPROTO_TCP: if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { size_t tcplen = ntohs(iph->ip_len) - sizeof(struct ip); struct tcphdr *th = (struct tcphdr*)(iph + 1); th->th_sum = in_pseudo(iph->ip_src.s_addr, iph->ip_dst.s_addr, htons(IPPROTO_TCP + tcplen)); th->th_sum = in_cksum_skip(mbufc, sizeof(struct ether_header) + ntohs(iph->ip_len), sizeof(struct ether_header) + (iph->ip_hl << 2)); } break; case IPPROTO_UDP: if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { size_t udplen = ntohs(iph->ip_len) - sizeof(struct ip); struct udphdr *uh = (struct udphdr*)(iph + 1); uh->uh_sum = in_pseudo(iph->ip_src.s_addr, iph->ip_dst.s_addr, htons(IPPROTO_UDP + udplen)); uh->uh_sum = in_cksum_skip(mbufc, sizeof(struct ether_header) + ntohs(iph->ip_len), sizeof(struct ether_header) + (iph->ip_hl << 2)); } break; default: break; } } #endif /* INET || INET6 */ static void xnb_stop(struct xnb_softc *xnb) { - struct ifnet *ifp; + if_t ifp; mtx_assert(&xnb->sc_lock, MA_OWNED); ifp = xnb->xnb_ifp; - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); } static int -xnb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +xnb_ioctl(if_t ifp, u_long cmd, caddr_t data) { - struct xnb_softc *xnb = ifp->if_softc; + struct xnb_softc *xnb = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq*) data; #ifdef INET struct ifaddr *ifa = (struct ifaddr*)data; #endif int error = 0; switch (cmd) { case SIOCSIFFLAGS: mtx_lock(&xnb->sc_lock); - if (ifp->if_flags & IFF_UP) { + if (if_getflags(ifp) & IFF_UP) { xnb_ifinit_locked(xnb); } else { - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { xnb_stop(xnb); } } /* * Note: netfront sets a variable named xn_if_flags * here, but that variable is never read */ mtx_unlock(&xnb->sc_lock); break; case SIOCSIFADDR: #ifdef INET mtx_lock(&xnb->sc_lock); if (ifa->ifa_addr->sa_family == AF_INET) { - ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | - IFF_DRV_OACTIVE); + if_setflagbits(ifp, IFF_UP, 0); + if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { + if_setdrvflagbits(ifp, 0, + IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); + if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_UP); } arp_ifinit(ifp, ifa); mtx_unlock(&xnb->sc_lock); } else { mtx_unlock(&xnb->sc_lock); #endif error = ether_ioctl(ifp, cmd, data); #ifdef INET } #endif break; case SIOCSIFCAP: mtx_lock(&xnb->sc_lock); if (ifr->ifr_reqcap & IFCAP_TXCSUM) { - ifp->if_capenable |= IFCAP_TXCSUM; - ifp->if_hwassist |= XNB_CSUM_FEATURES; + if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); + if_sethwassistbits(ifp, XNB_CSUM_FEATURES, 0); } else { - ifp->if_capenable &= ~(IFCAP_TXCSUM); - ifp->if_hwassist &= ~(XNB_CSUM_FEATURES); + if_setcapenablebit(ifp, 0, IFCAP_TXCSUM); + if_sethwassistbits(ifp, 0, XNB_CSUM_FEATURES); } if ((ifr->ifr_reqcap & IFCAP_RXCSUM)) { - ifp->if_capenable |= IFCAP_RXCSUM; + if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); } else { - ifp->if_capenable &= ~(IFCAP_RXCSUM); + if_setcapenablebit(ifp, 0, IFCAP_RXCSUM); } /* * TODO enable TSO4 and LRO once we no longer need * to calculate checksums in software */ #if 0 if (ifr->if_reqcap |= IFCAP_TSO4) { - if (IFCAP_TXCSUM & ifp->if_capenable) { + if (IFCAP_TXCSUM & if_getcapenable(ifp)) { printf("xnb: Xen netif requires that " "TXCSUM be enabled in order " "to use TSO4\n"); error = EINVAL; } else { - ifp->if_capenable |= IFCAP_TSO4; - ifp->if_hwassist |= CSUM_TSO; + if_setcapenablebit(ifp, IFCAP_TSO4, 0); + if_sethwassistbits(ifp, CSUM_TSO, 0); } } else { - ifp->if_capenable &= ~(IFCAP_TSO4); - ifp->if_hwassist &= ~(CSUM_TSO); + if_setcapenablebit(ifp, 0, IFCAP_TSO4); + if_sethwassistbits(ifp, 0, (CSUM_TSO)); } if (ifr->ifreqcap |= IFCAP_LRO) { - ifp->if_capenable |= IFCAP_LRO; + if_setcapenablebit(ifp, IFCAP_LRO, 0); } else { - ifp->if_capenable &= ~(IFCAP_LRO); + if_setcapenablebit(ifp, 0, IFCAP_LRO); } #endif mtx_unlock(&xnb->sc_lock); break; case SIOCSIFMTU: - ifp->if_mtu = ifr->ifr_mtu; - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + if_setmtu(ifp, ifr->ifr_mtu); + if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); xnb_ifinit(xnb); break; case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &xnb->sc_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void -xnb_start_locked(struct ifnet *ifp) +xnb_start_locked(if_t ifp) { netif_rx_back_ring_t *rxb; struct xnb_softc *xnb; struct mbuf *mbufc; RING_IDX req_prod_local; - xnb = ifp->if_softc; + xnb = if_getsoftc(ifp); rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring; if (!xnb->carrier) return; do { int out_of_space = 0; int notify; req_prod_local = rxb->sring->req_prod; xen_rmb(); for (;;) { int error; - IF_DEQUEUE(&ifp->if_snd, mbufc); + mbufc = if_dequeue(ifp); if (mbufc == NULL) break; error = xnb_send(rxb, xnb->otherend_id, mbufc, xnb->rx_gnttab); switch (error) { case EAGAIN: /* * Insufficient space in the ring. * Requeue pkt and send when space is * available. */ - IF_PREPEND(&ifp->if_snd, mbufc); + if_sendq_prepend(ifp, mbufc); /* * Perhaps the frontend missed an IRQ * and went to sleep. Notify it to wake * it up. */ out_of_space = 1; break; case EINVAL: /* OS gave a corrupt packet. Drop it.*/ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); /* FALLTHROUGH */ default: /* Send succeeded, or packet had error. * Free the packet */ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (mbufc) m_freem(mbufc); break; } if (out_of_space != 0) break; } RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify); if ((notify != 0) || (out_of_space != 0)) xen_intr_signal(xnb->xen_intr_handle); rxb->sring->req_event = req_prod_local + 1; xen_mb(); } while (rxb->sring->req_prod != req_prod_local) ; } /** * Sends one packet to the ring. Blocks until the packet is on the ring * \param[in] mbufc Contains one packet to send. Caller must free * \param[in,out] rxb The packet will be pushed onto this ring, but the * otherend will not be notified. * \param[in] otherend The domain ID of the other end of the connection * \retval EAGAIN The ring did not have enough space for the packet. * The ring has not been modified * \param[in,out] gnttab Pointer to enough memory for a grant table. We make * this a function parameter so that we will take less * stack space. * \retval EINVAL mbufc was corrupt or not convertible into a pkt */ static int xnb_send(netif_rx_back_ring_t *ring, domid_t otherend, const struct mbuf *mbufc, gnttab_copy_table gnttab) { struct xnb_pkt pkt; int error, n_entries; RING_IDX space; space = ring->sring->req_prod - ring->req_cons; error = xnb_mbufc2pkt(mbufc, &pkt, ring->rsp_prod_pvt, space); if (error != 0) return error; n_entries = xnb_rxpkt2gnttab(&pkt, mbufc, gnttab, ring, otherend); if (n_entries != 0) { int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, gnttab, n_entries); KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n", hv_ret)); } xnb_rxpkt2rsp(&pkt, gnttab, n_entries, ring); return 0; } static void -xnb_start(struct ifnet *ifp) +xnb_start(if_t ifp) { struct xnb_softc *xnb; - xnb = ifp->if_softc; + xnb = if_getsoftc(ifp); mtx_lock(&xnb->rx_lock); xnb_start_locked(ifp); mtx_unlock(&xnb->rx_lock); } /* equivalent of network_open() in Linux */ static void xnb_ifinit_locked(struct xnb_softc *xnb) { - struct ifnet *ifp; + if_t ifp; ifp = xnb->xnb_ifp; mtx_assert(&xnb->sc_lock, MA_OWNED); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) + if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) return; xnb_stop(xnb); - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); + if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_UP); } static void xnb_ifinit(void *xsc) { struct xnb_softc *xnb = xsc; mtx_lock(&xnb->sc_lock); xnb_ifinit_locked(xnb); mtx_unlock(&xnb->sc_lock); } /** * Callback used by the generic networking code to tell us when our carrier * state has changed. Since we don't have a physical carrier, we don't care */ static int -xnb_ifmedia_upd(struct ifnet *ifp) +xnb_ifmedia_upd(if_t ifp) { return (0); } /** * Callback used by the generic networking code to ask us what our carrier * state is. Since we don't have a physical carrier, this is very simple */ static void -xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) +xnb_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) { ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; } /*---------------------------- NewBus Registration ---------------------------*/ static device_method_t xnb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xnb_probe), DEVMETHOD(device_attach, xnb_attach), DEVMETHOD(device_detach, xnb_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xnb_suspend), DEVMETHOD(device_resume, xnb_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, xnb_frontend_changed), { 0, 0 } }; static driver_t xnb_driver = { "xnb", xnb_methods, sizeof(struct xnb_softc), }; DRIVER_MODULE(xnb, xenbusb_back, xnb_driver, 0, 0); /*-------------------------- Unit Tests -------------------------------------*/ #ifdef XNB_DEBUG #include "netback_unit_tests.c" #endif diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c index cfb6172dbe19..b833dfc0a42d 100644 --- a/sys/dev/xen/netfront/netfront.c +++ b/sys/dev/xen/netfront/netfront.c @@ -1,2422 +1,2425 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004-2006 Kip Macy * Copyright (c) 2015 Wei Liu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xenbus_if.h" /* Features supported by all backends. TSO and LRO can be negotiated */ #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE) #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE) #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) /* * Should the driver do LRO on the RX end * this can be toggled on the fly, but the * interface must be reset (down/up) for it * to take effect. */ static int xn_enable_lro = 1; TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); /* * Number of pairs of queues. */ static unsigned long xn_num_queues = 4; TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); /** * \brief The maximum allowed data fragments in a single transmit * request. * * This limit is imposed by the backend driver. We assume here that * we are dealing with a Linux driver domain and have set our limit * to mirror the Linux MAX_SKB_FRAGS constant. */ #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) #define RX_COPY_THRESHOLD 256 #define net_ratelimit() 0 struct netfront_rxq; struct netfront_txq; struct netfront_info; struct netfront_rx_info; static void xn_txeof(struct netfront_txq *); static void xn_rxeof(struct netfront_rxq *); static void xn_alloc_rx_buffers(struct netfront_rxq *); static void xn_alloc_rx_buffers_callout(void *arg); static void xn_release_rx_bufs(struct netfront_rxq *); static void xn_release_tx_bufs(struct netfront_txq *); static void xn_rxq_intr(struct netfront_rxq *); static void xn_txq_intr(struct netfront_txq *); static void xn_intr(void *); static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); -static int xn_ioctl(struct ifnet *, u_long, caddr_t); +static int xn_ioctl(if_t, u_long, caddr_t); static void xn_ifinit_locked(struct netfront_info *); static void xn_ifinit(void *); static void xn_stop(struct netfront_info *); static void xn_query_features(struct netfront_info *np); static int xn_configure_features(struct netfront_info *np); static void netif_free(struct netfront_info *info); static int netfront_detach(device_t dev); static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); -static int xn_txq_mq_start(struct ifnet *, struct mbuf *); +static int xn_txq_mq_start(if_t, struct mbuf *); static int talk_to_backend(device_t dev, struct netfront_info *info); static int create_netdev(device_t dev); static void netif_disconnect_backend(struct netfront_info *info); static int setup_device(device_t dev, struct netfront_info *info, unsigned long); -static int xn_ifmedia_upd(struct ifnet *ifp); -static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); +static int xn_ifmedia_upd(if_t ifp); +static void xn_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr); static int xn_connect(struct netfront_info *); static void xn_kick_rings(struct netfront_info *); static int xn_get_responses(struct netfront_rxq *, struct netfront_rx_info *, RING_IDX, RING_IDX *, struct mbuf **); #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) #define INVALID_P2M_ENTRY (~0UL) #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ struct netfront_rxq { struct netfront_info *info; u_int id; char name[XN_QUEUE_NAME_LEN]; struct mtx lock; int ring_ref; netif_rx_front_ring_t ring; xen_intr_handle_t xen_intr_handle; grant_ref_t gref_head; grant_ref_t grant_ref[NET_RX_RING_SIZE + 1]; struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; struct lro_ctrl lro; struct callout rx_refill; }; struct netfront_txq { struct netfront_info *info; u_int id; char name[XN_QUEUE_NAME_LEN]; struct mtx lock; int ring_ref; netif_tx_front_ring_t ring; xen_intr_handle_t xen_intr_handle; grant_ref_t gref_head; grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; int mbufs_cnt; struct buf_ring *br; struct taskqueue *tq; struct task defrtask; bus_dma_segment_t segs[MAX_TX_REQ_FRAGS]; struct mbuf_xennet { struct m_tag tag; bus_dma_tag_t dma_tag; bus_dmamap_t dma_map; struct netfront_txq *txq; SLIST_ENTRY(mbuf_xennet) next; u_int count; } xennet_tag[NET_TX_RING_SIZE + 1]; SLIST_HEAD(, mbuf_xennet) tags; bool full; }; struct netfront_info { - struct ifnet *xn_ifp; + if_t xn_ifp; struct mtx sc_lock; u_int num_queues; struct netfront_rxq *rxq; struct netfront_txq *txq; u_int carrier; u_int maxfrags; device_t xbdev; uint8_t mac[ETHER_ADDR_LEN]; int xn_if_flags; struct ifmedia sc_media; bus_dma_tag_t dma_tag; bool xn_reset; }; struct netfront_rx_info { struct netif_rx_response rx; struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; }; #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); #define netfront_carrier_on(netif) ((netif)->carrier = 1) #define netfront_carrier_off(netif) ((netif)->carrier = 0) #define netfront_carrier_ok(netif) ((netif)->carrier) /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ static inline void add_id_to_freelist(struct mbuf **list, uintptr_t id) { KASSERT(id != 0, ("%s: the head item (0) must always be free.", __func__)); list[id] = list[0]; list[0] = (struct mbuf *)id; } static inline unsigned short get_id_from_freelist(struct mbuf **list) { uintptr_t id; id = (uintptr_t)list[0]; KASSERT(id != 0, ("%s: the head item (0) must always remain free.", __func__)); list[0] = list[id]; return (id); } static inline int xn_rxidx(RING_IDX idx) { return idx & (NET_RX_RING_SIZE - 1); } static inline struct mbuf * xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) { int i; struct mbuf *m; i = xn_rxidx(ri); m = rxq->mbufs[i]; rxq->mbufs[i] = NULL; return (m); } static inline grant_ref_t xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) { int i = xn_rxidx(ri); grant_ref_t ref = rxq->grant_ref[i]; KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); rxq->grant_ref[i] = GRANT_REF_INVALID; return (ref); } #define MTAG_COOKIE 1218492000 #define MTAG_XENNET 0 static void mbuf_grab(struct mbuf *m) { struct mbuf_xennet *ref; ref = (struct mbuf_xennet *)m_tag_locate(m, MTAG_COOKIE, MTAG_XENNET, NULL); KASSERT(ref != NULL, ("Cannot find refcount")); ref->count++; } static void mbuf_release(struct mbuf *m) { struct mbuf_xennet *ref; ref = (struct mbuf_xennet *)m_tag_locate(m, MTAG_COOKIE, MTAG_XENNET, NULL); KASSERT(ref != NULL, ("Cannot find refcount")); KASSERT(ref->count > 0, ("Invalid reference count")); if (--ref->count == 0) m_freem(m); } static void tag_free(struct m_tag *t) { struct mbuf_xennet *ref = (struct mbuf_xennet *)t; KASSERT(ref->count == 0, ("Free mbuf tag with pending refcnt")); bus_dmamap_sync(ref->dma_tag, ref->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_destroy(ref->dma_tag, ref->dma_map); SLIST_INSERT_HEAD(&ref->txq->tags, ref, next); } #define IPRINTK(fmt, args...) \ printf("[XEN] " fmt, ##args) #ifdef INVARIANTS #define WPRINTK(fmt, args...) \ printf("[XEN] " fmt, ##args) #else #define WPRINTK(fmt, args...) #endif #ifdef DEBUG #define DPRINTK(fmt, args...) \ printf("[XEN] %s: " fmt, __func__, ##args) #else #define DPRINTK(fmt, args...) #endif /** * Read the 'mac' node at the given device's node in the store, and parse that * as colon-separated octets, placing result the given mac array. mac must be * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). * Return 0 on success, or errno on error. */ static int xen_net_read_mac(device_t dev, uint8_t mac[]) { int error, i; char *s, *e, *macstr; const char *path; path = xenbus_get_node(dev); error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); if (error == ENOENT) { /* * Deal with missing mac XenStore nodes on devices with * HVM emulation (the 'ioemu' configuration attribute) * enabled. * * The HVM emulator may execute in a stub device model * domain which lacks the permission, only given to Dom0, * to update the guest's XenStore tree. For this reason, * the HVM emulator doesn't even attempt to write the * front-side mac node, even when operating in Dom0. * However, there should always be a mac listed in the * backend tree. Fallback to this version if our query * of the front side XenStore location doesn't find * anything. */ path = xenbus_get_otherend_path(dev); error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); } if (error != 0) { xenbus_dev_fatal(dev, error, "parsing %s/mac", path); return (error); } s = macstr; for (i = 0; i < ETHER_ADDR_LEN; i++) { mac[i] = strtoul(s, &e, 16); if (s == e || (e[0] != ':' && e[0] != 0)) { free(macstr, M_XENBUS); return (ENOENT); } s = &e[1]; } free(macstr, M_XENBUS); return (0); } /** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffers for communication with the backend, and * inform the backend of the appropriate details for those. Switch to * Connected state. */ static int netfront_probe(device_t dev) { if (xen_pv_nics_disabled()) return (ENXIO); if (!strcmp(xenbus_get_type(dev), "vif")) { device_set_desc(dev, "Virtual Network Interface"); return (0); } return (ENXIO); } static int netfront_attach(device_t dev) { int err; err = create_netdev(dev); if (err != 0) { xenbus_dev_fatal(dev, err, "creating netdev"); return (err); } SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "enable_lro", CTLFLAG_RW, &xn_enable_lro, 0, "Large Receive Offload"); SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "num_queues", CTLFLAG_RD, &xn_num_queues, "Number of pairs of queues"); return (0); } static int netfront_suspend(device_t dev) { struct netfront_info *np = device_get_softc(dev); u_int i; for (i = 0; i < np->num_queues; i++) { XN_RX_LOCK(&np->rxq[i]); XN_TX_LOCK(&np->txq[i]); } netfront_carrier_off(np); for (i = 0; i < np->num_queues; i++) { XN_RX_UNLOCK(&np->rxq[i]); XN_TX_UNLOCK(&np->txq[i]); } return (0); } /** * We are reconnecting to the backend, due to a suspend/resume, or a backend * driver restart. We tear down our netif structure and recreate it, but * leave the device-layer structures intact so that this is transparent to the * rest of the kernel. */ static int netfront_resume(device_t dev) { struct netfront_info *info = device_get_softc(dev); u_int i; if (xen_suspend_cancelled) { for (i = 0; i < info->num_queues; i++) { XN_RX_LOCK(&info->rxq[i]); XN_TX_LOCK(&info->txq[i]); } netfront_carrier_on(info); for (i = 0; i < info->num_queues; i++) { XN_RX_UNLOCK(&info->rxq[i]); XN_TX_UNLOCK(&info->txq[i]); } return (0); } netif_disconnect_backend(info); return (0); } static int write_queue_xenstore_keys(device_t dev, struct netfront_rxq *rxq, struct netfront_txq *txq, struct xs_transaction *xst, bool hierarchy) { int err; const char *message; const char *node = xenbus_get_node(dev); char *path; size_t path_size; KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); /* Split event channel support is not yet there. */ KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, ("Split event channels are not supported")); if (hierarchy) { path_size = strlen(node) + 10; path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); snprintf(path, path_size, "%s/queue-%u", node, rxq->id); } else { path_size = strlen(node) + 1; path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); snprintf(path, path_size, "%s", node); } err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); if (err != 0) { message = "writing tx ring-ref"; goto error; } err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); if (err != 0) { message = "writing rx ring-ref"; goto error; } err = xs_printf(*xst, path, "event-channel", "%u", xen_intr_port(rxq->xen_intr_handle)); if (err != 0) { message = "writing event-channel"; goto error; } free(path, M_DEVBUF); return (0); error: free(path, M_DEVBUF); xenbus_dev_fatal(dev, err, "%s", message); return (err); } /* Common code used when first setting up, and when resuming. */ static int talk_to_backend(device_t dev, struct netfront_info *info) { const char *message; struct xs_transaction xst; const char *node = xenbus_get_node(dev); int err; unsigned long num_queues, max_queues = 0; unsigned int i; err = xen_net_read_mac(dev, info->mac); if (err != 0) { xenbus_dev_fatal(dev, err, "parsing %s/mac", node); goto out; } err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), "multi-queue-max-queues", NULL, "%lu", &max_queues); if (err != 0) max_queues = 1; num_queues = xn_num_queues; if (num_queues > max_queues) num_queues = max_queues; err = setup_device(dev, info, num_queues); if (err != 0) goto out; again: err = xs_transaction_start(&xst); if (err != 0) { xenbus_dev_fatal(dev, err, "starting transaction"); goto free; } if (info->num_queues == 1) { err = write_queue_xenstore_keys(dev, &info->rxq[0], &info->txq[0], &xst, false); if (err != 0) goto abort_transaction_no_def_error; } else { err = xs_printf(xst, node, "multi-queue-num-queues", "%u", info->num_queues); if (err != 0) { message = "writing multi-queue-num-queues"; goto abort_transaction; } for (i = 0; i < info->num_queues; i++) { err = write_queue_xenstore_keys(dev, &info->rxq[i], &info->txq[i], &xst, true); if (err != 0) goto abort_transaction_no_def_error; } } err = xs_printf(xst, node, "request-rx-copy", "%u", 1); if (err != 0) { message = "writing request-rx-copy"; goto abort_transaction; } err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); if (err != 0) { message = "writing feature-rx-notify"; goto abort_transaction; } err = xs_printf(xst, node, "feature-sg", "%d", 1); if (err != 0) { message = "writing feature-sg"; goto abort_transaction; } - if ((info->xn_ifp->if_capenable & IFCAP_LRO) != 0) { + if ((if_getcapenable(info->xn_ifp) & IFCAP_LRO) != 0) { err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); if (err != 0) { message = "writing feature-gso-tcpv4"; goto abort_transaction; } } - if ((info->xn_ifp->if_capenable & IFCAP_RXCSUM) == 0) { + if ((if_getcapenable(info->xn_ifp) & IFCAP_RXCSUM) == 0) { err = xs_printf(xst, node, "feature-no-csum-offload", "%d", 1); if (err != 0) { message = "writing feature-no-csum-offload"; goto abort_transaction; } } err = xs_transaction_end(xst, 0); if (err != 0) { if (err == EAGAIN) goto again; xenbus_dev_fatal(dev, err, "completing transaction"); goto free; } return 0; abort_transaction: xenbus_dev_fatal(dev, err, "%s", message); abort_transaction_no_def_error: xs_transaction_end(xst, 1); free: netif_free(info); out: return (err); } static void xn_rxq_intr(struct netfront_rxq *rxq) { XN_RX_LOCK(rxq); xn_rxeof(rxq); XN_RX_UNLOCK(rxq); } static void xn_txq_start(struct netfront_txq *txq) { struct netfront_info *np = txq->info; - struct ifnet *ifp = np->xn_ifp; + if_t ifp = np->xn_ifp; XN_TX_LOCK_ASSERT(txq); if (!drbr_empty(ifp, txq->br)) xn_txq_mq_start_locked(txq, NULL); } static void xn_txq_intr(struct netfront_txq *txq) { XN_TX_LOCK(txq); if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) xn_txeof(txq); xn_txq_start(txq); XN_TX_UNLOCK(txq); } static void xn_txq_tq_deferred(void *xtxq, int pending) { struct netfront_txq *txq = xtxq; XN_TX_LOCK(txq); xn_txq_start(txq); XN_TX_UNLOCK(txq); } static void disconnect_rxq(struct netfront_rxq *rxq) { xn_release_rx_bufs(rxq); gnttab_free_grant_references(rxq->gref_head); gnttab_end_foreign_access(rxq->ring_ref, NULL); /* * No split event channel support at the moment, handle will * be unbound in tx. So no need to call xen_intr_unbind here, * but we do want to reset the handler to 0. */ rxq->xen_intr_handle = 0; } static void destroy_rxq(struct netfront_rxq *rxq) { callout_drain(&rxq->rx_refill); free(rxq->ring.sring, M_DEVBUF); } static void destroy_rxqs(struct netfront_info *np) { int i; for (i = 0; i < np->num_queues; i++) destroy_rxq(&np->rxq[i]); free(np->rxq, M_DEVBUF); np->rxq = NULL; } static int setup_rxqs(device_t dev, struct netfront_info *info, unsigned long num_queues) { int q, i; int error; netif_rx_sring_t *rxs; struct netfront_rxq *rxq; info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, M_DEVBUF, M_WAITOK|M_ZERO); for (q = 0; q < num_queues; q++) { rxq = &info->rxq[q]; rxq->id = q; rxq->info = info; rxq->ring_ref = GRANT_REF_INVALID; rxq->ring.sring = NULL; snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); mtx_init(&rxq->lock, rxq->name, "netfront receive lock", MTX_DEF); for (i = 0; i <= NET_RX_RING_SIZE; i++) { rxq->mbufs[i] = NULL; rxq->grant_ref[i] = GRANT_REF_INVALID; } /* Start resources allocation */ if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, &rxq->gref_head) != 0) { device_printf(dev, "allocating rx gref"); error = ENOMEM; goto fail; } rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK|M_ZERO); SHARED_RING_INIT(rxs); FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &rxq->ring_ref); if (error != 0) { device_printf(dev, "granting rx ring page"); goto fail_grant_ring; } callout_init(&rxq->rx_refill, 1); } return (0); fail_grant_ring: gnttab_free_grant_references(rxq->gref_head); free(rxq->ring.sring, M_DEVBUF); fail: for (; q >= 0; q--) { disconnect_rxq(&info->rxq[q]); destroy_rxq(&info->rxq[q]); } free(info->rxq, M_DEVBUF); return (error); } static void disconnect_txq(struct netfront_txq *txq) { xn_release_tx_bufs(txq); gnttab_free_grant_references(txq->gref_head); gnttab_end_foreign_access(txq->ring_ref, NULL); xen_intr_unbind(&txq->xen_intr_handle); } static void destroy_txq(struct netfront_txq *txq) { unsigned int i; free(txq->ring.sring, M_DEVBUF); buf_ring_free(txq->br, M_DEVBUF); taskqueue_drain_all(txq->tq); taskqueue_free(txq->tq); for (i = 0; i <= NET_TX_RING_SIZE; i++) { bus_dmamap_destroy(txq->info->dma_tag, txq->xennet_tag[i].dma_map); txq->xennet_tag[i].dma_map = NULL; } } static void destroy_txqs(struct netfront_info *np) { int i; for (i = 0; i < np->num_queues; i++) destroy_txq(&np->txq[i]); free(np->txq, M_DEVBUF); np->txq = NULL; } static int setup_txqs(device_t dev, struct netfront_info *info, unsigned long num_queues) { int q, i; int error; netif_tx_sring_t *txs; struct netfront_txq *txq; info->txq = malloc(sizeof(struct netfront_txq) * num_queues, M_DEVBUF, M_WAITOK|M_ZERO); for (q = 0; q < num_queues; q++) { txq = &info->txq[q]; txq->id = q; txq->info = info; txq->ring_ref = GRANT_REF_INVALID; txq->ring.sring = NULL; snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); mtx_init(&txq->lock, txq->name, "netfront transmit lock", MTX_DEF); SLIST_INIT(&txq->tags); for (i = 0; i <= NET_TX_RING_SIZE; i++) { txq->mbufs[i] = (void *) ((u_long) i+1); txq->grant_ref[i] = GRANT_REF_INVALID; txq->xennet_tag[i].txq = txq; txq->xennet_tag[i].dma_tag = info->dma_tag; error = bus_dmamap_create(info->dma_tag, 0, &txq->xennet_tag[i].dma_map); if (error != 0) { device_printf(dev, "failed to allocate dma map\n"); goto fail; } m_tag_setup(&txq->xennet_tag[i].tag, MTAG_COOKIE, MTAG_XENNET, sizeof(txq->xennet_tag[i]) - sizeof(txq->xennet_tag[i].tag)); txq->xennet_tag[i].tag.m_tag_free = &tag_free; SLIST_INSERT_HEAD(&txq->tags, &txq->xennet_tag[i], next); } txq->mbufs[NET_TX_RING_SIZE] = (void *)0; /* Start resources allocation. */ if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, &txq->gref_head) != 0) { device_printf(dev, "failed to allocate tx grant refs\n"); error = ENOMEM; goto fail; } txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK|M_ZERO); SHARED_RING_INIT(txs); FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); error = xenbus_grant_ring(dev, virt_to_mfn(txs), &txq->ring_ref); if (error != 0) { device_printf(dev, "failed to grant tx ring\n"); goto fail_grant_ring; } txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, M_WAITOK, &txq->lock); TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); txq->tq = taskqueue_create(txq->name, M_WAITOK, taskqueue_thread_enqueue, &txq->tq); error = taskqueue_start_threads(&txq->tq, 1, PI_NET, "%s txq %d", device_get_nameunit(dev), txq->id); if (error != 0) { device_printf(dev, "failed to start tx taskq %d\n", txq->id); goto fail_start_thread; } error = xen_intr_alloc_and_bind_local_port(dev, xenbus_get_otherend_id(dev), /* filter */ NULL, xn_intr, &info->txq[q], INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, &txq->xen_intr_handle); if (error != 0) { device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); goto fail_bind_port; } } return (0); fail_bind_port: taskqueue_drain_all(txq->tq); fail_start_thread: buf_ring_free(txq->br, M_DEVBUF); taskqueue_free(txq->tq); gnttab_end_foreign_access(txq->ring_ref, NULL); fail_grant_ring: gnttab_free_grant_references(txq->gref_head); free(txq->ring.sring, M_DEVBUF); fail: for (; q >= 0; q--) { disconnect_txq(&info->txq[q]); destroy_txq(&info->txq[q]); } free(info->txq, M_DEVBUF); return (error); } static int setup_device(device_t dev, struct netfront_info *info, unsigned long num_queues) { int error; int q; if (info->txq) destroy_txqs(info); if (info->rxq) destroy_rxqs(info); info->num_queues = 0; error = setup_rxqs(dev, info, num_queues); if (error != 0) goto out; error = setup_txqs(dev, info, num_queues); if (error != 0) goto out; info->num_queues = num_queues; /* No split event channel at the moment. */ for (q = 0; q < num_queues; q++) info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; return (0); out: KASSERT(error != 0, ("Error path taken without providing an error code")); return (error); } #ifdef INET +static u_int +netfront_addr_cb(void *arg, struct ifaddr *a, u_int count) +{ + arp_ifinit((if_t)arg, a); + return (1); +} /** * If this interface has an ipv4 address, send an arp for it. This * helps to get the network going again after migrating hosts. */ static void netfront_send_fake_arp(device_t dev, struct netfront_info *info) { - struct ifnet *ifp; - struct ifaddr *ifa; + if_t ifp; ifp = info->xn_ifp; - CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family == AF_INET) { - arp_ifinit(ifp, ifa); - } - } + if_foreach_addr_type(ifp, AF_INET, netfront_addr_cb, ifp); } #endif /** * Callback received when the backend's state changes. */ static void netfront_backend_changed(device_t dev, XenbusState newstate) { struct netfront_info *sc = device_get_softc(dev); DPRINTK("newstate=%d\n", newstate); - CURVNET_SET(sc->xn_ifp->if_vnet); + CURVNET_SET(if_getvnet(sc->xn_ifp)); switch (newstate) { case XenbusStateInitialising: case XenbusStateInitialised: case XenbusStateUnknown: case XenbusStateReconfigured: case XenbusStateReconfiguring: break; case XenbusStateInitWait: if (xenbus_get_state(dev) != XenbusStateInitialising) break; if (xn_connect(sc) != 0) break; /* Switch to connected state before kicking the rings. */ xenbus_set_state(sc->xbdev, XenbusStateConnected); xn_kick_rings(sc); break; case XenbusStateClosing: xenbus_set_state(dev, XenbusStateClosed); break; case XenbusStateClosed: if (sc->xn_reset) { netif_disconnect_backend(sc); xenbus_set_state(dev, XenbusStateInitialising); sc->xn_reset = false; } break; case XenbusStateConnected: #ifdef INET netfront_send_fake_arp(dev, sc); #endif break; } CURVNET_RESTORE(); } /** * \brief Verify that there is sufficient space in the Tx ring * buffer for a maximally sized request to be enqueued. * * A transmit request requires a transmit descriptor for each packet * fragment, plus up to 2 entries for "options" (e.g. TSO). */ static inline int xn_tx_slot_available(struct netfront_txq *txq) { return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); } static void xn_release_tx_bufs(struct netfront_txq *txq) { int i; for (i = 1; i <= NET_TX_RING_SIZE; i++) { struct mbuf *m; m = txq->mbufs[i]; /* * We assume that no kernel addresses are * less than NET_TX_RING_SIZE. Any entry * in the table that is below this number * must be an index from free-list tracking. */ if (((uintptr_t)m) <= NET_TX_RING_SIZE) continue; gnttab_end_foreign_access_ref(txq->grant_ref[i]); gnttab_release_grant_reference(&txq->gref_head, txq->grant_ref[i]); txq->grant_ref[i] = GRANT_REF_INVALID; add_id_to_freelist(txq->mbufs, i); txq->mbufs_cnt--; if (txq->mbufs_cnt < 0) { panic("%s: tx_chain_cnt must be >= 0", __func__); } mbuf_release(m); } } static struct mbuf * xn_alloc_one_rx_buffer(struct netfront_rxq *rxq) { struct mbuf *m; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (m == NULL) return NULL; m->m_len = m->m_pkthdr.len = MJUMPAGESIZE; return (m); } static void xn_alloc_rx_buffers(struct netfront_rxq *rxq) { RING_IDX req_prod; int notify; XN_RX_LOCK_ASSERT(rxq); if (__predict_false(rxq->info->carrier == 0)) return; for (req_prod = rxq->ring.req_prod_pvt; req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE; req_prod++) { struct mbuf *m; unsigned short id; grant_ref_t ref; struct netif_rx_request *req; unsigned long pfn; m = xn_alloc_one_rx_buffer(rxq); if (m == NULL) break; id = xn_rxidx(req_prod); KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); rxq->mbufs[id] = m; ref = gnttab_claim_grant_reference(&rxq->gref_head); KASSERT(ref != GNTTAB_LIST_END, ("reserved grant references exhuasted")); rxq->grant_ref[id] = ref; pfn = atop(vtophys(mtod(m, vm_offset_t))); req = RING_GET_REQUEST(&rxq->ring, req_prod); gnttab_grant_foreign_access_ref(ref, xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); req->id = id; req->gref = ref; } rxq->ring.req_prod_pvt = req_prod; /* Not enough requests? Try again later. */ if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) { callout_reset_curcpu(&rxq->rx_refill, hz/10, xn_alloc_rx_buffers_callout, rxq); return; } wmb(); /* barrier so backend seens requests */ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); if (notify) xen_intr_signal(rxq->xen_intr_handle); } static void xn_alloc_rx_buffers_callout(void *arg) { struct netfront_rxq *rxq; rxq = (struct netfront_rxq *)arg; XN_RX_LOCK(rxq); xn_alloc_rx_buffers(rxq); XN_RX_UNLOCK(rxq); } static void xn_release_rx_bufs(struct netfront_rxq *rxq) { int i, ref; struct mbuf *m; for (i = 0; i < NET_RX_RING_SIZE; i++) { m = rxq->mbufs[i]; if (m == NULL) continue; ref = rxq->grant_ref[i]; if (ref == GRANT_REF_INVALID) continue; gnttab_end_foreign_access_ref(ref); gnttab_release_grant_reference(&rxq->gref_head, ref); rxq->mbufs[i] = NULL; rxq->grant_ref[i] = GRANT_REF_INVALID; m_freem(m); } } static void xn_rxeof(struct netfront_rxq *rxq) { - struct ifnet *ifp; + if_t ifp; struct netfront_info *np = rxq->info; #if (defined(INET) || defined(INET6)) struct lro_ctrl *lro = &rxq->lro; #endif struct netfront_rx_info rinfo; struct netif_rx_response *rx = &rinfo.rx; struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; struct mbuf *m; struct mbufq mbufq_rxq, mbufq_errq; int err, work_to_do; XN_RX_LOCK_ASSERT(rxq); if (!netfront_carrier_ok(np)) return; /* XXX: there should be some sane limit. */ mbufq_init(&mbufq_errq, INT_MAX); mbufq_init(&mbufq_rxq, INT_MAX); ifp = np->xn_ifp; do { rp = rxq->ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ i = rxq->ring.rsp_cons; while ((i != rp)) { memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); memset(extras, 0, sizeof(rinfo.extras)); m = NULL; err = xn_get_responses(rxq, &rinfo, rp, &i, &m); if (__predict_false(err)) { if (m) (void )mbufq_enqueue(&mbufq_errq, m); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); continue; } m->m_pkthdr.rcvif = ifp; if (rx->flags & NETRXF_data_validated) { /* * According to mbuf(9) the correct way to tell * the stack that the checksum of an inbound * packet is correct, without it actually being * present (because the underlying interface * doesn't provide it), is to set the * CSUM_DATA_VALID and CSUM_PSEUDO_HDR flags, * and the csum_data field to 0xffff. */ m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } if ((rx->flags & NETRXF_extra_info) != 0 && (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type == XEN_NETIF_EXTRA_TYPE_GSO)) { m->m_pkthdr.tso_segsz = extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].u.gso.size; m->m_pkthdr.csum_flags |= CSUM_TSO; } (void )mbufq_enqueue(&mbufq_rxq, m); } rxq->ring.rsp_cons = i; xn_alloc_rx_buffers(rxq); RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); } while (work_to_do); mbufq_drain(&mbufq_errq); /* * Process all the mbufs after the remapping is complete. * Break the mbuf chain first though. */ while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); #if (defined(INET) || defined(INET6)) /* Use LRO if possible */ - if ((ifp->if_capenable & IFCAP_LRO) == 0 || + if ((if_getcapenable(ifp) & IFCAP_LRO) == 0 || lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { /* * If LRO fails, pass up to the stack * directly. */ - (*ifp->if_input)(ifp, m); + if_input(ifp, m); } #else - (*ifp->if_input)(ifp, m); + if_input(ifp, m); #endif } #if (defined(INET) || defined(INET6)) /* * Flush any outstanding LRO work */ tcp_lro_flush_all(lro); #endif } static void xn_txeof(struct netfront_txq *txq) { RING_IDX i, prod; unsigned short id; - struct ifnet *ifp; + if_t ifp; netif_tx_response_t *txr; struct mbuf *m; struct netfront_info *np = txq->info; XN_TX_LOCK_ASSERT(txq); if (!netfront_carrier_ok(np)) return; ifp = np->xn_ifp; do { prod = txq->ring.sring->rsp_prod; rmb(); /* Ensure we see responses up to 'rp'. */ for (i = txq->ring.rsp_cons; i != prod; i++) { txr = RING_GET_RESPONSE(&txq->ring, i); if (txr->status == NETIF_RSP_NULL) continue; if (txr->status != NETIF_RSP_OKAY) { printf("%s: WARNING: response is %d!\n", __func__, txr->status); } id = txr->id; m = txq->mbufs[id]; KASSERT(m != NULL, ("mbuf not found in chain")); KASSERT((uintptr_t)m > NET_TX_RING_SIZE, ("mbuf already on the free list, but we're " "trying to free it again!")); M_ASSERTVALID(m); if (__predict_false(gnttab_query_foreign_access( txq->grant_ref[id]) != 0)) { panic("%s: grant id %u still in use by the " "backend", __func__, id); } gnttab_end_foreign_access_ref(txq->grant_ref[id]); gnttab_release_grant_reference( &txq->gref_head, txq->grant_ref[id]); txq->grant_ref[id] = GRANT_REF_INVALID; txq->mbufs[id] = NULL; add_id_to_freelist(txq->mbufs, id); txq->mbufs_cnt--; mbuf_release(m); /* Only mark the txq active if we've freed up at least one slot to try */ - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); } txq->ring.rsp_cons = prod; /* * Set a new event, then check for race with update of * tx_cons. Note that it is essential to schedule a * callback, no matter how few buffers are pending. Even if * there is space in the transmit ring, higher layers may * be blocked because too much data is outstanding: in such * cases notification from Xen is likely to be the only kick * that we'll get. */ txq->ring.sring->rsp_event = prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; mb(); } while (prod != txq->ring.sring->rsp_prod); if (txq->full && ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { txq->full = false; xn_txq_start(txq); } } static void xn_intr(void *xsc) { struct netfront_txq *txq = xsc; struct netfront_info *np = txq->info; struct netfront_rxq *rxq = &np->rxq[txq->id]; /* kick both tx and rx */ xn_rxq_intr(rxq); xn_txq_intr(txq); } static void xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, grant_ref_t ref) { int new = xn_rxidx(rxq->ring.req_prod_pvt); KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); rxq->mbufs[new] = m; rxq->grant_ref[new] = ref; RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; rxq->ring.req_prod_pvt++; } static int xn_get_extras(struct netfront_rxq *rxq, struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) { struct netif_extra_info *extra; int err = 0; do { struct mbuf *m; grant_ref_t ref; if (__predict_false(*cons + 1 == rp)) { err = EINVAL; break; } extra = (struct netif_extra_info *) RING_GET_RESPONSE(&rxq->ring, ++(*cons)); if (__predict_false(!extra->type || extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { err = EINVAL; } else { memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); } m = xn_get_rx_mbuf(rxq, *cons); ref = xn_get_rx_ref(rxq, *cons); xn_move_rx_slot(rxq, m, ref); } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); return err; } static int xn_get_responses(struct netfront_rxq *rxq, struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, struct mbuf **list) { struct netif_rx_response *rx = &rinfo->rx; struct netif_extra_info *extras = rinfo->extras; struct mbuf *m, *m0, *m_prev; grant_ref_t ref = xn_get_rx_ref(rxq, *cons); int frags = 1; int err = 0; u_long ret __diagused; m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); if (rx->flags & NETRXF_extra_info) { err = xn_get_extras(rxq, extras, rp, cons); } if (m0 != NULL) { m0->m_pkthdr.len = 0; m0->m_next = NULL; } for (;;) { #if 0 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", rx->status, rx->offset, frags); #endif if (__predict_false(rx->status < 0 || rx->offset + rx->status > PAGE_SIZE)) { xn_move_rx_slot(rxq, m, ref); if (m0 == m) m0 = NULL; m = NULL; err = EINVAL; goto next_skip_queue; } /* * This definitely indicates a bug, either in this driver or in * the backend driver. In future this should flag the bad * situation to the system controller to reboot the backed. */ if (ref == GRANT_REF_INVALID) { printf("%s: Bad rx response id %d.\n", __func__, rx->id); err = EINVAL; goto next; } ret = gnttab_end_foreign_access_ref(ref); KASSERT(ret, ("Unable to end access to grant references")); gnttab_release_grant_reference(&rxq->gref_head, ref); next: if (m == NULL) break; m->m_len = rx->status; m->m_data += rx->offset; m0->m_pkthdr.len += rx->status; next_skip_queue: if (!(rx->flags & NETRXF_more_data)) break; if (*cons + frags == rp) { if (net_ratelimit()) WPRINTK("Need more frags\n"); err = ENOENT; printf("%s: cons %u frags %u rp %u, not enough frags\n", __func__, *cons, frags, rp); break; } /* * Note that m can be NULL, if rx->status < 0 or if * rx->offset + rx->status > PAGE_SIZE above. */ m_prev = m; rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); m = xn_get_rx_mbuf(rxq, *cons + frags); /* * m_prev == NULL can happen if rx->status < 0 or if * rx->offset + * rx->status > PAGE_SIZE above. */ if (m_prev != NULL) m_prev->m_next = m; /* * m0 can be NULL if rx->status < 0 or if * rx->offset + * rx->status > PAGE_SIZE above. */ if (m0 == NULL) m0 = m; m->m_next = NULL; ref = xn_get_rx_ref(rxq, *cons + frags); frags++; } *list = m0; *cons += frags; return (err); } /** * Given an mbuf chain, make sure we have enough room and then push * it onto the transmit ring. */ static int xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) { struct netfront_info *np = txq->info; - struct ifnet *ifp = np->xn_ifp; + if_t ifp = np->xn_ifp; int otherend_id, error, nfrags; bus_dma_segment_t *segs = txq->segs; struct mbuf_xennet *tag; bus_dmamap_t map; unsigned int i; KASSERT(!SLIST_EMPTY(&txq->tags), ("no tags available")); tag = SLIST_FIRST(&txq->tags); SLIST_REMOVE_HEAD(&txq->tags, next); KASSERT(tag->count == 0, ("tag already in-use")); map = tag->dma_map; error = bus_dmamap_load_mbuf_sg(np->dma_tag, map, m_head, segs, &nfrags, 0); if (error == EFBIG || nfrags > np->maxfrags) { struct mbuf *m; bus_dmamap_unload(np->dma_tag, map); m = m_defrag(m_head, M_NOWAIT); if (!m) { /* * Defrag failed, so free the mbuf and * therefore drop the packet. */ SLIST_INSERT_HEAD(&txq->tags, tag, next); m_freem(m_head); return (EMSGSIZE); } m_head = m; error = bus_dmamap_load_mbuf_sg(np->dma_tag, map, m_head, segs, &nfrags, 0); if (error != 0 || nfrags > np->maxfrags) { bus_dmamap_unload(np->dma_tag, map); SLIST_INSERT_HEAD(&txq->tags, tag, next); m_freem(m_head); return (error ?: EFBIG); } } else if (error != 0) { SLIST_INSERT_HEAD(&txq->tags, tag, next); m_freem(m_head); return (error); } /** * The FreeBSD TCP stack, with TSO enabled, can produce a chain * of mbufs longer than Linux can handle. Make sure we don't * pass a too-long chain over to the other side by dropping the * packet. It doesn't look like there is currently a way to * tell the TCP stack to generate a shorter chain of packets. */ if (nfrags > MAX_TX_REQ_FRAGS) { #ifdef DEBUG printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " "won't be able to handle it, dropping\n", __func__, nfrags, MAX_TX_REQ_FRAGS); #endif SLIST_INSERT_HEAD(&txq->tags, tag, next); bus_dmamap_unload(np->dma_tag, map); m_freem(m_head); return (EMSGSIZE); } /* * This check should be redundant. We've already verified that we * have enough slots in the ring to handle a packet of maximum * size, and that our packet is less than the maximum size. Keep * it in here as an assert for now just to make certain that * chain_cnt is accurate. */ KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " "(%d)!", __func__, (int) txq->mbufs_cnt, (int) nfrags, (int) NET_TX_RING_SIZE)); /* * Start packing the mbufs in this chain into * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ otherend_id = xenbus_get_otherend_id(np->xbdev); m_tag_prepend(m_head, &tag->tag); for (i = 0; i < nfrags; i++) { netif_tx_request_t *tx; uintptr_t id; grant_ref_t ref; u_long mfn; /* XXX Wrong type? */ tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); id = get_id_from_freelist(txq->mbufs); if (id == 0) panic("%s: was allocated the freelist head!\n", __func__); txq->mbufs_cnt++; if (txq->mbufs_cnt > NET_TX_RING_SIZE) panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", __func__); mbuf_grab(m_head); txq->mbufs[id] = m_head; tx->id = id; ref = gnttab_claim_grant_reference(&txq->gref_head); KASSERT((short)ref >= 0, ("Negative ref")); mfn = atop(segs[i].ds_addr); gnttab_grant_foreign_access_ref(ref, otherend_id, mfn, GNTMAP_readonly); tx->gref = txq->grant_ref[id] = ref; tx->offset = segs[i].ds_addr & PAGE_MASK; KASSERT(tx->offset + segs[i].ds_len <= PAGE_SIZE, ("mbuf segment crosses a page boundary")); tx->flags = 0; if (i == 0) { /* * The first fragment has the entire packet * size, subsequent fragments have just the * fragment size. The backend works out the * true size of the first fragment by * subtracting the sizes of the other * fragments. */ tx->size = m_head->m_pkthdr.len; /* * The first fragment contains the checksum flags * and is optionally followed by extra data for * TSO etc. */ /** * CSUM_TSO requires checksum offloading. * Some versions of FreeBSD fail to * set CSUM_TCP in the CSUM_TSO case, * so we have to test for CSUM_TSO * explicitly. */ if (m_head->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_TSO)) { tx->flags |= (NETTXF_csum_blank | NETTXF_data_validated); } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { struct netif_extra_info *gso = (struct netif_extra_info *) RING_GET_REQUEST(&txq->ring, ++txq->ring.req_prod_pvt); tx->flags |= NETTXF_extra_info; gso->u.gso.size = m_head->m_pkthdr.tso_segsz; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; } } else { tx->size = segs[i].ds_len; } if (i != nfrags - 1) tx->flags |= NETTXF_more_data; txq->ring.req_prod_pvt++; } bus_dmamap_sync(np->dma_tag, map, BUS_DMASYNC_PREWRITE); BPF_MTAP(ifp, m_head); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, m_head->m_pkthdr.len); if (m_head->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); xn_txeof(txq); return (0); } /* equivalent of network_open() in Linux */ static void xn_ifinit_locked(struct netfront_info *np) { - struct ifnet *ifp; + if_t ifp; int i; struct netfront_rxq *rxq; XN_LOCK_ASSERT(np); ifp = np->xn_ifp; - if (ifp->if_drv_flags & IFF_DRV_RUNNING || !netfront_carrier_ok(np)) + if (if_getdrvflags(ifp) & IFF_DRV_RUNNING || !netfront_carrier_ok(np)) return; xn_stop(np); for (i = 0; i < np->num_queues; i++) { rxq = &np->rxq[i]; XN_RX_LOCK(rxq); xn_alloc_rx_buffers(rxq); rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring)) xn_rxeof(rxq); XN_RX_UNLOCK(rxq); } - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); + if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_UP); } static void xn_ifinit(void *xsc) { struct netfront_info *sc = xsc; XN_LOCK(sc); xn_ifinit_locked(sc); XN_UNLOCK(sc); } static int -xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +xn_ioctl(if_t ifp, u_long cmd, caddr_t data) { - struct netfront_info *sc = ifp->if_softc; + struct netfront_info *sc = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *) data; device_t dev; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif int mask, error = 0, reinit; dev = sc->xbdev; switch(cmd) { case SIOCSIFADDR: #ifdef INET XN_LOCK(sc); if (ifa->ifa_addr->sa_family == AF_INET) { - ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + if_setflagbits(ifp, IFF_UP, 0); + if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) xn_ifinit_locked(sc); arp_ifinit(ifp, ifa); XN_UNLOCK(sc); } else { XN_UNLOCK(sc); #endif error = ether_ioctl(ifp, cmd, data); #ifdef INET } #endif break; case SIOCSIFMTU: - if (ifp->if_mtu == ifr->ifr_mtu) + if (if_getmtu(ifp) == ifr->ifr_mtu) break; - ifp->if_mtu = ifr->ifr_mtu; - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + if_setmtu(ifp, ifr->ifr_mtu); + if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); xn_ifinit(sc); break; case SIOCSIFFLAGS: XN_LOCK(sc); - if (ifp->if_flags & IFF_UP) { + if (if_getflags(ifp) & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ xn_ifinit_locked(sc); } else { - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { xn_stop(sc); } } - sc->xn_if_flags = ifp->if_flags; + sc->xn_if_flags = if_getflags(ifp); XN_UNLOCK(sc); break; case SIOCSIFCAP: - mask = ifr->ifr_reqcap ^ ifp->if_capenable; + mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); reinit = 0; if (mask & IFCAP_TXCSUM) { - ifp->if_capenable ^= IFCAP_TXCSUM; - ifp->if_hwassist ^= XN_CSUM_FEATURES; + if_togglecapenable(ifp, IFCAP_TXCSUM); + if_togglehwassist(ifp, XN_CSUM_FEATURES); } if (mask & IFCAP_TSO4) { - ifp->if_capenable ^= IFCAP_TSO4; - ifp->if_hwassist ^= CSUM_TSO; + if_togglecapenable(ifp, IFCAP_TSO4); + if_togglehwassist(ifp, CSUM_TSO); } if (mask & (IFCAP_RXCSUM | IFCAP_LRO)) { /* These Rx features require us to renegotiate. */ reinit = 1; if (mask & IFCAP_RXCSUM) - ifp->if_capenable ^= IFCAP_RXCSUM; + if_togglecapenable(ifp, IFCAP_RXCSUM); if (mask & IFCAP_LRO) - ifp->if_capenable ^= IFCAP_LRO; + if_togglecapenable(ifp, IFCAP_LRO); } if (reinit == 0) break; /* * We must reset the interface so the backend picks up the * new features. */ device_printf(sc->xbdev, "performing interface reset due to feature change\n"); XN_LOCK(sc); netfront_carrier_off(sc); sc->xn_reset = true; /* * NB: the pending packet queue is not flushed, since * the interface should still support the old options. */ XN_UNLOCK(sc); /* * Delete the xenstore nodes that export features. * * NB: There's a xenbus state called * "XenbusStateReconfiguring", which is what we should set * here. Sadly none of the backends know how to handle it, * and simply disconnect from the frontend, so we will just * switch back to XenbusStateInitialising in order to force * a reconnection. */ xs_rm(XST_NIL, xenbus_get_node(dev), "feature-gso-tcpv4"); xs_rm(XST_NIL, xenbus_get_node(dev), "feature-no-csum-offload"); xenbus_set_state(dev, XenbusStateClosing); /* * Wait for the frontend to reconnect before returning * from the ioctl. 30s should be more than enough for any * sane backend to reconnect. */ error = tsleep(sc, 0, "xn_rst", 30*hz); break; case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); } return (error); } static void xn_stop(struct netfront_info *sc) { - struct ifnet *ifp; + if_t ifp; XN_LOCK_ASSERT(sc); ifp = sc->xn_ifp; - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); } static void xn_rebuild_rx_bufs(struct netfront_rxq *rxq) { int requeue_idx, i; grant_ref_t ref; netif_rx_request_t *req; for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { struct mbuf *m; u_long pfn; if (rxq->mbufs[i] == NULL) continue; m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); req = RING_GET_REQUEST(&rxq->ring, requeue_idx); pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; gnttab_grant_foreign_access_ref(ref, xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); req->gref = ref; req->id = requeue_idx; requeue_idx++; } rxq->ring.req_prod_pvt = requeue_idx; } /* START of Xenolinux helper functions adapted to FreeBSD */ static int xn_connect(struct netfront_info *np) { int i, error; u_int feature_rx_copy; struct netfront_rxq *rxq; struct netfront_txq *txq; error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-rx-copy", NULL, "%u", &feature_rx_copy); if (error != 0) feature_rx_copy = 0; /* We only support rx copy. */ if (!feature_rx_copy) return (EPROTONOSUPPORT); /* Recovery procedure: */ error = talk_to_backend(np->xbdev, np); if (error != 0) return (error); /* Step 1: Reinitialise variables. */ xn_query_features(np); xn_configure_features(np); /* Step 2: Release TX buffer */ for (i = 0; i < np->num_queues; i++) { txq = &np->txq[i]; xn_release_tx_bufs(txq); } /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ for (i = 0; i < np->num_queues; i++) { rxq = &np->rxq[i]; xn_rebuild_rx_bufs(rxq); } /* Step 4: All public and private state should now be sane. Get * ready to start sending and receiving packets and give the driver * domain a kick because we've probably just requeued some * packets. */ netfront_carrier_on(np); wakeup(np); return (0); } static void xn_kick_rings(struct netfront_info *np) { struct netfront_rxq *rxq; struct netfront_txq *txq; int i; for (i = 0; i < np->num_queues; i++) { txq = &np->txq[i]; rxq = &np->rxq[i]; xen_intr_signal(txq->xen_intr_handle); XN_TX_LOCK(txq); xn_txeof(txq); XN_TX_UNLOCK(txq); XN_RX_LOCK(rxq); xn_alloc_rx_buffers(rxq); XN_RX_UNLOCK(rxq); } } static void xn_query_features(struct netfront_info *np) { int val; device_printf(np->xbdev, "backend features:"); if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-sg", NULL, "%d", &val) != 0) val = 0; np->maxfrags = 1; if (val) { np->maxfrags = MAX_TX_REQ_FRAGS; printf(" feature-sg"); } if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-gso-tcpv4", NULL, "%d", &val) != 0) val = 0; - np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); + if_setcapabilitiesbit(np->xn_ifp, 0, IFCAP_TSO4 | IFCAP_LRO); if (val) { - np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; + if_setcapabilitiesbit(np->xn_ifp, IFCAP_TSO4 | IFCAP_LRO, 0); printf(" feature-gso-tcp4"); } /* * HW CSUM offload is assumed to be available unless * feature-no-csum-offload is set in xenstore. */ if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-no-csum-offload", NULL, "%d", &val) != 0) val = 0; - np->xn_ifp->if_capabilities |= IFCAP_HWCSUM; + if_setcapabilitiesbit(np->xn_ifp, IFCAP_HWCSUM, 0); if (val) { - np->xn_ifp->if_capabilities &= ~(IFCAP_HWCSUM); + if_setcapabilitiesbit(np->xn_ifp, 0, IFCAP_HWCSUM); printf(" feature-no-csum-offload"); } printf("\n"); } static int xn_configure_features(struct netfront_info *np) { int err, cap_enabled; #if (defined(INET) || defined(INET6)) int i; #endif - struct ifnet *ifp; + if_t ifp; ifp = np->xn_ifp; err = 0; - if ((ifp->if_capenable & ifp->if_capabilities) == ifp->if_capenable) { + if ((if_getcapenable(ifp) & if_getcapabilities(ifp)) == if_getcapenable(ifp)) { /* Current options are available, no need to do anything. */ return (0); } /* Try to preserve as many options as possible. */ - cap_enabled = ifp->if_capenable; - ifp->if_capenable = ifp->if_hwassist = 0; + cap_enabled = if_getcapenable(ifp); + if_setcapenable(ifp, 0); + if_sethwassist(ifp, 0); #if (defined(INET) || defined(INET6)) if ((cap_enabled & IFCAP_LRO) != 0) for (i = 0; i < np->num_queues; i++) tcp_lro_free(&np->rxq[i].lro); if (xn_enable_lro && - (ifp->if_capabilities & cap_enabled & IFCAP_LRO) != 0) { - ifp->if_capenable |= IFCAP_LRO; + (if_getcapabilities(ifp) & cap_enabled & IFCAP_LRO) != 0) { + if_setcapenablebit(ifp, IFCAP_LRO, 0); for (i = 0; i < np->num_queues; i++) { err = tcp_lro_init(&np->rxq[i].lro); if (err != 0) { device_printf(np->xbdev, "LRO initialization failed\n"); - ifp->if_capenable &= ~IFCAP_LRO; + if_setcapenablebit(ifp, 0, IFCAP_LRO); break; } np->rxq[i].lro.ifp = ifp; } } - if ((ifp->if_capabilities & cap_enabled & IFCAP_TSO4) != 0) { - ifp->if_capenable |= IFCAP_TSO4; - ifp->if_hwassist |= CSUM_TSO; + if ((if_getcapabilities(ifp) & cap_enabled & IFCAP_TSO4) != 0) { + if_setcapenablebit(ifp, IFCAP_TSO4, 0); + if_sethwassistbits(ifp, CSUM_TSO, 0); } #endif - if ((ifp->if_capabilities & cap_enabled & IFCAP_TXCSUM) != 0) { - ifp->if_capenable |= IFCAP_TXCSUM; - ifp->if_hwassist |= XN_CSUM_FEATURES; + if ((if_getcapabilities(ifp) & cap_enabled & IFCAP_TXCSUM) != 0) { + if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); + if_sethwassistbits(ifp, XN_CSUM_FEATURES, 0); } - if ((ifp->if_capabilities & cap_enabled & IFCAP_RXCSUM) != 0) - ifp->if_capenable |= IFCAP_RXCSUM; + if ((if_getcapabilities(ifp) & cap_enabled & IFCAP_RXCSUM) != 0) + if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); return (err); } static int xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) { struct netfront_info *np; - struct ifnet *ifp; + if_t ifp; struct buf_ring *br; int error, notify; np = txq->info; br = txq->br; ifp = np->xn_ifp; error = 0; XN_TX_LOCK_ASSERT(txq); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || !netfront_carrier_ok(np)) { if (m != NULL) error = drbr_enqueue(ifp, br, m); return (error); } if (m != NULL) { error = drbr_enqueue(ifp, br, m); if (error != 0) return (error); } while ((m = drbr_peek(ifp, br)) != NULL) { if (!xn_tx_slot_available(txq)) { drbr_putback(ifp, br, m); break; } error = xn_assemble_tx_request(txq, m); /* xn_assemble_tx_request always consumes the mbuf*/ if (error != 0) { drbr_advance(ifp, br); break; } RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); if (notify) xen_intr_signal(txq->xen_intr_handle); drbr_advance(ifp, br); } if (RING_FULL(&txq->ring)) txq->full = true; return (0); } static int -xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) +xn_txq_mq_start(if_t ifp, struct mbuf *m) { struct netfront_info *np; struct netfront_txq *txq; int i, npairs, error; - np = ifp->if_softc; + np = if_getsoftc(ifp); npairs = np->num_queues; if (!netfront_carrier_ok(np)) return (ENOBUFS); KASSERT(npairs != 0, ("called with 0 available queues")); /* check if flowid is set */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % npairs; else i = curcpu % npairs; txq = &np->txq[i]; if (XN_TX_TRYLOCK(txq) != 0) { error = xn_txq_mq_start_locked(txq, m); XN_TX_UNLOCK(txq); } else { error = drbr_enqueue(ifp, txq->br, m); taskqueue_enqueue(txq->tq, &txq->defrtask); } return (error); } static void -xn_qflush(struct ifnet *ifp) +xn_qflush(if_t ifp) { struct netfront_info *np; struct netfront_txq *txq; struct mbuf *m; int i; - np = ifp->if_softc; + np = if_getsoftc(ifp); for (i = 0; i < np->num_queues; i++) { txq = &np->txq[i]; XN_TX_LOCK(txq); while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) m_freem(m); XN_TX_UNLOCK(txq); } if_qflush(ifp); } /** * Create a network device. * @param dev Newbus device representing this virtual NIC. */ int create_netdev(device_t dev) { struct netfront_info *np; int err; - struct ifnet *ifp; + if_t ifp; np = device_get_softc(dev); np->xbdev = dev; mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); err = xen_net_read_mac(dev, np->mac); if (err != 0) goto error; /* Set up ifnet structure */ ifp = np->xn_ifp = if_alloc(IFT_ETHER); - ifp->if_softc = np; - if_initname(ifp, "xn", device_get_unit(dev)); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_ioctl = xn_ioctl; + if_setsoftc(ifp, np); + if_initname(ifp, "xn", device_get_unit(dev)); + if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); + if_setioctlfn(ifp, xn_ioctl); - ifp->if_transmit = xn_txq_mq_start; - ifp->if_qflush = xn_qflush; + if_settransmitfn(ifp, xn_txq_mq_start); + if_setqflushfn(ifp, xn_qflush); - ifp->if_init = xn_ifinit; + if_setinitfn(ifp, xn_ifinit); - ifp->if_hwassist = XN_CSUM_FEATURES; + if_sethwassist(ifp, XN_CSUM_FEATURES); /* Enable all supported features at device creation. */ - ifp->if_capenable = ifp->if_capabilities = - IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_LRO; - ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); - ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; - ifp->if_hw_tsomaxsegsize = PAGE_SIZE; + if_setcapabilities(ifp, IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_LRO); + if_setcapenable(ifp, if_getcapabilities(ifp)); + + if_sethwtsomax(ifp, 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); + if_sethwtsomaxsegcount(ifp, MAX_TX_REQ_FRAGS); + if_sethwtsomaxsegsize(ifp, PAGE_SIZE); - ether_ifattach(ifp, np->mac); + ether_ifattach(ifp, np->mac); netfront_carrier_off(np); err = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1, PAGE_SIZE, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ PAGE_SIZE * MAX_TX_REQ_FRAGS, /* max request size */ MAX_TX_REQ_FRAGS, /* max segments */ PAGE_SIZE, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &np->dma_tag); return (err); error: KASSERT(err != 0, ("Error path with no error code specified")); return (err); } static int netfront_detach(device_t dev) { struct netfront_info *info = device_get_softc(dev); DPRINTK("%s\n", xenbus_get_node(dev)); netif_free(info); return 0; } static void netif_free(struct netfront_info *np) { XN_LOCK(np); xn_stop(np); XN_UNLOCK(np); netif_disconnect_backend(np); ether_ifdetach(np->xn_ifp); free(np->rxq, M_DEVBUF); free(np->txq, M_DEVBUF); if_free(np->xn_ifp); np->xn_ifp = NULL; ifmedia_removeall(&np->sc_media); bus_dma_tag_destroy(np->dma_tag); } static void netif_disconnect_backend(struct netfront_info *np) { u_int i; for (i = 0; i < np->num_queues; i++) { XN_RX_LOCK(&np->rxq[i]); XN_TX_LOCK(&np->txq[i]); } netfront_carrier_off(np); for (i = 0; i < np->num_queues; i++) { XN_RX_UNLOCK(&np->rxq[i]); XN_TX_UNLOCK(&np->txq[i]); } for (i = 0; i < np->num_queues; i++) { disconnect_rxq(&np->rxq[i]); disconnect_txq(&np->txq[i]); } } static int -xn_ifmedia_upd(struct ifnet *ifp) +xn_ifmedia_upd(if_t ifp) { return (0); } static void -xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) +xn_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) { ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; } /* ** Driver registration ** */ static device_method_t netfront_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netfront_probe), DEVMETHOD(device_attach, netfront_attach), DEVMETHOD(device_detach, netfront_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, netfront_suspend), DEVMETHOD(device_resume, netfront_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), DEVMETHOD_END }; static driver_t netfront_driver = { "xn", netfront_methods, sizeof(struct netfront_info), }; DRIVER_MODULE(xe, xenbusb_front, netfront_driver, NULL, NULL);