Index: head/share/man/man9/mbpool.9 =================================================================== --- head/share/man/man9/mbpool.9 (revision 324445) +++ head/share/man/man9/mbpool.9 (revision 324446) @@ -1,264 +1,262 @@ .\" Copyright (c) 2003 .\" Fraunhofer Institute for Open Communication Systems (FhG Fokus). .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" Author: Hartmut Brandt .\" .\" $FreeBSD$ .\" -.Dd July 15, 2003 +.Dd September 27, 2017 .Dt MBPOOL 9 .Os .Sh NAME .Nm mbpool .Nd "buffer pools for network interfaces" .Sh SYNOPSIS .In sys/types.h .In machine/bus.h .In sys/mbpool.h .Vt struct mbpool ; .Ft int .Fo mbp_create .Fa "struct mbpool **mbp" "const char *name" "bus_dma_tag_t dmat" .Fa "u_int max_pages" "size_t page_size" "size_t chunk_size" .Fc .Ft void .Fn mbp_destroy "struct mbpool *mbp" .Ft "void *" .Fn mbp_alloc "struct mbpool *mbp" "bus_addr_t *pa" "uint32_t *hp" .Ft void .Fn mbp_free "struct mbpool *mbp" "void *p" .Ft void -.Fn mbp_ext_free "void *" "void *" +.Fn mbp_ext_free "struct mbuf *" .Ft void .Fn mbp_card_free "struct mbpool *mbp" .Ft void .Fn mbp_count "struct mbpool *mbp" "u_int *used" "u_int *card" "u_int *free" .Ft "void *" .Fn mbp_get "struct mbpool *mbp" "uint32_t h" .Ft "void *" .Fn mbp_get_keep "struct mbpool *mbp" "uint32_t h" .Ft void .Fo mbp_sync .Fa "struct mbpool *mbp" "uint32_t h" "bus_addr_t off" "bus_size_t len" .Fa "u_int op" .Fc .Pp .Fn MODULE_DEPEND "your_module" "libmbpool" 1 1 1 .Pp .Cd "options LIBMBPOOL" .Sh DESCRIPTION Mbuf pools are intended to help drivers for interface cards that need huge amounts of receive buffers, and additionally provides a mapping between these buffers and 32-bit handles. .Pp An example of these cards are the Fore/Marconi ForeRunnerHE cards. These employ up to 8 receive groups, each with two buffer pools, each of which can contain up to 8192. This gives a total maximum number of more than 100000 buffers. Even with a more moderate configuration the card eats several thousand buffers. Each of these buffers must be mapped for DMA. While for machines without an IOMMU and with lesser than 4GByte memory this is not a problem, for other machines this may quickly eat up all available IOMMU address space and/or bounce buffers. On sparc64, the default I/O page size is 16k, so mapping a simple mbuf wastes 31/32 of the address space. .Pp Another problem with most of these cards is that they support putting a 32-bit handle into the buffer descriptor together with the physical address. This handle is reflected back to the driver when the buffer is filled, and assists the driver in finding the buffer in host memory. For 32-bit machines, the virtual address of the buffer is usually used as the handle. This does not work for 64-bit machines for obvious reasons, so a mapping is needed between these handles and the buffers. This mapping should be possible without searching lists and the like. .Pp An mbuf pool overcomes both problems by allocating DMA-able memory page wise with a per-pool configurable page size. Each page is divided into a number of equally-sized chunks, the last .Dv MBPOOL_TRAILER_SIZE of which are used by the pool code (4 bytes). The rest of each chunk is usable as a buffer. There is a per-pool limit on pages that will be allocated. .Pp Additionally, the code manages two flags for each buffer: .Dq on-card and .Dq used . A buffer may be in one of three states: .Bl -tag -width "on-card" .It free None of the flags is set. .It on-card Both flags are set. The buffer is assumed to be handed over to the card and waiting to be filled. .It used The buffer was returned by the card and is now travelling through the system. .El .Pp A pool is created with .Fn mbp_create . This call specifies a DMA tag .Fa dmat to be used to create and map the memory pages via .Xr bus_dmamem_alloc 9 . The .Fa chunk_size includes the pool overhead. It means that to get buffers for 5 ATM cells (240 bytes), a chunk size of 256 should be specified. This results in 12 unused bytes between the buffer, and the pool overhead of four byte. The total maximum number of buffers in a pool is .Fa max_pages * .Fa ( page_size / .Fa chunk_size ) . The maximum value for .Fa max_pages is 2^14-1 (16383) and the maximum of .Fa page_size / .Fa chunk_size is 2^9 (512). If the call is successful, a pointer to a newly allocated .Vt "struct mbpool" is set into the variable pointed to by .Fa mpb . .Pp A pool is destroyed with .Fn mbp_destroy . This frees all pages and the pool structure itself. If compiled with .Dv DIAGNOSTICS , the code checks that all buffers are free. If not, a warning message is issued to the console. .Pp A buffer is allocated with .Fn mbp_alloc . This returns the virtual address of the buffer and stores the physical address into the variable pointed to by .Fa pa . The handle is stored into the variable pointed to by .Fa hp . The two most significant bits and the 7 least significant bits of the handle are unused by the pool code and may be used by the caller. These are automatically stripped when passing a handle to one of the other functions. If a buffer cannot be allocated (either because the maximum number of pages is reached, no memory is available or the memory cannot be mapped), .Dv NULL is returned. If a buffer could be allocated, it is in the .Dq on-card state. .Pp When the buffer is returned by the card, the driver calls .Fn mbp_get with the handle. This function returns the virtual address of the buffer and clears the .Dq on-card bit. The buffer is now in the .Dq used state. The function .Fn mbp_get_keep differs from .Fn mbp_get in that it does not clear the .Dq on-card bit. This can be used for buffers that are returned .Dq partially by the card. .Pp A buffer is freed by calling .Fn mbp_free with the virtual address of the buffer. This clears the .Dq used bit, and puts the buffer on the free list of the pool. Note that free buffers are NOT returned to the system. The function .Fn mbp_ext_free can be given to .Fn m_extadd as the free function. -The user argument must be the pointer to -the pool. .Pp Before using the contents of a buffer returned by the card, the driver must call .Fn mbp_sync with the appropriate parameters. This results in a call to .Xr bus_dmamap_sync 9 for the buffer. .Pp All buffers in the pool that are currently in the .Dq on-card state can be freed with a call to .Fn mbp_card_free . This may be called by the driver when it stops the interface. Buffers in the .Dq used state are not freed by this call. .Pp For debugging it is possible to call .Fn mbp_count . This returns the number of buffers in the .Dq used and .Dq on-card states and the number of buffers on the free list. .Sh SEE ALSO .Xr mbuf 9 .Sh AUTHORS .An Harti Brandt Aq Mt harti@FreeBSD.org .Sh CAVEATS The function .Fn mbp_sync is currently a no-op because .Xr bus_dmamap_sync 9 is missing the offset and length parameters. Index: head/share/man/man9/mbuf.9 =================================================================== --- head/share/man/man9/mbuf.9 (revision 324445) +++ head/share/man/man9/mbuf.9 (revision 324446) @@ -1,1219 +1,1224 @@ .\" Copyright (c) 2000 FreeBSD Inc. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd October 10, 2016 +.Dd September 27, 2017 .Dt MBUF 9 .Os .\" .Sh NAME .Nm mbuf .Nd "memory management in the kernel IPC subsystem" .\" .Sh SYNOPSIS .In sys/param.h .In sys/systm.h .In sys/mbuf.h .\" .Ss Mbuf allocation macros .Fn MGET "struct mbuf *mbuf" "int how" "short type" .Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" .Ft int .Fn MCLGET "struct mbuf *mbuf" "int how" .Fo MEXTADD .Fa "struct mbuf *mbuf" -.Fa "caddr_t buf" +.Fa "char *buf" .Fa "u_int size" -.Fa "void (*free)(void *opt_arg1, void *opt_arg2)" +.Fa "void (*free)(struct mbuf *)" .Fa "void *opt_arg1" .Fa "void *opt_arg2" -.Fa "short flags" +.Fa "int flags" .Fa "int type" .Fc .\" .Ss Mbuf utility macros .Fn mtod "struct mbuf *mbuf" "type" .Fn M_ALIGN "struct mbuf *mbuf" "u_int len" .Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" .Ft int .Fn M_LEADINGSPACE "struct mbuf *mbuf" .Ft int .Fn M_TRAILINGSPACE "struct mbuf *mbuf" .Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" .Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" .Fn MCHTYPE "struct mbuf *mbuf" "short type" .Ft int .Fn M_WRITABLE "struct mbuf *mbuf" .\" .Ss Mbuf allocation functions .Ft struct mbuf * .Fn m_get "int how" "short type" .Ft struct mbuf * .Fn m_get2 "int size" "int how" "short type" "int flags" .Ft struct mbuf * .Fn m_getm "struct mbuf *orig" "int len" "int how" "short type" .Ft struct mbuf * .Fn m_getjcl "int how" "short type" "int flags" "int size" .Ft struct mbuf * .Fn m_getcl "int how" "short type" "int flags" .Ft struct mbuf * .Fn m_gethdr "int how" "short type" .Ft struct mbuf * .Fn m_free "struct mbuf *mbuf" .Ft void .Fn m_freem "struct mbuf *mbuf" .\" .Ss Mbuf utility functions .Ft void .Fn m_adj "struct mbuf *mbuf" "int len" .Ft void .Fn m_align "struct mbuf *mbuf" "int len" .Ft int .Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp" .Ft struct mbuf * .Fn m_prepend "struct mbuf *mbuf" "int len" "int how" .Ft struct mbuf * .Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff" .Ft struct mbuf * .Fn m_pullup "struct mbuf *mbuf" "int len" .Ft struct mbuf * .Fn m_pulldown "struct mbuf *mbuf" "int offset" "int len" "int *offsetp" .Ft struct mbuf * .Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" .Ft struct mbuf * .Fn m_copypacket "struct mbuf *mbuf" "int how" .Ft struct mbuf * .Fn m_dup "const struct mbuf *mbuf" "int how" .Ft void .Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" .Ft void .Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" .Ft struct mbuf * .Fo m_devget .Fa "char *buf" .Fa "int len" .Fa "int offset" .Fa "struct ifnet *ifp" .Fa "void (*copy)(char *from, caddr_t to, u_int len)" .Fc .Ft void .Fn m_cat "struct mbuf *m" "struct mbuf *n" .Ft void .Fn m_catpkt "struct mbuf *m" "struct mbuf *n" .Ft u_int .Fn m_fixhdr "struct mbuf *mbuf" .Ft int .Fn m_dup_pkthdr "struct mbuf *to" "const struct mbuf *from" "int how" .Ft void .Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" .Ft u_int .Fn m_length "struct mbuf *mbuf" "struct mbuf **last" .Ft struct mbuf * .Fn m_split "struct mbuf *mbuf" "int len" "int how" .Ft int .Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" .Ft struct mbuf * .Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" .Ft struct mbuf * .Fn m_defrag "struct mbuf *m0" "int how" .Ft struct mbuf * .Fn m_collapse "struct mbuf *m0" "int how" "int maxfrags" .Ft struct mbuf * .Fn m_unshare "struct mbuf *m0" "int how" .\" .Sh DESCRIPTION An .Vt mbuf is a basic unit of memory management in the kernel IPC subsystem. Network packets and socket buffers are stored in .Vt mbufs . A network packet may span multiple .Vt mbufs arranged into a .Vt mbuf chain (linked list), which allows adding or trimming network headers with little overhead. .Pp While a developer should not bother with .Vt mbuf internals without serious reason in order to avoid incompatibilities with future changes, it is useful to understand the general structure of an .Vt mbuf . .Pp An .Vt mbuf consists of a variable-sized header and a small internal buffer for data. The total size of an .Vt mbuf , .Dv MSIZE , is a constant defined in .In sys/param.h . The .Vt mbuf header includes: .Bl -tag -width "m_nextpkt" -offset indent .It Va m_next .Pq Vt struct mbuf * A pointer to the next .Vt mbuf in the .Vt mbuf chain . .It Va m_nextpkt .Pq Vt struct mbuf * A pointer to the next .Vt mbuf chain in the queue. .It Va m_data .Pq Vt caddr_t A pointer to data attached to this .Vt mbuf . .It Va m_len .Pq Vt int The length of the data. .It Va m_type .Pq Vt short The type of the data. .It Va m_flags .Pq Vt int The .Vt mbuf flags. .El .Pp The .Vt mbuf flag bits are defined as follows: .Bd -literal /* mbuf flags */ #define M_EXT 0x00000001 /* has associated external storage */ #define M_PKTHDR 0x00000002 /* start of record */ #define M_EOR 0x00000004 /* end of record */ #define M_RDONLY 0x00000008 /* associated data marked read-only */ #define M_PROTO1 0x00001000 /* protocol-specific */ #define M_PROTO2 0x00002000 /* protocol-specific */ #define M_PROTO3 0x00004000 /* protocol-specific */ #define M_PROTO4 0x00008000 /* protocol-specific */ #define M_PROTO5 0x00010000 /* protocol-specific */ #define M_PROTO6 0x00020000 /* protocol-specific */ #define M_PROTO7 0x00040000 /* protocol-specific */ #define M_PROTO8 0x00080000 /* protocol-specific */ #define M_PROTO9 0x00100000 /* protocol-specific */ #define M_PROTO10 0x00200000 /* protocol-specific */ #define M_PROTO11 0x00400000 /* protocol-specific */ #define M_PROTO12 0x00800000 /* protocol-specific */ /* mbuf pkthdr flags (also stored in m_flags) */ #define M_BCAST 0x00000010 /* send/received as link-level broadcast */ #define M_MCAST 0x00000020 /* send/received as link-level multicast */ .Ed .Pp The available .Vt mbuf types are defined as follows: .Bd -literal /* mbuf types */ #define MT_DATA 1 /* dynamic (data) allocation */ #define MT_HEADER MT_DATA /* packet header */ #define MT_SONAME 8 /* socket name */ #define MT_CONTROL 14 /* extra-data protocol message */ #define MT_OOBDATA 15 /* expedited data */ .Ed .Pp The available external buffer types are defined as follows: .Bd -literal /* external buffer types */ #define EXT_CLUSTER 1 /* mbuf cluster */ #define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ #define EXT_JUMBOP 3 /* jumbo cluster 4096 bytes */ #define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference */ #define EXT_NET_DRV 252 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 253 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 254 /* can throw this buffer away w/page flipping */ #define EXT_EXTREF 255 /* has externally maintained ref_cnt ptr */ .Ed .Pp If the .Dv M_PKTHDR flag is set, a .Vt struct pkthdr Va m_pkthdr is added to the .Vt mbuf header. It contains a pointer to the interface the packet has been received from .Pq Vt struct ifnet Va *rcvif , and the total packet length .Pq Vt int Va len . Optionally, it may also contain an attached list of packet tags .Pq Vt "struct m_tag" . See .Xr mbuf_tags 9 for details. Fields used in offloading checksum calculation to the hardware are kept in .Va m_pkthdr as well. See .Sx HARDWARE-ASSISTED CHECKSUM CALCULATION for details. .Pp If small enough, data is stored in the internal data buffer of an .Vt mbuf . If the data is sufficiently large, another .Vt mbuf may be added to the .Vt mbuf chain , or external storage may be associated with the .Vt mbuf . .Dv MHLEN bytes of data can fit into an .Vt mbuf with the .Dv M_PKTHDR flag set, .Dv MLEN bytes can otherwise. .Pp If external storage is being associated with an .Vt mbuf , the .Va m_ext header is added at the cost of losing the internal data buffer. It includes a pointer to external storage, the size of the storage, a pointer to a function used for freeing the storage, a pointer to an optional argument that can be passed to the function, and a pointer to a reference counter. An .Vt mbuf using external storage has the .Dv M_EXT flag set. .Pp The system supplies a macro for allocating the desired external storage buffer, .Dv MEXTADD . .Pp The allocation and management of the reference counter is handled by the subsystem. .Pp The system also supplies a default type of external storage buffer called an .Vt mbuf cluster . .Vt Mbuf clusters can be allocated and configured with the use of the .Dv MCLGET macro. Each .Vt mbuf cluster is .Dv MCLBYTES in size, where MCLBYTES is a machine-dependent constant. The system defines an advisory macro .Dv MINCLSIZE , which is the smallest amount of data to put into an .Vt mbuf cluster . It is equal to .Dv MHLEN plus one. It is typically preferable to store data into the data region of an .Vt mbuf , if size permits, as opposed to allocating a separate .Vt mbuf cluster to hold the same data. .\" .Ss Macros and Functions There are numerous predefined macros and functions that provide the developer with common utilities. .\" .Bl -ohang -offset indent .It Fn mtod mbuf type Convert an .Fa mbuf pointer to a data pointer. The macro expands to the data pointer cast to the specified .Fa type . .Sy Note : It is advisable to ensure that there is enough contiguous data in .Fa mbuf . See .Fn m_pullup for details. .It Fn MGET mbuf how type Allocate an .Vt mbuf and initialize it to contain internal data. .Fa mbuf will point to the allocated .Vt mbuf on success, or be set to .Dv NULL on failure. The .Fa how argument is to be set to .Dv M_WAITOK or .Dv M_NOWAIT . It specifies whether the caller is willing to block if necessary. A number of other functions and macros related to .Vt mbufs have the same argument because they may at some point need to allocate new .Vt mbufs . .It Fn MGETHDR mbuf how type Allocate an .Vt mbuf and initialize it to contain a packet header and internal data. See .Fn MGET for details. .It Fn MEXTADD mbuf buf size free opt_arg1 opt_arg2 flags type Associate externally managed data with .Fa mbuf . Any internal data contained in the mbuf will be discarded, and the .Dv M_EXT flag will be set. The .Fa buf and .Fa size arguments are the address and length, respectively, of the data. The .Fa free argument points to a function which will be called to free the data when the mbuf is freed; it is only used if .Fa type is .Dv EXT_EXTREF . The .Fa opt_arg1 and .Fa opt_arg2 -arguments will be passed unmodified to -.Fa free . +arguments will be saved in +.Va ext_arg1 +and +.Va ext_arg2 +fields of the +.Va struct m_ext +of the mbuf. The .Fa flags argument specifies additional .Vt mbuf flags; it is not necessary to specify .Dv M_EXT . Finally, the .Fa type argument specifies the type of external data, which controls how it will be disposed of when the .Vt mbuf is freed. In most cases, the correct value is .Dv EXT_EXTREF . .It Fn MCLGET mbuf how Allocate and attach an .Vt mbuf cluster to .Fa mbuf . On success, a non-zero value returned; otherwise, 0. Historically, consumers would check for success by testing the .Dv M_EXT flag on the mbuf, but this is now discouraged to avoid unnecessary awareness of the implementation of external storage in protocol stacks and device drivers. .It Fn M_ALIGN mbuf len Set the pointer .Fa mbuf->m_data to place an object of the size .Fa len at the end of the internal data area of .Fa mbuf , long word aligned. Applicable only if .Fa mbuf is newly allocated with .Fn MGET or .Fn m_get . .It Fn MH_ALIGN mbuf len Serves the same purpose as .Fn M_ALIGN does, but only for .Fa mbuf newly allocated with .Fn MGETHDR or .Fn m_gethdr , or initialized by .Fn m_dup_pkthdr or .Fn m_move_pkthdr . .It Fn m_align mbuf len Services the same purpose as .Fn M_ALIGN but handles any type of mbuf. .It Fn M_LEADINGSPACE mbuf Returns the number of bytes available before the beginning of data in .Fa mbuf . .It Fn M_TRAILINGSPACE mbuf Returns the number of bytes available after the end of data in .Fa mbuf . .It Fn M_PREPEND mbuf len how This macro operates on an .Vt mbuf chain . It is an optimized wrapper for .Fn m_prepend that can make use of possible empty space before data (e.g.\& left after trimming of a link-layer header). The new .Vt mbuf chain pointer or .Dv NULL is in .Fa mbuf after the call. .It Fn M_MOVE_PKTHDR to from Using this macro is equivalent to calling .Fn m_move_pkthdr to from . .It Fn M_WRITABLE mbuf This macro will evaluate true if .Fa mbuf is not marked .Dv M_RDONLY and if either .Fa mbuf does not contain external storage or, if it does, then if the reference count of the storage is not greater than 1. The .Dv M_RDONLY flag can be set in .Fa mbuf->m_flags . This can be achieved during setup of the external storage, by passing the .Dv M_RDONLY bit as a .Fa flags argument to the .Fn MEXTADD macro, or can be directly set in individual .Vt mbufs . .It Fn MCHTYPE mbuf type Change the type of .Fa mbuf to .Fa type . This is a relatively expensive operation and should be avoided. .El .Pp The functions are: .Bl -ohang -offset indent .It Fn m_get how type A function version of .Fn MGET for non-critical paths. .It Fn m_get2 size how type flags Allocate an .Vt mbuf with enough space to hold specified amount of data. .It Fn m_getm orig len how type Allocate .Fa len bytes worth of .Vt mbufs and .Vt mbuf clusters if necessary and append the resulting allocated .Vt mbuf chain to the .Vt mbuf chain .Fa orig , if it is .No non- Ns Dv NULL . If the allocation fails at any point, free whatever was allocated and return .Dv NULL . If .Fa orig is .No non- Ns Dv NULL , it will not be freed. It is possible to use .Fn m_getm to either append .Fa len bytes to an existing .Vt mbuf or .Vt mbuf chain (for example, one which may be sitting in a pre-allocated ring) or to simply perform an all-or-nothing .Vt mbuf and .Vt mbuf cluster allocation. .It Fn m_gethdr how type A function version of .Fn MGETHDR for non-critical paths. .It Fn m_getcl how type flags Fetch an .Vt mbuf with a .Vt mbuf cluster attached to it. If one of the allocations fails, the entire allocation fails. This routine is the preferred way of fetching both the .Vt mbuf and .Vt mbuf cluster together, as it avoids having to unlock/relock between allocations. Returns .Dv NULL on failure. .It Fn m_getjcl how type flags size This is like .Fn m_getcl but it the size of the cluster allocated will be large enough for .Fa size bytes. .It Fn m_free mbuf Frees .Vt mbuf . Returns .Va m_next of the freed .Vt mbuf . .El .Pp The functions below operate on .Vt mbuf chains . .Bl -ohang -offset indent .It Fn m_freem mbuf Free an entire .Vt mbuf chain , including any external storage. .\" .It Fn m_adj mbuf len Trim .Fa len bytes from the head of an .Vt mbuf chain if .Fa len is positive, from the tail otherwise. .\" .It Fn m_append mbuf len cp Append .Vt len bytes of data .Vt cp to the .Vt mbuf chain . Extend the mbuf chain if the new data does not fit in existing space. .\" .It Fn m_prepend mbuf len how Allocate a new .Vt mbuf and prepend it to the .Vt mbuf chain , handle .Dv M_PKTHDR properly. .Sy Note : It does not allocate any .Vt mbuf clusters , so .Fa len must be less than .Dv MLEN or .Dv MHLEN , depending on the .Dv M_PKTHDR flag setting. .\" .It Fn m_copyup mbuf len dstoff Similar to .Fn m_pullup but copies .Fa len bytes of data into a new mbuf at .Fa dstoff bytes into the mbuf. The .Fa dstoff argument aligns the data and leaves room for a link layer header. Returns the new .Vt mbuf chain on success, and frees the .Vt mbuf chain and returns .Dv NULL on failure. .Sy Note : The function does not allocate .Vt mbuf clusters , so .Fa len + dstoff must be less than .Dv MHLEN . .\" .It Fn m_pullup mbuf len Arrange that the first .Fa len bytes of an .Vt mbuf chain are contiguous and lay in the data area of .Fa mbuf , so they are accessible with .Fn mtod mbuf type . It is important to remember that this may involve reallocating some mbufs and moving data so all pointers referencing data within the old mbuf chain must be recalculated or made invalid. Return the new .Vt mbuf chain on success, .Dv NULL on failure (the .Vt mbuf chain is freed in this case). .Sy Note : It does not allocate any .Vt mbuf clusters , so .Fa len must be less than or equal to .Dv MHLEN . .\" .It Fn m_pulldown mbuf offset len offsetp Arrange that .Fa len bytes between .Fa offset and .Fa offset + len in the .Vt mbuf chain are contiguous and lay in the data area of .Fa mbuf , so they are accessible with .Fn mtod mbuf type . .Fa len must be smaller than, or equal to, the size of an .Vt mbuf cluster . Return a pointer to an intermediate .Vt mbuf in the chain containing the requested region; the offset in the data region of the .Vt mbuf chain to the data contained in the returned mbuf is stored in .Fa *offsetp . If .Fa offsetp is NULL, the region may be accessed using .Fn mtod mbuf type . If .Fa offsetp is non-NULL, the region may be accessed using .Fn mtod mbuf uint8_t + *offsetp. The region of the mbuf chain between its beginning and .Fa offset is not modified, therefore it is safe to hold pointers to data within this region before calling .Fn m_pulldown . .\" .It Fn m_copym mbuf offset len how Make a copy of an .Vt mbuf chain starting .Fa offset bytes from the beginning, continuing for .Fa len bytes. If .Fa len is .Dv M_COPYALL , copy to the end of the .Vt mbuf chain . .Sy Note : The copy is read-only, because the .Vt mbuf clusters are not copied, only their reference counts are incremented. .\" .It Fn m_copypacket mbuf how Copy an entire packet including header, which must be present. This is an optimized version of the common case .Fn m_copym mbuf 0 M_COPYALL how . .Sy Note : the copy is read-only, because the .Vt mbuf clusters are not copied, only their reference counts are incremented. .\" .It Fn m_dup mbuf how Copy a packet header .Vt mbuf chain into a completely new .Vt mbuf chain , including copying any .Vt mbuf clusters . Use this instead of .Fn m_copypacket when you need a writable copy of an .Vt mbuf chain . .\" .It Fn m_copydata mbuf offset len buf Copy data from an .Vt mbuf chain starting .Fa off bytes from the beginning, continuing for .Fa len bytes, into the indicated buffer .Fa buf . .\" .It Fn m_copyback mbuf offset len buf Copy .Fa len bytes from the buffer .Fa buf back into the indicated .Vt mbuf chain , starting at .Fa offset bytes from the beginning of the .Vt mbuf chain , extending the .Vt mbuf chain if necessary. .Sy Note : It does not allocate any .Vt mbuf clusters , just adds .Vt mbufs to the .Vt mbuf chain . It is safe to set .Fa offset beyond the current .Vt mbuf chain end: zeroed .Vt mbufs will be allocated to fill the space. .\" .It Fn m_length mbuf last Return the length of the .Vt mbuf chain , and optionally a pointer to the last .Vt mbuf . .\" .It Fn m_dup_pkthdr to from how Upon the function's completion, the .Vt mbuf .Fa to will contain an identical copy of .Fa from->m_pkthdr and the per-packet attributes found in the .Vt mbuf chain .Fa from . The .Vt mbuf .Fa from must have the flag .Dv M_PKTHDR initially set, and .Fa to must be empty on entry. .\" .It Fn m_move_pkthdr to from Move .Va m_pkthdr and the per-packet attributes from the .Vt mbuf chain .Fa from to the .Vt mbuf .Fa to . The .Vt mbuf .Fa from must have the flag .Dv M_PKTHDR initially set, and .Fa to must be empty on entry. Upon the function's completion, .Fa from will have the flag .Dv M_PKTHDR and the per-packet attributes cleared. .\" .It Fn m_fixhdr mbuf Set the packet-header length to the length of the .Vt mbuf chain . .\" .It Fn m_devget buf len offset ifp copy Copy data from a device local memory pointed to by .Fa buf to an .Vt mbuf chain . The copy is done using a specified copy routine .Fa copy , or .Fn bcopy if .Fa copy is .Dv NULL . .\" .It Fn m_cat m n Concatenate .Fa n to .Fa m . Both .Vt mbuf chains must be of the same type. .Fa n is not guaranteed to be valid after .Fn m_cat returns. .Fn m_cat does not update any packet header fields or free mbuf tags. .\" .It Fn m_catpkt m n A variant of .Fn m_cat that operates on packets. Both .Fa m and .Fa n must contain packet headers. .Fa n is not guaranteed to be valid after .Fn m_catpkt returns. .\" .It Fn m_split mbuf len how Partition an .Vt mbuf chain in two pieces, returning the tail: all but the first .Fa len bytes. In case of failure, it returns .Dv NULL and attempts to restore the .Vt mbuf chain to its original state. .\" .It Fn m_apply mbuf off len f arg Apply a function to an .Vt mbuf chain , at offset .Fa off , for length .Fa len bytes. Typically used to avoid calls to .Fn m_pullup which would otherwise be unnecessary or undesirable. .Fa arg is a convenience argument which is passed to the callback function .Fa f . .Pp Each time .Fn f is called, it will be passed .Fa arg , a pointer to the .Fa data in the current mbuf, and the length .Fa len of the data in this mbuf to which the function should be applied. .Pp The function should return zero to indicate success; otherwise, if an error is indicated, then .Fn m_apply will return the error and stop iterating through the .Vt mbuf chain . .\" .It Fn m_getptr mbuf loc off Return a pointer to the mbuf containing the data located at .Fa loc bytes from the beginning of the .Vt mbuf chain . The corresponding offset into the mbuf will be stored in .Fa *off . .It Fn m_defrag m0 how Defragment an mbuf chain, returning the shortest possible chain of mbufs and clusters. If allocation fails and this can not be completed, .Dv NULL will be returned and the original chain will be unchanged. Upon success, the original chain will be freed and the new chain will be returned. .Fa how should be either .Dv M_WAITOK or .Dv M_NOWAIT , depending on the caller's preference. .Pp This function is especially useful in network drivers, where certain long mbuf chains must be shortened before being added to TX descriptor lists. .It Fn m_collapse m0 how maxfrags Defragment an mbuf chain, returning a chain of at most .Fa maxfrags mbufs and clusters. If allocation fails or the chain cannot be collapsed as requested, .Dv NULL will be returned, with the original chain possibly modified. As with .Fn m_defrag , .Fa how should be one of .Dv M_WAITOK or .Dv M_NOWAIT . .It Fn m_unshare m0 how Create a version of the specified mbuf chain whose contents can be safely modified without affecting other users. If allocation fails and this operation can not be completed, .Dv NULL will be returned. The original mbuf chain is always reclaimed and the reference count of any shared mbuf clusters is decremented. .Fa how should be either .Dv M_WAITOK or .Dv M_NOWAIT , depending on the caller's preference. As a side-effect of this process the returned mbuf chain may be compacted. .Pp This function is especially useful in the transmit path of network code, when data must be encrypted or otherwise altered prior to transmission. .El .Sh HARDWARE-ASSISTED CHECKSUM CALCULATION This section currently applies to TCP/IP only. In order to save the host CPU resources, computing checksums is offloaded to the network interface hardware if possible. The .Va m_pkthdr member of the leading .Vt mbuf of a packet contains two fields used for that purpose, .Vt int Va csum_flags and .Vt int Va csum_data . The meaning of those fields depends on the direction a packet flows in, and on whether the packet is fragmented. Henceforth, .Va csum_flags or .Va csum_data of a packet will denote the corresponding field of the .Va m_pkthdr member of the leading .Vt mbuf in the .Vt mbuf chain containing the packet. .Pp On output, checksum offloading is attempted after the outgoing interface has been determined for a packet. The interface-specific field .Va ifnet.if_data.ifi_hwassist (see .Xr ifnet 9 ) is consulted for the capabilities of the interface to assist in computing checksums. The .Va csum_flags field of the packet header is set to indicate which actions the interface is supposed to perform on it. The actions unsupported by the network interface are done in the software prior to passing the packet down to the interface driver; such actions will never be requested through .Va csum_flags . .Pp The flags demanding a particular action from an interface are as follows: .Bl -tag -width ".Dv CSUM_TCP" -offset indent .It Dv CSUM_IP The IP header checksum is to be computed and stored in the corresponding field of the packet. The hardware is expected to know the format of an IP header to determine the offset of the IP checksum field. .It Dv CSUM_TCP The TCP checksum is to be computed. (See below.) .It Dv CSUM_UDP The UDP checksum is to be computed. (See below.) .El .Pp Should a TCP or UDP checksum be offloaded to the hardware, the field .Va csum_data will contain the byte offset of the checksum field relative to the end of the IP header. In this case, the checksum field will be initially set by the TCP/IP module to the checksum of the pseudo header defined by the TCP and UDP specifications. .Pp On input, an interface indicates the actions it has performed on a packet by setting one or more of the following flags in .Va csum_flags associated with the packet: .Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent .It Dv CSUM_IP_CHECKED The IP header checksum has been computed. .It Dv CSUM_IP_VALID The IP header has a valid checksum. This flag can appear only in combination with .Dv CSUM_IP_CHECKED . .It Dv CSUM_DATA_VALID The checksum of the data portion of the IP packet has been computed and stored in the field .Va csum_data in network byte order. .It Dv CSUM_PSEUDO_HDR Can be set only along with .Dv CSUM_DATA_VALID to indicate that the IP data checksum found in .Va csum_data allows for the pseudo header defined by the TCP and UDP specifications. Otherwise the checksum of the pseudo header must be calculated by the host CPU and added to .Va csum_data to obtain the final checksum to be used for TCP or UDP validation purposes. .El .Pp If a particular network interface just indicates success or failure of TCP or UDP checksum validation without returning the exact value of the checksum to the host CPU, its driver can mark .Dv CSUM_DATA_VALID and .Dv CSUM_PSEUDO_HDR in .Va csum_flags , and set .Va csum_data to .Li 0xFFFF hexadecimal to indicate a valid checksum. It is a peculiarity of the algorithm used that the Internet checksum calculated over any valid packet will be .Li 0xFFFF as long as the original checksum field is included. .Sh STRESS TESTING When running a kernel compiled with the option .Dv MBUF_STRESS_TEST , the following .Xr sysctl 8 Ns -controlled options may be used to create various failure/extreme cases for testing of network drivers and other parts of the kernel that rely on .Vt mbufs . .Bl -tag -width ident .It Va net.inet.ip.mbuf_frag_size Causes .Fn ip_output to fragment outgoing .Vt mbuf chains into fragments of the specified size. Setting this variable to 1 is an excellent way to test the long .Vt mbuf chain handling ability of network drivers. .It Va kern.ipc.m_defragrandomfailures Causes the function .Fn m_defrag to randomly fail, returning .Dv NULL . Any piece of code which uses .Fn m_defrag should be tested with this feature. .El .Sh RETURN VALUES See above. .Sh SEE ALSO .Xr ifnet 9 , .Xr mbuf_tags 9 .Sh HISTORY .\" Please correct me if I'm wrong .Vt Mbufs appeared in an early version of .Bx . Besides being used for network packets, they were used to store various dynamic structures, such as routing table entries, interface addresses, protocol control blocks, etc. In more recent .Fx use of .Vt mbufs is almost entirely limited to packet storage, with .Xr uma 9 zones being used directly to store other network-related memory. .Pp Historically, the .Vt mbuf allocator has been a special-purpose memory allocator able to run in interrupt contexts and allocating from a special kernel address space map. As of .Fx 5.3 , the .Vt mbuf allocator is a wrapper around .Xr uma 9 , allowing caching of .Vt mbufs , clusters, and .Vt mbuf + cluster pairs in per-CPU caches, as well as bringing other benefits of slab allocation. .Sh AUTHORS The original .Nm manual page was written by .An Yar Tikhiy . The .Xr uma 9 .Vt mbuf allocator was written by .An Bosko Milekic . Index: head/sys/compat/ndis/kern_ndis.c =================================================================== --- head/sys/compat/ndis/kern_ndis.c (revision 324445) +++ head/sys/compat/ndis/kern_ndis.c (revision 324446) @@ -1,1444 +1,1447 @@ /*- * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NDIS_DUMMY_PATH "\\\\some\\bogus\\path" #define NDIS_FLAG_RDONLY 1 static void ndis_status_func(ndis_handle, ndis_status, void *, uint32_t); static void ndis_statusdone_func(ndis_handle); static void ndis_setdone_func(ndis_handle, ndis_status); static void ndis_getdone_func(ndis_handle, ndis_status); static void ndis_resetdone_func(ndis_handle, ndis_status, uint8_t); static void ndis_sendrsrcavail_func(ndis_handle); static void ndis_intrsetup(kdpc *, device_object *, irp *, struct ndis_softc *); static void ndis_return(device_object *, void *); static image_patch_table kernndis_functbl[] = { IMPORT_SFUNC(ndis_status_func, 4), IMPORT_SFUNC(ndis_statusdone_func, 1), IMPORT_SFUNC(ndis_setdone_func, 2), IMPORT_SFUNC(ndis_getdone_func, 2), IMPORT_SFUNC(ndis_resetdone_func, 3), IMPORT_SFUNC(ndis_sendrsrcavail_func, 1), IMPORT_SFUNC(ndis_intrsetup, 4), IMPORT_SFUNC(ndis_return, 1), { NULL, NULL, NULL } }; static struct nd_head ndis_devhead; /* * This allows us to export our symbols to other modules. * Note that we call ourselves 'ndisapi' to avoid a namespace * collision with if_ndis.ko, which internally calls itself * 'ndis.' * * Note: some of the subsystems depend on each other, so the * order in which they're started is important. The order of * importance is: * * HAL - spinlocks and IRQL manipulation * ntoskrnl - DPC and workitem threads, object waiting * windrv - driver/device registration * * The HAL should also be the last thing shut down, since * the ntoskrnl subsystem will use spinlocks right up until * the DPC and workitem threads are terminated. */ static int ndis_modevent(module_t mod, int cmd, void *arg) { int error = 0; image_patch_table *patch; switch (cmd) { case MOD_LOAD: /* Initialize subsystems */ hal_libinit(); ntoskrnl_libinit(); windrv_libinit(); ndis_libinit(); usbd_libinit(); patch = kernndis_functbl; while (patch->ipt_func != NULL) { windrv_wrap((funcptr)patch->ipt_func, (funcptr *)&patch->ipt_wrap, patch->ipt_argcnt, patch->ipt_ftype); patch++; } TAILQ_INIT(&ndis_devhead); break; case MOD_SHUTDOWN: if (TAILQ_FIRST(&ndis_devhead) == NULL) { /* Shut down subsystems */ ndis_libfini(); usbd_libfini(); windrv_libfini(); ntoskrnl_libfini(); hal_libfini(); patch = kernndis_functbl; while (patch->ipt_func != NULL) { windrv_unwrap(patch->ipt_wrap); patch++; } } break; case MOD_UNLOAD: /* Shut down subsystems */ ndis_libfini(); usbd_libfini(); windrv_libfini(); ntoskrnl_libfini(); hal_libfini(); patch = kernndis_functbl; while (patch->ipt_func != NULL) { windrv_unwrap(patch->ipt_wrap); patch++; } break; default: error = EINVAL; break; } return (error); } DEV_MODULE(ndisapi, ndis_modevent, NULL); MODULE_VERSION(ndisapi, 1); static void ndis_sendrsrcavail_func(adapter) ndis_handle adapter; { } static void ndis_status_func(adapter, status, sbuf, slen) ndis_handle adapter; ndis_status status; void *sbuf; uint32_t slen; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = NDISUSB_GET_IFNET(sc); if ( ifp && ifp->if_flags & IFF_DEBUG) device_printf(sc->ndis_dev, "status: %x\n", status); } static void ndis_statusdone_func(adapter) ndis_handle adapter; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = NDISUSB_GET_IFNET(sc); if (ifp && ifp->if_flags & IFF_DEBUG) device_printf(sc->ndis_dev, "status complete\n"); } static void ndis_setdone_func(adapter, status) ndis_handle adapter; ndis_status status; { ndis_miniport_block *block; block = adapter; block->nmb_setstat = status; KeSetEvent(&block->nmb_setevent, IO_NO_INCREMENT, FALSE); } static void ndis_getdone_func(adapter, status) ndis_handle adapter; ndis_status status; { ndis_miniport_block *block; block = adapter; block->nmb_getstat = status; KeSetEvent(&block->nmb_getevent, IO_NO_INCREMENT, FALSE); } static void ndis_resetdone_func(ndis_handle adapter, ndis_status status, uint8_t addressingreset) { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = NDISUSB_GET_IFNET(sc); if (ifp && ifp->if_flags & IFF_DEBUG) device_printf(sc->ndis_dev, "reset done...\n"); KeSetEvent(&block->nmb_resetevent, IO_NO_INCREMENT, FALSE); } int ndis_create_sysctls(arg) void *arg; { struct ndis_softc *sc; ndis_cfg *vals; char buf[256]; struct sysctl_oid *oidp; struct sysctl_ctx_entry *e; if (arg == NULL) return (EINVAL); sc = arg; /* device_printf(sc->ndis_dev, "ndis_create_sysctls() sc=%p\n", sc); */ vals = sc->ndis_regvals; TAILQ_INIT(&sc->ndis_cfglist_head); /* Add the driver-specific registry keys. */ while(1) { if (vals->nc_cfgkey == NULL) break; if (vals->nc_idx != sc->ndis_devidx) { vals++; continue; } /* See if we already have a sysctl with this name */ oidp = NULL; TAILQ_FOREACH(e, device_get_sysctl_ctx(sc->ndis_dev), link) { oidp = e->entry; if (strcasecmp(oidp->oid_name, vals->nc_cfgkey) == 0) break; oidp = NULL; } if (oidp != NULL) { vals++; continue; } ndis_add_sysctl(sc, vals->nc_cfgkey, vals->nc_cfgdesc, vals->nc_val, CTLFLAG_RW); vals++; } /* Now add a couple of builtin keys. */ /* * Environment can be either Windows (0) or WindowsNT (1). * We qualify as the latter. */ ndis_add_sysctl(sc, "Environment", "Windows environment", "1", NDIS_FLAG_RDONLY); /* NDIS version should be 5.1. */ ndis_add_sysctl(sc, "NdisVersion", "NDIS API Version", "0x00050001", NDIS_FLAG_RDONLY); /* * Some miniport drivers rely on the existence of the SlotNumber, * NetCfgInstanceId and DriverDesc keys. */ ndis_add_sysctl(sc, "SlotNumber", "Slot Numer", "01", NDIS_FLAG_RDONLY); ndis_add_sysctl(sc, "NetCfgInstanceId", "NetCfgInstanceId", "{12345678-1234-5678-CAFE0-123456789ABC}", NDIS_FLAG_RDONLY); ndis_add_sysctl(sc, "DriverDesc", "Driver Description", "NDIS Network Adapter", NDIS_FLAG_RDONLY); /* Bus type (PCI, PCMCIA, etc...) */ sprintf(buf, "%d", (int)sc->ndis_iftype); ndis_add_sysctl(sc, "BusType", "Bus Type", buf, NDIS_FLAG_RDONLY); if (sc->ndis_res_io != NULL) { sprintf(buf, "0x%jx", rman_get_start(sc->ndis_res_io)); ndis_add_sysctl(sc, "IOBaseAddress", "Base I/O Address", buf, NDIS_FLAG_RDONLY); } if (sc->ndis_irq != NULL) { sprintf(buf, "%ju", rman_get_start(sc->ndis_irq)); ndis_add_sysctl(sc, "InterruptNumber", "Interrupt Number", buf, NDIS_FLAG_RDONLY); } return (0); } int ndis_add_sysctl(arg, key, desc, val, flag_rdonly) void *arg; char *key; char *desc; char *val; int flag_rdonly; { struct ndis_softc *sc; struct ndis_cfglist *cfg; char descstr[256]; sc = arg; cfg = malloc(sizeof(struct ndis_cfglist), M_DEVBUF, M_NOWAIT|M_ZERO); if (cfg == NULL) { printf("failed for %s\n", key); return (ENOMEM); } cfg->ndis_cfg.nc_cfgkey = strdup(key, M_DEVBUF); if (desc == NULL) { snprintf(descstr, sizeof(descstr), "%s (dynamic)", key); cfg->ndis_cfg.nc_cfgdesc = strdup(descstr, M_DEVBUF); } else cfg->ndis_cfg.nc_cfgdesc = strdup(desc, M_DEVBUF); strcpy(cfg->ndis_cfg.nc_val, val); TAILQ_INSERT_TAIL(&sc->ndis_cfglist_head, cfg, link); if (flag_rdonly != 0) { cfg->ndis_oid = SYSCTL_ADD_STRING(device_get_sysctl_ctx(sc->ndis_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(sc->ndis_dev)), OID_AUTO, cfg->ndis_cfg.nc_cfgkey, CTLFLAG_RD, cfg->ndis_cfg.nc_val, sizeof(cfg->ndis_cfg.nc_val), cfg->ndis_cfg.nc_cfgdesc); } else { cfg->ndis_oid = SYSCTL_ADD_STRING(device_get_sysctl_ctx(sc->ndis_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(sc->ndis_dev)), OID_AUTO, cfg->ndis_cfg.nc_cfgkey, CTLFLAG_RW, cfg->ndis_cfg.nc_val, sizeof(cfg->ndis_cfg.nc_val), cfg->ndis_cfg.nc_cfgdesc); } return (0); } /* * Somewhere, somebody decided "hey, let's automatically create * a sysctl tree for each device instance as it's created -- it'll * make life so much easier!" Lies. Why must they turn the kernel * into a house of lies? */ int ndis_flush_sysctls(arg) void *arg; { struct ndis_softc *sc; struct ndis_cfglist *cfg; struct sysctl_ctx_list *clist; sc = arg; clist = device_get_sysctl_ctx(sc->ndis_dev); while (!TAILQ_EMPTY(&sc->ndis_cfglist_head)) { cfg = TAILQ_FIRST(&sc->ndis_cfglist_head); TAILQ_REMOVE(&sc->ndis_cfglist_head, cfg, link); sysctl_ctx_entry_del(clist, cfg->ndis_oid); sysctl_remove_oid(cfg->ndis_oid, 1, 0); free(cfg->ndis_cfg.nc_cfgkey, M_DEVBUF); free(cfg->ndis_cfg.nc_cfgdesc, M_DEVBUF); free(cfg, M_DEVBUF); } return (0); } void * ndis_get_routine_address(functbl, name) struct image_patch_table *functbl; char *name; { int i; for (i = 0; functbl[i].ipt_name != NULL; i++) if (strcmp(name, functbl[i].ipt_name) == 0) return (functbl[i].ipt_wrap); return (NULL); } static void ndis_return(dobj, arg) device_object *dobj; void *arg; { ndis_miniport_block *block; ndis_miniport_characteristics *ch; ndis_return_handler returnfunc; ndis_handle adapter; ndis_packet *p; uint8_t irql; list_entry *l; block = arg; ch = IoGetDriverObjectExtension(dobj->do_drvobj, (void *)1); p = arg; adapter = block->nmb_miniportadapterctx; if (adapter == NULL) return; returnfunc = ch->nmc_return_packet_func; KeAcquireSpinLock(&block->nmb_returnlock, &irql); while (!IsListEmpty(&block->nmb_returnlist)) { l = RemoveHeadList((&block->nmb_returnlist)); p = CONTAINING_RECORD(l, ndis_packet, np_list); InitializeListHead((&p->np_list)); KeReleaseSpinLock(&block->nmb_returnlock, irql); MSCALL2(returnfunc, adapter, p); KeAcquireSpinLock(&block->nmb_returnlock, &irql); } KeReleaseSpinLock(&block->nmb_returnlock, irql); } +static void +ndis_ext_free(struct mbuf *m) +{ + + return (ndis_return_packet(m->m_ext.ext_arg1)); +} + void -ndis_return_packet(struct mbuf *m, void *buf, void *arg) +ndis_return_packet(ndis_packet *p) { - ndis_packet *p; ndis_miniport_block *block; - if (arg == NULL) + if (p == NULL) return; - p = arg; - /* Decrement refcount. */ p->np_refcnt--; /* Release packet when refcount hits zero, otherwise return. */ if (p->np_refcnt) return; block = ((struct ndis_softc *)p->np_softc)->ndis_block; KeAcquireSpinLockAtDpcLevel(&block->nmb_returnlock); InitializeListHead((&p->np_list)); InsertHeadList((&block->nmb_returnlist), (&p->np_list)); KeReleaseSpinLockFromDpcLevel(&block->nmb_returnlock); IoQueueWorkItem(block->nmb_returnitem, (io_workitem_func)kernndis_functbl[7].ipt_wrap, WORKQUEUE_CRITICAL, block); } void ndis_free_bufs(b0) ndis_buffer *b0; { ndis_buffer *next; if (b0 == NULL) return; while(b0 != NULL) { next = b0->mdl_next; IoFreeMdl(b0); b0 = next; } } void ndis_free_packet(p) ndis_packet *p; { if (p == NULL) return; ndis_free_bufs(p->np_private.npp_head); NdisFreePacket(p); } int ndis_convert_res(arg) void *arg; { struct ndis_softc *sc; ndis_resource_list *rl = NULL; cm_partial_resource_desc *prd = NULL; ndis_miniport_block *block; device_t dev; struct resource_list *brl; struct resource_list_entry *brle; int error = 0; sc = arg; block = sc->ndis_block; dev = sc->ndis_dev; rl = malloc(sizeof(ndis_resource_list) + (sizeof(cm_partial_resource_desc) * (sc->ndis_rescnt - 1)), M_DEVBUF, M_NOWAIT|M_ZERO); if (rl == NULL) return (ENOMEM); rl->cprl_version = 5; rl->cprl_revision = 1; rl->cprl_count = sc->ndis_rescnt; prd = rl->cprl_partial_descs; brl = BUS_GET_RESOURCE_LIST(dev, dev); if (brl != NULL) { STAILQ_FOREACH(brle, brl, link) { switch (brle->type) { case SYS_RES_IOPORT: prd->cprd_type = CmResourceTypePort; prd->cprd_flags = CM_RESOURCE_PORT_IO; prd->cprd_sharedisp = CmResourceShareDeviceExclusive; prd->u.cprd_port.cprd_start.np_quad = brle->start; prd->u.cprd_port.cprd_len = brle->count; break; case SYS_RES_MEMORY: prd->cprd_type = CmResourceTypeMemory; prd->cprd_flags = CM_RESOURCE_MEMORY_READ_WRITE; prd->cprd_sharedisp = CmResourceShareDeviceExclusive; prd->u.cprd_mem.cprd_start.np_quad = brle->start; prd->u.cprd_mem.cprd_len = brle->count; break; case SYS_RES_IRQ: prd->cprd_type = CmResourceTypeInterrupt; prd->cprd_flags = 0; /* * Always mark interrupt resources as * shared, since in our implementation, * they will be. */ prd->cprd_sharedisp = CmResourceShareShared; prd->u.cprd_intr.cprd_level = brle->start; prd->u.cprd_intr.cprd_vector = brle->start; prd->u.cprd_intr.cprd_affinity = 0; break; default: break; } prd++; } } block->nmb_rlist = rl; return (error); } /* * Map an NDIS packet to an mbuf list. When an NDIS driver receives a * packet, it will hand it to us in the form of an ndis_packet, * which we need to convert to an mbuf that is then handed off * to the stack. Note: we configure the mbuf list so that it uses * the memory regions specified by the ndis_buffer structures in * the ndis_packet as external storage. In most cases, this will * point to a memory region allocated by the driver (either by * ndis_malloc_withtag() or ndis_alloc_sharedmem()). We expect * the driver to handle free()ing this region for is, so we set up * a dummy no-op free handler for it. */ int ndis_ptom(m0, p) struct mbuf **m0; ndis_packet *p; { struct mbuf *m = NULL, *prev = NULL; ndis_buffer *buf; ndis_packet_private *priv; uint32_t totlen = 0; struct ifnet *ifp; struct ether_header *eh; int diff; if (p == NULL || m0 == NULL) return (EINVAL); priv = &p->np_private; buf = priv->npp_head; p->np_refcnt = 0; for (buf = priv->npp_head; buf != NULL; buf = buf->mdl_next) { if (buf == priv->npp_head) m = m_gethdr(M_NOWAIT, MT_DATA); else m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) { m_freem(*m0); *m0 = NULL; return (ENOBUFS); } m->m_len = MmGetMdlByteCount(buf); - m->m_data = MmGetMdlVirtualAddress(buf); - MEXTADD(m, m->m_data, m->m_len, ndis_return_packet, - m->m_data, p, 0, EXT_NDIS); + m_extadd(m, MmGetMdlVirtualAddress(buf), m->m_len, + ndis_ext_free, p, NULL, 0, EXT_NDIS); p->np_refcnt++; totlen += m->m_len; if (m->m_flags & M_PKTHDR) *m0 = m; else prev->m_next = m; prev = m; } /* * This is a hack to deal with the Marvell 8335 driver * which, when associated with an AP in WPA-PSK mode, * seems to overpad its frames by 8 bytes. I don't know * that the extra 8 bytes are for, and they're not there * in open mode, so for now clamp the frame size at 1514 * until I can figure out how to deal with this properly, * otherwise if_ethersubr() will spank us by discarding * the 'oversize' frames. */ eh = mtod((*m0), struct ether_header *); ifp = NDISUSB_GET_IFNET((struct ndis_softc *)p->np_softc); if (ifp && totlen > ETHER_MAX_FRAME(ifp, eh->ether_type, FALSE)) { diff = totlen - ETHER_MAX_FRAME(ifp, eh->ether_type, FALSE); totlen -= diff; m->m_len -= diff; } (*m0)->m_pkthdr.len = totlen; return (0); } /* * Create an NDIS packet from an mbuf chain. * This is used mainly when transmitting packets, where we need * to turn an mbuf off an interface's send queue and transform it * into an NDIS packet which will be fed into the NDIS driver's * send routine. * * NDIS packets consist of two parts: an ndis_packet structure, * which is vaguely analogous to the pkthdr portion of an mbuf, * and one or more ndis_buffer structures, which define the * actual memory segments in which the packet data resides. * We need to allocate one ndis_buffer for each mbuf in a chain, * plus one ndis_packet as the header. */ int ndis_mtop(m0, p) struct mbuf *m0; ndis_packet **p; { struct mbuf *m; ndis_buffer *buf = NULL, *prev = NULL; ndis_packet_private *priv; if (p == NULL || *p == NULL || m0 == NULL) return (EINVAL); priv = &(*p)->np_private; priv->npp_totlen = m0->m_pkthdr.len; for (m = m0; m != NULL; m = m->m_next) { if (m->m_len == 0) continue; buf = IoAllocateMdl(m->m_data, m->m_len, FALSE, FALSE, NULL); if (buf == NULL) { ndis_free_packet(*p); *p = NULL; return (ENOMEM); } MmBuildMdlForNonPagedPool(buf); if (priv->npp_head == NULL) priv->npp_head = buf; else prev->mdl_next = buf; prev = buf; } priv->npp_tail = buf; return (0); } int ndis_get_supported_oids(arg, oids, oidcnt) void *arg; ndis_oid **oids; int *oidcnt; { int len, rval; ndis_oid *o; if (arg == NULL || oids == NULL || oidcnt == NULL) return (EINVAL); len = 0; ndis_get_info(arg, OID_GEN_SUPPORTED_LIST, NULL, &len); o = malloc(len, M_DEVBUF, M_NOWAIT); if (o == NULL) return (ENOMEM); rval = ndis_get_info(arg, OID_GEN_SUPPORTED_LIST, o, &len); if (rval) { free(o, M_DEVBUF); return (rval); } *oids = o; *oidcnt = len / 4; return (0); } int ndis_set_info(arg, oid, buf, buflen) void *arg; ndis_oid oid; void *buf; int *buflen; { struct ndis_softc *sc; ndis_status rval; ndis_handle adapter; ndis_setinfo_handler setfunc; uint32_t byteswritten = 0, bytesneeded = 0; uint8_t irql; uint64_t duetime; /* * According to the NDIS spec, MiniportQueryInformation() * and MiniportSetInformation() requests are handled serially: * once one request has been issued, we must wait for it to * finish before allowing another request to proceed. */ sc = arg; KeResetEvent(&sc->ndis_block->nmb_setevent); KeAcquireSpinLock(&sc->ndis_block->nmb_lock, &irql); if (sc->ndis_block->nmb_pendingreq != NULL) { KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); panic("ndis_set_info() called while other request pending"); } else sc->ndis_block->nmb_pendingreq = (ndis_request *)sc; setfunc = sc->ndis_chars->nmc_setinfo_func; adapter = sc->ndis_block->nmb_miniportadapterctx; if (adapter == NULL || setfunc == NULL || sc->ndis_block->nmb_devicectx == NULL) { sc->ndis_block->nmb_pendingreq = NULL; KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); return (ENXIO); } rval = MSCALL6(setfunc, adapter, oid, buf, *buflen, &byteswritten, &bytesneeded); sc->ndis_block->nmb_pendingreq = NULL; KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); if (rval == NDIS_STATUS_PENDING) { /* Wait up to 5 seconds. */ duetime = (5 * 1000000) * -10; KeWaitForSingleObject(&sc->ndis_block->nmb_setevent, 0, 0, FALSE, &duetime); rval = sc->ndis_block->nmb_setstat; } if (byteswritten) *buflen = byteswritten; if (bytesneeded) *buflen = bytesneeded; if (rval == NDIS_STATUS_INVALID_LENGTH) return (ENOSPC); if (rval == NDIS_STATUS_INVALID_OID) return (EINVAL); if (rval == NDIS_STATUS_NOT_SUPPORTED || rval == NDIS_STATUS_NOT_ACCEPTED) return (ENOTSUP); if (rval != NDIS_STATUS_SUCCESS) return (ENODEV); return (0); } typedef void (*ndis_senddone_func)(ndis_handle, ndis_packet *, ndis_status); int ndis_send_packets(arg, packets, cnt) void *arg; ndis_packet **packets; int cnt; { struct ndis_softc *sc; ndis_handle adapter; ndis_sendmulti_handler sendfunc; ndis_senddone_func senddonefunc; int i; ndis_packet *p; uint8_t irql = 0; sc = arg; adapter = sc->ndis_block->nmb_miniportadapterctx; if (adapter == NULL) return (ENXIO); sendfunc = sc->ndis_chars->nmc_sendmulti_func; senddonefunc = sc->ndis_block->nmb_senddone_func; if (NDIS_SERIALIZED(sc->ndis_block)) KeAcquireSpinLock(&sc->ndis_block->nmb_lock, &irql); MSCALL3(sendfunc, adapter, packets, cnt); for (i = 0; i < cnt; i++) { p = packets[i]; /* * Either the driver already handed the packet to * ndis_txeof() due to a failure, or it wants to keep * it and release it asynchronously later. Skip to the * next one. */ if (p == NULL || p->np_oob.npo_status == NDIS_STATUS_PENDING) continue; MSCALL3(senddonefunc, sc->ndis_block, p, p->np_oob.npo_status); } if (NDIS_SERIALIZED(sc->ndis_block)) KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); return (0); } int ndis_send_packet(arg, packet) void *arg; ndis_packet *packet; { struct ndis_softc *sc; ndis_handle adapter; ndis_status status; ndis_sendsingle_handler sendfunc; ndis_senddone_func senddonefunc; uint8_t irql = 0; sc = arg; adapter = sc->ndis_block->nmb_miniportadapterctx; if (adapter == NULL) return (ENXIO); sendfunc = sc->ndis_chars->nmc_sendsingle_func; senddonefunc = sc->ndis_block->nmb_senddone_func; if (NDIS_SERIALIZED(sc->ndis_block)) KeAcquireSpinLock(&sc->ndis_block->nmb_lock, &irql); status = MSCALL3(sendfunc, adapter, packet, packet->np_private.npp_flags); if (status == NDIS_STATUS_PENDING) { if (NDIS_SERIALIZED(sc->ndis_block)) KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); return (0); } MSCALL3(senddonefunc, sc->ndis_block, packet, status); if (NDIS_SERIALIZED(sc->ndis_block)) KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); return (0); } int ndis_init_dma(arg) void *arg; { struct ndis_softc *sc; int i, error; sc = arg; sc->ndis_tmaps = malloc(sizeof(bus_dmamap_t) * sc->ndis_maxpkts, M_DEVBUF, M_NOWAIT|M_ZERO); if (sc->ndis_tmaps == NULL) return (ENOMEM); for (i = 0; i < sc->ndis_maxpkts; i++) { error = bus_dmamap_create(sc->ndis_ttag, 0, &sc->ndis_tmaps[i]); if (error) { free(sc->ndis_tmaps, M_DEVBUF); return (ENODEV); } } return (0); } int ndis_destroy_dma(arg) void *arg; { struct ndis_softc *sc; struct mbuf *m; ndis_packet *p = NULL; int i; sc = arg; for (i = 0; i < sc->ndis_maxpkts; i++) { if (sc->ndis_txarray[i] != NULL) { p = sc->ndis_txarray[i]; m = (struct mbuf *)p->np_rsvd[1]; if (m != NULL) m_freem(m); ndis_free_packet(sc->ndis_txarray[i]); } bus_dmamap_destroy(sc->ndis_ttag, sc->ndis_tmaps[i]); } free(sc->ndis_tmaps, M_DEVBUF); bus_dma_tag_destroy(sc->ndis_ttag); return (0); } int ndis_reset_nic(arg) void *arg; { struct ndis_softc *sc; ndis_handle adapter; ndis_reset_handler resetfunc; uint8_t addressing_reset; int rval; uint8_t irql = 0; sc = arg; NDIS_LOCK(sc); adapter = sc->ndis_block->nmb_miniportadapterctx; resetfunc = sc->ndis_chars->nmc_reset_func; if (adapter == NULL || resetfunc == NULL || sc->ndis_block->nmb_devicectx == NULL) { NDIS_UNLOCK(sc); return (EIO); } NDIS_UNLOCK(sc); KeResetEvent(&sc->ndis_block->nmb_resetevent); if (NDIS_SERIALIZED(sc->ndis_block)) KeAcquireSpinLock(&sc->ndis_block->nmb_lock, &irql); rval = MSCALL2(resetfunc, &addressing_reset, adapter); if (NDIS_SERIALIZED(sc->ndis_block)) KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); if (rval == NDIS_STATUS_PENDING) KeWaitForSingleObject(&sc->ndis_block->nmb_resetevent, 0, 0, FALSE, NULL); return (0); } int ndis_halt_nic(arg) void *arg; { struct ndis_softc *sc; ndis_handle adapter; ndis_halt_handler haltfunc; ndis_miniport_block *block; int empty = 0; uint8_t irql; sc = arg; block = sc->ndis_block; if (!cold) KeFlushQueuedDpcs(); /* * Wait for all packets to be returned. */ while (1) { KeAcquireSpinLock(&block->nmb_returnlock, &irql); empty = IsListEmpty(&block->nmb_returnlist); KeReleaseSpinLock(&block->nmb_returnlock, irql); if (empty) break; NdisMSleep(1000); } NDIS_LOCK(sc); adapter = sc->ndis_block->nmb_miniportadapterctx; if (adapter == NULL) { NDIS_UNLOCK(sc); return (EIO); } sc->ndis_block->nmb_devicectx = NULL; /* * The adapter context is only valid after the init * handler has been called, and is invalid once the * halt handler has been called. */ haltfunc = sc->ndis_chars->nmc_halt_func; NDIS_UNLOCK(sc); MSCALL1(haltfunc, adapter); NDIS_LOCK(sc); sc->ndis_block->nmb_miniportadapterctx = NULL; NDIS_UNLOCK(sc); return (0); } int ndis_shutdown_nic(arg) void *arg; { struct ndis_softc *sc; ndis_handle adapter; ndis_shutdown_handler shutdownfunc; sc = arg; NDIS_LOCK(sc); adapter = sc->ndis_block->nmb_miniportadapterctx; shutdownfunc = sc->ndis_chars->nmc_shutdown_handler; NDIS_UNLOCK(sc); if (adapter == NULL || shutdownfunc == NULL) return (EIO); if (sc->ndis_chars->nmc_rsvd0 == NULL) MSCALL1(shutdownfunc, adapter); else MSCALL1(shutdownfunc, sc->ndis_chars->nmc_rsvd0); TAILQ_REMOVE(&ndis_devhead, sc->ndis_block, link); return (0); } int ndis_pnpevent_nic(arg, type) void *arg; int type; { device_t dev; struct ndis_softc *sc; ndis_handle adapter; ndis_pnpevent_handler pnpeventfunc; dev = arg; sc = device_get_softc(arg); NDIS_LOCK(sc); adapter = sc->ndis_block->nmb_miniportadapterctx; pnpeventfunc = sc->ndis_chars->nmc_pnpevent_handler; NDIS_UNLOCK(sc); if (adapter == NULL || pnpeventfunc == NULL) return (EIO); if (sc->ndis_chars->nmc_rsvd0 == NULL) MSCALL4(pnpeventfunc, adapter, type, NULL, 0); else MSCALL4(pnpeventfunc, sc->ndis_chars->nmc_rsvd0, type, NULL, 0); return (0); } int ndis_init_nic(arg) void *arg; { struct ndis_softc *sc; ndis_miniport_block *block; ndis_init_handler initfunc; ndis_status status, openstatus = 0; ndis_medium mediumarray[NdisMediumMax]; uint32_t chosenmedium, i; if (arg == NULL) return (EINVAL); sc = arg; NDIS_LOCK(sc); block = sc->ndis_block; initfunc = sc->ndis_chars->nmc_init_func; NDIS_UNLOCK(sc); sc->ndis_block->nmb_timerlist = NULL; for (i = 0; i < NdisMediumMax; i++) mediumarray[i] = i; status = MSCALL6(initfunc, &openstatus, &chosenmedium, mediumarray, NdisMediumMax, block, block); /* * If the init fails, blow away the other exported routines * we obtained from the driver so we can't call them later. * If the init failed, none of these will work. */ if (status != NDIS_STATUS_SUCCESS) { NDIS_LOCK(sc); sc->ndis_block->nmb_miniportadapterctx = NULL; NDIS_UNLOCK(sc); return (ENXIO); } /* * This may look really goofy, but apparently it is possible * to halt a miniport too soon after it's been initialized. * After MiniportInitialize() finishes, pause for 1 second * to give the chip a chance to handle any short-lived timers * that were set in motion. If we call MiniportHalt() too soon, * some of the timers may not be cancelled, because the driver * expects them to fire before the halt is called. */ pause("ndwait", hz); NDIS_LOCK(sc); sc->ndis_block->nmb_devicectx = sc; NDIS_UNLOCK(sc); return (0); } static void ndis_intrsetup(dpc, dobj, ip, sc) kdpc *dpc; device_object *dobj; irp *ip; struct ndis_softc *sc; { ndis_miniport_interrupt *intr; intr = sc->ndis_block->nmb_interrupt; /* Sanity check. */ if (intr == NULL) return; KeAcquireSpinLockAtDpcLevel(&intr->ni_dpccountlock); KeResetEvent(&intr->ni_dpcevt); if (KeInsertQueueDpc(&intr->ni_dpc, NULL, NULL) == TRUE) intr->ni_dpccnt++; KeReleaseSpinLockFromDpcLevel(&intr->ni_dpccountlock); } int ndis_get_info(arg, oid, buf, buflen) void *arg; ndis_oid oid; void *buf; int *buflen; { struct ndis_softc *sc; ndis_status rval; ndis_handle adapter; ndis_queryinfo_handler queryfunc; uint32_t byteswritten = 0, bytesneeded = 0; uint8_t irql; uint64_t duetime; sc = arg; KeResetEvent(&sc->ndis_block->nmb_getevent); KeAcquireSpinLock(&sc->ndis_block->nmb_lock, &irql); if (sc->ndis_block->nmb_pendingreq != NULL) { KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); panic("ndis_get_info() called while other request pending"); } else sc->ndis_block->nmb_pendingreq = (ndis_request *)sc; queryfunc = sc->ndis_chars->nmc_queryinfo_func; adapter = sc->ndis_block->nmb_miniportadapterctx; if (adapter == NULL || queryfunc == NULL || sc->ndis_block->nmb_devicectx == NULL) { sc->ndis_block->nmb_pendingreq = NULL; KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); return (ENXIO); } rval = MSCALL6(queryfunc, adapter, oid, buf, *buflen, &byteswritten, &bytesneeded); sc->ndis_block->nmb_pendingreq = NULL; KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); /* Wait for requests that block. */ if (rval == NDIS_STATUS_PENDING) { /* Wait up to 5 seconds. */ duetime = (5 * 1000000) * -10; KeWaitForSingleObject(&sc->ndis_block->nmb_getevent, 0, 0, FALSE, &duetime); rval = sc->ndis_block->nmb_getstat; } if (byteswritten) *buflen = byteswritten; if (bytesneeded) *buflen = bytesneeded; if (rval == NDIS_STATUS_INVALID_LENGTH || rval == NDIS_STATUS_BUFFER_TOO_SHORT) return (ENOSPC); if (rval == NDIS_STATUS_INVALID_OID) return (EINVAL); if (rval == NDIS_STATUS_NOT_SUPPORTED || rval == NDIS_STATUS_NOT_ACCEPTED) return (ENOTSUP); if (rval != NDIS_STATUS_SUCCESS) return (ENODEV); return (0); } uint32_t NdisAddDevice(drv, pdo) driver_object *drv; device_object *pdo; { device_object *fdo; ndis_miniport_block *block; struct ndis_softc *sc; uint32_t status; int error; sc = device_get_softc(pdo->do_devext); if (sc->ndis_iftype == PCMCIABus || sc->ndis_iftype == PCIBus) { error = bus_setup_intr(sc->ndis_dev, sc->ndis_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, ntoskrnl_intr, NULL, &sc->ndis_intrhand); if (error) return (NDIS_STATUS_FAILURE); } status = IoCreateDevice(drv, sizeof(ndis_miniport_block), NULL, FILE_DEVICE_UNKNOWN, 0, FALSE, &fdo); if (status != STATUS_SUCCESS) return (status); block = fdo->do_devext; block->nmb_filterdbs.nf_ethdb = block; block->nmb_deviceobj = fdo; block->nmb_physdeviceobj = pdo; block->nmb_nextdeviceobj = IoAttachDeviceToDeviceStack(fdo, pdo); KeInitializeSpinLock(&block->nmb_lock); KeInitializeSpinLock(&block->nmb_returnlock); KeInitializeEvent(&block->nmb_getevent, EVENT_TYPE_NOTIFY, TRUE); KeInitializeEvent(&block->nmb_setevent, EVENT_TYPE_NOTIFY, TRUE); KeInitializeEvent(&block->nmb_resetevent, EVENT_TYPE_NOTIFY, TRUE); InitializeListHead(&block->nmb_parmlist); InitializeListHead(&block->nmb_returnlist); block->nmb_returnitem = IoAllocateWorkItem(fdo); /* * Stash pointers to the miniport block and miniport * characteristics info in the if_ndis softc so the * UNIX wrapper driver can get to them later. */ sc->ndis_block = block; sc->ndis_chars = IoGetDriverObjectExtension(drv, (void *)1); /* * If the driver has a MiniportTransferData() function, * we should allocate a private RX packet pool. */ if (sc->ndis_chars->nmc_transferdata_func != NULL) { NdisAllocatePacketPool(&status, &block->nmb_rxpool, 32, PROTOCOL_RESERVED_SIZE_IN_PACKET); if (status != NDIS_STATUS_SUCCESS) { IoDetachDevice(block->nmb_nextdeviceobj); IoDeleteDevice(fdo); return (status); } InitializeListHead((&block->nmb_packetlist)); } /* Give interrupt handling priority over timers. */ IoInitializeDpcRequest(fdo, kernndis_functbl[6].ipt_wrap); KeSetImportanceDpc(&fdo->do_dpc, KDPC_IMPORTANCE_HIGH); /* Finish up BSD-specific setup. */ block->nmb_signature = (void *)0xcafebabe; block->nmb_status_func = kernndis_functbl[0].ipt_wrap; block->nmb_statusdone_func = kernndis_functbl[1].ipt_wrap; block->nmb_setdone_func = kernndis_functbl[2].ipt_wrap; block->nmb_querydone_func = kernndis_functbl[3].ipt_wrap; block->nmb_resetdone_func = kernndis_functbl[4].ipt_wrap; block->nmb_sendrsrc_func = kernndis_functbl[5].ipt_wrap; block->nmb_pendingreq = NULL; TAILQ_INSERT_TAIL(&ndis_devhead, block, link); return (STATUS_SUCCESS); } int ndis_unload_driver(arg) void *arg; { struct ndis_softc *sc; device_object *fdo; sc = arg; if (sc->ndis_intrhand) bus_teardown_intr(sc->ndis_dev, sc->ndis_irq, sc->ndis_intrhand); if (sc->ndis_block->nmb_rlist != NULL) free(sc->ndis_block->nmb_rlist, M_DEVBUF); ndis_flush_sysctls(sc); TAILQ_REMOVE(&ndis_devhead, sc->ndis_block, link); if (sc->ndis_chars->nmc_transferdata_func != NULL) NdisFreePacketPool(sc->ndis_block->nmb_rxpool); fdo = sc->ndis_block->nmb_deviceobj; IoFreeWorkItem(sc->ndis_block->nmb_returnitem); IoDetachDevice(sc->ndis_block->nmb_nextdeviceobj); IoDeleteDevice(fdo); return (0); } Index: head/sys/compat/ndis/ndis_var.h =================================================================== --- head/sys/compat/ndis/ndis_var.h (revision 324445) +++ head/sys/compat/ndis/ndis_var.h (revision 324446) @@ -1,1767 +1,1767 @@ /*- * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NDIS_VAR_H_ #define _NDIS_VAR_H_ /* Forward declarations */ struct ndis_miniport_block; struct ndis_mdriver_block; typedef struct ndis_miniport_block ndis_miniport_block; typedef struct ndis_mdriver_block ndis_mdriver_block; /* Base types */ typedef uint32_t ndis_status; typedef void *ndis_handle; typedef uint32_t ndis_oid; typedef uint32_t ndis_error_code; typedef register_t ndis_kspin_lock; typedef uint8_t ndis_kirql; /* * NDIS status codes (there are lots of them). The ones that * don't seem to fit the pattern are actually mapped to generic * NT status codes. */ #define NDIS_STATUS_SUCCESS 0 #define NDIS_STATUS_PENDING 0x00000103 #define NDIS_STATUS_NOT_RECOGNIZED 0x00010001 #define NDIS_STATUS_NOT_COPIED 0x00010002 #define NDIS_STATUS_NOT_ACCEPTED 0x00010003 #define NDIS_STATUS_CALL_ACTIVE 0x00010007 #define NDIS_STATUS_ONLINE 0x40010003 #define NDIS_STATUS_RESET_START 0x40010004 #define NDIS_STATUS_RESET_END 0x40010005 #define NDIS_STATUS_RING_STATUS 0x40010006 #define NDIS_STATUS_CLOSED 0x40010007 #define NDIS_STATUS_WAN_LINE_UP 0x40010008 #define NDIS_STATUS_WAN_LINE_DOWN 0x40010009 #define NDIS_STATUS_WAN_FRAGMENT 0x4001000A #define NDIS_STATUS_MEDIA_CONNECT 0x4001000B #define NDIS_STATUS_MEDIA_DISCONNECT 0x4001000C #define NDIS_STATUS_HARDWARE_LINE_UP 0x4001000D #define NDIS_STATUS_HARDWARE_LINE_DOWN 0x4001000E #define NDIS_STATUS_INTERFACE_UP 0x4001000F #define NDIS_STATUS_INTERFACE_DOWN 0x40010010 #define NDIS_STATUS_MEDIA_BUSY 0x40010011 #define NDIS_STATUS_MEDIA_SPECIFIC_INDICATION 0x40010012 #define NDIS_STATUS_WW_INDICATION NDIS_STATUS_MEDIA_SPECIFIC_INDICATION #define NDIS_STATUS_LINK_SPEED_CHANGE 0x40010013 #define NDIS_STATUS_WAN_GET_STATS 0x40010014 #define NDIS_STATUS_WAN_CO_FRAGMENT 0x40010015 #define NDIS_STATUS_WAN_CO_LINKPARAMS 0x40010016 #define NDIS_STATUS_NOT_RESETTABLE 0x80010001 #define NDIS_STATUS_SOFT_ERRORS 0x80010003 #define NDIS_STATUS_HARD_ERRORS 0x80010004 #define NDIS_STATUS_BUFFER_OVERFLOW 0x80000005 #define NDIS_STATUS_FAILURE 0xC0000001 #define NDIS_STATUS_RESOURCES 0xC000009A #define NDIS_STATUS_CLOSING 0xC0010002 #define NDIS_STATUS_BAD_VERSION 0xC0010004 #define NDIS_STATUS_BAD_CHARACTERISTICS 0xC0010005 #define NDIS_STATUS_ADAPTER_NOT_FOUND 0xC0010006 #define NDIS_STATUS_OPEN_FAILED 0xC0010007 #define NDIS_STATUS_DEVICE_FAILED 0xC0010008 #define NDIS_STATUS_MULTICAST_FULL 0xC0010009 #define NDIS_STATUS_MULTICAST_EXISTS 0xC001000A #define NDIS_STATUS_MULTICAST_NOT_FOUND 0xC001000B #define NDIS_STATUS_REQUEST_ABORTED 0xC001000C #define NDIS_STATUS_RESET_IN_PROGRESS 0xC001000D #define NDIS_STATUS_CLOSING_INDICATING 0xC001000E #define NDIS_STATUS_NOT_SUPPORTED 0xC00000BB #define NDIS_STATUS_INVALID_PACKET 0xC001000F #define NDIS_STATUS_OPEN_LIST_FULL 0xC0010010 #define NDIS_STATUS_ADAPTER_NOT_READY 0xC0010011 #define NDIS_STATUS_ADAPTER_NOT_OPEN 0xC0010012 #define NDIS_STATUS_NOT_INDICATING 0xC0010013 #define NDIS_STATUS_INVALID_LENGTH 0xC0010014 #define NDIS_STATUS_INVALID_DATA 0xC0010015 #define NDIS_STATUS_BUFFER_TOO_SHORT 0xC0010016 #define NDIS_STATUS_INVALID_OID 0xC0010017 #define NDIS_STATUS_ADAPTER_REMOVED 0xC0010018 #define NDIS_STATUS_UNSUPPORTED_MEDIA 0xC0010019 #define NDIS_STATUS_GROUP_ADDRESS_IN_USE 0xC001001A #define NDIS_STATUS_FILE_NOT_FOUND 0xC001001B #define NDIS_STATUS_ERROR_READING_FILE 0xC001001C #define NDIS_STATUS_ALREADY_MAPPED 0xC001001D #define NDIS_STATUS_RESOURCE_CONFLICT 0xC001001E #define NDIS_STATUS_NO_CABLE 0xC001001F #define NDIS_STATUS_INVALID_SAP 0xC0010020 #define NDIS_STATUS_SAP_IN_USE 0xC0010021 #define NDIS_STATUS_INVALID_ADDRESS 0xC0010022 #define NDIS_STATUS_VC_NOT_ACTIVATED 0xC0010023 #define NDIS_STATUS_DEST_OUT_OF_ORDER 0xC0010024 #define NDIS_STATUS_VC_NOT_AVAILABLE 0xC0010025 #define NDIS_STATUS_CELLRATE_NOT_AVAILABLE 0xC0010026 #define NDIS_STATUS_INCOMPATABLE_QOS 0xC0010027 #define NDIS_STATUS_AAL_PARAMS_UNSUPPORTED 0xC0010028 #define NDIS_STATUS_NO_ROUTE_TO_DESTINATION 0xC0010029 #define NDIS_STATUS_TOKEN_RING_OPEN_ERROR 0xC0011000 #define NDIS_STATUS_INVALID_DEVICE_REQUEST 0xC0000010 #define NDIS_STATUS_NETWORK_UNREACHABLE 0xC000023C /* * NDIS event codes. They are usually reported to NdisWriteErrorLogEntry(). */ #define EVENT_NDIS_RESOURCE_CONFLICT 0xC0001388 #define EVENT_NDIS_OUT_OF_RESOURCE 0xC0001389 #define EVENT_NDIS_HARDWARE_FAILURE 0xC000138A #define EVENT_NDIS_ADAPTER_NOT_FOUND 0xC000138B #define EVENT_NDIS_INTERRUPT_CONNECT 0xC000138C #define EVENT_NDIS_DRIVER_FAILURE 0xC000138D #define EVENT_NDIS_BAD_VERSION 0xC000138E #define EVENT_NDIS_TIMEOUT 0x8000138F #define EVENT_NDIS_NETWORK_ADDRESS 0xC0001390 #define EVENT_NDIS_UNSUPPORTED_CONFIGURATION 0xC0001391 #define EVENT_NDIS_INVALID_VALUE_FROM_ADAPTER 0xC0001392 #define EVENT_NDIS_MISSING_CONFIGURATION_PARAMETER 0xC0001393 #define EVENT_NDIS_BAD_IO_BASE_ADDRESS 0xC0001394 #define EVENT_NDIS_RECEIVE_SPACE_SMALL 0x40001395 #define EVENT_NDIS_ADAPTER_DISABLED 0x80001396 #define EVENT_NDIS_IO_PORT_CONFLICT 0x80001397 #define EVENT_NDIS_PORT_OR_DMA_CONFLICT 0x80001398 #define EVENT_NDIS_MEMORY_CONFLICT 0x80001399 #define EVENT_NDIS_INTERRUPT_CONFLICT 0x8000139A #define EVENT_NDIS_DMA_CONFLICT 0x8000139B #define EVENT_NDIS_INVALID_DOWNLOAD_FILE_ERROR 0xC000139C #define EVENT_NDIS_MAXRECEIVES_ERROR 0x8000139D #define EVENT_NDIS_MAXTRANSMITS_ERROR 0x8000139E #define EVENT_NDIS_MAXFRAMESIZE_ERROR 0x8000139F #define EVENT_NDIS_MAXINTERNALBUFS_ERROR 0x800013A0 #define EVENT_NDIS_MAXMULTICAST_ERROR 0x800013A1 #define EVENT_NDIS_PRODUCTID_ERROR 0x800013A2 #define EVENT_NDIS_LOBE_FAILUE_ERROR 0x800013A3 #define EVENT_NDIS_SIGNAL_LOSS_ERROR 0x800013A4 #define EVENT_NDIS_REMOVE_RECEIVED_ERROR 0x800013A5 #define EVENT_NDIS_TOKEN_RING_CORRECTION 0x400013A6 #define EVENT_NDIS_ADAPTER_CHECK_ERROR 0xC00013A7 #define EVENT_NDIS_RESET_FAILURE_ERROR 0x800013A8 #define EVENT_NDIS_CABLE_DISCONNECTED_ERROR 0x800013A9 #define EVENT_NDIS_RESET_FAILURE_CORRECTION 0x800013AA /* * NDIS OIDs used by the queryinfo/setinfo routines. * Some are required by all NDIS drivers, some are specific to * a particular type of device, and some are purely optional. * Unfortunately, one of the purely optional OIDs is the one * that lets us set the MAC address of the device. */ /* Required OIDs */ #define OID_GEN_SUPPORTED_LIST 0x00010101 #define OID_GEN_HARDWARE_STATUS 0x00010102 #define OID_GEN_MEDIA_SUPPORTED 0x00010103 #define OID_GEN_MEDIA_IN_USE 0x00010104 #define OID_GEN_MAXIMUM_LOOKAHEAD 0x00010105 #define OID_GEN_MAXIMUM_FRAME_SIZE 0x00010106 #define OID_GEN_LINK_SPEED 0x00010107 #define OID_GEN_TRANSMIT_BUFFER_SPACE 0x00010108 #define OID_GEN_RECEIVE_BUFFER_SPACE 0x00010109 #define OID_GEN_TRANSMIT_BLOCK_SIZE 0x0001010A #define OID_GEN_RECEIVE_BLOCK_SIZE 0x0001010B #define OID_GEN_VENDOR_ID 0x0001010C #define OID_GEN_VENDOR_DESCRIPTION 0x0001010D #define OID_GEN_CURRENT_PACKET_FILTER 0x0001010E #define OID_GEN_CURRENT_LOOKAHEAD 0x0001010F #define OID_GEN_DRIVER_VERSION 0x00010110 #define OID_GEN_MAXIMUM_TOTAL_SIZE 0x00010111 #define OID_GEN_PROTOCOL_OPTIONS 0x00010112 #define OID_GEN_MAC_OPTIONS 0x00010113 #define OID_GEN_MEDIA_CONNECT_STATUS 0x00010114 #define OID_GEN_MAXIMUM_SEND_PACKETS 0x00010115 #define OID_GEN_VENDOR_DRIVER_VERSION 0x00010116 #define OID_GEN_SUPPORTED_GUIDS 0x00010117 #define OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118 /* Set only */ #define OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119 /* Set only */ #define OID_GEN_MACHINE_NAME 0x0001021A #define OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B /* Set only */ #define OID_GEN_VLAN_ID 0x0001021C /* Optional OIDs. */ #define OID_GEN_MEDIA_CAPABILITIES 0x00010201 #define OID_GEN_PHYSICAL_MEDIUM 0x00010202 /* Required statistics OIDs. */ #define OID_GEN_XMIT_OK 0x00020101 #define OID_GEN_RCV_OK 0x00020102 #define OID_GEN_XMIT_ERROR 0x00020103 #define OID_GEN_RCV_ERROR 0x00020104 #define OID_GEN_RCV_NO_BUFFER 0x00020105 /* Optional OID statistics */ #define OID_GEN_DIRECTED_BYTES_XMIT 0x00020201 #define OID_GEN_DIRECTED_FRAMES_XMIT 0x00020202 #define OID_GEN_MULTICAST_BYTES_XMIT 0x00020203 #define OID_GEN_MULTICAST_FRAMES_XMIT 0x00020204 #define OID_GEN_BROADCAST_BYTES_XMIT 0x00020205 #define OID_GEN_BROADCAST_FRAMES_XMIT 0x00020206 #define OID_GEN_DIRECTED_BYTES_RCV 0x00020207 #define OID_GEN_DIRECTED_FRAMES_RCV 0x00020208 #define OID_GEN_MULTICAST_BYTES_RCV 0x00020209 #define OID_GEN_MULTICAST_FRAMES_RCV 0x0002020A #define OID_GEN_BROADCAST_BYTES_RCV 0x0002020B #define OID_GEN_BROADCAST_FRAMES_RCV 0x0002020C #define OID_GEN_RCV_CRC_ERROR 0x0002020D #define OID_GEN_TRANSMIT_QUEUE_LENGTH 0x0002020E #define OID_GEN_GET_TIME_CAPS 0x0002020F #define OID_GEN_GET_NETCARD_TIME 0x00020210 #define OID_GEN_NETCARD_LOAD 0x00020211 #define OID_GEN_DEVICE_PROFILE 0x00020212 /* 802.3 (ethernet) OIDs */ #define OID_802_3_PERMANENT_ADDRESS 0x01010101 #define OID_802_3_CURRENT_ADDRESS 0x01010102 #define OID_802_3_MULTICAST_LIST 0x01010103 #define OID_802_3_MAXIMUM_LIST_SIZE 0x01010104 #define OID_802_3_MAC_OPTIONS 0x01010105 #define NDIS_802_3_MAC_OPTION_PRIORITY 0x00000001 #define OID_802_3_RCV_ERROR_ALIGNMENT 0x01020101 #define OID_802_3_XMIT_ONE_COLLISION 0x01020102 #define OID_802_3_XMIT_MORE_COLLISIONS 0x01020103 #define OID_802_3_XMIT_DEFERRED 0x01020201 #define OID_802_3_XMIT_MAX_COLLISIONS 0x01020202 #define OID_802_3_RCV_OVERRUN 0x01020203 #define OID_802_3_XMIT_UNDERRUN 0x01020204 #define OID_802_3_XMIT_HEARTBEAT_FAILURE 0x01020205 #define OID_802_3_XMIT_TIMES_CRS_LOST 0x01020206 #define OID_802_3_XMIT_LATE_COLLISIONS 0x01020207 /* PnP and power management OIDs */ #define OID_PNP_CAPABILITIES 0xFD010100 #define OID_PNP_SET_POWER 0xFD010101 #define OID_PNP_QUERY_POWER 0xFD010102 #define OID_PNP_ADD_WAKE_UP_PATTERN 0xFD010103 #define OID_PNP_REMOVE_WAKE_UP_PATTERN 0xFD010104 #define OID_PNP_WAKE_UP_PATTERN_LIST 0xFD010105 #define OID_PNP_ENABLE_WAKE_UP 0xFD010106 /* * These are the possible power states for * OID_PNP_SET_POWER and OID_PNP_QUERY_POWER. */ #define NDIS_POWERSTATE_UNSPEC 0 #define NDIS_POWERSTATE_D0 1 #define NDIS_POWERSTATE_D1 2 #define NDIS_POWERSTATE_D2 3 #define NDIS_POWERSTATE_D3 4 /* * These are used with the MiniportPnpEventNotify() method. */ #define NDIS_POWERPROFILE_BATTERY 0 #define NDIS_POWERPROFILE_ACONLINE 1 #define NDIS_PNP_EVENT_QUERY_REMOVED 0 #define NDIS_PNP_EVENT_REMOVED 1 #define NDIS_PNP_EVENT_SURPRISE_REMOVED 2 #define NDIS_PNP_EVENT_QUERY_STOPPED 3 #define NDIS_PNP_EVENT_STOPPED 4 #define NDIS_PNP_EVENT_PROFILECHANGED 5 /* PnP/PM Statistics (Optional). */ #define OID_PNP_WAKE_UP_OK 0xFD020200 #define OID_PNP_WAKE_UP_ERROR 0xFD020201 /* The following bits are defined for OID_PNP_ENABLE_WAKE_UP */ #define NDIS_PNP_WAKE_UP_MAGIC_PACKET 0x00000001 #define NDIS_PNP_WAKE_UP_PATTERN_MATCH 0x00000002 #define NDIS_PNP_WAKE_UP_LINK_CHANGE 0x00000004 /* 802.11 OIDs */ #define OID_802_11_BSSID 0x0D010101 #define OID_802_11_SSID 0x0D010102 #define OID_802_11_NETWORK_TYPES_SUPPORTED 0x0D010203 #define OID_802_11_NETWORK_TYPE_IN_USE 0x0D010204 #define OID_802_11_TX_POWER_LEVEL 0x0D010205 #define OID_802_11_RSSI 0x0D010206 #define OID_802_11_RSSI_TRIGGER 0x0D010207 #define OID_802_11_INFRASTRUCTURE_MODE 0x0D010108 #define OID_802_11_FRAGMENTATION_THRESHOLD 0x0D010209 #define OID_802_11_RTS_THRESHOLD 0x0D01020A #define OID_802_11_NUMBER_OF_ANTENNAS 0x0D01020B #define OID_802_11_RX_ANTENNA_SELECTED 0x0D01020C #define OID_802_11_TX_ANTENNA_SELECTED 0x0D01020D #define OID_802_11_SUPPORTED_RATES 0x0D01020E #define OID_802_11_DESIRED_RATES 0x0D010210 #define OID_802_11_CONFIGURATION 0x0D010211 #define OID_802_11_STATISTICS 0x0D020212 #define OID_802_11_ADD_WEP 0x0D010113 #define OID_802_11_REMOVE_WEP 0x0D010114 #define OID_802_11_DISASSOCIATE 0x0D010115 #define OID_802_11_POWER_MODE 0x0D010216 #define OID_802_11_BSSID_LIST 0x0D010217 #define OID_802_11_AUTHENTICATION_MODE 0x0D010118 #define OID_802_11_PRIVACY_FILTER 0x0D010119 #define OID_802_11_BSSID_LIST_SCAN 0x0D01011A #define OID_802_11_WEP_STATUS 0x0D01011B #define OID_802_11_ENCRYPTION_STATUS OID_802_11_WEP_STATUS #define OID_802_11_RELOAD_DEFAULTS 0x0D01011C #define OID_802_11_ADD_KEY 0x0D01011D #define OID_802_11_REMOVE_KEY 0x0D01011E #define OID_802_11_ASSOCIATION_INFORMATION 0x0D01011F #define OID_802_11_TEST 0x0D010120 #define OID_802_11_CAPABILITY 0x0D010122 #define OID_802_11_PMKID 0x0D010123 /* structures/definitions for 802.11 */ #define NDIS_80211_NETTYPE_11FH 0x00000000 #define NDIS_80211_NETTYPE_11DS 0x00000001 #define NDIS_80211_NETTYPE_11OFDM5 0x00000002 #define NDIS_80211_NETTYPE_11OFDM24 0x00000003 #define NDIS_80211_NETTYPE_AUTO 0x00000004 struct ndis_80211_nettype_list { uint32_t ntl_items; uint32_t ntl_type[1]; }; #define NDIS_80211_POWERMODE_CAM 0x00000000 #define NDIS_80211_POWERMODE_MAX_PSP 0x00000001 #define NDIS_80211_POWERMODE_FAST_PSP 0x00000002 typedef uint32_t ndis_80211_power; /* Power in milliwatts */ typedef uint32_t ndis_80211_rssi; /* Signal strength in dBm */ struct ndis_80211_config_fh { uint32_t ncf_length; uint32_t ncf_hoppatterh; uint32_t ncf_hopset; uint32_t ncf_dwelltime; }; typedef struct ndis_80211_config_fh ndis_80211_config_fh; struct ndis_80211_config { uint32_t nc_length; uint32_t nc_beaconperiod; uint32_t nc_atimwin; uint32_t nc_dsconfig; ndis_80211_config_fh nc_fhconfig; }; typedef struct ndis_80211_config ndis_80211_config; struct ndis_80211_stats { uint32_t ns_length; uint64_t ns_txfragcnt; uint64_t ns_txmcastcnt; uint64_t ns_failedcnt; uint64_t ns_retrycnt; uint64_t ns_multiretrycnt; uint64_t ns_rtssuccesscnt; uint64_t ns_rtsfailcnt; uint64_t ns_ackfailcnt; uint64_t ns_dupeframecnt; uint64_t ns_rxfragcnt; uint64_t ns_rxmcastcnt; uint64_t ns_fcserrcnt; }; typedef struct ndis_80211_stats ndis_80211_stats; typedef uint32_t ndis_80211_key_idx; struct ndis_80211_wep { uint32_t nw_length; uint32_t nw_keyidx; uint32_t nw_keylen; uint8_t nw_keydata[256]; }; typedef struct ndis_80211_wep ndis_80211_wep; #define NDIS_80211_WEPKEY_TX 0x80000000 #define NDIS_80211_WEPKEY_PERCLIENT 0x40000000 #define NDIS_80211_NET_INFRA_IBSS 0x00000000 #define NDIS_80211_NET_INFRA_BSS 0x00000001 #define NDIS_80211_NET_INFRA_AUTO 0x00000002 #define NDIS_80211_AUTHMODE_OPEN 0x00000000 #define NDIS_80211_AUTHMODE_SHARED 0x00000001 #define NDIS_80211_AUTHMODE_AUTO 0x00000002 #define NDIS_80211_AUTHMODE_WPA 0x00000003 #define NDIS_80211_AUTHMODE_WPAPSK 0x00000004 #define NDIS_80211_AUTHMODE_WPANONE 0x00000005 #define NDIS_80211_AUTHMODE_WPA2 0x00000006 #define NDIS_80211_AUTHMODE_WPA2PSK 0x00000007 typedef uint8_t ndis_80211_rates[8]; typedef uint8_t ndis_80211_rates_ex[16]; typedef uint8_t ndis_80211_macaddr[6]; struct ndis_80211_ssid { uint32_t ns_ssidlen; uint8_t ns_ssid[32]; }; typedef struct ndis_80211_ssid ndis_80211_ssid; struct ndis_wlan_bssid { uint32_t nwb_length; ndis_80211_macaddr nwb_macaddr; uint8_t nwb_rsvd[2]; ndis_80211_ssid nwb_ssid; uint32_t nwb_privacy; ndis_80211_rssi nwb_rssi; uint32_t nwb_nettype; ndis_80211_config nwb_config; uint32_t nwb_netinfra; ndis_80211_rates nwb_supportedrates; }; typedef struct ndis_wlan_bssid ndis_wlan_bssid; struct ndis_80211_bssid_list { uint32_t nbl_items; ndis_wlan_bssid nbl_bssid[1]; }; typedef struct ndis_80211_bssid_list ndis_80211_bssid_list; struct ndis_wlan_bssid_ex { uint32_t nwbx_len; ndis_80211_macaddr nwbx_macaddr; uint8_t nwbx_rsvd[2]; ndis_80211_ssid nwbx_ssid; uint32_t nwbx_privacy; ndis_80211_rssi nwbx_rssi; uint32_t nwbx_nettype; ndis_80211_config nwbx_config; uint32_t nwbx_netinfra; ndis_80211_rates_ex nwbx_supportedrates; uint32_t nwbx_ielen; uint8_t nwbx_ies[1]; }; typedef struct ndis_wlan_bssid_ex ndis_wlan_bssid_ex; struct ndis_80211_bssid_list_ex { uint32_t nblx_items; ndis_wlan_bssid_ex nblx_bssid[1]; }; typedef struct ndis_80211_bssid_list_ex ndis_80211_bssid_list_ex; struct ndis_80211_fixed_ies { uint8_t nfi_tstamp[8]; uint16_t nfi_beaconint; uint16_t nfi_caps; }; struct ndis_80211_variable_ies { uint8_t nvi_elemid; uint8_t nvi_len; uint8_t nvi_data[1]; }; typedef uint32_t ndis_80211_fragthresh; typedef uint32_t ndis_80211_rtsthresh; typedef uint32_t ndis_80211_antenna; #define NDIS_80211_PRIVFILT_ACCEPTALL 0x00000000 #define NDIS_80211_PRIVFILT_8021XWEP 0x00000001 #define NDIS_80211_WEPSTAT_ENABLED 0x00000000 #define NDIS_80211_WEPSTAT_ENC1ENABLED NDIS_80211_WEPSTAT_ENABLED #define NDIS_80211_WEPSTAT_DISABLED 0x00000001 #define NDIS_80211_WEPSTAT_ENCDISABLED NDIS_80211_WEPSTAT_DISABLED #define NDIS_80211_WEPSTAT_KEYABSENT 0x00000002 #define NDIS_80211_WEPSTAT_ENC1KEYABSENT NDIS_80211_WEPSTAT_KEYABSENT #define NDIS_80211_WEPSTAT_NOTSUPPORTED 0x00000003 #define NDIS_80211_WEPSTAT_ENCNOTSUPPORTED NDIS_80211_WEPSTAT_NOTSUPPORTED #define NDIS_80211_WEPSTAT_ENC2ENABLED 0x00000004 #define NDIS_80211_WEPSTAT_ENC2KEYABSENT 0x00000005 #define NDIS_80211_WEPSTAT_ENC3ENABLED 0x00000006 #define NDIS_80211_WEPSTAT_ENC3KEYABSENT 0x00000007 #define NDIS_80211_RELOADDEFAULT_WEP 0x00000000 #define NDIS_80211_STATUSTYPE_AUTH 0x00000000 #define NDIS_80211_STATUSTYPE_PMKIDLIST 0x00000001 struct ndis_80211_status_indication { uint32_t nsi_type; }; typedef struct ndis_80211_status_indication ndis_80211_status_indication; #define NDIS_802_11_AUTH_REQUEST_REAUTH 0x01 #define NDIS_802_11_AUTH_REQUEST_KEYUPDATE 0x02 #define NDIS_802_11_AUTH_REQUEST_PAIRWISE_ERROR 0x06 #define NDIS_802_11_AUTH_REQUEST_GROUP_ERROR 0x0E struct ndis_80211_auth_request { uint32_t nar_len; ndis_80211_macaddr nar_bssid; uint32_t nar_flags; }; typedef struct ndis_80211_auth_request ndis_80211_auth_request; struct ndis_80211_key { uint32_t nk_len; uint32_t nk_keyidx; uint32_t nk_keylen; ndis_80211_macaddr nk_bssid; uint8_t nk_pad[6]; uint64_t nk_keyrsc; uint8_t nk_keydata[32]; }; typedef struct ndis_80211_key ndis_80211_key; struct ndis_80211_remove_key { uint32_t nk_len; uint32_t nk_keyidx; ndis_80211_macaddr nk_bssid; }; typedef struct ndis_80211_remove_key ndis_80211_remove_key; #define NDIS_80211_AI_REQFI_CAPABILITIES 0x00000001 #define NDIS_80211_AI_REQFI_LISTENINTERVAL 0x00000002 #define NDIS_80211_AI_REQFI_CURRENTAPADDRESS 0x00000004 #define NDIS_80211_AI_RESFI_CAPABILITIES 0x00000001 #define NDIS_80211_AI_RESFI_STATUSCODE 0x00000002 #define NDIS_80211_AI_RESFI_ASSOCIATIONID 0x00000004 struct ndis_80211_ai_reqfi { uint16_t naq_caps; uint16_t naq_listentint; ndis_80211_macaddr naq_currentapaddr; }; typedef struct ndis_80211_ai_reqfi ndis_80211_ai_reqfi; struct ndis_80211_ai_resfi { uint16_t nas_caps; uint16_t nas_statuscode; uint16_t nas_associd; }; typedef struct ndis_80211_ai_resfi ndis_80211_ai_resfi; struct ndis_80211_assoc_info { uint32_t nai_len; uint16_t nai_avail_req_fixed_ies; ndis_80211_ai_reqfi nai_req_fixed_ies; uint32_t nai_req_ielen; uint32_t nai_offset_req_ies; uint16_t nai_avail_resp_fixed_ies; ndis_80211_ai_resfi nai_resp_fixed_iex; uint32_t nai_resp_ielen; uint32_t nai_offset_resp_ies; }; typedef struct ndis_80211_assoc_info ndis_80211_assoc_info; struct ndis_80211_auth_event { ndis_80211_status_indication nae_status; ndis_80211_auth_request nae_request[1]; }; typedef struct ndis_80211_auth_event ndis_80211_auth_event; struct ndis_80211_test { uint32_t nt_len; uint32_t nt_type; union { ndis_80211_auth_event nt_authevent; uint32_t nt_rssitrigger; } u; }; typedef struct ndis_80211_test ndis_80211_test; struct ndis_80211_auth_encrypt { uint32_t ne_authmode; uint32_t ne_cryptstat; }; typedef struct ndis_80211_auth_encrypt ndis_80211_auth_encrypt; struct ndis_80211_caps { uint32_t nc_len; uint32_t nc_ver; uint32_t nc_numpmkids; ndis_80211_auth_encrypt nc_authencs[1]; }; typedef struct ndis_80211_caps ndis_80211_caps; struct ndis_80211_bssidinfo { ndis_80211_macaddr nb_bssid; uint8_t nb_pmkid[16]; }; typedef struct ndis_80211_bssidinfo ndis_80211_bssidinfo; struct ndis_80211_pmkid { uint32_t np_len; uint32_t np_bssidcnt; ndis_80211_bssidinfo np_bssidinfo[1]; }; typedef struct ndis_80211_pmkid ndis_80211_pmkid; struct ndis_80211_pmkid_cand { ndis_80211_macaddr npc_bssid; uint32_t npc_flags; }; typedef struct ndis_80211_pmkid_cand ndis_80211_pmkid_cand; #define NDIS_802_11_PMKID_CANDIDATE_PREAUTH_ENABLED (0x01) struct ndis_80211_pmkid_candidate_list { uint32_t npcl_version; uint32_t npcl_numcandidates; ndis_80211_pmkid_cand npcl_candidatelist[1]; }; typedef struct ndis_80211_pmkid_candidate_list ndis_80211_pmkid_candidate_list; struct ndis_80211_enc_indication { uint32_t nei_statustype; ndis_80211_pmkid_candidate_list nei_pmkidlist; }; typedef struct ndis_80211_enc_indication ndis_80211_enc_indication; /* TCP OIDs. */ #define OID_TCP_TASK_OFFLOAD 0xFC010201 #define OID_TCP_TASK_IPSEC_ADD_SA 0xFC010202 #define OID_TCP_TASK_IPSEC_DELETE_SA 0xFC010203 #define OID_TCP_SAN_SUPPORT 0xFC010204 #define NDIS_TASK_OFFLOAD_VERSION 1 #define NDIS_TASK_TCPIP_CSUM 0x00000000 #define NDIS_TASK_IPSEC 0x00000001 #define NDIS_TASK_TCP_LARGESEND 0x00000002 #define NDIS_ENCAP_UNSPEC 0x00000000 #define NDIS_ENCAP_NULL 0x00000001 #define NDIS_ENCAP_IEEE802_3 0x00000002 #define NDIS_ENCAP_IEEE802_5 0x00000003 #define NDIS_ENCAP_SNAP_ROUTED 0x00000004 #define NDIS_ENCAP_SNAP_BRIDGED 0x00000005 #define NDIS_ENCAPFLAG_FIXEDHDRLEN 0x00000001 struct ndis_encap_fmt { uint32_t nef_encap; uint32_t nef_flags; uint32_t nef_encaphdrlen; }; typedef struct ndis_encap_fmt ndis_encap_fmt; struct ndis_task_offload_hdr { uint32_t ntoh_vers; uint32_t ntoh_len; uint32_t ntoh_rsvd; uint32_t ntoh_offset_firsttask; ndis_encap_fmt ntoh_encapfmt; }; typedef struct ndis_task_offload_hdr ndis_task_offload_hdr; struct ndis_task_offload { uint32_t nto_vers; uint32_t nto_len; uint32_t nto_task; uint32_t nto_offset_nexttask; uint32_t nto_taskbuflen; uint8_t nto_taskbuf[1]; }; typedef struct ndis_task_offload ndis_task_offload; #define NDIS_TCPSUM_FLAGS_IP_OPTS 0x00000001 #define NDIS_TCPSUM_FLAGS_TCP_OPTS 0x00000002 #define NDIS_TCPSUM_FLAGS_TCP_CSUM 0x00000004 #define NDIS_TCPSUM_FLAGS_UDP_CSUM 0x00000008 #define NDIS_TCPSUM_FLAGS_IP_CSUM 0x00000010 struct ndis_task_tcpip_csum { uint32_t nttc_v4tx; uint32_t nttc_v4rx; uint32_t nttc_v6tx; uint32_t nttc_v6rx; }; typedef struct ndis_task_tcpip_csum ndis_task_tcpip_csum; struct ndis_task_tcp_largesend { uint32_t nttl_vers; uint32_t nttl_maxofflen; uint32_t nttl_minsegcnt; uint8_t nttl_tcpopt; uint8_t nttl_ipopt; }; typedef struct ndis_task_tcp_largesend ndis_task_tcp_largesend; #define NDIS_IPSEC_AH_MD5 0x00000001 #define NDIS_IPSEC_AH_SHA1 0x00000002 #define NDIS_IPSEC_AH_TRANSPORT 0x00000004 #define NDIS_IPSEC_AH_TUNNEL 0x00000008 #define NDIS_IPSEC_AH_SEND 0x00000010 #define NDIS_IPSEC_AH_RECEIVE 0x00000020 #define NDIS_IPSEC_ESP_DES 0x00000001 #define NDIS_IPSEC_ESP_RSVD 0x00000002 #define NDIS_IPSEC_ESP_3DES 0x00000004 #define NDIS_IPSEC_ESP_NULL 0x00000008 #define NDIS_IPSEC_ESP_TRANSPORT 0x00000010 #define NDIS_IPSEC_ESP_TUNNEL 0x00000020 #define NDIS_IPSEC_ESP_SEND 0x00000040 #define NDIS_IPSEC_ESP_RECEIVE 0x00000080 struct ndis_task_ipsec { uint32_t nti_ah_esp_combined; uint32_t nti_ah_transport_tunnel_combined; uint32_t nti_v4_options; uint32_t nti_rsvd; uint32_t nti_v4ah; uint32_t nti_v4esp; }; typedef struct ndis_task_ipsec ndis_task_ipsec; /* * Attribures of NDIS drivers. Not all drivers support * all attributes. */ #define NDIS_ATTRIBUTE_IGNORE_PACKET_TIMEOUT 0x00000001 #define NDIS_ATTRIBUTE_IGNORE_REQUEST_TIMEOUT 0x00000002 #define NDIS_ATTRIBUTE_IGNORE_TOKEN_RING_ERRORS 0x00000004 #define NDIS_ATTRIBUTE_BUS_MASTER 0x00000008 #define NDIS_ATTRIBUTE_INTERMEDIATE_DRIVER 0x00000010 #define NDIS_ATTRIBUTE_DESERIALIZE 0x00000020 #define NDIS_ATTRIBUTE_NO_HALT_ON_SUSPEND 0x00000040 #define NDIS_ATTRIBUTE_SURPRISE_REMOVE_OK 0x00000080 #define NDIS_ATTRIBUTE_NOT_CO_NDIS 0x00000100 #define NDIS_ATTRIBUTE_USES_SAFE_BUFFER_APIS 0x00000200 #define NDIS_SERIALIZED(block) \ (((block)->nmb_flags & NDIS_ATTRIBUTE_DESERIALIZE) == 0) enum ndis_media_state { nmc_connected, nmc_disconnected }; typedef enum ndis_media_state ndis_media_state; /* Ndis Packet Filter Bits (OID_GEN_CURRENT_PACKET_FILTER). */ #define NDIS_PACKET_TYPE_DIRECTED 0x00000001 #define NDIS_PACKET_TYPE_MULTICAST 0x00000002 #define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004 #define NDIS_PACKET_TYPE_BROADCAST 0x00000008 #define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010 #define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020 #define NDIS_PACKET_TYPE_SMT 0x00000040 #define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080 #define NDIS_PACKET_TYPE_GROUP 0x00001000 #define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00002000 #define NDIS_PACKET_TYPE_FUNCTIONAL 0x00004000 #define NDIS_PACKET_TYPE_MAC_FRAME 0x00008000 /* Ndis MAC option bits (OID_GEN_MAC_OPTIONS). */ #define NDIS_MAC_OPTION_COPY_LOOKAHEAD_DATA 0x00000001 #define NDIS_MAC_OPTION_RECEIVE_SERIALIZED 0x00000002 #define NDIS_MAC_OPTION_TRANSFERS_NOT_PEND 0x00000004 #define NDIS_MAC_OPTION_NO_LOOPBACK 0x00000008 #define NDIS_MAC_OPTION_FULL_DUPLEX 0x00000010 #define NDIS_MAC_OPTION_EOTX_INDICATION 0x00000020 #define NDIS_MAC_OPTION_8021P_PRIORITY 0x00000040 #define NDIS_MAC_OPTION_SUPPORTS_MAC_ADDRESS_OVERWRITE 0x00000080 #define NDIS_MAC_OPTION_RECEIVE_AT_DPC 0x00000100 #define NDIS_MAC_OPTION_8021Q_VLAN 0x00000200 #define NDIS_MAC_OPTION_RESERVED 0x80000000 #define NDIS_DMA_24BITS 0x00 #define NDIS_DMA_32BITS 0x01 #define NDIS_DMA_64BITS 0x02 /* struct ndis_physaddr { #ifdef __i386__ uint64_t np_quad; #endif #ifdef __amd64__ uint32_t np_low; uint32_t np_high; #define np_quad np_low #endif #ifdef notdef uint32_t np_low; uint32_t np_high; #endif }; */ typedef struct physaddr ndis_physaddr; struct ndis_ansi_string { uint16_t nas_len; uint16_t nas_maxlen; char *nas_buf; }; typedef struct ndis_ansi_string ndis_ansi_string; #ifdef notdef /* * nus_buf is really a wchar_t *, but it's inconvenient to include * all the necessary header goop needed to define it, and it's a * pointer anyway, so for now, just make it a uint16_t *. */ struct ndis_unicode_string { uint16_t nus_len; uint16_t nus_maxlen; uint16_t *nus_buf; }; typedef struct ndis_unicode_string ndis_unicode_string; #endif typedef unicode_string ndis_unicode_string; enum ndis_parm_type { ndis_parm_int, ndis_parm_hexint, ndis_parm_string, ndis_parm_multistring, ndis_parm_binary }; typedef enum ndis_parm_type ndis_parm_type; struct ndis_binary_data { uint16_t nbd_len; void *nbd_buf; }; typedef struct ndis_binary_data ndis_binary_data; struct ndis_config_parm { ndis_parm_type ncp_type; union { uint32_t ncp_intdata; ndis_unicode_string ncp_stringdata; ndis_binary_data ncp_binarydata; } ncp_parmdata; }; /* * Not part of Windows NDIS spec; we uses this to keep a * list of ndis_config_parm structures that we've allocated. */ typedef struct ndis_config_parm ndis_config_parm; struct ndis_parmlist_entry { list_entry np_list; ndis_config_parm np_parm; }; typedef struct ndis_parmlist_entry ndis_parmlist_entry; #ifdef notdef struct ndis_list_entry { struct ndis_list_entry *nle_flink; struct ndis_list_entry *nle_blink; }; typedef struct ndis_list_entry ndis_list_entry; #endif struct ndis_bind_paths { uint32_t nbp_number; ndis_unicode_string nbp_paths[1]; }; typedef struct ndis_bind_paths ndis_bind_paths; #ifdef notdef struct dispatch_header { uint8_t dh_type; uint8_t dh_abs; uint8_t dh_size; uint8_t dh_inserted; uint32_t dh_sigstate; list_entry dh_waitlisthead; }; #endif #define dispatch_header nt_dispatch_header struct ndis_ktimer { struct dispatch_header nk_header; uint64_t nk_duetime; list_entry nk_timerlistentry; void *nk_dpc; uint32_t nk_period; }; struct ndis_kevent { struct dispatch_header nk_header; }; struct ndis_event { struct nt_kevent ne_event; }; typedef struct ndis_event ndis_event; /* Kernel defered procedure call (i.e. timer callback) */ struct ndis_kdpc; typedef void (*ndis_kdpc_func)(struct ndis_kdpc *, void *, void *, void *); struct ndis_kdpc { uint16_t nk_type; uint8_t nk_num; uint8_t nk_importance; list_entry nk_dpclistentry; ndis_kdpc_func nk_deferedfunc; void *nk_deferredctx; void *nk_sysarg1; void *nk_sysarg2; uint32_t *nk_lock; }; struct ndis_timer { struct ktimer nt_ktimer; struct kdpc nt_kdpc; }; typedef struct ndis_timer ndis_timer; typedef void (*ndis_timer_function)(void *, void *, void *, void *); struct ndis_miniport_timer { struct ktimer nmt_ktimer; struct kdpc nmt_kdpc; ndis_timer_function nmt_timerfunc; void *nmt_timerctx; ndis_miniport_block *nmt_block; struct ndis_miniport_timer *nmt_nexttimer; }; typedef struct ndis_miniport_timer ndis_miniport_timer; struct ndis_spin_lock { ndis_kspin_lock nsl_spinlock; ndis_kirql nsl_kirql; }; typedef struct ndis_spin_lock ndis_spin_lock; struct ndis_rw_lock { union { kspin_lock nrl_spinlock; void *nrl_ctx; } u; uint8_t nrl_rsvd[16]; }; #define nrl_spinlock u.nrl_spinlock #define nrl_ctx u.nrl_ctx; typedef struct ndis_rw_lock ndis_rw_lock; struct ndis_lock_state { uint16_t nls_lockstate; ndis_kirql nls_oldirql; }; typedef struct ndis_lock_state ndis_lock_state; struct ndis_request { uint8_t nr_macreserved[4*sizeof(void *)]; uint32_t nr_requesttype; union _ndis_data { struct _ndis_query_information { ndis_oid nr_oid; void *nr_infobuf; uint32_t nr_infobuflen; uint32_t nr_byteswritten; uint32_t nr_bytesneeded; } ndis_query_information; struct _ndis_set_information { ndis_oid nr_oid; void *nr_infobuf; uint32_t nr_infobuflen; uint32_t nr_byteswritten; uint32_t nr_bytesneeded; } ndis_set_information; } ndis_data; /* NDIS 5.0 extensions */ uint8_t nr_ndis_rsvd[9 * sizeof(void *)]; union { uint8_t nr_callmgr_rsvd[2 * sizeof(void *)]; uint8_t nr_protocol_rsvd[2 * sizeof(void *)]; } u; uint8_t nr_miniport_rsvd[2 * sizeof(void *)]; }; typedef struct ndis_request ndis_request; /* * Filler, not used. */ struct ndis_miniport_interrupt { kinterrupt *ni_introbj; ndis_kspin_lock ni_dpccountlock; void *ni_rsvd; void *ni_isrfunc; void *ni_dpcfunc; kdpc ni_dpc; ndis_miniport_block *ni_block; uint8_t ni_dpccnt; uint8_t ni_filler1; struct nt_kevent ni_dpcevt; uint8_t ni_shared; uint8_t ni_isrreq; }; typedef struct ndis_miniport_interrupt ndis_miniport_interrupt; enum ndis_interrupt_mode { nim_level, nim_latched }; typedef enum ndis_interrupt_mode ndis_interrupt_mode; #define NUMBER_OF_SINGLE_WORK_ITEMS 6 struct ndis_work_item; typedef void (*ndis_proc)(struct ndis_work_item *, void *); struct ndis_work_item { void *nwi_ctx; ndis_proc nwi_func; uint8_t nwi_wraprsvd[sizeof(void *) * 8]; }; typedef struct ndis_work_item ndis_work_item; #define NdisInitializeWorkItem(w, f, c) \ do { \ (w)->nwi_ctx = c; \ (w)->nwi_func = f; \ } while (0) #ifdef notdef struct ndis_buffer { struct ndis_buffer *nb_next; uint16_t nb_size; uint16_t nb_flags; void *nb_process; void *nb_mappedsystemva; void *nb_startva; uint32_t nb_bytecount; uint32_t nb_byteoffset; }; typedef struct ndis_buffer ndis_buffer; #endif struct ndis_sc_element { ndis_physaddr nse_addr; uint32_t nse_len; uint32_t *nse_rsvd; }; typedef struct ndis_sc_element ndis_sc_element; #define NDIS_MAXSEG 32 #define NDIS_BUS_SPACE_SHARED_MAXADDR 0x3E7FFFFF struct ndis_sc_list { uint32_t nsl_frags; uint32_t *nsl_rsvd; ndis_sc_element nsl_elements[NDIS_MAXSEG]; }; typedef struct ndis_sc_list ndis_sc_list; struct ndis_tcpip_csum { union { uint32_t ntc_txflags; uint32_t ntc_rxflags; uint32_t ntc_val; } u; }; typedef struct ndis_tcpip_csum ndis_tcpip_csum; #define NDIS_TXCSUM_DO_IPV4 0x00000001 #define NDIS_TXCSUM_DO_IPV6 0x00000002 #define NDIS_TXCSUM_DO_TCP 0x00000004 #define NDIS_TXCSUM_DO_UDP 0x00000008 #define NDIS_TXCSUM_DO_IP 0x00000010 #define NDIS_RXCSUM_TCP_FAILED 0x00000001 #define NDIS_RXCSUM_UDP_FAILED 0x00000002 #define NDIS_RXCSUM_IP_FAILED 0x00000004 #define NDIS_RXCSUM_TCP_PASSED 0x00000008 #define NDIS_RXCSUM_UDP_PASSED 0x00000010 #define NDIS_RXCSUM_IP_PASSED 0x00000020 #define NDIS_RXCSUM_LOOPBACK 0x00000040 struct ndis_vlan { union { struct { uint32_t nvt_userprio:3; uint32_t nvt_canformatid:1; uint32_t nvt_vlanid:12; uint32_t nvt_rsvd:16; } nv_taghdr; } u; }; typedef struct ndis_vlan ndis_vlan; enum ndis_perpkt_info { ndis_tcpipcsum_info, ndis_ipsec_info, ndis_largesend_info, ndis_classhandle_info, ndis_rsvd, ndis_sclist_info, ndis_ieee8021q_info, ndis_originalpkt_info, ndis_packetcancelid, ndis_maxpkt_info }; typedef enum ndis_perpkt_info ndis_perpkt_info; struct ndis_packet_extension { void *npe_info[ndis_maxpkt_info]; }; typedef struct ndis_packet_extension ndis_packet_extension; struct ndis_packet_private { uint32_t npp_physcnt; uint32_t npp_totlen; ndis_buffer *npp_head; ndis_buffer *npp_tail; void *npp_pool; uint32_t npp_count; uint32_t npp_flags; uint8_t npp_validcounts; uint8_t npp_ndispktflags; uint16_t npp_packetooboffset; }; #define NDIS_FLAGS_PROTOCOL_ID_MASK 0x0000000F #define NDIS_FLAGS_MULTICAST_PACKET 0x00000010 #define NDIS_FLAGS_RESERVED2 0x00000020 #define NDIS_FLAGS_RESERVED3 0x00000040 #define NDIS_FLAGS_DONT_LOOPBACK 0x00000080 #define NDIS_FLAGS_IS_LOOPBACK_PACKET 0x00000100 #define NDIS_FLAGS_LOOPBACK_ONLY 0x00000200 #define NDIS_FLAGS_RESERVED4 0x00000400 #define NDIS_FLAGS_DOUBLE_BUFFERED 0x00000800 #define NDIS_FLAGS_SENT_AT_DPC 0x00001000 #define NDIS_FLAGS_USES_SG_BUFFER_LIST 0x00002000 #define NDIS_PACKET_WRAPPER_RESERVED 0x3F #define NDIS_PACKET_CONTAINS_MEDIA_SPECIFIC_INFO 0x40 #define NDIS_PACKET_ALLOCATED_BY_NDIS 0x80 #define NDIS_PROTOCOL_ID_DEFAULT 0x00 #define NDIS_PROTOCOL_ID_TCP_IP 0x02 #define NDIS_PROTOCOL_ID_IPX 0x06 #define NDIS_PROTOCOL_ID_NBF 0x07 #define NDIS_PROTOCOL_ID_MAX 0x0F #define NDIS_PROTOCOL_ID_MASK 0x0F typedef struct ndis_packet_private ndis_packet_private; enum ndis_classid { ndis_class_802_3prio, ndis_class_wirelesswan_mbx, ndis_class_irda_packetinfo, ndis_class_atm_aainfo }; typedef enum ndis_classid ndis_classid; struct ndis_mediaspecific_info { uint32_t nmi_nextentoffset; ndis_classid nmi_classid; uint32_t nmi_size; uint8_t nmi_classinfo[1]; }; typedef struct ndis_mediaspecific_info ndis_mediaspecific_info; struct ndis_packet_oob { union { uint64_t npo_timetotx; uint64_t npo_timetxed; } u; uint64_t npo_timerxed; uint32_t npo_hdrlen; uint32_t npo_mediaspecific_len; void *npo_mediaspecific; ndis_status npo_status; }; typedef struct ndis_packet_oob ndis_packet_oob; /* * Our protocol private region for handling ethernet. * We need this to stash some of the things returned * by NdisMEthIndicateReceive(). */ struct ndis_ethpriv { void *nep_ctx; /* packet context */ long nep_offset; /* residual data to transfer */ void *nep_pad[2]; }; typedef struct ndis_ethpriv ndis_ethpriv; #define PROTOCOL_RESERVED_SIZE_IN_PACKET (4 * sizeof(void *)) struct ndis_packet { ndis_packet_private np_private; union { /* For connectionless miniports. */ struct { uint8_t np_miniport_rsvd[2 * sizeof(void *)]; uint8_t np_wrapper_rsvd[2 * sizeof(void *)]; } np_clrsvd; /* For de-serialized miniports */ struct { uint8_t np_miniport_rsvdex[3 * sizeof(void *)]; uint8_t np_wrapper_rsvdex[sizeof(void *)]; } np_dsrsvd; struct { uint8_t np_mac_rsvd[4 * sizeof(void *)]; } np_macrsvd; } u; uint32_t *np_rsvd[2]; uint8_t np_protocolreserved[PROTOCOL_RESERVED_SIZE_IN_PACKET]; /* * This next part is probably wrong, but we need some place * to put the out of band data structure... */ ndis_packet_oob np_oob; ndis_packet_extension np_ext; ndis_sc_list np_sclist; /* BSD-specific stuff which should be invisible to drivers. */ uint32_t np_refcnt; void *np_softc; void *np_m0; int np_txidx; list_entry np_list; }; typedef struct ndis_packet ndis_packet; struct ndis_packet_pool { slist_header np_head; int np_dead; nt_kevent np_event; kspin_lock np_lock; int np_cnt; int np_len; int np_protrsvd; void *np_pktmem; }; typedef struct ndis_packet_pool ndis_packet_pool; /* mbuf ext type for NDIS */ #define EXT_NDIS EXT_NET_DRV /* mtx type for NDIS */ #define MTX_NDIS_LOCK "NDIS lock" struct ndis_filterdbs { union { void *nf_ethdb; void *nf_nulldb; } u; void *nf_trdb; void *nf_fddidb; void *nf_arcdb; }; typedef struct ndis_filterdbs ndis_filterdbs; #define nf_ethdb u.nf_ethdb enum ndis_medium { NdisMedium802_3, NdisMedium802_5, NdisMediumFddi, NdisMediumWan, NdisMediumLocalTalk, NdisMediumDix, /* defined for convenience, not a real medium */ NdisMediumArcnetRaw, NdisMediumArcnet878_2, NdisMediumAtm, NdisMediumWirelessWan, NdisMediumIrda, NdisMediumBpc, NdisMediumCoWan, NdisMedium1394, NdisMediumMax }; typedef enum ndis_medium ndis_medium; /* enum interface_type { InterfaceTypeUndefined = -1, Internal, Isa, Eisa, MicroChannel, TurboChannel, PCIBus, VMEBus, NuBus, PCMCIABus, CBus, MPIBus, MPSABus, ProcessorInternal, InternalPowerBus, PNPISABus, PNPBus, MaximumInterfaceType }; */ enum ndis_interface_type { NdisInterfaceInternal = Internal, NdisInterfaceIsa = Isa, NdisInterfaceEisa = Eisa, NdisInterfaceMca = MicroChannel, NdisInterfaceTurboChannel = TurboChannel, NdisInterfacePci = PCIBus, NdisInterfacePcMcia = PCMCIABus }; typedef enum ndis_interface_type ndis_interface_type; struct ndis_paddr_unit { ndis_physaddr npu_physaddr; uint32_t npu_len; }; typedef struct ndis_paddr_unit ndis_paddr_unit; struct ndis_map_arg { ndis_paddr_unit *nma_fraglist; int nma_cnt; int nma_max; }; /* * Miniport characteristics were originally defined in the NDIS 3.0 * spec and then extended twice, in NDIS 4.0 and 5.0. */ struct ndis_miniport_characteristics { /* NDIS 3.0 */ uint8_t nmc_version_major; uint8_t nmc_version_minor; uint16_t nmc_pad; uint32_t nmc_rsvd; void * nmc_checkhang_func; void * nmc_disable_interrupts_func; void * nmc_enable_interrupts_func; void * nmc_halt_func; void * nmc_interrupt_func; void * nmc_init_func; void * nmc_isr_func; void * nmc_queryinfo_func; void * nmc_reconfig_func; void * nmc_reset_func; void * nmc_sendsingle_func; void * nmc_setinfo_func; void * nmc_transferdata_func; /* NDIS 4.0 extensions */ void * nmc_return_packet_func; void * nmc_sendmulti_func; void * nmc_allocate_complete_func; /* NDIS 5.0 extensions */ void * nmc_cocreatevc_func; void * nmc_codeletevc_func; void * nmc_coactivatevc_func; void * nmc_codeactivatevc_func; void * nmc_comultisend_func; void * nmc_corequest_func; /* NDIS 5.1 extensions */ void * nmc_canceltxpkts_handler; void * nmc_pnpevent_handler; void * nmc_shutdown_handler; void * nmc_rsvd0; void * nmc_rsvd1; void * nmc_rsvd2; void * nmc_rsvd3; }; typedef struct ndis_miniport_characteristics ndis_miniport_characteristics; struct ndis_driver_object { char *ndo_ifname; void *ndo_softc; ndis_miniport_characteristics ndo_chars; }; typedef struct ndis_driver_object ndis_driver_object; struct ndis_reference { ndis_kspin_lock nr_spinlock; uint16_t nr_refcnt; uint8_t nr_closing; }; typedef struct ndis_reference ndis_reference; struct ndis_timer_entry { struct callout nte_ch; ndis_miniport_timer *nte_timer; TAILQ_ENTRY(ndis_timer_entry) link; }; TAILQ_HEAD(nte_head, ndis_timer_entry); #define NDIS_FH_TYPE_VFS 0 #define NDIS_FH_TYPE_MODULE 1 struct ndis_fh { int nf_type; char *nf_name; void *nf_vp; void *nf_map; uint32_t nf_maplen; }; typedef struct ndis_fh ndis_fh; /* * The miniport block is basically the internal NDIS handle. We need * to define this because, unfortunately, it is not entirely opaque * to NDIS drivers. For one thing, it contains the function pointer * to the NDIS packet receive handler, which is invoked out of the * NDIS block via a macro rather than a function pointer. (The * NdisMIndicateReceivePacket() routine is a macro rather than * a function.) For another, the driver maintains a pointer to the * miniport block and passes it as a handle to various NDIS functions. * (The driver never really knows this because it's hidden behind * an ndis_handle though.) * * The miniport block has two parts: the first part contains fields * that must never change, since they are referenced by driver * binaries through macros. The second part is ignored by the driver, * but contains various things used internaly by NDIS.SYS. In our * case, we define the first 'immutable' part exactly as it appears * in Windows, but don't bother duplicating the Windows definitions * for the second part. Instead, we replace them with a few BSD-specific * things. */ struct ndis_miniport_block { /* * Windows-specific portion -- DO NOT MODIFY OR NDIS * DRIVERS WILL NOT WORK. */ void *nmb_signature; /* magic number */ ndis_miniport_block *nmb_nextminiport; ndis_mdriver_block *nmb_driverhandle; ndis_handle nmb_miniportadapterctx; ndis_unicode_string nmb_name; ndis_bind_paths *nmb_bindpaths; ndis_handle nmb_openqueue; ndis_reference nmb_ref; ndis_handle nmb_devicectx; uint8_t nmb_padding; uint8_t nmb_lockacquired; uint8_t nmb_pmodeopens; uint8_t nmb_assignedcpu; ndis_kspin_lock nmb_lock; ndis_request *nmb_mediarequest; ndis_miniport_interrupt *nmb_interrupt; uint32_t nmb_flags; uint32_t nmb_pnpflags; list_entry nmb_packetlist; ndis_packet *nmb_firstpendingtxpacket; ndis_packet *nmb_returnpacketqueue; uint32_t nmb_requestbuffer; void *nmb_setmcastbuf; ndis_miniport_block *nmb_primaryminiport; void *nmb_wrapperctx; void *nmb_busdatactx; uint32_t nmb_pnpcaps; cm_resource_list *nmb_resources; ndis_timer nmb_wkupdpctimer; ndis_unicode_string nmb_basename; ndis_unicode_string nmb_symlinkname; uint32_t nmb_checkforhangsecs; uint16_t nmb_cfhticks; uint16_t nmb_cfhcurrticks; ndis_status nmb_resetstatus; ndis_handle nmb_resetopen; ndis_filterdbs nmb_filterdbs; void *nmb_pktind_func; void *nmb_senddone_func; void *nmb_sendrsrc_func; void *nmb_resetdone_func; ndis_medium nmb_medium; uint32_t nmb_busnum; uint32_t nmb_bustype; uint32_t nmb_adaptertype; device_object *nmb_deviceobj; /* Functional device */ device_object *nmb_physdeviceobj; /* Physical device */ device_object *nmb_nextdeviceobj; /* Next dev in stack */ void *nmb_mapreg; void *nmb_callmgraflist; void *nmb_miniportthread; void *nmb_setinfobuf; uint16_t nmb_setinfobuflen; uint16_t nmb_maxsendpkts; ndis_status nmb_fakestatus; void *nmb_lockhandler; ndis_unicode_string *nmb_adapterinstancename; void *nmb_timerqueue; uint32_t nmb_mactoptions; ndis_request *nmb_pendingreq; uint32_t nmb_maxlongaddrs; uint32_t nmb_maxshortaddrs; uint32_t nmb_currlookahead; uint32_t nmb_maxlookahead; void *nmb_interrupt_func; void *nmb_disableintr_func; void *nmb_enableintr_func; void *nmb_sendpkts_func; void *nmb_deferredsend_func; void *nmb_ethrxindicate_func; void *nmb_txrxindicate_func; void *nmb_fddirxindicate_func; void *nmb_ethrxdone_func; void *nmb_txrxdone_func; void *nmb_fddirxcond_func; void *nmb_status_func; void *nmb_statusdone_func; void *nmb_tdcond_func; void *nmb_querydone_func; void *nmb_setdone_func; void *nmb_wantxdone_func; void *nmb_wanrx_func; void *nmb_wanrxdone_func; /* * End of windows-specific portion of miniport block. Everything * below is BSD-specific. */ list_entry nmb_parmlist; ndis_resource_list *nmb_rlist; ndis_status nmb_getstat; nt_kevent nmb_getevent; ndis_status nmb_setstat; nt_kevent nmb_setevent; nt_kevent nmb_resetevent; io_workitem *nmb_returnitem; ndis_miniport_timer *nmb_timerlist; ndis_handle nmb_rxpool; list_entry nmb_returnlist; kspin_lock nmb_returnlock; TAILQ_ENTRY(ndis_miniport_block) link; }; TAILQ_HEAD(nd_head, ndis_miniport_block); typedef ndis_status (*ndis_init_handler)(ndis_status *, uint32_t *, ndis_medium *, uint32_t, ndis_handle, ndis_handle); typedef ndis_status (*ndis_queryinfo_handler)(ndis_handle, ndis_oid, void *, uint32_t, uint32_t *, uint32_t *); typedef ndis_status (*ndis_setinfo_handler)(ndis_handle, ndis_oid, void *, uint32_t, uint32_t *, uint32_t *); typedef ndis_status (*ndis_sendsingle_handler)(ndis_handle, ndis_packet *, uint32_t); typedef ndis_status (*ndis_sendmulti_handler)(ndis_handle, ndis_packet **, uint32_t); typedef void (*ndis_isr_handler)(uint8_t *, uint8_t *, ndis_handle); typedef void (*ndis_interrupt_handler)(ndis_handle); typedef int (*ndis_reset_handler)(uint8_t *, ndis_handle); typedef void (*ndis_halt_handler)(ndis_handle); typedef void (*ndis_return_handler)(ndis_handle, ndis_packet *); typedef void (*ndis_enable_interrupts_handler)(ndis_handle); typedef void (*ndis_disable_interrupts_handler)(ndis_handle); typedef void (*ndis_shutdown_handler)(void *); typedef void (*ndis_pnpevent_handler)(void *, int, void *, uint32_t); typedef void (*ndis_allocdone_handler)(ndis_handle, void *, ndis_physaddr *, uint32_t, void *); typedef uint8_t (*ndis_checkforhang_handler)(ndis_handle); typedef ndis_status (*driver_entry)(void *, unicode_string *); extern image_patch_table ndis_functbl[]; #define NDIS_TASKQUEUE 1 #define NDIS_SWI 2 #define NDIS_PSTATE_RUNNING 1 #define NDIS_PSTATE_SLEEPING 2 #define NdisQueryPacket(p, pbufcnt, bufcnt, firstbuf, plen) \ do { \ if ((firstbuf) != NULL) { \ ndis_buffer **_first; \ _first = firstbuf; \ *(_first) = (p)->np_private.npp_head; \ } \ if ((plen) || (bufcnt) || (pbufcnt)) { \ if ((p)->np_private.npp_validcounts == FALSE) { \ ndis_buffer *tmp; \ unsigned int tlen = 0, pcnt = 0; \ unsigned int add = 0; \ unsigned int pktlen, off; \ \ tmp = (p)->np_private.npp_head; \ while (tmp != NULL) { \ off = MmGetMdlByteOffset(tmp); \ pktlen = MmGetMdlByteCount(tmp);\ tlen += pktlen; \ pcnt += \ NDIS_BUFFER_TO_SPAN_PAGES(tmp); \ add++; \ tmp = tmp->mdl_next; \ } \ (p)->np_private.npp_count = add; \ (p)->np_private.npp_totlen = tlen; \ (p)->np_private.npp_physcnt = pcnt; \ (p)->np_private.npp_validcounts = TRUE; \ } \ if (pbufcnt) { \ unsigned int *_pbufcnt; \ _pbufcnt = (pbufcnt); \ *(_pbufcnt) = (p)->np_private.npp_physcnt; \ } \ if (bufcnt) { \ unsigned int *_bufcnt; \ _bufcnt = (bufcnt); \ *(_bufcnt) = (p)->np_private.npp_count; \ } \ if (plen) { \ unsigned int *_plen; \ _plen = (plen); \ *(_plen) = (p)->np_private.npp_totlen; \ } \ } \ } while (0) __BEGIN_DECLS extern int ndis_libinit(void); extern int ndis_libfini(void); extern int ndis_load_driver(vm_offset_t, void *); extern int ndis_unload_driver(void *); extern int ndis_mtop(struct mbuf *, ndis_packet **); extern int ndis_ptom(struct mbuf **, ndis_packet *); extern int ndis_get_info(void *, ndis_oid, void *, int *); extern int ndis_set_info(void *, ndis_oid, void *, int *); extern void *ndis_get_routine_address(struct image_patch_table *, char *); extern int ndis_get_supported_oids(void *, ndis_oid **, int *); extern int ndis_send_packets(void *, ndis_packet **, int); extern int ndis_send_packet(void *, ndis_packet *); extern int ndis_convert_res(void *); extern int ndis_alloc_amem(void *); extern void ndis_free_amem(void *); extern void ndis_free_packet(ndis_packet *); extern void ndis_free_bufs(ndis_buffer *); extern int ndis_reset_nic(void *); extern int ndis_halt_nic(void *); extern int ndis_shutdown_nic(void *); extern int ndis_pnpevent_nic(void *, int); extern int ndis_init_nic(void *); -extern void ndis_return_packet(struct mbuf *, void *, void *); +extern void ndis_return_packet(ndis_packet *); extern int ndis_init_dma(void *); extern int ndis_destroy_dma(void *); extern int ndis_create_sysctls(void *); extern int ndis_add_sysctl(void *, char *, char *, char *, int); extern int ndis_flush_sysctls(void *); extern uint32_t NdisAddDevice(driver_object *, device_object *); extern void NdisAllocatePacketPool(ndis_status *, ndis_handle *, uint32_t, uint32_t); extern void NdisAllocatePacketPoolEx(ndis_status *, ndis_handle *, uint32_t, uint32_t, uint32_t); extern uint32_t NdisPacketPoolUsage(ndis_handle); extern void NdisFreePacketPool(ndis_handle); extern void NdisAllocatePacket(ndis_status *, ndis_packet **, ndis_handle); extern void NdisFreePacket(ndis_packet *); extern ndis_status NdisScheduleWorkItem(ndis_work_item *); extern void NdisMSleep(uint32_t); __END_DECLS #endif /* _NDIS_VAR_H_ */ Index: head/sys/dev/cas/if_cas.c =================================================================== --- head/sys/dev/cas/if_cas.c (revision 324445) +++ head/sys/dev/cas/if_cas.c (revision 324446) @@ -1,2945 +1,2919 @@ /*- * Copyright (C) 2001 Eduardo Horvath. * Copyright (c) 2001-2003 Thomas Moestl * Copyright (c) 2007-2009 Marius Strobl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: NetBSD: gem.c,v 1.21 2002/06/01 23:50:58 lukem Exp * from: FreeBSD: if_gem.c 182060 2008-08-23 15:03:26Z marius */ #include __FBSDID("$FreeBSD$"); /* * driver for Sun Cassini/Cassini+ and National Semiconductor DP83065 * Saturn Gigabit Ethernet controllers */ #if 0 #define CAS_DEBUG #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__powerpc__) || defined(__sparc64__) #include #include #include #endif #include #include #include #include #include #include #include #include "miibus_if.h" #define RINGASSERT(n , min, max) \ CTASSERT(powerof2(n) && (n) >= (min) && (n) <= (max)) RINGASSERT(CAS_NRXCOMP, 128, 32768); RINGASSERT(CAS_NRXDESC, 32, 8192); RINGASSERT(CAS_NRXDESC2, 32, 8192); RINGASSERT(CAS_NTXDESC, 32, 8192); #undef RINGASSERT #define CCDASSERT(m, a) \ CTASSERT((offsetof(struct cas_control_data, m) & ((a) - 1)) == 0) CCDASSERT(ccd_rxcomps, CAS_RX_COMP_ALIGN); CCDASSERT(ccd_rxdescs, CAS_RX_DESC_ALIGN); CCDASSERT(ccd_rxdescs2, CAS_RX_DESC_ALIGN); #undef CCDASSERT #define CAS_TRIES 10000 /* * According to documentation, the hardware has support for basic TCP * checksum offloading only, in practice this can be also used for UDP * however (i.e. the problem of previous Sun NICs that a checksum of 0x0 * is not converted to 0xffff no longer exists). */ #define CAS_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) static inline void cas_add_rxdesc(struct cas_softc *sc, u_int idx); static int cas_attach(struct cas_softc *sc); static int cas_bitwait(struct cas_softc *sc, bus_addr_t r, uint32_t clr, uint32_t set); static void cas_cddma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error); static void cas_detach(struct cas_softc *sc); static int cas_disable_rx(struct cas_softc *sc); static int cas_disable_tx(struct cas_softc *sc); static void cas_eint(struct cas_softc *sc, u_int status); -static void cas_free(struct mbuf *m, void *arg1, void* arg2); +static void cas_free(struct mbuf *m); static void cas_init(void *xsc); static void cas_init_locked(struct cas_softc *sc); static void cas_init_regs(struct cas_softc *sc); static int cas_intr(void *v); static void cas_intr_task(void *arg, int pending __unused); static int cas_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int cas_load_txmbuf(struct cas_softc *sc, struct mbuf **m_head); static int cas_mediachange(struct ifnet *ifp); static void cas_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr); static void cas_meminit(struct cas_softc *sc); static void cas_mifinit(struct cas_softc *sc); static int cas_mii_readreg(device_t dev, int phy, int reg); static void cas_mii_statchg(device_t dev); static int cas_mii_writereg(device_t dev, int phy, int reg, int val); static void cas_reset(struct cas_softc *sc); static int cas_reset_rx(struct cas_softc *sc); static int cas_reset_tx(struct cas_softc *sc); static void cas_resume(struct cas_softc *sc); static u_int cas_descsize(u_int sz); static void cas_rint(struct cas_softc *sc); static void cas_rint_timeout(void *arg); static inline void cas_rxcksum(struct mbuf *m, uint16_t cksum); static inline void cas_rxcompinit(struct cas_rx_comp *rxcomp); static u_int cas_rxcompsize(u_int sz); static void cas_rxdma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error); static void cas_setladrf(struct cas_softc *sc); static void cas_start(struct ifnet *ifp); static void cas_stop(struct ifnet *ifp); static void cas_suspend(struct cas_softc *sc); static void cas_tick(void *arg); static void cas_tint(struct cas_softc *sc); static void cas_tx_task(void *arg, int pending __unused); static inline void cas_txkick(struct cas_softc *sc); static void cas_watchdog(struct cas_softc *sc); static devclass_t cas_devclass; MODULE_DEPEND(cas, ether, 1, 1, 1); MODULE_DEPEND(cas, miibus, 1, 1, 1); #ifdef CAS_DEBUG #include #define KTR_CAS KTR_SPARE2 #endif static int cas_attach(struct cas_softc *sc) { struct cas_txsoft *txs; struct ifnet *ifp; int error, i; uint32_t v; /* Set up ifnet structure. */ ifp = sc->sc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOSPC); ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->sc_dev), device_get_unit(sc->sc_dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_start = cas_start; ifp->if_ioctl = cas_ioctl; ifp->if_init = cas_init; IFQ_SET_MAXLEN(&ifp->if_snd, CAS_TXQUEUELEN); ifp->if_snd.ifq_drv_maxlen = CAS_TXQUEUELEN; IFQ_SET_READY(&ifp->if_snd); callout_init_mtx(&sc->sc_tick_ch, &sc->sc_mtx, 0); callout_init_mtx(&sc->sc_rx_ch, &sc->sc_mtx, 0); /* Create local taskq. */ TASK_INIT(&sc->sc_intr_task, 0, cas_intr_task, sc); TASK_INIT(&sc->sc_tx_task, 1, cas_tx_task, ifp); sc->sc_tq = taskqueue_create_fast("cas_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->sc_tq); if (sc->sc_tq == NULL) { device_printf(sc->sc_dev, "could not create taskqueue\n"); error = ENXIO; goto fail_ifnet; } error = taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->sc_dev)); if (error != 0) { device_printf(sc->sc_dev, "could not start threads\n"); goto fail_taskq; } /* Make sure the chip is stopped. */ cas_reset(sc); error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 0, BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_pdmatag); if (error != 0) goto fail_taskq; error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, CAS_PAGE_SIZE, 1, CAS_PAGE_SIZE, 0, NULL, NULL, &sc->sc_rdmatag); if (error != 0) goto fail_ptag; error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * CAS_NTXSEGS, CAS_NTXSEGS, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->sc_tdmatag); if (error != 0) goto fail_rtag; error = bus_dma_tag_create(sc->sc_pdmatag, CAS_TX_DESC_ALIGN, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct cas_control_data), 1, sizeof(struct cas_control_data), 0, NULL, NULL, &sc->sc_cdmatag); if (error != 0) goto fail_ttag; /* * Allocate the control data structures, create and load the * DMA map for it. */ if ((error = bus_dmamem_alloc(sc->sc_cdmatag, (void **)&sc->sc_control_data, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->sc_cddmamap)) != 0) { device_printf(sc->sc_dev, "unable to allocate control data, error = %d\n", error); goto fail_ctag; } sc->sc_cddma = 0; if ((error = bus_dmamap_load(sc->sc_cdmatag, sc->sc_cddmamap, sc->sc_control_data, sizeof(struct cas_control_data), cas_cddma_callback, sc, 0)) != 0 || sc->sc_cddma == 0) { device_printf(sc->sc_dev, "unable to load control data DMA map, error = %d\n", error); goto fail_cmem; } /* * Initialize the transmit job descriptors. */ STAILQ_INIT(&sc->sc_txfreeq); STAILQ_INIT(&sc->sc_txdirtyq); /* * Create the transmit buffer DMA maps. */ error = ENOMEM; for (i = 0; i < CAS_TXQUEUELEN; i++) { txs = &sc->sc_txsoft[i]; txs->txs_mbuf = NULL; txs->txs_ndescs = 0; if ((error = bus_dmamap_create(sc->sc_tdmatag, 0, &txs->txs_dmamap)) != 0) { device_printf(sc->sc_dev, "unable to create TX DMA map %d, error = %d\n", i, error); goto fail_txd; } STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q); } /* * Allocate the receive buffers, create and load the DMA maps * for them. */ for (i = 0; i < CAS_NRXDESC; i++) { if ((error = bus_dmamem_alloc(sc->sc_rdmatag, &sc->sc_rxdsoft[i].rxds_buf, BUS_DMA_WAITOK, &sc->sc_rxdsoft[i].rxds_dmamap)) != 0) { device_printf(sc->sc_dev, "unable to allocate RX buffer %d, error = %d\n", i, error); goto fail_rxmem; } sc->sc_rxdptr = i; sc->sc_rxdsoft[i].rxds_paddr = 0; if ((error = bus_dmamap_load(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_dmamap, sc->sc_rxdsoft[i].rxds_buf, CAS_PAGE_SIZE, cas_rxdma_callback, sc, 0)) != 0 || sc->sc_rxdsoft[i].rxds_paddr == 0) { device_printf(sc->sc_dev, "unable to load RX DMA map %d, error = %d\n", i, error); goto fail_rxmap; } } if ((sc->sc_flags & CAS_SERDES) == 0) { CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_MII); CAS_BARRIER(sc, CAS_PCS_DATAPATH, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); cas_mifinit(sc); /* * Look for an external PHY. */ error = ENXIO; v = CAS_READ_4(sc, CAS_MIF_CONF); if ((v & CAS_MIF_CONF_MDI1) != 0) { v |= CAS_MIF_CONF_PHY_SELECT; CAS_WRITE_4(sc, CAS_MIF_CONF, v); CAS_BARRIER(sc, CAS_MIF_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); /* Enable/unfreeze the GMII pins of Saturn. */ if (sc->sc_variant == CAS_SATURN) { CAS_WRITE_4(sc, CAS_SATURN_PCFG, CAS_READ_4(sc, CAS_SATURN_PCFG) & ~CAS_SATURN_PCFG_FSI); CAS_BARRIER(sc, CAS_SATURN_PCFG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); DELAY(10000); } error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp, cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, MIIF_DOPAUSE); } /* * Fall back on an internal PHY if no external PHY was found. */ if (error != 0 && (v & CAS_MIF_CONF_MDI0) != 0) { v &= ~CAS_MIF_CONF_PHY_SELECT; CAS_WRITE_4(sc, CAS_MIF_CONF, v); CAS_BARRIER(sc, CAS_MIF_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); /* Freeze the GMII pins of Saturn for saving power. */ if (sc->sc_variant == CAS_SATURN) { CAS_WRITE_4(sc, CAS_SATURN_PCFG, CAS_READ_4(sc, CAS_SATURN_PCFG) | CAS_SATURN_PCFG_FSI); CAS_BARRIER(sc, CAS_SATURN_PCFG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); DELAY(10000); } error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp, cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, MIIF_DOPAUSE); } } else { /* * Use the external PCS SERDES. */ CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_SERDES); CAS_BARRIER(sc, CAS_PCS_DATAPATH, 4, BUS_SPACE_BARRIER_WRITE); /* Enable/unfreeze the SERDES pins of Saturn. */ if (sc->sc_variant == CAS_SATURN) { CAS_WRITE_4(sc, CAS_SATURN_PCFG, 0); CAS_BARRIER(sc, CAS_SATURN_PCFG, 4, BUS_SPACE_BARRIER_WRITE); } CAS_WRITE_4(sc, CAS_PCS_SERDES_CTRL, CAS_PCS_SERDES_CTRL_ESD); CAS_BARRIER(sc, CAS_PCS_SERDES_CTRL, 4, BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_CONF, CAS_PCS_CONF_EN); CAS_BARRIER(sc, CAS_PCS_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp, cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK, CAS_PHYAD_EXTERNAL, MII_OFFSET_ANY, MIIF_DOPAUSE); } if (error != 0) { device_printf(sc->sc_dev, "attaching PHYs failed\n"); goto fail_rxmap; } sc->sc_mii = device_get_softc(sc->sc_miibus); /* * From this point forward, the attachment cannot fail. A failure * before this point releases all resources that may have been * allocated. */ /* Announce FIFO sizes. */ v = CAS_READ_4(sc, CAS_TX_FIFO_SIZE); device_printf(sc->sc_dev, "%ukB RX FIFO, %ukB TX FIFO\n", CAS_RX_FIFO_SIZE / 1024, v / 16); /* Attach the interface. */ ether_ifattach(ifp, sc->sc_enaddr); /* * Tell the upper layer(s) we support long frames/checksum offloads. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities = IFCAP_VLAN_MTU; if ((sc->sc_flags & CAS_NO_CSUM) == 0) { ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_hwassist = CAS_CSUM_FEATURES; } ifp->if_capenable = ifp->if_capabilities; return (0); /* * Free any resources we've allocated during the failed attach * attempt. Do this in reverse order and fall through. */ fail_rxmap: for (i = 0; i < CAS_NRXDESC; i++) if (sc->sc_rxdsoft[i].rxds_paddr != 0) bus_dmamap_unload(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_dmamap); fail_rxmem: for (i = 0; i < CAS_NRXDESC; i++) if (sc->sc_rxdsoft[i].rxds_buf != NULL) bus_dmamem_free(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_buf, sc->sc_rxdsoft[i].rxds_dmamap); fail_txd: for (i = 0; i < CAS_TXQUEUELEN; i++) if (sc->sc_txsoft[i].txs_dmamap != NULL) bus_dmamap_destroy(sc->sc_tdmatag, sc->sc_txsoft[i].txs_dmamap); bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap); fail_cmem: bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data, sc->sc_cddmamap); fail_ctag: bus_dma_tag_destroy(sc->sc_cdmatag); fail_ttag: bus_dma_tag_destroy(sc->sc_tdmatag); fail_rtag: bus_dma_tag_destroy(sc->sc_rdmatag); fail_ptag: bus_dma_tag_destroy(sc->sc_pdmatag); fail_taskq: taskqueue_free(sc->sc_tq); fail_ifnet: if_free(ifp); return (error); } static void cas_detach(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; int i; ether_ifdetach(ifp); CAS_LOCK(sc); cas_stop(ifp); CAS_UNLOCK(sc); callout_drain(&sc->sc_tick_ch); callout_drain(&sc->sc_rx_ch); taskqueue_drain(sc->sc_tq, &sc->sc_intr_task); taskqueue_drain(sc->sc_tq, &sc->sc_tx_task); if_free(ifp); taskqueue_free(sc->sc_tq); device_delete_child(sc->sc_dev, sc->sc_miibus); for (i = 0; i < CAS_NRXDESC; i++) if (sc->sc_rxdsoft[i].rxds_dmamap != NULL) bus_dmamap_sync(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_dmamap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (i = 0; i < CAS_NRXDESC; i++) if (sc->sc_rxdsoft[i].rxds_paddr != 0) bus_dmamap_unload(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_dmamap); for (i = 0; i < CAS_NRXDESC; i++) if (sc->sc_rxdsoft[i].rxds_buf != NULL) bus_dmamem_free(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_buf, sc->sc_rxdsoft[i].rxds_dmamap); for (i = 0; i < CAS_TXQUEUELEN; i++) if (sc->sc_txsoft[i].txs_dmamap != NULL) bus_dmamap_destroy(sc->sc_tdmatag, sc->sc_txsoft[i].txs_dmamap); CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap); bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data, sc->sc_cddmamap); bus_dma_tag_destroy(sc->sc_cdmatag); bus_dma_tag_destroy(sc->sc_tdmatag); bus_dma_tag_destroy(sc->sc_rdmatag); bus_dma_tag_destroy(sc->sc_pdmatag); } static void cas_suspend(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; CAS_LOCK(sc); cas_stop(ifp); CAS_UNLOCK(sc); } static void cas_resume(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; CAS_LOCK(sc); /* * On resume all registers have to be initialized again like * after power-on. */ sc->sc_flags &= ~CAS_INITED; if (ifp->if_flags & IFF_UP) cas_init_locked(sc); CAS_UNLOCK(sc); } static inline void cas_rxcksum(struct mbuf *m, uint16_t cksum) { struct ether_header *eh; struct ip *ip; struct udphdr *uh; uint16_t *opts; int32_t hlen, len, pktlen; uint32_t temp32; pktlen = m->m_pkthdr.len; if (pktlen < sizeof(struct ether_header) + sizeof(struct ip)) return; eh = mtod(m, struct ether_header *); if (eh->ether_type != htons(ETHERTYPE_IP)) return; ip = (struct ip *)(eh + 1); if (ip->ip_v != IPVERSION) return; hlen = ip->ip_hl << 2; pktlen -= sizeof(struct ether_header); if (hlen < sizeof(struct ip)) return; if (ntohs(ip->ip_len) < hlen) return; if (ntohs(ip->ip_len) != pktlen) return; if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) return; /* Cannot handle fragmented packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (pktlen < (hlen + sizeof(struct tcphdr))) return; break; case IPPROTO_UDP: if (pktlen < (hlen + sizeof(struct udphdr))) return; uh = (struct udphdr *)((uint8_t *)ip + hlen); if (uh->uh_sum == 0) return; /* no checksum */ break; default: return; } cksum = ~cksum; /* checksum fixup for IP options */ len = hlen - sizeof(struct ip); if (len > 0) { opts = (uint16_t *)(ip + 1); for (; len > 0; len -= sizeof(uint16_t), opts++) { temp32 = cksum - *opts; temp32 = (temp32 >> 16) + (temp32 & 65535); cksum = temp32 & 65535; } } m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; m->m_pkthdr.csum_data = cksum; } static void cas_cddma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) { struct cas_softc *sc = xsc; if (error != 0) return; if (nsegs != 1) panic("%s: bad control buffer segment count", __func__); sc->sc_cddma = segs[0].ds_addr; } static void cas_rxdma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) { struct cas_softc *sc = xsc; if (error != 0) return; if (nsegs != 1) panic("%s: bad RX buffer segment count", __func__); sc->sc_rxdsoft[sc->sc_rxdptr].rxds_paddr = segs[0].ds_addr; } static void cas_tick(void *arg) { struct cas_softc *sc = arg; struct ifnet *ifp = sc->sc_ifp; uint32_t v; CAS_LOCK_ASSERT(sc, MA_OWNED); /* * Unload collision and error counters. */ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, CAS_READ_4(sc, CAS_MAC_NORM_COLL_CNT) + CAS_READ_4(sc, CAS_MAC_FIRST_COLL_CNT)); v = CAS_READ_4(sc, CAS_MAC_EXCESS_COLL_CNT) + CAS_READ_4(sc, CAS_MAC_LATE_COLL_CNT); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, v); if_inc_counter(ifp, IFCOUNTER_OERRORS, v); if_inc_counter(ifp, IFCOUNTER_IERRORS, CAS_READ_4(sc, CAS_MAC_RX_LEN_ERR_CNT) + CAS_READ_4(sc, CAS_MAC_RX_ALIGN_ERR) + CAS_READ_4(sc, CAS_MAC_RX_CRC_ERR_CNT) + CAS_READ_4(sc, CAS_MAC_RX_CODE_VIOL)); /* * Then clear the hardware counters. */ CAS_WRITE_4(sc, CAS_MAC_NORM_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_FIRST_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_EXCESS_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_LATE_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_LEN_ERR_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_ALIGN_ERR, 0); CAS_WRITE_4(sc, CAS_MAC_RX_CRC_ERR_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_CODE_VIOL, 0); mii_tick(sc->sc_mii); if (sc->sc_txfree != CAS_MAXTXFREE) cas_tint(sc); cas_watchdog(sc); callout_reset(&sc->sc_tick_ch, hz, cas_tick, sc); } static int cas_bitwait(struct cas_softc *sc, bus_addr_t r, uint32_t clr, uint32_t set) { int i; uint32_t reg; for (i = CAS_TRIES; i--; DELAY(100)) { reg = CAS_READ_4(sc, r); if ((reg & clr) == 0 && (reg & set) == set) return (1); } return (0); } static void cas_reset(struct cas_softc *sc) { #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif /* Disable all interrupts in order to avoid spurious ones. */ CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff); cas_reset_rx(sc); cas_reset_tx(sc); /* * Do a full reset modulo the result of the last auto-negotiation * when using the SERDES. */ CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX | ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0)); CAS_BARRIER(sc, CAS_RESET, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); DELAY(3000); if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX, 0)) device_printf(sc->sc_dev, "cannot reset device\n"); } static void cas_stop(struct ifnet *ifp) { struct cas_softc *sc = ifp->if_softc; struct cas_txsoft *txs; #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif callout_stop(&sc->sc_tick_ch); callout_stop(&sc->sc_rx_ch); /* Disable all interrupts in order to avoid spurious ones. */ CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff); cas_reset_tx(sc); cas_reset_rx(sc); /* * Release any queued transmit buffers. */ while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) { STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q); if (txs->txs_ndescs != 0) { bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap); if (txs->txs_mbuf != NULL) { m_freem(txs->txs_mbuf); txs->txs_mbuf = NULL; } } STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q); } /* * Mark the interface down and cancel the watchdog timer. */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->sc_flags &= ~CAS_LINK; sc->sc_wdog_timer = 0; } static int cas_reset_rx(struct cas_softc *sc) { /* * Resetting while DMA is in progress can cause a bus hang, so we * disable DMA first. */ (void)cas_disable_rx(sc); CAS_WRITE_4(sc, CAS_RX_CONF, 0); CAS_BARRIER(sc, CAS_RX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_RX_CONF, CAS_RX_CONF_RXDMA_EN, 0)) device_printf(sc->sc_dev, "cannot disable RX DMA\n"); /* Finally, reset the ERX. */ CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_RX | ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0)); CAS_BARRIER(sc, CAS_RESET, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_RX, 0)) { device_printf(sc->sc_dev, "cannot reset receiver\n"); return (1); } return (0); } static int cas_reset_tx(struct cas_softc *sc) { /* * Resetting while DMA is in progress can cause a bus hang, so we * disable DMA first. */ (void)cas_disable_tx(sc); CAS_WRITE_4(sc, CAS_TX_CONF, 0); CAS_BARRIER(sc, CAS_TX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_TX_CONF, CAS_TX_CONF_TXDMA_EN, 0)) device_printf(sc->sc_dev, "cannot disable TX DMA\n"); /* Finally, reset the ETX. */ CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_TX | ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0)); CAS_BARRIER(sc, CAS_RESET, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_TX, 0)) { device_printf(sc->sc_dev, "cannot reset transmitter\n"); return (1); } return (0); } static int cas_disable_rx(struct cas_softc *sc) { CAS_WRITE_4(sc, CAS_MAC_RX_CONF, CAS_READ_4(sc, CAS_MAC_RX_CONF) & ~CAS_MAC_RX_CONF_EN); CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_EN, 0)) return (1); if (bootverbose) device_printf(sc->sc_dev, "cannot disable RX MAC\n"); return (0); } static int cas_disable_tx(struct cas_softc *sc) { CAS_WRITE_4(sc, CAS_MAC_TX_CONF, CAS_READ_4(sc, CAS_MAC_TX_CONF) & ~CAS_MAC_TX_CONF_EN); CAS_BARRIER(sc, CAS_MAC_TX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (cas_bitwait(sc, CAS_MAC_TX_CONF, CAS_MAC_TX_CONF_EN, 0)) return (1); if (bootverbose) device_printf(sc->sc_dev, "cannot disable TX MAC\n"); return (0); } static inline void cas_rxcompinit(struct cas_rx_comp *rxcomp) { rxcomp->crc_word1 = 0; rxcomp->crc_word2 = 0; rxcomp->crc_word3 = htole64(CAS_SET(ETHER_HDR_LEN + sizeof(struct ip), CAS_RC3_CSO)); rxcomp->crc_word4 = htole64(CAS_RC4_ZERO); } static void cas_meminit(struct cas_softc *sc) { int i; CAS_LOCK_ASSERT(sc, MA_OWNED); /* * Initialize the transmit descriptor ring. */ for (i = 0; i < CAS_NTXDESC; i++) { sc->sc_txdescs[i].cd_flags = 0; sc->sc_txdescs[i].cd_buf_ptr = 0; } sc->sc_txfree = CAS_MAXTXFREE; sc->sc_txnext = 0; sc->sc_txwin = 0; /* * Initialize the receive completion ring. */ for (i = 0; i < CAS_NRXCOMP; i++) cas_rxcompinit(&sc->sc_rxcomps[i]); sc->sc_rxcptr = 0; /* * Initialize the first receive descriptor ring. We leave * the second one zeroed as we don't actually use it. */ for (i = 0; i < CAS_NRXDESC; i++) CAS_INIT_RXDESC(sc, i, i); sc->sc_rxdptr = 0; CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static u_int cas_descsize(u_int sz) { switch (sz) { case 32: return (CAS_DESC_32); case 64: return (CAS_DESC_64); case 128: return (CAS_DESC_128); case 256: return (CAS_DESC_256); case 512: return (CAS_DESC_512); case 1024: return (CAS_DESC_1K); case 2048: return (CAS_DESC_2K); case 4096: return (CAS_DESC_4K); case 8192: return (CAS_DESC_8K); default: printf("%s: invalid descriptor ring size %d\n", __func__, sz); return (CAS_DESC_32); } } static u_int cas_rxcompsize(u_int sz) { switch (sz) { case 128: return (CAS_RX_CONF_COMP_128); case 256: return (CAS_RX_CONF_COMP_256); case 512: return (CAS_RX_CONF_COMP_512); case 1024: return (CAS_RX_CONF_COMP_1K); case 2048: return (CAS_RX_CONF_COMP_2K); case 4096: return (CAS_RX_CONF_COMP_4K); case 8192: return (CAS_RX_CONF_COMP_8K); case 16384: return (CAS_RX_CONF_COMP_16K); case 32768: return (CAS_RX_CONF_COMP_32K); default: printf("%s: invalid dcompletion ring size %d\n", __func__, sz); return (CAS_RX_CONF_COMP_128); } } static void cas_init(void *xsc) { struct cas_softc *sc = xsc; CAS_LOCK(sc); cas_init_locked(sc); CAS_UNLOCK(sc); } /* * Initialization of interface; set up initialization block * and transmit/receive descriptor rings. */ static void cas_init_locked(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; uint32_t v; CAS_LOCK_ASSERT(sc, MA_OWNED); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: %s: calling stop", device_get_name(sc->sc_dev), __func__); #endif /* * Initialization sequence. The numbered steps below correspond * to the sequence outlined in section 6.3.5.1 in the Ethernet * Channel Engine manual (part of the PCIO manual). * See also the STP2002-STQ document from Sun Microsystems. */ /* step 1 & 2. Reset the Ethernet Channel. */ cas_stop(ifp); cas_reset(sc); #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: %s: restarting", device_get_name(sc->sc_dev), __func__); #endif if ((sc->sc_flags & CAS_SERDES) == 0) /* Re-initialize the MIF. */ cas_mifinit(sc); /* step 3. Setup data structures in host memory. */ cas_meminit(sc); /* step 4. TX MAC registers & counters */ cas_init_regs(sc); /* step 5. RX MAC registers & counters */ /* step 6 & 7. Program Ring Base Addresses. */ CAS_WRITE_4(sc, CAS_TX_DESC3_BASE_HI, (((uint64_t)CAS_CDTXDADDR(sc, 0)) >> 32)); CAS_WRITE_4(sc, CAS_TX_DESC3_BASE_LO, CAS_CDTXDADDR(sc, 0) & 0xffffffff); CAS_WRITE_4(sc, CAS_RX_COMP_BASE_HI, (((uint64_t)CAS_CDRXCADDR(sc, 0)) >> 32)); CAS_WRITE_4(sc, CAS_RX_COMP_BASE_LO, CAS_CDRXCADDR(sc, 0) & 0xffffffff); CAS_WRITE_4(sc, CAS_RX_DESC_BASE_HI, (((uint64_t)CAS_CDRXDADDR(sc, 0)) >> 32)); CAS_WRITE_4(sc, CAS_RX_DESC_BASE_LO, CAS_CDRXDADDR(sc, 0) & 0xffffffff); if ((sc->sc_flags & CAS_REG_PLUS) != 0) { CAS_WRITE_4(sc, CAS_RX_DESC2_BASE_HI, (((uint64_t)CAS_CDRXD2ADDR(sc, 0)) >> 32)); CAS_WRITE_4(sc, CAS_RX_DESC2_BASE_LO, CAS_CDRXD2ADDR(sc, 0) & 0xffffffff); } #ifdef CAS_DEBUG CTR5(KTR_CAS, "loading TXDR %lx, RXCR %lx, RXDR %lx, RXD2R %lx, cddma %lx", CAS_CDTXDADDR(sc, 0), CAS_CDRXCADDR(sc, 0), CAS_CDRXDADDR(sc, 0), CAS_CDRXD2ADDR(sc, 0), sc->sc_cddma); #endif /* step 8. Global Configuration & Interrupt Masks */ /* Disable weighted round robin. */ CAS_WRITE_4(sc, CAS_CAW, CAS_CAW_RR_DIS); /* * Enable infinite bursts for revisions without PCI issues if * applicable. Doing so greatly improves the TX performance on * !__sparc64__ (on sparc64, setting CAS_INF_BURST improves TX * performance only marginally but hurts RX throughput quite a bit). */ CAS_WRITE_4(sc, CAS_INF_BURST, #if !defined(__sparc64__) (sc->sc_flags & CAS_TABORT) == 0 ? CAS_INF_BURST_EN : #endif 0); /* Set up interrupts. */ CAS_WRITE_4(sc, CAS_INTMASK, ~(CAS_INTR_TX_INT_ME | CAS_INTR_TX_TAG_ERR | CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_TAG_ERR | CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL | CAS_INTR_RX_LEN_MMATCH | CAS_INTR_PCI_ERROR_INT #ifdef CAS_DEBUG | CAS_INTR_PCS_INT | CAS_INTR_MIF #endif )); /* Don't clear top level interrupts when CAS_STATUS_ALIAS is read. */ CAS_WRITE_4(sc, CAS_CLEAR_ALIAS, 0); CAS_WRITE_4(sc, CAS_MAC_RX_MASK, ~CAS_MAC_RX_OVERFLOW); CAS_WRITE_4(sc, CAS_MAC_TX_MASK, ~(CAS_MAC_TX_UNDERRUN | CAS_MAC_TX_MAX_PKT_ERR)); #ifdef CAS_DEBUG CAS_WRITE_4(sc, CAS_MAC_CTRL_MASK, ~(CAS_MAC_CTRL_PAUSE_RCVD | CAS_MAC_CTRL_PAUSE | CAS_MAC_CTRL_NON_PAUSE)); #else CAS_WRITE_4(sc, CAS_MAC_CTRL_MASK, CAS_MAC_CTRL_PAUSE_RCVD | CAS_MAC_CTRL_PAUSE | CAS_MAC_CTRL_NON_PAUSE); #endif /* Enable PCI error interrupts. */ CAS_WRITE_4(sc, CAS_ERROR_MASK, ~(CAS_ERROR_DTRTO | CAS_ERROR_OTHER | CAS_ERROR_DMAW_ZERO | CAS_ERROR_DMAR_ZERO | CAS_ERROR_RTRTO)); /* Enable PCI error interrupts in BIM configuration. */ CAS_WRITE_4(sc, CAS_BIM_CONF, CAS_BIM_CONF_DPAR_EN | CAS_BIM_CONF_RMA_EN | CAS_BIM_CONF_RTA_EN); /* * step 9. ETX Configuration: encode receive descriptor ring size, * enable DMA and disable pre-interrupt writeback completion. */ v = cas_descsize(CAS_NTXDESC) << CAS_TX_CONF_DESC3_SHFT; CAS_WRITE_4(sc, CAS_TX_CONF, v | CAS_TX_CONF_TXDMA_EN | CAS_TX_CONF_RDPP_DIS | CAS_TX_CONF_PICWB_DIS); /* step 10. ERX Configuration */ /* * Encode receive completion and descriptor ring sizes, set the * swivel offset. */ v = cas_rxcompsize(CAS_NRXCOMP) << CAS_RX_CONF_COMP_SHFT; v |= cas_descsize(CAS_NRXDESC) << CAS_RX_CONF_DESC_SHFT; if ((sc->sc_flags & CAS_REG_PLUS) != 0) v |= cas_descsize(CAS_NRXDESC2) << CAS_RX_CONF_DESC2_SHFT; CAS_WRITE_4(sc, CAS_RX_CONF, v | (ETHER_ALIGN << CAS_RX_CONF_SOFF_SHFT)); /* Set the PAUSE thresholds. We use the maximum OFF threshold. */ CAS_WRITE_4(sc, CAS_RX_PTHRS, (111 << CAS_RX_PTHRS_XOFF_SHFT) | (15 << CAS_RX_PTHRS_XON_SHFT)); /* RX blanking */ CAS_WRITE_4(sc, CAS_RX_BLANK, (15 << CAS_RX_BLANK_TIME_SHFT) | (5 << CAS_RX_BLANK_PKTS_SHFT)); /* Set RX_COMP_AFULL threshold to half of the RX completions. */ CAS_WRITE_4(sc, CAS_RX_AEMPTY_THRS, (CAS_NRXCOMP / 2) << CAS_RX_AEMPTY_COMP_SHFT); /* Initialize the RX page size register as appropriate for 8k. */ CAS_WRITE_4(sc, CAS_RX_PSZ, (CAS_RX_PSZ_8K << CAS_RX_PSZ_SHFT) | (4 << CAS_RX_PSZ_MB_CNT_SHFT) | (CAS_RX_PSZ_MB_STRD_2K << CAS_RX_PSZ_MB_STRD_SHFT) | (CAS_RX_PSZ_MB_OFF_64 << CAS_RX_PSZ_MB_OFF_SHFT)); /* Disable RX random early detection. */ CAS_WRITE_4(sc, CAS_RX_RED, 0); /* Zero the RX reassembly DMA table. */ for (v = 0; v <= CAS_RX_REAS_DMA_ADDR_LC; v++) { CAS_WRITE_4(sc, CAS_RX_REAS_DMA_ADDR, v); CAS_WRITE_4(sc, CAS_RX_REAS_DMA_DATA_LO, 0); CAS_WRITE_4(sc, CAS_RX_REAS_DMA_DATA_MD, 0); CAS_WRITE_4(sc, CAS_RX_REAS_DMA_DATA_HI, 0); } /* Ensure the RX control FIFO and RX IPP FIFO addresses are zero. */ CAS_WRITE_4(sc, CAS_RX_CTRL_FIFO, 0); CAS_WRITE_4(sc, CAS_RX_IPP_ADDR, 0); /* Finally, enable RX DMA. */ CAS_WRITE_4(sc, CAS_RX_CONF, CAS_READ_4(sc, CAS_RX_CONF) | CAS_RX_CONF_RXDMA_EN); /* step 11. Configure Media. */ /* step 12. RX_MAC Configuration Register */ v = CAS_READ_4(sc, CAS_MAC_RX_CONF); v &= ~(CAS_MAC_RX_CONF_STRPPAD | CAS_MAC_RX_CONF_EN); v |= CAS_MAC_RX_CONF_STRPFCS; sc->sc_mac_rxcfg = v; /* * Clear the RX filter and reprogram it. This will also set the * current RX MAC configuration and enable it. */ cas_setladrf(sc); /* step 13. TX_MAC Configuration Register */ v = CAS_READ_4(sc, CAS_MAC_TX_CONF); v |= CAS_MAC_TX_CONF_EN; (void)cas_disable_tx(sc); CAS_WRITE_4(sc, CAS_MAC_TX_CONF, v); /* step 14. Issue Transmit Pending command. */ /* step 15. Give the receiver a swift kick. */ CAS_WRITE_4(sc, CAS_RX_KICK, CAS_NRXDESC - 4); CAS_WRITE_4(sc, CAS_RX_COMP_TAIL, 0); if ((sc->sc_flags & CAS_REG_PLUS) != 0) CAS_WRITE_4(sc, CAS_RX_KICK2, CAS_NRXDESC2 - 4); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; mii_mediachg(sc->sc_mii); /* Start the one second timer. */ sc->sc_wdog_timer = 0; callout_reset(&sc->sc_tick_ch, hz, cas_tick, sc); } static int cas_load_txmbuf(struct cas_softc *sc, struct mbuf **m_head) { bus_dma_segment_t txsegs[CAS_NTXSEGS]; struct cas_txsoft *txs; struct ip *ip; struct mbuf *m; uint64_t cflags; int error, nexttx, nsegs, offset, seg; CAS_LOCK_ASSERT(sc, MA_OWNED); /* Get a work queue entry. */ if ((txs = STAILQ_FIRST(&sc->sc_txfreeq)) == NULL) { /* Ran out of descriptors. */ return (ENOBUFS); } cflags = 0; if (((*m_head)->m_pkthdr.csum_flags & CAS_CSUM_FEATURES) != 0) { if (M_WRITABLE(*m_head) == 0) { m = m_dup(*m_head, M_NOWAIT); m_freem(*m_head); *m_head = m; if (m == NULL) return (ENOBUFS); } offset = sizeof(struct ether_header); m = m_pullup(*m_head, offset + sizeof(struct ip)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m, caddr_t) + offset); offset += (ip->ip_hl << 2); cflags = (offset << CAS_TD_CKSUM_START_SHFT) | ((offset + m->m_pkthdr.csum_data) << CAS_TD_CKSUM_STUFF_SHFT) | CAS_TD_CKSUM_EN; *m_head = m; } error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag, txs->txs_dmamap, *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { m = m_collapse(*m_head, M_NOWAIT, CAS_NTXSEGS); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag, txs->txs_dmamap, *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); /* If nsegs is wrong then the stack is corrupt. */ KASSERT(nsegs <= CAS_NTXSEGS, ("%s: too many DMA segments (%d)", __func__, nsegs)); if (nsegs == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* * Ensure we have enough descriptors free to describe * the packet. Note, we always reserve one descriptor * at the end of the ring as a termination point, in * order to prevent wrap-around. */ if (nsegs > sc->sc_txfree - 1) { txs->txs_ndescs = 0; bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap); return (ENOBUFS); } txs->txs_ndescs = nsegs; txs->txs_firstdesc = sc->sc_txnext; nexttx = txs->txs_firstdesc; for (seg = 0; seg < nsegs; seg++, nexttx = CAS_NEXTTX(nexttx)) { #ifdef CAS_DEBUG CTR6(KTR_CAS, "%s: mapping seg %d (txd %d), len %lx, addr %#lx (%#lx)", __func__, seg, nexttx, txsegs[seg].ds_len, txsegs[seg].ds_addr, htole64(txsegs[seg].ds_addr)); #endif sc->sc_txdescs[nexttx].cd_buf_ptr = htole64(txsegs[seg].ds_addr); KASSERT(txsegs[seg].ds_len < CAS_TD_BUF_LEN_MASK >> CAS_TD_BUF_LEN_SHFT, ("%s: segment size too large!", __func__)); sc->sc_txdescs[nexttx].cd_flags = htole64(txsegs[seg].ds_len << CAS_TD_BUF_LEN_SHFT); txs->txs_lastdesc = nexttx; } /* Set EOF on the last descriptor. */ #ifdef CAS_DEBUG CTR3(KTR_CAS, "%s: end of frame at segment %d, TX %d", __func__, seg, nexttx); #endif sc->sc_txdescs[txs->txs_lastdesc].cd_flags |= htole64(CAS_TD_END_OF_FRAME); /* Lastly set SOF on the first descriptor. */ #ifdef CAS_DEBUG CTR3(KTR_CAS, "%s: start of frame at segment %d, TX %d", __func__, seg, nexttx); #endif if (sc->sc_txwin += nsegs > CAS_MAXTXFREE * 2 / 3) { sc->sc_txwin = 0; sc->sc_txdescs[txs->txs_firstdesc].cd_flags |= htole64(cflags | CAS_TD_START_OF_FRAME | CAS_TD_INT_ME); } else sc->sc_txdescs[txs->txs_firstdesc].cd_flags |= htole64(cflags | CAS_TD_START_OF_FRAME); /* Sync the DMA map. */ bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap, BUS_DMASYNC_PREWRITE); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: setting firstdesc=%d, lastdesc=%d, ndescs=%d", __func__, txs->txs_firstdesc, txs->txs_lastdesc, txs->txs_ndescs); #endif STAILQ_REMOVE_HEAD(&sc->sc_txfreeq, txs_q); STAILQ_INSERT_TAIL(&sc->sc_txdirtyq, txs, txs_q); txs->txs_mbuf = *m_head; sc->sc_txnext = CAS_NEXTTX(txs->txs_lastdesc); sc->sc_txfree -= txs->txs_ndescs; return (0); } static void cas_init_regs(struct cas_softc *sc) { int i; const u_char *laddr = IF_LLADDR(sc->sc_ifp); CAS_LOCK_ASSERT(sc, MA_OWNED); /* These registers are not cleared on reset. */ if ((sc->sc_flags & CAS_INITED) == 0) { /* magic values */ CAS_WRITE_4(sc, CAS_MAC_IPG0, 0); CAS_WRITE_4(sc, CAS_MAC_IPG1, 8); CAS_WRITE_4(sc, CAS_MAC_IPG2, 4); /* min frame length */ CAS_WRITE_4(sc, CAS_MAC_MIN_FRAME, ETHER_MIN_LEN); /* max frame length and max burst size */ CAS_WRITE_4(sc, CAS_MAC_MAX_BF, ((ETHER_MAX_LEN_JUMBO + ETHER_VLAN_ENCAP_LEN) << CAS_MAC_MAX_BF_FRM_SHFT) | (0x2000 << CAS_MAC_MAX_BF_BST_SHFT)); /* more magic values */ CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x7); CAS_WRITE_4(sc, CAS_MAC_JAM_SIZE, 0x4); CAS_WRITE_4(sc, CAS_MAC_ATTEMPT_LIMIT, 0x10); CAS_WRITE_4(sc, CAS_MAC_CTRL_TYPE, 0x8808); /* random number seed */ CAS_WRITE_4(sc, CAS_MAC_RANDOM_SEED, ((laddr[5] << 8) | laddr[4]) & 0x3ff); /* secondary MAC addresses: 0:0:0:0:0:0 */ for (i = CAS_MAC_ADDR3; i <= CAS_MAC_ADDR41; i += CAS_MAC_ADDR4 - CAS_MAC_ADDR3) CAS_WRITE_4(sc, i, 0); /* MAC control address: 01:80:c2:00:00:01 */ CAS_WRITE_4(sc, CAS_MAC_ADDR42, 0x0001); CAS_WRITE_4(sc, CAS_MAC_ADDR43, 0xc200); CAS_WRITE_4(sc, CAS_MAC_ADDR44, 0x0180); /* MAC filter address: 0:0:0:0:0:0 */ CAS_WRITE_4(sc, CAS_MAC_AFILTER0, 0); CAS_WRITE_4(sc, CAS_MAC_AFILTER1, 0); CAS_WRITE_4(sc, CAS_MAC_AFILTER2, 0); CAS_WRITE_4(sc, CAS_MAC_AFILTER_MASK1_2, 0); CAS_WRITE_4(sc, CAS_MAC_AFILTER_MASK0, 0); /* Zero the hash table. */ for (i = CAS_MAC_HASH0; i <= CAS_MAC_HASH15; i += CAS_MAC_HASH1 - CAS_MAC_HASH0) CAS_WRITE_4(sc, i, 0); sc->sc_flags |= CAS_INITED; } /* Counters need to be zeroed. */ CAS_WRITE_4(sc, CAS_MAC_NORM_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_FIRST_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_EXCESS_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_LATE_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_DEFER_TMR_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_PEAK_ATTEMPTS, 0); CAS_WRITE_4(sc, CAS_MAC_RX_FRAME_COUNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_LEN_ERR_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_ALIGN_ERR, 0); CAS_WRITE_4(sc, CAS_MAC_RX_CRC_ERR_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_CODE_VIOL, 0); /* Set XOFF PAUSE time. */ CAS_WRITE_4(sc, CAS_MAC_SPC, 0x1BF0 << CAS_MAC_SPC_TIME_SHFT); /* Set the station address. */ CAS_WRITE_4(sc, CAS_MAC_ADDR0, (laddr[4] << 8) | laddr[5]); CAS_WRITE_4(sc, CAS_MAC_ADDR1, (laddr[2] << 8) | laddr[3]); CAS_WRITE_4(sc, CAS_MAC_ADDR2, (laddr[0] << 8) | laddr[1]); /* Enable MII outputs. */ CAS_WRITE_4(sc, CAS_MAC_XIF_CONF, CAS_MAC_XIF_CONF_TX_OE); } static void cas_tx_task(void *arg, int pending __unused) { struct ifnet *ifp; ifp = (struct ifnet *)arg; cas_start(ifp); } static inline void cas_txkick(struct cas_softc *sc) { /* * Update the TX kick register. This register has to point to the * descriptor after the last valid one and for optimum performance * should be incremented in multiples of 4 (the DMA engine fetches/ * updates descriptors in batches of 4). */ #ifdef CAS_DEBUG CTR3(KTR_CAS, "%s: %s: kicking TX %d", device_get_name(sc->sc_dev), __func__, sc->sc_txnext); #endif CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CAS_WRITE_4(sc, CAS_TX_KICK3, sc->sc_txnext); } static void cas_start(struct ifnet *ifp) { struct cas_softc *sc = ifp->if_softc; struct mbuf *m; int kicked, ntx; CAS_LOCK(sc); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->sc_flags & CAS_LINK) == 0) { CAS_UNLOCK(sc); return; } if (sc->sc_txfree < CAS_MAXTXFREE / 4) cas_tint(sc); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: %s: txfree %d, txnext %d", device_get_name(sc->sc_dev), __func__, sc->sc_txfree, sc->sc_txnext); #endif ntx = 0; kicked = 0; for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->sc_txfree > 1;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; if (cas_load_txmbuf(sc, &m) != 0) { if (m == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m); break; } if ((sc->sc_txnext % 4) == 0) { cas_txkick(sc); kicked = 1; } else kicked = 0; ntx++; BPF_MTAP(ifp, m); } if (ntx > 0) { if (kicked == 0) cas_txkick(sc); #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: packets enqueued, OWN on %d", device_get_name(sc->sc_dev), sc->sc_txnext); #endif /* Set a watchdog timer in case the chip flakes out. */ sc->sc_wdog_timer = 5; #ifdef CAS_DEBUG CTR3(KTR_CAS, "%s: %s: watchdog %d", device_get_name(sc->sc_dev), __func__, sc->sc_wdog_timer); #endif } CAS_UNLOCK(sc); } static void cas_tint(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; struct cas_txsoft *txs; int progress; uint32_t txlast; #ifdef CAS_DEBUG int i; CAS_LOCK_ASSERT(sc, MA_OWNED); CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif /* * Go through our TX list and free mbufs for those * frames that have been transmitted. */ progress = 0; CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD); while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) { #ifdef CAS_DEBUG if ((ifp->if_flags & IFF_DEBUG) != 0) { printf(" txsoft %p transmit chain:\n", txs); for (i = txs->txs_firstdesc;; i = CAS_NEXTTX(i)) { printf("descriptor %d: ", i); printf("cd_flags: 0x%016llx\t", (long long)le64toh( sc->sc_txdescs[i].cd_flags)); printf("cd_buf_ptr: 0x%016llx\n", (long long)le64toh( sc->sc_txdescs[i].cd_buf_ptr)); if (i == txs->txs_lastdesc) break; } } #endif /* * In theory, we could harvest some descriptors before * the ring is empty, but that's a bit complicated. * * CAS_TX_COMPn points to the last descriptor * processed + 1. */ txlast = CAS_READ_4(sc, CAS_TX_COMP3); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: txs->txs_firstdesc = %d, " "txs->txs_lastdesc = %d, txlast = %d", __func__, txs->txs_firstdesc, txs->txs_lastdesc, txlast); #endif if (txs->txs_firstdesc <= txs->txs_lastdesc) { if ((txlast >= txs->txs_firstdesc) && (txlast <= txs->txs_lastdesc)) break; } else { /* Ick -- this command wraps. */ if ((txlast >= txs->txs_firstdesc) || (txlast <= txs->txs_lastdesc)) break; } #ifdef CAS_DEBUG CTR1(KTR_CAS, "%s: releasing a descriptor", __func__); #endif STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q); sc->sc_txfree += txs->txs_ndescs; bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap); if (txs->txs_mbuf != NULL) { m_freem(txs->txs_mbuf); txs->txs_mbuf = NULL; } STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); progress = 1; } #ifdef CAS_DEBUG CTR5(KTR_CAS, "%s: CAS_TX_SM1 %x CAS_TX_SM2 %x CAS_TX_DESC_BASE %llx " "CAS_TX_COMP3 %x", __func__, CAS_READ_4(sc, CAS_TX_SM1), CAS_READ_4(sc, CAS_TX_SM2), ((long long)CAS_READ_4(sc, CAS_TX_DESC3_BASE_HI) << 32) | CAS_READ_4(sc, CAS_TX_DESC3_BASE_LO), CAS_READ_4(sc, CAS_TX_COMP3)); #endif if (progress) { /* We freed some descriptors, so reset IFF_DRV_OACTIVE. */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (STAILQ_EMPTY(&sc->sc_txdirtyq)) sc->sc_wdog_timer = 0; } #ifdef CAS_DEBUG CTR3(KTR_CAS, "%s: %s: watchdog %d", device_get_name(sc->sc_dev), __func__, sc->sc_wdog_timer); #endif } static void cas_rint_timeout(void *arg) { struct cas_softc *sc = arg; CAS_LOCK_ASSERT(sc, MA_OWNED); cas_rint(sc); } static void cas_rint(struct cas_softc *sc) { struct cas_rxdsoft *rxds, *rxds2; struct ifnet *ifp = sc->sc_ifp; struct mbuf *m, *m2; uint64_t word1, word2, word3, word4; uint32_t rxhead; u_int idx, idx2, len, off, skip; CAS_LOCK_ASSERT(sc, MA_OWNED); callout_stop(&sc->sc_rx_ch); #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif #define PRINTWORD(n, delimiter) \ printf("word ## n: 0x%016llx%c", (long long)word ## n, delimiter) #define SKIPASSERT(n) \ KASSERT(sc->sc_rxcomps[sc->sc_rxcptr].crc_word ## n == 0, \ ("%s: word ## n not 0", __func__)) #define WORDTOH(n) \ word ## n = le64toh(sc->sc_rxcomps[sc->sc_rxcptr].crc_word ## n) /* * Read the completion head register once. This limits * how long the following loop can execute. */ rxhead = CAS_READ_4(sc, CAS_RX_COMP_HEAD); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: sc->sc_rxcptr %d, sc->sc_rxdptr %d, head %d", __func__, sc->sc_rxcptr, sc->sc_rxdptr, rxhead); #endif skip = 0; CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (; sc->sc_rxcptr != rxhead; sc->sc_rxcptr = CAS_NEXTRXCOMP(sc->sc_rxcptr)) { if (skip != 0) { SKIPASSERT(1); SKIPASSERT(2); SKIPASSERT(3); --skip; goto skip; } WORDTOH(1); WORDTOH(2); WORDTOH(3); WORDTOH(4); #ifdef CAS_DEBUG if ((ifp->if_flags & IFF_DEBUG) != 0) { printf(" completion %d: ", sc->sc_rxcptr); PRINTWORD(1, '\t'); PRINTWORD(2, '\t'); PRINTWORD(3, '\t'); PRINTWORD(4, '\n'); } #endif if (__predict_false( (word1 & CAS_RC1_TYPE_MASK) == CAS_RC1_TYPE_HW || (word4 & CAS_RC4_ZERO) != 0)) { /* * The descriptor is still marked as owned, although * it is supposed to have completed. This has been * observed on some machines. Just exiting here * might leave the packet sitting around until another * one arrives to trigger a new interrupt, which is * generally undesirable, so set up a timeout. */ callout_reset(&sc->sc_rx_ch, CAS_RXOWN_TICKS, cas_rint_timeout, sc); break; } if (__predict_false( (word4 & (CAS_RC4_BAD | CAS_RC4_LEN_MMATCH)) != 0)) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); device_printf(sc->sc_dev, "receive error: CRC error\n"); continue; } KASSERT(CAS_GET(word1, CAS_RC1_DATA_SIZE) == 0 || CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0, ("%s: data and header present", __func__)); KASSERT((word1 & CAS_RC1_SPLIT_PKT) == 0 || CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0, ("%s: split and header present", __func__)); KASSERT(CAS_GET(word1, CAS_RC1_DATA_SIZE) == 0 || (word1 & CAS_RC1_RELEASE_HDR) == 0, ("%s: data present but header release", __func__)); KASSERT(CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0 || (word1 & CAS_RC1_RELEASE_DATA) == 0, ("%s: header present but data release", __func__)); if ((len = CAS_GET(word2, CAS_RC2_HDR_SIZE)) != 0) { idx = CAS_GET(word2, CAS_RC2_HDR_INDEX); off = CAS_GET(word2, CAS_RC2_HDR_OFF); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: hdr at idx %d, off %d, len %d", __func__, idx, off, len); #endif rxds = &sc->sc_rxdsoft[idx]; MGETHDR(m, M_NOWAIT, MT_DATA); if (m != NULL) { refcount_acquire(&rxds->rxds_refcount); bus_dmamap_sync(sc->sc_rdmatag, rxds->rxds_dmamap, BUS_DMASYNC_POSTREAD); -#if __FreeBSD_version < 800016 - MEXTADD(m, (caddr_t)rxds->rxds_buf + + m_extadd(m, (char *)rxds->rxds_buf + off * 256 + ETHER_ALIGN, len, cas_free, - rxds, M_RDONLY, EXT_NET_DRV); -#else - MEXTADD(m, (caddr_t)rxds->rxds_buf + - off * 256 + ETHER_ALIGN, len, cas_free, sc, (void *)(uintptr_t)idx, M_RDONLY, EXT_NET_DRV); -#endif if ((m->m_flags & M_EXT) == 0) { m_freem(m); m = NULL; } } if (m != NULL) { m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = len; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) cas_rxcksum(m, CAS_GET(word4, CAS_RC4_TCP_CSUM)); /* Pass it on. */ CAS_UNLOCK(sc); (*ifp->if_input)(ifp, m); CAS_LOCK(sc); } else if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if ((word1 & CAS_RC1_RELEASE_HDR) != 0 && refcount_release(&rxds->rxds_refcount) != 0) cas_add_rxdesc(sc, idx); } else if ((len = CAS_GET(word1, CAS_RC1_DATA_SIZE)) != 0) { idx = CAS_GET(word1, CAS_RC1_DATA_INDEX); off = CAS_GET(word1, CAS_RC1_DATA_OFF); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: data at idx %d, off %d, len %d", __func__, idx, off, len); #endif rxds = &sc->sc_rxdsoft[idx]; MGETHDR(m, M_NOWAIT, MT_DATA); if (m != NULL) { refcount_acquire(&rxds->rxds_refcount); off += ETHER_ALIGN; m->m_len = min(CAS_PAGE_SIZE - off, len); bus_dmamap_sync(sc->sc_rdmatag, rxds->rxds_dmamap, BUS_DMASYNC_POSTREAD); -#if __FreeBSD_version < 800016 - MEXTADD(m, (caddr_t)rxds->rxds_buf + off, - m->m_len, cas_free, rxds, M_RDONLY, - EXT_NET_DRV); -#else - MEXTADD(m, (caddr_t)rxds->rxds_buf + off, + m_extadd(m, (char *)rxds->rxds_buf + off, m->m_len, cas_free, sc, (void *)(uintptr_t)idx, M_RDONLY, EXT_NET_DRV); -#endif if ((m->m_flags & M_EXT) == 0) { m_freem(m); m = NULL; } } idx2 = 0; m2 = NULL; rxds2 = NULL; if ((word1 & CAS_RC1_SPLIT_PKT) != 0) { KASSERT((word1 & CAS_RC1_RELEASE_NEXT) != 0, ("%s: split but no release next", __func__)); idx2 = CAS_GET(word2, CAS_RC2_NEXT_INDEX); #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: split at idx %d", __func__, idx2); #endif rxds2 = &sc->sc_rxdsoft[idx2]; if (m != NULL) { MGET(m2, M_NOWAIT, MT_DATA); if (m2 != NULL) { refcount_acquire( &rxds2->rxds_refcount); m2->m_len = len - m->m_len; bus_dmamap_sync( sc->sc_rdmatag, rxds2->rxds_dmamap, BUS_DMASYNC_POSTREAD); -#if __FreeBSD_version < 800016 - MEXTADD(m2, - (caddr_t)rxds2->rxds_buf, - m2->m_len, cas_free, - rxds2, M_RDONLY, - EXT_NET_DRV); -#else - MEXTADD(m2, - (caddr_t)rxds2->rxds_buf, + m_extadd(m2, + (char *)rxds2->rxds_buf, m2->m_len, cas_free, sc, (void *)(uintptr_t)idx2, M_RDONLY, EXT_NET_DRV); -#endif if ((m2->m_flags & M_EXT) == 0) { m_freem(m2); m2 = NULL; } } } if (m2 != NULL) m->m_next = m2; else if (m != NULL) { m_freem(m); m = NULL; } } if (m != NULL) { m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = len; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) cas_rxcksum(m, CAS_GET(word4, CAS_RC4_TCP_CSUM)); /* Pass it on. */ CAS_UNLOCK(sc); (*ifp->if_input)(ifp, m); CAS_LOCK(sc); } else if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if ((word1 & CAS_RC1_RELEASE_DATA) != 0 && refcount_release(&rxds->rxds_refcount) != 0) cas_add_rxdesc(sc, idx); if ((word1 & CAS_RC1_SPLIT_PKT) != 0 && refcount_release(&rxds2->rxds_refcount) != 0) cas_add_rxdesc(sc, idx2); } skip = CAS_GET(word1, CAS_RC1_SKIP); skip: cas_rxcompinit(&sc->sc_rxcomps[sc->sc_rxcptr]); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CAS_WRITE_4(sc, CAS_RX_COMP_TAIL, sc->sc_rxcptr); #undef PRINTWORD #undef SKIPASSERT #undef WORDTOH #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: done sc->sc_rxcptr %d, sc->sc_rxdptr %d, head %d", __func__, sc->sc_rxcptr, sc->sc_rxdptr, CAS_READ_4(sc, CAS_RX_COMP_HEAD)); #endif } static void -cas_free(struct mbuf *m, void *arg1, void *arg2) +cas_free(struct mbuf *m) { struct cas_rxdsoft *rxds; struct cas_softc *sc; u_int idx, locked; -#if __FreeBSD_version < 800016 - rxds = arg2; - sc = rxds->rxds_sc; - idx = rxds->rxds_idx; -#else - sc = arg1; - idx = (uintptr_t)arg2; + sc = m->m_ext.ext_arg1; + idx = (uintptr_t)m->m_ext.ext_arg2; rxds = &sc->sc_rxdsoft[idx]; -#endif if (refcount_release(&rxds->rxds_refcount) == 0) return; /* * NB: this function can be called via m_freem(9) within * this driver! */ if ((locked = CAS_LOCK_OWNED(sc)) == 0) CAS_LOCK(sc); cas_add_rxdesc(sc, idx); if (locked == 0) CAS_UNLOCK(sc); } static inline void cas_add_rxdesc(struct cas_softc *sc, u_int idx) { CAS_LOCK_ASSERT(sc, MA_OWNED); bus_dmamap_sync(sc->sc_rdmatag, sc->sc_rxdsoft[idx].rxds_dmamap, BUS_DMASYNC_PREREAD); CAS_UPDATE_RXDESC(sc, sc->sc_rxdptr, idx); sc->sc_rxdptr = CAS_NEXTRXDESC(sc->sc_rxdptr); /* * Update the RX kick register. This register has to point to the * descriptor after the last valid one (before the current batch) * and for optimum performance should be incremented in multiples * of 4 (the DMA engine fetches/updates descriptors in batches of 4). */ if ((sc->sc_rxdptr % 4) == 0) { CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CAS_WRITE_4(sc, CAS_RX_KICK, (sc->sc_rxdptr + CAS_NRXDESC - 4) & CAS_NRXDESC_MASK); } } static void cas_eint(struct cas_softc *sc, u_int status) { struct ifnet *ifp = sc->sc_ifp; CAS_LOCK_ASSERT(sc, MA_OWNED); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); device_printf(sc->sc_dev, "%s: status 0x%x", __func__, status); if ((status & CAS_INTR_PCI_ERROR_INT) != 0) { status = CAS_READ_4(sc, CAS_ERROR_STATUS); printf(", PCI bus error 0x%x", status); if ((status & CAS_ERROR_OTHER) != 0) { status = pci_read_config(sc->sc_dev, PCIR_STATUS, 2); printf(", PCI status 0x%x", status); pci_write_config(sc->sc_dev, PCIR_STATUS, status, 2); } } printf("\n"); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; cas_init_locked(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task); } static int cas_intr(void *v) { struct cas_softc *sc = v; if (__predict_false((CAS_READ_4(sc, CAS_STATUS_ALIAS) & CAS_INTR_SUMMARY) == 0)) return (FILTER_STRAY); /* Disable interrupts. */ CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff); taskqueue_enqueue(sc->sc_tq, &sc->sc_intr_task); return (FILTER_HANDLED); } static void cas_intr_task(void *arg, int pending __unused) { struct cas_softc *sc = arg; struct ifnet *ifp = sc->sc_ifp; uint32_t status, status2; CAS_LOCK_ASSERT(sc, MA_NOTOWNED); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; status = CAS_READ_4(sc, CAS_STATUS); if (__predict_false((status & CAS_INTR_SUMMARY) == 0)) goto done; CAS_LOCK(sc); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: %s: cplt %x, status %x", device_get_name(sc->sc_dev), __func__, (status >> CAS_STATUS_TX_COMP3_SHFT), (u_int)status); /* * PCS interrupts must be cleared, otherwise no traffic is passed! */ if ((status & CAS_INTR_PCS_INT) != 0) { status2 = CAS_READ_4(sc, CAS_PCS_INTR_STATUS) | CAS_READ_4(sc, CAS_PCS_INTR_STATUS); if ((status2 & CAS_PCS_INTR_LINK) != 0) device_printf(sc->sc_dev, "%s: PCS link status changed\n", __func__); } if ((status & CAS_MAC_CTRL_STATUS) != 0) { status2 = CAS_READ_4(sc, CAS_MAC_CTRL_STATUS); if ((status2 & CAS_MAC_CTRL_PAUSE) != 0) device_printf(sc->sc_dev, "%s: PAUSE received (PAUSE time %d slots)\n", __func__, (status2 & CAS_MAC_CTRL_STATUS_PT_MASK) >> CAS_MAC_CTRL_STATUS_PT_SHFT); if ((status2 & CAS_MAC_CTRL_PAUSE) != 0) device_printf(sc->sc_dev, "%s: transited to PAUSE state\n", __func__); if ((status2 & CAS_MAC_CTRL_NON_PAUSE) != 0) device_printf(sc->sc_dev, "%s: transited to non-PAUSE state\n", __func__); } if ((status & CAS_INTR_MIF) != 0) device_printf(sc->sc_dev, "%s: MIF interrupt\n", __func__); #endif if (__predict_false((status & (CAS_INTR_TX_TAG_ERR | CAS_INTR_RX_TAG_ERR | CAS_INTR_RX_LEN_MMATCH | CAS_INTR_PCI_ERROR_INT)) != 0)) { cas_eint(sc, status); CAS_UNLOCK(sc); return; } if (__predict_false(status & CAS_INTR_TX_MAC_INT)) { status2 = CAS_READ_4(sc, CAS_MAC_TX_STATUS); if ((status2 & (CAS_MAC_TX_UNDERRUN | CAS_MAC_TX_MAX_PKT_ERR)) != 0) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); else if ((status2 & ~CAS_MAC_TX_FRAME_XMTD) != 0) device_printf(sc->sc_dev, "MAC TX fault, status %x\n", status2); } if (__predict_false(status & CAS_INTR_RX_MAC_INT)) { status2 = CAS_READ_4(sc, CAS_MAC_RX_STATUS); if ((status2 & CAS_MAC_RX_OVERFLOW) != 0) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); else if ((status2 & ~CAS_MAC_RX_FRAME_RCVD) != 0) device_printf(sc->sc_dev, "MAC RX fault, status %x\n", status2); } if ((status & (CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL)) != 0) { cas_rint(sc); #ifdef CAS_DEBUG if (__predict_false((status & (CAS_INTR_RX_BUF_NA | CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL)) != 0)) device_printf(sc->sc_dev, "RX fault, status %x\n", status); #endif } if ((status & (CAS_INTR_TX_INT_ME | CAS_INTR_TX_ALL | CAS_INTR_TX_DONE)) != 0) cas_tint(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { CAS_UNLOCK(sc); return; } else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task); CAS_UNLOCK(sc); status = CAS_READ_4(sc, CAS_STATUS_ALIAS); if (__predict_false((status & CAS_INTR_SUMMARY) != 0)) { taskqueue_enqueue(sc->sc_tq, &sc->sc_intr_task); return; } done: /* Re-enable interrupts. */ CAS_WRITE_4(sc, CAS_INTMASK, ~(CAS_INTR_TX_INT_ME | CAS_INTR_TX_TAG_ERR | CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_TAG_ERR | CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL | CAS_INTR_RX_LEN_MMATCH | CAS_INTR_PCI_ERROR_INT #ifdef CAS_DEBUG | CAS_INTR_PCS_INT | CAS_INTR_MIF #endif )); } static void cas_watchdog(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; CAS_LOCK_ASSERT(sc, MA_OWNED); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: CAS_RX_CONF %x CAS_MAC_RX_STATUS %x CAS_MAC_RX_CONF %x", __func__, CAS_READ_4(sc, CAS_RX_CONF), CAS_READ_4(sc, CAS_MAC_RX_STATUS), CAS_READ_4(sc, CAS_MAC_RX_CONF)); CTR4(KTR_CAS, "%s: CAS_TX_CONF %x CAS_MAC_TX_STATUS %x CAS_MAC_TX_CONF %x", __func__, CAS_READ_4(sc, CAS_TX_CONF), CAS_READ_4(sc, CAS_MAC_TX_STATUS), CAS_READ_4(sc, CAS_MAC_TX_CONF)); #endif if (sc->sc_wdog_timer == 0 || --sc->sc_wdog_timer != 0) return; if ((sc->sc_flags & CAS_LINK) != 0) device_printf(sc->sc_dev, "device timeout\n"); else if (bootverbose) device_printf(sc->sc_dev, "device timeout (no link)\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); /* Try to get more packets going. */ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; cas_init_locked(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task); } static void cas_mifinit(struct cas_softc *sc) { /* Configure the MIF in frame mode. */ CAS_WRITE_4(sc, CAS_MIF_CONF, CAS_READ_4(sc, CAS_MIF_CONF) & ~CAS_MIF_CONF_BB_MODE); CAS_BARRIER(sc, CAS_MIF_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } /* * MII interface * * The MII interface supports at least three different operating modes: * * Bitbang mode is implemented using data, clock and output enable registers. * * Frame mode is implemented by loading a complete frame into the frame * register and polling the valid bit for completion. * * Polling mode uses the frame register but completion is indicated by * an interrupt. * */ static int cas_mii_readreg(device_t dev, int phy, int reg) { struct cas_softc *sc; int n; uint32_t v; #ifdef CAS_DEBUG_PHY printf("%s: phy %d reg %d\n", __func__, phy, reg); #endif sc = device_get_softc(dev); if ((sc->sc_flags & CAS_SERDES) != 0) { switch (reg) { case MII_BMCR: reg = CAS_PCS_CTRL; break; case MII_BMSR: reg = CAS_PCS_STATUS; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); case MII_ANAR: reg = CAS_PCS_ANAR; break; case MII_ANLPAR: reg = CAS_PCS_ANLPAR; break; case MII_EXTSR: return (EXTSR_1000XFDX | EXTSR_1000XHDX); default: device_printf(sc->sc_dev, "%s: unhandled register %d\n", __func__, reg); return (0); } return (CAS_READ_4(sc, reg)); } /* Construct the frame command. */ v = CAS_MIF_FRAME_READ | (phy << CAS_MIF_FRAME_PHY_SHFT) | (reg << CAS_MIF_FRAME_REG_SHFT); CAS_WRITE_4(sc, CAS_MIF_FRAME, v); CAS_BARRIER(sc, CAS_MIF_FRAME, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); for (n = 0; n < 100; n++) { DELAY(1); v = CAS_READ_4(sc, CAS_MIF_FRAME); if (v & CAS_MIF_FRAME_TA_LSB) return (v & CAS_MIF_FRAME_DATA); } device_printf(sc->sc_dev, "%s: timed out\n", __func__); return (0); } static int cas_mii_writereg(device_t dev, int phy, int reg, int val) { struct cas_softc *sc; int n; uint32_t v; #ifdef CAS_DEBUG_PHY printf("%s: phy %d reg %d val %x\n", phy, reg, val, __func__); #endif sc = device_get_softc(dev); if ((sc->sc_flags & CAS_SERDES) != 0) { switch (reg) { case MII_BMSR: reg = CAS_PCS_STATUS; break; case MII_BMCR: reg = CAS_PCS_CTRL; if ((val & CAS_PCS_CTRL_RESET) == 0) break; CAS_WRITE_4(sc, CAS_PCS_CTRL, val); CAS_BARRIER(sc, CAS_PCS_CTRL, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_PCS_CTRL, CAS_PCS_CTRL_RESET, 0)) device_printf(sc->sc_dev, "cannot reset PCS\n"); /* FALLTHROUGH */ case MII_ANAR: CAS_WRITE_4(sc, CAS_PCS_CONF, 0); CAS_BARRIER(sc, CAS_PCS_CONF, 4, BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_ANAR, val); CAS_BARRIER(sc, CAS_PCS_ANAR, 4, BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_SERDES_CTRL, CAS_PCS_SERDES_CTRL_ESD); CAS_BARRIER(sc, CAS_PCS_CONF, 4, BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_CONF, CAS_PCS_CONF_EN); CAS_BARRIER(sc, CAS_PCS_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (0); case MII_ANLPAR: reg = CAS_PCS_ANLPAR; break; default: device_printf(sc->sc_dev, "%s: unhandled register %d\n", __func__, reg); return (0); } CAS_WRITE_4(sc, reg, val); CAS_BARRIER(sc, reg, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (0); } /* Construct the frame command. */ v = CAS_MIF_FRAME_WRITE | (phy << CAS_MIF_FRAME_PHY_SHFT) | (reg << CAS_MIF_FRAME_REG_SHFT) | (val & CAS_MIF_FRAME_DATA); CAS_WRITE_4(sc, CAS_MIF_FRAME, v); CAS_BARRIER(sc, CAS_MIF_FRAME, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); for (n = 0; n < 100; n++) { DELAY(1); v = CAS_READ_4(sc, CAS_MIF_FRAME); if (v & CAS_MIF_FRAME_TA_LSB) return (1); } device_printf(sc->sc_dev, "%s: timed out\n", __func__); return (0); } static void cas_mii_statchg(device_t dev) { struct cas_softc *sc; struct ifnet *ifp; int gigabit; uint32_t rxcfg, txcfg, v; sc = device_get_softc(dev); ifp = sc->sc_ifp; CAS_LOCK_ASSERT(sc, MA_OWNED); #ifdef CAS_DEBUG if ((ifp->if_flags & IFF_DEBUG) != 0) device_printf(sc->sc_dev, "%s: status changen", __func__); #endif if ((sc->sc_mii->mii_media_status & IFM_ACTIVE) != 0 && IFM_SUBTYPE(sc->sc_mii->mii_media_active) != IFM_NONE) sc->sc_flags |= CAS_LINK; else sc->sc_flags &= ~CAS_LINK; switch (IFM_SUBTYPE(sc->sc_mii->mii_media_active)) { case IFM_1000_SX: case IFM_1000_LX: case IFM_1000_CX: case IFM_1000_T: gigabit = 1; break; default: gigabit = 0; } /* * The configuration done here corresponds to the steps F) and * G) and as far as enabling of RX and TX MAC goes also step H) * of the initialization sequence outlined in section 11.2.1 of * the Cassini+ ASIC Specification. */ rxcfg = sc->sc_mac_rxcfg; rxcfg &= ~CAS_MAC_RX_CONF_CARR; txcfg = CAS_MAC_TX_CONF_EN_IPG0 | CAS_MAC_TX_CONF_NGU | CAS_MAC_TX_CONF_NGUL; if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0) txcfg |= CAS_MAC_TX_CONF_ICARR | CAS_MAC_TX_CONF_ICOLLIS; else if (gigabit != 0) { rxcfg |= CAS_MAC_RX_CONF_CARR; txcfg |= CAS_MAC_TX_CONF_CARR; } (void)cas_disable_tx(sc); CAS_WRITE_4(sc, CAS_MAC_TX_CONF, txcfg); (void)cas_disable_rx(sc); CAS_WRITE_4(sc, CAS_MAC_RX_CONF, rxcfg); v = CAS_READ_4(sc, CAS_MAC_CTRL_CONF) & ~(CAS_MAC_CTRL_CONF_TXP | CAS_MAC_CTRL_CONF_RXP); if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) v |= CAS_MAC_CTRL_CONF_RXP; if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0) v |= CAS_MAC_CTRL_CONF_TXP; CAS_WRITE_4(sc, CAS_MAC_CTRL_CONF, v); /* * All supported chips have a bug causing incorrect checksum * to be calculated when letting them strip the FCS in half- * duplex mode. In theory we could disable FCS stripping and * manually adjust the checksum accordingly. It seems to make * more sense to optimze for the common case and just disable * hardware checksumming in half-duplex mode though. */ if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0) { ifp->if_capenable &= ~IFCAP_HWCSUM; ifp->if_hwassist = 0; } else if ((sc->sc_flags & CAS_NO_CSUM) == 0) { ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = CAS_CSUM_FEATURES; } if (sc->sc_variant == CAS_SATURN) { if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0) /* silicon bug workaround */ CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x41); else CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x7); } if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0 && gigabit != 0) CAS_WRITE_4(sc, CAS_MAC_SLOT_TIME, CAS_MAC_SLOT_TIME_CARR); else CAS_WRITE_4(sc, CAS_MAC_SLOT_TIME, CAS_MAC_SLOT_TIME_NORM); /* XIF Configuration */ v = CAS_MAC_XIF_CONF_TX_OE | CAS_MAC_XIF_CONF_LNKLED; if ((sc->sc_flags & CAS_SERDES) == 0) { if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0) v |= CAS_MAC_XIF_CONF_NOECHO; v |= CAS_MAC_XIF_CONF_BUF_OE; } if (gigabit != 0) v |= CAS_MAC_XIF_CONF_GMII; if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0) v |= CAS_MAC_XIF_CONF_FDXLED; CAS_WRITE_4(sc, CAS_MAC_XIF_CONF, v); sc->sc_mac_rxcfg = rxcfg; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && (sc->sc_flags & CAS_LINK) != 0) { CAS_WRITE_4(sc, CAS_MAC_TX_CONF, txcfg | CAS_MAC_TX_CONF_EN); CAS_WRITE_4(sc, CAS_MAC_RX_CONF, rxcfg | CAS_MAC_RX_CONF_EN); } } static int cas_mediachange(struct ifnet *ifp) { struct cas_softc *sc = ifp->if_softc; int error; /* XXX add support for serial media. */ CAS_LOCK(sc); error = mii_mediachg(sc->sc_mii); CAS_UNLOCK(sc); return (error); } static void cas_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) { struct cas_softc *sc = ifp->if_softc; CAS_LOCK(sc); if ((ifp->if_flags & IFF_UP) == 0) { CAS_UNLOCK(sc); return; } mii_pollstat(sc->sc_mii); ifmr->ifm_active = sc->sc_mii->mii_media_active; ifmr->ifm_status = sc->sc_mii->mii_media_status; CAS_UNLOCK(sc); } static int cas_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct cas_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int error; error = 0; switch (cmd) { case SIOCSIFFLAGS: CAS_LOCK(sc); if ((ifp->if_flags & IFF_UP) != 0) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && ((ifp->if_flags ^ sc->sc_ifflags) & (IFF_ALLMULTI | IFF_PROMISC)) != 0) cas_setladrf(sc); else cas_init_locked(sc); } else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) cas_stop(ifp); sc->sc_ifflags = ifp->if_flags; CAS_UNLOCK(sc); break; case SIOCSIFCAP: CAS_LOCK(sc); if ((sc->sc_flags & CAS_NO_CSUM) != 0) { error = EINVAL; CAS_UNLOCK(sc); break; } ifp->if_capenable = ifr->ifr_reqcap; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist = CAS_CSUM_FEATURES; else ifp->if_hwassist = 0; CAS_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: CAS_LOCK(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) cas_setladrf(sc); CAS_UNLOCK(sc); break; case SIOCSIFMTU: if ((ifr->ifr_mtu < ETHERMIN) || (ifr->ifr_mtu > ETHERMTU_JUMBO)) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void cas_setladrf(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; struct ifmultiaddr *inm; int i; uint32_t hash[16]; uint32_t crc, v; CAS_LOCK_ASSERT(sc, MA_OWNED); /* * Turn off the RX MAC and the hash filter as required by the Sun * Cassini programming restrictions. */ v = sc->sc_mac_rxcfg & ~(CAS_MAC_RX_CONF_HFILTER | CAS_MAC_RX_CONF_EN); CAS_WRITE_4(sc, CAS_MAC_RX_CONF, v); CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_HFILTER | CAS_MAC_RX_CONF_EN, 0)) device_printf(sc->sc_dev, "cannot disable RX MAC or hash filter\n"); v &= ~(CAS_MAC_RX_CONF_PROMISC | CAS_MAC_RX_CONF_PGRP); if ((ifp->if_flags & IFF_PROMISC) != 0) { v |= CAS_MAC_RX_CONF_PROMISC; goto chipit; } if ((ifp->if_flags & IFF_ALLMULTI) != 0) { v |= CAS_MAC_RX_CONF_PGRP; goto chipit; } /* * Set up multicast address filter by passing all multicast * addresses through a crc generator, and then using the high * order 8 bits as an index into the 256 bit logical address * filter. The high order 4 bits selects the word, while the * other 4 bits select the bit within the word (where bit 0 * is the MSB). */ /* Clear the hash table. */ memset(hash, 0, sizeof(hash)); if_maddr_rlock(ifp); TAILQ_FOREACH(inm, &ifp->if_multiaddrs, ifma_link) { if (inm->ifma_addr->sa_family != AF_LINK) continue; crc = ether_crc32_le(LLADDR((struct sockaddr_dl *) inm->ifma_addr), ETHER_ADDR_LEN); /* We just want the 8 most significant bits. */ crc >>= 24; /* Set the corresponding bit in the filter. */ hash[crc >> 4] |= 1 << (15 - (crc & 15)); } if_maddr_runlock(ifp); v |= CAS_MAC_RX_CONF_HFILTER; /* Now load the hash table into the chip (if we are using it). */ for (i = 0; i < 16; i++) CAS_WRITE_4(sc, CAS_MAC_HASH0 + i * (CAS_MAC_HASH1 - CAS_MAC_HASH0), hash[i]); chipit: sc->sc_mac_rxcfg = v; CAS_WRITE_4(sc, CAS_MAC_RX_CONF, v | CAS_MAC_RX_CONF_EN); } static int cas_pci_attach(device_t dev); static int cas_pci_detach(device_t dev); static int cas_pci_probe(device_t dev); static int cas_pci_resume(device_t dev); static int cas_pci_suspend(device_t dev); static device_method_t cas_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, cas_pci_probe), DEVMETHOD(device_attach, cas_pci_attach), DEVMETHOD(device_detach, cas_pci_detach), DEVMETHOD(device_suspend, cas_pci_suspend), DEVMETHOD(device_resume, cas_pci_resume), /* Use the suspend handler here, it is all that is required. */ DEVMETHOD(device_shutdown, cas_pci_suspend), /* MII interface */ DEVMETHOD(miibus_readreg, cas_mii_readreg), DEVMETHOD(miibus_writereg, cas_mii_writereg), DEVMETHOD(miibus_statchg, cas_mii_statchg), DEVMETHOD_END }; static driver_t cas_pci_driver = { "cas", cas_pci_methods, sizeof(struct cas_softc) }; DRIVER_MODULE(cas, pci, cas_pci_driver, cas_devclass, 0, 0); DRIVER_MODULE(miibus, cas, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(cas, pci, 1, 1, 1); static const struct cas_pci_dev { uint32_t cpd_devid; uint8_t cpd_revid; int cpd_variant; const char *cpd_desc; } cas_pci_devlist[] = { { 0x0035100b, 0x0, CAS_SATURN, "NS DP83065 Saturn Gigabit Ethernet" }, { 0xabba108e, 0x10, CAS_CASPLUS, "Sun Cassini+ Gigabit Ethernet" }, { 0xabba108e, 0x0, CAS_CAS, "Sun Cassini Gigabit Ethernet" }, { 0, 0, 0, NULL } }; static int cas_pci_probe(device_t dev) { int i; for (i = 0; cas_pci_devlist[i].cpd_desc != NULL; i++) { if (pci_get_devid(dev) == cas_pci_devlist[i].cpd_devid && pci_get_revid(dev) >= cas_pci_devlist[i].cpd_revid) { device_set_desc(dev, cas_pci_devlist[i].cpd_desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static struct resource_spec cas_pci_res_spec[] = { { SYS_RES_IRQ, 0, RF_SHAREABLE | RF_ACTIVE }, /* CAS_RES_INTR */ { SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE }, /* CAS_RES_MEM */ { -1, 0 } }; #define CAS_LOCAL_MAC_ADDRESS "local-mac-address" #define CAS_PHY_INTERFACE "phy-interface" #define CAS_PHY_TYPE "phy-type" #define CAS_PHY_TYPE_PCS "pcs" static int cas_pci_attach(device_t dev) { char buf[sizeof(CAS_LOCAL_MAC_ADDRESS)]; struct cas_softc *sc; int i; #if !(defined(__powerpc__) || defined(__sparc64__)) u_char enaddr[4][ETHER_ADDR_LEN]; u_int j, k, lma, pcs[4], phy; #endif sc = device_get_softc(dev); sc->sc_variant = CAS_UNKNOWN; for (i = 0; cas_pci_devlist[i].cpd_desc != NULL; i++) { if (pci_get_devid(dev) == cas_pci_devlist[i].cpd_devid && pci_get_revid(dev) >= cas_pci_devlist[i].cpd_revid) { sc->sc_variant = cas_pci_devlist[i].cpd_variant; break; } } if (sc->sc_variant == CAS_UNKNOWN) { device_printf(dev, "unknown adaptor\n"); return (ENXIO); } /* PCI configuration */ pci_write_config(dev, PCIR_COMMAND, pci_read_config(dev, PCIR_COMMAND, 2) | PCIM_CMD_BUSMASTEREN | PCIM_CMD_MWRICEN | PCIM_CMD_PERRESPEN | PCIM_CMD_SERRESPEN, 2); sc->sc_dev = dev; if (sc->sc_variant == CAS_CAS && pci_get_devid(dev) < 0x02) /* Hardware checksumming may hang TX. */ sc->sc_flags |= CAS_NO_CSUM; if (sc->sc_variant == CAS_CASPLUS || sc->sc_variant == CAS_SATURN) sc->sc_flags |= CAS_REG_PLUS; if (sc->sc_variant == CAS_CAS || (sc->sc_variant == CAS_CASPLUS && pci_get_revid(dev) < 0x11)) sc->sc_flags |= CAS_TABORT; if (bootverbose) device_printf(dev, "flags=0x%x\n", sc->sc_flags); if (bus_alloc_resources(dev, cas_pci_res_spec, sc->sc_res)) { device_printf(dev, "failed to allocate resources\n"); bus_release_resources(dev, cas_pci_res_spec, sc->sc_res); return (ENXIO); } CAS_LOCK_INIT(sc, device_get_nameunit(dev)); #if defined(__powerpc__) || defined(__sparc64__) OF_getetheraddr(dev, sc->sc_enaddr); if (OF_getprop(ofw_bus_get_node(dev), CAS_PHY_INTERFACE, buf, sizeof(buf)) > 0 || OF_getprop(ofw_bus_get_node(dev), CAS_PHY_TYPE, buf, sizeof(buf)) > 0) { buf[sizeof(buf) - 1] = '\0'; if (strcmp(buf, CAS_PHY_TYPE_PCS) == 0) sc->sc_flags |= CAS_SERDES; } #else /* * Dig out VPD (vital product data) and read the MAC address as well * as the PHY type. The VPD resides in the PCI Expansion ROM (PCI * FCode) and can't be accessed via the PCI capability pointer. * SUNW,pci-ce and SUNW,pci-qge use the Enhanced VPD format described * in the free US Patent 7149820. */ #define PCI_ROMHDR_SIZE 0x1c #define PCI_ROMHDR_SIG 0x00 #define PCI_ROMHDR_SIG_MAGIC 0xaa55 /* little endian */ #define PCI_ROMHDR_PTR_DATA 0x18 #define PCI_ROM_SIZE 0x18 #define PCI_ROM_SIG 0x00 #define PCI_ROM_SIG_MAGIC 0x52494350 /* "PCIR", endian */ /* reversed */ #define PCI_ROM_VENDOR 0x04 #define PCI_ROM_DEVICE 0x06 #define PCI_ROM_PTR_VPD 0x08 #define PCI_VPDRES_BYTE0 0x00 #define PCI_VPDRES_ISLARGE(x) ((x) & 0x80) #define PCI_VPDRES_LARGE_NAME(x) ((x) & 0x7f) #define PCI_VPDRES_LARGE_LEN_LSB 0x01 #define PCI_VPDRES_LARGE_LEN_MSB 0x02 #define PCI_VPDRES_LARGE_SIZE 0x03 #define PCI_VPDRES_TYPE_ID_STRING 0x02 /* large */ #define PCI_VPDRES_TYPE_VPD 0x10 /* large */ #define PCI_VPD_KEY0 0x00 #define PCI_VPD_KEY1 0x01 #define PCI_VPD_LEN 0x02 #define PCI_VPD_SIZE 0x03 #define CAS_ROM_READ_1(sc, offs) \ CAS_READ_1((sc), CAS_PCI_ROM_OFFSET + (offs)) #define CAS_ROM_READ_2(sc, offs) \ CAS_READ_2((sc), CAS_PCI_ROM_OFFSET + (offs)) #define CAS_ROM_READ_4(sc, offs) \ CAS_READ_4((sc), CAS_PCI_ROM_OFFSET + (offs)) lma = phy = 0; memset(enaddr, 0, sizeof(enaddr)); memset(pcs, 0, sizeof(pcs)); /* Enable PCI Expansion ROM access. */ CAS_WRITE_4(sc, CAS_BIM_LDEV_OEN, CAS_BIM_LDEV_OEN_PAD | CAS_BIM_LDEV_OEN_PROM); /* Read PCI Expansion ROM header. */ if (CAS_ROM_READ_2(sc, PCI_ROMHDR_SIG) != PCI_ROMHDR_SIG_MAGIC || (i = CAS_ROM_READ_2(sc, PCI_ROMHDR_PTR_DATA)) < PCI_ROMHDR_SIZE) { device_printf(dev, "unexpected PCI Expansion ROM header\n"); goto fail_prom; } /* Read PCI Expansion ROM data. */ if (CAS_ROM_READ_4(sc, i + PCI_ROM_SIG) != PCI_ROM_SIG_MAGIC || CAS_ROM_READ_2(sc, i + PCI_ROM_VENDOR) != pci_get_vendor(dev) || CAS_ROM_READ_2(sc, i + PCI_ROM_DEVICE) != pci_get_device(dev) || (j = CAS_ROM_READ_2(sc, i + PCI_ROM_PTR_VPD)) < i + PCI_ROM_SIZE) { device_printf(dev, "unexpected PCI Expansion ROM data\n"); goto fail_prom; } /* Read PCI VPD. */ next: if (PCI_VPDRES_ISLARGE(CAS_ROM_READ_1(sc, j + PCI_VPDRES_BYTE0)) == 0) { device_printf(dev, "no large PCI VPD\n"); goto fail_prom; } i = (CAS_ROM_READ_1(sc, j + PCI_VPDRES_LARGE_LEN_MSB) << 8) | CAS_ROM_READ_1(sc, j + PCI_VPDRES_LARGE_LEN_LSB); switch (PCI_VPDRES_LARGE_NAME(CAS_ROM_READ_1(sc, j + PCI_VPDRES_BYTE0))) { case PCI_VPDRES_TYPE_ID_STRING: /* Skip identifier string. */ j += PCI_VPDRES_LARGE_SIZE + i; goto next; case PCI_VPDRES_TYPE_VPD: for (j += PCI_VPDRES_LARGE_SIZE; i > 0; i -= PCI_VPD_SIZE + CAS_ROM_READ_1(sc, j + PCI_VPD_LEN), j += PCI_VPD_SIZE + CAS_ROM_READ_1(sc, j + PCI_VPD_LEN)) { if (CAS_ROM_READ_1(sc, j + PCI_VPD_KEY0) != 'Z') /* no Enhanced VPD */ continue; if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE) != 'I') /* no instance property */ continue; if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) == 'B') { /* byte array */ if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 4) != ETHER_ADDR_LEN) continue; bus_read_region_1(sc->sc_res[CAS_RES_MEM], CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5, buf, sizeof(buf)); buf[sizeof(buf) - 1] = '\0'; if (strcmp(buf, CAS_LOCAL_MAC_ADDRESS) != 0) continue; bus_read_region_1(sc->sc_res[CAS_RES_MEM], CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5 + sizeof(CAS_LOCAL_MAC_ADDRESS), enaddr[lma], sizeof(enaddr[lma])); lma++; if (lma == 4 && phy == 4) break; } else if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) == 'S') { /* string */ if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 4) != sizeof(CAS_PHY_TYPE_PCS)) continue; bus_read_region_1(sc->sc_res[CAS_RES_MEM], CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5, buf, sizeof(buf)); buf[sizeof(buf) - 1] = '\0'; if (strcmp(buf, CAS_PHY_INTERFACE) == 0) k = sizeof(CAS_PHY_INTERFACE); else if (strcmp(buf, CAS_PHY_TYPE) == 0) k = sizeof(CAS_PHY_TYPE); else continue; bus_read_region_1(sc->sc_res[CAS_RES_MEM], CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5 + k, buf, sizeof(buf)); buf[sizeof(buf) - 1] = '\0'; if (strcmp(buf, CAS_PHY_TYPE_PCS) == 0) pcs[phy] = 1; phy++; if (lma == 4 && phy == 4) break; } } break; default: device_printf(dev, "unexpected PCI VPD\n"); goto fail_prom; } fail_prom: CAS_WRITE_4(sc, CAS_BIM_LDEV_OEN, 0); if (lma == 0) { device_printf(dev, "could not determine Ethernet address\n"); goto fail; } i = 0; if (lma > 1 && pci_get_slot(dev) < nitems(enaddr)) i = pci_get_slot(dev); memcpy(sc->sc_enaddr, enaddr[i], ETHER_ADDR_LEN); if (phy == 0) { device_printf(dev, "could not determine PHY type\n"); goto fail; } i = 0; if (phy > 1 && pci_get_slot(dev) < nitems(pcs)) i = pci_get_slot(dev); if (pcs[i] != 0) sc->sc_flags |= CAS_SERDES; #endif if (cas_attach(sc) != 0) { device_printf(dev, "could not be attached\n"); goto fail; } if (bus_setup_intr(dev, sc->sc_res[CAS_RES_INTR], INTR_TYPE_NET | INTR_MPSAFE, cas_intr, NULL, sc, &sc->sc_ih) != 0) { device_printf(dev, "failed to set up interrupt\n"); cas_detach(sc); goto fail; } return (0); fail: CAS_LOCK_DESTROY(sc); bus_release_resources(dev, cas_pci_res_spec, sc->sc_res); return (ENXIO); } static int cas_pci_detach(device_t dev) { struct cas_softc *sc; sc = device_get_softc(dev); bus_teardown_intr(dev, sc->sc_res[CAS_RES_INTR], sc->sc_ih); cas_detach(sc); CAS_LOCK_DESTROY(sc); bus_release_resources(dev, cas_pci_res_spec, sc->sc_res); return (0); } static int cas_pci_suspend(device_t dev) { cas_suspend(device_get_softc(dev)); return (0); } static int cas_pci_resume(device_t dev) { cas_resume(device_get_softc(dev)); return (0); } Index: head/sys/dev/cas/if_casvar.h =================================================================== --- head/sys/dev/cas/if_casvar.h (revision 324445) +++ head/sys/dev/cas/if_casvar.h (revision 324446) @@ -1,263 +1,248 @@ /*- * Copyright (C) 2001 Eduardo Horvath. * Copyright (c) 2008 Marius Strobl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: NetBSD: gemvar.h,v 1.8 2002/05/15 02:36:12 matt Exp * from: FreeBSD: if_gemvar.h 177560 2008-03-24 17:23:53Z marius * * $FreeBSD$ */ #ifndef _IF_CASVAR_H #define _IF_CASVAR_H /* * The page size is configurable, but needs to be at least 8k (the * default) in order to also support jumbo buffers. */ #define CAS_PAGE_SIZE 8192 /* * Transmit descriptor ring size - this is arbitrary, but allocate * enough descriptors for 64 pending transmissions and 16 segments * per packet. This limit is not actually enforced (packets with * more segments can be sent, depending on the busdma backend); it * is however used as an estimate for the TX window size. */ #define CAS_NTXSEGS 16 #define CAS_TXQUEUELEN 64 #define CAS_NTXDESC (CAS_TXQUEUELEN * CAS_NTXSEGS) #define CAS_MAXTXFREE (CAS_NTXDESC - 1) #define CAS_NTXDESC_MASK (CAS_NTXDESC - 1) #define CAS_NEXTTX(x) ((x + 1) & CAS_NTXDESC_MASK) /* * Receive completion ring size - we have one completion per * incoming packet (though the opposite isn't necessarily true), * so this logic is a little simpler. */ #define CAS_NRXCOMP 4096 #define CAS_NRXCOMP_MASK (CAS_NRXCOMP - 1) #define CAS_NEXTRXCOMP(x) ((x + 1) & CAS_NRXCOMP_MASK) /* * Receive descriptor ring sizes - for Cassini+ and Saturn both * rings must be at least initialized. */ #define CAS_NRXDESC 1024 #define CAS_NRXDESC_MASK (CAS_NRXDESC - 1) #define CAS_NEXTRXDESC(x) ((x + 1) & CAS_NRXDESC_MASK) #define CAS_NRXDESC2 32 #define CAS_NRXDESC2_MASK (CAS_NRXDESC2 - 1) #define CAS_NEXTRXDESC2(x) ((x + 1) & CAS_NRXDESC2_MASK) /* * How many ticks to wait until to retry on a RX descriptor that is * still owned by the hardware. */ #define CAS_RXOWN_TICKS (hz / 50) /* * Control structures are DMA'd to the chip. We allocate them * in a single clump that maps to a single DMA segment to make * several things easier. */ struct cas_control_data { struct cas_desc ccd_txdescs[CAS_NTXDESC]; /* TX descriptors */ struct cas_rx_comp ccd_rxcomps[CAS_NRXCOMP]; /* RX completions */ struct cas_desc ccd_rxdescs[CAS_NRXDESC]; /* RX descriptors */ struct cas_desc ccd_rxdescs2[CAS_NRXDESC2]; /* RX descriptors 2 */ }; #define CAS_CDOFF(x) offsetof(struct cas_control_data, x) #define CAS_CDTXDOFF(x) CAS_CDOFF(ccd_txdescs[(x)]) #define CAS_CDRXCOFF(x) CAS_CDOFF(ccd_rxcomps[(x)]) #define CAS_CDRXDOFF(x) CAS_CDOFF(ccd_rxdescs[(x)]) #define CAS_CDRXD2OFF(x) CAS_CDOFF(ccd_rxdescs2[(x)]) /* * software state for transmit job mbufs (may be elements of mbuf chains) */ struct cas_txsoft { struct mbuf *txs_mbuf; /* head of our mbuf chain */ bus_dmamap_t txs_dmamap; /* our DMA map */ u_int txs_firstdesc; /* first descriptor in packet */ u_int txs_lastdesc; /* last descriptor in packet */ u_int txs_ndescs; /* number of descriptors */ STAILQ_ENTRY(cas_txsoft) txs_q; }; STAILQ_HEAD(cas_txsq, cas_txsoft); /* * software state for receive descriptors */ struct cas_rxdsoft { void *rxds_buf; /* receive buffer */ bus_dmamap_t rxds_dmamap; /* our DMA map */ bus_addr_t rxds_paddr; /* physical address of the segment */ -#if __FreeBSD_version < 800016 - struct cas_softc *rxds_sc; /* softc pointer */ - u_int rxds_idx; /* our index */ -#endif u_int rxds_refcount; /* hardware + mbuf references */ }; /* * software state per device */ struct cas_softc { struct ifnet *sc_ifp; struct mtx sc_mtx; device_t sc_miibus; struct mii_data *sc_mii; /* MII media control */ device_t sc_dev; /* generic device information */ u_char sc_enaddr[ETHER_ADDR_LEN]; struct callout sc_tick_ch; /* tick callout */ struct callout sc_rx_ch; /* delayed RX callout */ struct task sc_intr_task; struct task sc_tx_task; struct taskqueue *sc_tq; u_int sc_wdog_timer; /* watchdog timer */ void *sc_ih; struct resource *sc_res[2]; #define CAS_RES_INTR 0 #define CAS_RES_MEM 1 bus_dma_tag_t sc_pdmatag; /* parent bus DMA tag */ bus_dma_tag_t sc_rdmatag; /* RX bus DMA tag */ bus_dma_tag_t sc_tdmatag; /* TX bus DMA tag */ bus_dma_tag_t sc_cdmatag; /* control data bus DMA tag */ bus_dmamap_t sc_dmamap; /* bus DMA handle */ u_int sc_variant; #define CAS_UNKNOWN 0 /* don't know */ #define CAS_CAS 1 /* Sun Cassini */ #define CAS_CASPLUS 2 /* Sun Cassini+ */ #define CAS_SATURN 3 /* National Semiconductor Saturn */ u_int sc_flags; #define CAS_INITED (1 << 0) /* reset persistent regs init'ed */ #define CAS_NO_CSUM (1 << 1) /* don't use hardware checksumming */ #define CAS_LINK (1 << 2) /* link is up */ #define CAS_REG_PLUS (1 << 3) /* has Cassini+ registers */ #define CAS_SERDES (1 << 4) /* use the SERDES */ #define CAS_TABORT (1 << 5) /* has target abort issues */ bus_dmamap_t sc_cddmamap; /* control data DMA map */ bus_addr_t sc_cddma; /* * software state for transmit and receive descriptors */ struct cas_txsoft sc_txsoft[CAS_TXQUEUELEN]; struct cas_rxdsoft sc_rxdsoft[CAS_NRXDESC]; /* * control data structures */ struct cas_control_data *sc_control_data; #define sc_txdescs sc_control_data->ccd_txdescs #define sc_rxcomps sc_control_data->ccd_rxcomps #define sc_rxdescs sc_control_data->ccd_rxdescs #define sc_rxdescs2 sc_control_data->ccd_rxdescs2 u_int sc_txfree; /* number of free TX descriptors */ u_int sc_txnext; /* next ready TX descriptor */ u_int sc_txwin; /* TX desc. since last TX intr. */ struct cas_txsq sc_txfreeq; /* free software TX descriptors */ struct cas_txsq sc_txdirtyq; /* dirty software TX descriptors */ u_int sc_rxcptr; /* next ready RX completion */ u_int sc_rxdptr; /* next ready RX descriptor */ uint32_t sc_mac_rxcfg; /* RX MAC conf. % CAS_MAC_RX_CONF_EN */ int sc_ifflags; }; #define CAS_BARRIER(sc, offs, len, flags) \ bus_barrier((sc)->sc_res[CAS_RES_MEM], (offs), (len), (flags)) #define CAS_READ_N(n, sc, offs) \ bus_read_ ## n((sc)->sc_res[CAS_RES_MEM], (offs)) #define CAS_READ_1(sc, offs) CAS_READ_N(1, (sc), (offs)) #define CAS_READ_2(sc, offs) CAS_READ_N(2, (sc), (offs)) #define CAS_READ_4(sc, offs) CAS_READ_N(4, (sc), (offs)) #define CAS_WRITE_N(n, sc, offs, v) \ bus_write_ ## n((sc)->sc_res[CAS_RES_MEM], (offs), (v)) #define CAS_WRITE_1(sc, offs, v) CAS_WRITE_N(1, (sc), (offs), (v)) #define CAS_WRITE_2(sc, offs, v) CAS_WRITE_N(2, (sc), (offs), (v)) #define CAS_WRITE_4(sc, offs, v) CAS_WRITE_N(4, (sc), (offs), (v)) #define CAS_CDTXDADDR(sc, x) ((sc)->sc_cddma + CAS_CDTXDOFF((x))) #define CAS_CDRXCADDR(sc, x) ((sc)->sc_cddma + CAS_CDRXCOFF((x))) #define CAS_CDRXDADDR(sc, x) ((sc)->sc_cddma + CAS_CDRXDOFF((x))) #define CAS_CDRXD2ADDR(sc, x) ((sc)->sc_cddma + CAS_CDRXD2OFF((x))) #define CAS_CDSYNC(sc, ops) \ bus_dmamap_sync((sc)->sc_cdmatag, (sc)->sc_cddmamap, (ops)); #define __CAS_UPDATE_RXDESC(rxd, rxds, s) \ do { \ \ refcount_init(&(rxds)->rxds_refcount, 1); \ (rxd)->cd_buf_ptr = htole64((rxds)->rxds_paddr); \ KASSERT((s) < CAS_RD_BUF_INDEX_MASK >> CAS_RD_BUF_INDEX_SHFT, \ ("%s: RX buffer index too large!", __func__)); \ (rxd)->cd_flags = \ htole64((uint64_t)((s) << CAS_RD_BUF_INDEX_SHFT)); \ } while (0) #define CAS_UPDATE_RXDESC(sc, d, s) \ __CAS_UPDATE_RXDESC(&(sc)->sc_rxdescs[(d)], \ &(sc)->sc_rxdsoft[(s)], (s)) -#if __FreeBSD_version < 800016 -#define CAS_INIT_RXDESC(sc, d, s) \ -do { \ - struct cas_rxdsoft *__rxds = &(sc)->sc_rxdsoft[(s)]; \ - \ - __rxds->rxds_sc = (sc); \ - __rxds->rxds_idx = (s); \ - __CAS_UPDATE_RXDESC(&(sc)->sc_rxdescs[(d)], __rxds, (s)); \ -} while (0) -#else #define CAS_INIT_RXDESC(sc, d, s) CAS_UPDATE_RXDESC(sc, d, s) -#endif #define CAS_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->sc_mtx, _name, MTX_NETWORK_LOCK, MTX_DEF) #define CAS_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define CAS_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define CAS_LOCK_ASSERT(_sc, _what) mtx_assert(&(_sc)->sc_mtx, (_what)) #define CAS_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_mtx) #define CAS_LOCK_OWNED(_sc) mtx_owned(&(_sc)->sc_mtx) #endif Index: head/sys/dev/cxgbe/t4_sge.c =================================================================== --- head/sys/dev/cxgbe/t4_sge.c (revision 324445) +++ head/sys/dev/cxgbe/t4_sge.c (revision 324446) @@ -1,5360 +1,5360 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_NETMAP #include #include #include #include #include #endif #include "common/common.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4_msg.h" #include "t4_l2t.h" #include "t4_mp_ring.h" #ifdef T4_PKT_TIMESTAMP #define RX_COPY_THRESHOLD (MINCLSIZE - 8) #else #define RX_COPY_THRESHOLD MINCLSIZE #endif /* * Ethernet frames are DMA'd at this byte offset into the freelist buffer. * 0-7 are valid values. */ static int fl_pktshift = 2; TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift); /* * Pad ethernet payload up to this boundary. * -1: driver should figure out a good value. * 0: disable padding. * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. */ int fl_pad = -1; TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad); /* * Status page length. * -1: driver should figure out a good value. * 64 or 128 are the only other valid values. */ static int spg_len = -1; TUNABLE_INT("hw.cxgbe.spg_len", &spg_len); /* * Congestion drops. * -1: no congestion feedback (not recommended). * 0: backpressure the channel instead of dropping packets right away. * 1: no backpressure, drop packets for the congested queue immediately. */ static int cong_drop = 0; TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop); /* * Deliver multiple frames in the same free list buffer if they fit. * -1: let the driver decide whether to enable buffer packing or not. * 0: disable buffer packing. * 1: enable buffer packing. */ static int buffer_packing = -1; TUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing); /* * Start next frame in a packed buffer at this boundary. * -1: driver should figure out a good value. * T4: driver will ignore this and use the same value as fl_pad above. * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. */ static int fl_pack = -1; TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack); /* * Allow the driver to create mbuf(s) in a cluster allocated for rx. * 0: never; always allocate mbufs from the zone_mbuf UMA zone. * 1: ok to create mbuf(s) within a cluster if there is room. */ static int allow_mbufs_in_cluster = 1; TUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster); /* * Largest rx cluster size that the driver is allowed to allocate. */ static int largest_rx_cluster = MJUM16BYTES; TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster); /* * Size of cluster allocation that's most likely to succeed. The driver will * fall back to this size if it fails to allocate clusters larger than this. */ static int safest_rx_cluster = PAGE_SIZE; TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster); /* * The interrupt holdoff timers are multiplied by this value on T6+. * 1 and 3-17 (both inclusive) are legal values. */ static int tscale = 1; TUNABLE_INT("hw.cxgbe.tscale", &tscale); /* * Number of LRO entries in the lro_ctrl structure per rx queue. */ static int lro_entries = TCP_LRO_ENTRIES; TUNABLE_INT("hw.cxgbe.lro_entries", &lro_entries); /* * This enables presorting of frames before they're fed into tcp_lro_rx. */ static int lro_mbufs = 0; TUNABLE_INT("hw.cxgbe.lro_mbufs", &lro_mbufs); struct txpkts { u_int wr_type; /* type 0 or type 1 */ u_int npkt; /* # of packets in this work request */ u_int plen; /* total payload (sum of all packets) */ u_int len16; /* # of 16B pieces used by this work request */ }; /* A packet's SGL. This + m_pkthdr has all info needed for tx */ struct sgl { struct sglist sg; struct sglist_seg seg[TX_SGL_SEGS]; }; static int service_iq(struct sge_iq *, int); static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, uint16_t, char *); static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, bus_addr_t *, void **); static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, void *); static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, int, int); static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *); static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *, struct sysctl_oid *, struct sge_fl *); static int alloc_fwq(struct adapter *); static int free_fwq(struct adapter *); static int alloc_mgmtq(struct adapter *); static int free_mgmtq(struct adapter *); static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, struct sysctl_oid *); static int free_rxq(struct vi_info *, struct sge_rxq *); #ifdef TCP_OFFLOAD static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, struct sysctl_oid *); static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); #endif #ifdef DEV_NETMAP static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int, struct sysctl_oid *); static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *); static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int, struct sysctl_oid *); static int free_nm_txq(struct vi_info *, struct sge_nm_txq *); #endif static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); #ifdef TCP_OFFLOAD static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); #endif static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *); static int free_eq(struct adapter *, struct sge_eq *); static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, struct sysctl_oid *); static int free_wrq(struct adapter *, struct sge_wrq *); static int alloc_txq(struct vi_info *, struct sge_txq *, int, struct sysctl_oid *); static int free_txq(struct vi_info *, struct sge_txq *); static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); static inline void ring_fl_db(struct adapter *, struct sge_fl *); static int refill_fl(struct adapter *, struct sge_fl *, int); static void refill_sfl(void *); static int alloc_fl_sdesc(struct sge_fl *); static void free_fl_sdesc(struct adapter *, struct sge_fl *); static void find_best_refill_source(struct adapter *, struct sge_fl *, int); static void find_safe_refill_source(struct adapter *, struct sge_fl *); static void add_fl_to_sfl(struct adapter *, struct sge_fl *); static inline void get_pkt_gl(struct mbuf *, struct sglist *); static inline u_int txpkt_len16(u_int, u_int); static inline u_int txpkt_vm_len16(u_int, u_int); static inline u_int txpkts0_len16(u_int); static inline u_int txpkts1_len16(void); static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *, struct mbuf *, u_int); static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, struct fw_eth_tx_pkt_vm_wr *, struct mbuf *, u_int); static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int); static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int); static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *, struct mbuf *, const struct txpkts *, u_int); static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); static inline uint16_t read_hw_cidx(struct sge_eq *); static inline u_int reclaimable_tx_desc(struct sge_eq *); static inline u_int total_available_tx_desc(struct sge_eq *); static u_int reclaim_tx_descs(struct sge_txq *, u_int); static void tx_reclaim(void *, int); static __be64 get_flit(struct sglist_seg *, int, int); static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, struct mbuf *); static int handle_fw_msg(struct sge_iq *, const struct rss_header *, struct mbuf *); static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); static void wrq_tx_drain(void *, int); static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); static int sysctl_uint16(SYSCTL_HANDLER_ARGS); static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); static int sysctl_tc(SYSCTL_HANDLER_ARGS); static counter_u64_t extfree_refs; static counter_u64_t extfree_rels; an_handler_t t4_an_handler; fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; static int an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl) { #ifdef INVARIANTS panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl); #else log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n", __func__, iq, ctrl); #endif return (EDOOFUS); } int t4_register_an_handler(an_handler_t h) { uintptr_t *loc, new; new = h ? (uintptr_t)h : (uintptr_t)an_not_handled; loc = (uintptr_t *) &t4_an_handler; atomic_store_rel_ptr(loc, new); return (0); } static int fw_msg_not_handled(struct adapter *sc, const __be64 *rpl) { const struct cpl_fw6_msg *cpl = __containerof(rpl, struct cpl_fw6_msg, data[0]); #ifdef INVARIANTS panic("%s: fw_msg type %d", __func__, cpl->type); #else log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type); #endif return (EDOOFUS); } int t4_register_fw_msg_handler(int type, fw_msg_handler_t h) { uintptr_t *loc, new; if (type >= nitems(t4_fw_msg_handler)) return (EINVAL); /* * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL * handler dispatch table. Reject any attempt to install a handler for * this subtype. */ if (type == FW_TYPE_RSSCPL || type == FW6_TYPE_RSSCPL) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled; loc = (uintptr_t *) &t4_fw_msg_handler[type]; atomic_store_rel_ptr(loc, new); return (0); } static int cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { #ifdef INVARIANTS panic("%s: opcode 0x%02x on iq %p with payload %p", __func__, rss->opcode, iq, m); #else log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n", __func__, rss->opcode, iq, m); m_freem(m); #endif return (EDOOFUS); } int t4_register_cpl_handler(int opcode, cpl_handler_t h) { uintptr_t *loc, new; if (opcode >= nitems(t4_cpl_handler)) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled; loc = (uintptr_t *) &t4_cpl_handler[opcode]; atomic_store_rel_ptr(loc, new); return (0); } /* * Called on MOD_LOAD. Validates and calculates the SGE tunables. */ void t4_sge_modload(void) { int i; if (fl_pktshift < 0 || fl_pktshift > 7) { printf("Invalid hw.cxgbe.fl_pktshift value (%d)," " using 2 instead.\n", fl_pktshift); fl_pktshift = 2; } if (spg_len != 64 && spg_len != 128) { int len; #if defined(__i386__) || defined(__amd64__) len = cpu_clflush_line_size > 64 ? 128 : 64; #else len = 64; #endif if (spg_len != -1) { printf("Invalid hw.cxgbe.spg_len value (%d)," " using %d instead.\n", spg_len, len); } spg_len = len; } if (cong_drop < -1 || cong_drop > 1) { printf("Invalid hw.cxgbe.cong_drop value (%d)," " using 0 instead.\n", cong_drop); cong_drop = 0; } if (tscale != 1 && (tscale < 3 || tscale > 17)) { printf("Invalid hw.cxgbe.tscale value (%d)," " using 1 instead.\n", tscale); tscale = 1; } extfree_refs = counter_u64_alloc(M_WAITOK); extfree_rels = counter_u64_alloc(M_WAITOK); counter_u64_zero(extfree_refs); counter_u64_zero(extfree_rels); t4_an_handler = an_not_handled; for (i = 0; i < nitems(t4_fw_msg_handler); i++) t4_fw_msg_handler[i] = fw_msg_not_handled; for (i = 0; i < nitems(t4_cpl_handler); i++) t4_cpl_handler[i] = cpl_not_handled; t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx); t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); } void t4_sge_modunload(void) { counter_u64_free(extfree_refs); counter_u64_free(extfree_rels); } uint64_t t4_sge_extfree_refs(void) { uint64_t refs, rels; rels = counter_u64_fetch(extfree_rels); refs = counter_u64_fetch(extfree_refs); return (refs - rels); } static inline void setup_pad_and_pack_boundaries(struct adapter *sc) { uint32_t v, m; int pad, pack, pad_shift; pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT : X_INGPADBOUNDARY_SHIFT; pad = fl_pad; if (fl_pad < (1 << pad_shift) || fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) || !powerof2(fl_pad)) { /* * If there is any chance that we might use buffer packing and * the chip is a T4, then pick 64 as the pad/pack boundary. Set * it to the minimum allowed in all other cases. */ pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift; /* * For fl_pad = 0 we'll still write a reasonable value to the * register but all the freelists will opt out of padding. * We'll complain here only if the user tried to set it to a * value greater than 0 that was invalid. */ if (fl_pad > 0) { device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" " (%d), using %d instead.\n", fl_pad, pad); } } m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift); t4_set_reg_field(sc, A_SGE_CONTROL, m, v); if (is_t4(sc)) { if (fl_pack != -1 && fl_pack != pad) { /* Complain but carry on. */ device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," " using %d instead.\n", fl_pack, pad); } return; } pack = fl_pack; if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || !powerof2(fl_pack)) { pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); MPASS(powerof2(pack)); if (pack < 16) pack = 16; if (pack == 32) pack = 64; if (pack > 4096) pack = 4096; if (fl_pack != -1) { device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" " (%d), using %d instead.\n", fl_pack, pack); } } m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); if (pack == 16) v = V_INGPACKBOUNDARY(0); else v = V_INGPACKBOUNDARY(ilog2(pack) - 5); MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); } /* * adap->params.vpd.cclk must be set up before this is called. */ void t4_tweak_chip_settings(struct adapter *sc) { int i; uint32_t v, m; int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); static int sge_flbuf_sizes[] = { MCLBYTES, #if MJUMPAGESIZE != MCLBYTES MJUMPAGESIZE, MJUMPAGESIZE - CL_METADATA_SIZE, MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE, #endif MJUM9BYTES, MJUM16BYTES, MCLBYTES - MSIZE - CL_METADATA_SIZE, MJUM9BYTES - CL_METADATA_SIZE, MJUM16BYTES - CL_METADATA_SIZE, }; KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | V_EGRSTATUSPAGESIZE(spg_len == 128); t4_set_reg_field(sc, A_SGE_CONTROL, m, v); setup_pad_and_pack_boundaries(sc); v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES, ("%s: hw buffer size table too big", __func__)); for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) { t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), sge_flbuf_sizes[i]); } v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); KASSERT(intr_timer[0] <= timer_max, ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], timer_max)); for (i = 1; i < nitems(intr_timer); i++) { KASSERT(intr_timer[i] >= intr_timer[i - 1], ("%s: timers not listed in increasing order (%d)", __func__, i)); while (intr_timer[i] > timer_max) { if (i == nitems(intr_timer) - 1) { intr_timer[i] = timer_max; break; } intr_timer[i] += intr_timer[i - 1]; intr_timer[i] /= 2; } } v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); if (chip_id(sc) >= CHELSIO_T6) { m = V_TSCALE(M_TSCALE); if (tscale == 1) v = 0; else v = V_TSCALE(tscale - 2); t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v); if (sc->debug_flags & DF_DISABLE_TCB_CACHE) { m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN | V_WRTHRTHRESH(M_WRTHRTHRESH); t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1); v &= ~m; v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN | V_WRTHRTHRESH(16); t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1); } } /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); /* * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we * may have to deal with is MAXPHYS + 1 page. */ v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); } /* * SGE wants the buffer to be at least 64B and then a multiple of 16. If * padding is in use, the buffer's start and end need to be aligned to the pad * boundary as well. We'll just make sure that the size is a multiple of the * boundary here, it is up to the buffer allocation code to make sure the start * of the buffer is aligned as well. */ static inline int hwsz_ok(struct adapter *sc, int hwsz) { int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; return (hwsz >= 64 && (hwsz & mask) == 0); } /* * XXX: driver really should be able to deal with unexpected settings. */ int t4_read_chip_settings(struct adapter *sc) { struct sge *s = &sc->sge; struct sge_params *sp = &sc->params.sge; int i, j, n, rc = 0; uint32_t m, v, r; uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); static int sw_buf_sizes[] = { /* Sorted by size */ MCLBYTES, #if MJUMPAGESIZE != MCLBYTES MJUMPAGESIZE, #endif MJUM9BYTES, MJUM16BYTES }; struct sw_zone_info *swz, *safe_swz; struct hw_buf_info *hwb; m = F_RXPKTCPLMODE; v = F_RXPKTCPLMODE; r = sc->params.sge.sge_control; if ((r & m) != v) { device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); rc = EINVAL; } /* * If this changes then every single use of PAGE_SHIFT in the driver * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. */ if (sp->page_shift != PAGE_SHIFT) { device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); rc = EINVAL; } /* Filter out unusable hw buffer sizes entirely (mark with -2). */ hwb = &s->hw_buf_info[0]; for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { r = sc->params.sge.sge_fl_buffer_size[i]; hwb->size = r; hwb->zidx = hwsz_ok(sc, r) ? -1 : -2; hwb->next = -1; } /* * Create a sorted list in decreasing order of hw buffer sizes (and so * increasing order of spare area) for each software zone. * * If padding is enabled then the start and end of the buffer must align * to the pad boundary; if packing is enabled then they must align with * the pack boundary as well. Allocations from the cluster zones are * aligned to min(size, 4K), so the buffer starts at that alignment and * ends at hwb->size alignment. If mbuf inlining is allowed the * starting alignment will be reduced to MSIZE and the driver will * exercise appropriate caution when deciding on the best buffer layout * to use. */ n = 0; /* no usable buffer size to begin with */ swz = &s->sw_zone_info[0]; safe_swz = NULL; for (i = 0; i < SW_ZONE_SIZES; i++, swz++) { int8_t head = -1, tail = -1; swz->size = sw_buf_sizes[i]; swz->zone = m_getzone(swz->size); swz->type = m_gettype(swz->size); if (swz->size < PAGE_SIZE) { MPASS(powerof2(swz->size)); if (fl_pad && (swz->size % sp->pad_boundary != 0)) continue; } if (swz->size == safest_rx_cluster) safe_swz = swz; hwb = &s->hw_buf_info[0]; for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { if (hwb->zidx != -1 || hwb->size > swz->size) continue; #ifdef INVARIANTS if (fl_pad) MPASS(hwb->size % sp->pad_boundary == 0); #endif hwb->zidx = i; if (head == -1) head = tail = j; else if (hwb->size < s->hw_buf_info[tail].size) { s->hw_buf_info[tail].next = j; tail = j; } else { int8_t *cur; struct hw_buf_info *t; for (cur = &head; *cur != -1; cur = &t->next) { t = &s->hw_buf_info[*cur]; if (hwb->size == t->size) { hwb->zidx = -2; break; } if (hwb->size > t->size) { hwb->next = *cur; *cur = j; break; } } } } swz->head_hwidx = head; swz->tail_hwidx = tail; if (tail != -1) { n++; if (swz->size - s->hw_buf_info[tail].size >= CL_METADATA_SIZE) sc->flags |= BUF_PACKING_OK; } } if (n == 0) { device_printf(sc->dev, "no usable SGE FL buffer size.\n"); rc = EINVAL; } s->safe_hwidx1 = -1; s->safe_hwidx2 = -1; if (safe_swz != NULL) { s->safe_hwidx1 = safe_swz->head_hwidx; for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) { int spare; hwb = &s->hw_buf_info[i]; #ifdef INVARIANTS if (fl_pad) MPASS(hwb->size % sp->pad_boundary == 0); #endif spare = safe_swz->size - hwb->size; if (spare >= CL_METADATA_SIZE) { s->safe_hwidx2 = i; break; } } } if (sc->flags & IS_VF) return (0); v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); if (r != v) { device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); rc = EINVAL; } m = v = F_TDDPTAGTCB; r = t4_read_reg(sc, A_ULP_RX_CTL); if ((r & m) != v) { device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); rc = EINVAL; } m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; r = t4_read_reg(sc, A_TP_PARA_REG5); if ((r & m) != v) { device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); rc = EINVAL; } t4_init_tp_params(sc, 1); t4_read_mtu_tbl(sc, sc->params.mtus, NULL); t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd); return (rc); } int t4_create_dma_tag(struct adapter *sc) { int rc; rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->dmat); if (rc != 0) { device_printf(sc->dev, "failed to create main DMA tag: %d\n", rc); } return (rc); } void t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *children) { struct sge_params *sp = &sc->params.sge; SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A", "freelist buffer sizes"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, NULL, sp->pad_boundary, "payload pad boundary (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, NULL, sp->spg_len, "status page size (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, NULL, cong_drop, "congestion drop setting"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, NULL, sp->pack_boundary, "payload pack boundary (bytes)"); } int t4_destroy_dma_tag(struct adapter *sc) { if (sc->dmat) bus_dma_tag_destroy(sc->dmat); return (0); } /* * Allocate and initialize the firmware event queue and the management queue. * * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. */ int t4_setup_adapter_queues(struct adapter *sc) { int rc; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); sysctl_ctx_init(&sc->ctx); sc->flags |= ADAP_SYSCTL_CTX; /* * Firmware event queue */ rc = alloc_fwq(sc); if (rc != 0) return (rc); /* * Management queue. This is just a control queue that uses the fwq as * its associated iq. */ if (!(sc->flags & IS_VF)) rc = alloc_mgmtq(sc); return (rc); } /* * Idempotent */ int t4_teardown_adapter_queues(struct adapter *sc) { ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* Do this before freeing the queue */ if (sc->flags & ADAP_SYSCTL_CTX) { sysctl_ctx_free(&sc->ctx); sc->flags &= ~ADAP_SYSCTL_CTX; } free_mgmtq(sc); free_fwq(sc); return (0); } static inline int first_vector(struct vi_info *vi) { struct adapter *sc = vi->pi->adapter; if (sc->intr_count == 1) return (0); return (vi->first_intr); } /* * Given an arbitrary "index," come up with an iq that can be used by other * queues (of this VI) for interrupt forwarding, SGE egress updates, etc. * The iq returned is guaranteed to be something that takes direct interrupts. */ static struct sge_iq * vi_intr_iq(struct vi_info *vi, int idx) { struct adapter *sc = vi->pi->adapter; struct sge *s = &sc->sge; struct sge_iq *iq = NULL; int nintr, i; if (sc->intr_count == 1) return (&sc->sge.fwq); nintr = vi->nintr; #ifdef DEV_NETMAP /* Do not consider any netmap-only interrupts */ if (vi->flags & INTR_RXQ && vi->nnmrxq > vi->nrxq) nintr -= vi->nnmrxq - vi->nrxq; #endif KASSERT(nintr != 0, ("%s: vi %p has no exclusive interrupts, total interrupts = %d", __func__, vi, sc->intr_count)); i = idx % nintr; if (vi->flags & INTR_RXQ) { if (i < vi->nrxq) { iq = &s->rxq[vi->first_rxq + i].iq; goto done; } i -= vi->nrxq; } #ifdef TCP_OFFLOAD if (vi->flags & INTR_OFLD_RXQ) { if (i < vi->nofldrxq) { iq = &s->ofld_rxq[vi->first_ofld_rxq + i].iq; goto done; } i -= vi->nofldrxq; } #endif panic("%s: vi %p, intr_flags 0x%lx, idx %d, total intr %d\n", __func__, vi, vi->flags & INTR_ALL, idx, nintr); done: MPASS(iq != NULL); KASSERT(iq->flags & IQ_INTR, ("%s: iq %p (vi %p, intr_flags 0x%lx, idx %d)", __func__, iq, vi, vi->flags & INTR_ALL, idx)); return (iq); } /* Maximum payload that can be delivered with a single iq descriptor */ static inline int mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) { int payload; #ifdef TCP_OFFLOAD if (toe) { payload = sc->tt.rx_coalesce ? G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)) : mtu; } else { #endif /* large enough even when hw VLAN extraction is disabled */ payload = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu; #ifdef TCP_OFFLOAD } #endif return (payload); } int t4_setup_vi_queues(struct vi_info *vi) { int rc = 0, i, j, intr_idx, iqid; struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *ctrlq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif #ifdef DEV_NETMAP int saved_idx; struct sge_nm_rxq *nm_rxq; struct sge_nm_txq *nm_txq; #endif char name[16]; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); int maxp, mtu = ifp->if_mtu; /* Interrupt vector to start from (when using multiple vectors) */ intr_idx = first_vector(vi); #ifdef DEV_NETMAP saved_idx = intr_idx; if (ifp->if_capabilities & IFCAP_NETMAP) { /* netmap is supported with direct interrupts only. */ MPASS(vi->flags & INTR_RXQ); /* * We don't have buffers to back the netmap rx queues * right now so we create the queues in a way that * doesn't set off any congestion signal in the chip. */ oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq", CTLFLAG_RD, NULL, "rx queues"); for_each_nm_rxq(vi, i, nm_rxq) { rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; } oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", CTLFLAG_RD, NULL, "tx queues"); for_each_nm_txq(vi, i, nm_txq) { iqid = vi->first_nm_rxq + (i % vi->nnmrxq); rc = alloc_nm_txq(vi, nm_txq, iqid, i, oid); if (rc != 0) goto done; } } /* Normal rx queues and netmap rx queues share the same interrupts. */ intr_idx = saved_idx; #endif /* * First pass over all NIC and TOE rx queues: * a) initialize iq and fl * b) allocate queue iff it will take direct interrupts. */ maxp = mtu_to_max_payload(sc, mtu, 0); if (vi->flags & INTR_RXQ) { oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, NULL, "rx queues"); } for_each_rxq(vi, i, rxq) { init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); snprintf(name, sizeof(name), "%s rxq%d-fl", device_get_nameunit(vi->dev), i); init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); if (vi->flags & INTR_RXQ) { rxq->iq.flags |= IQ_INTR; rc = alloc_rxq(vi, rxq, intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; } } #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); #endif #ifdef TCP_OFFLOAD maxp = mtu_to_max_payload(sc, mtu, 1); if (vi->flags & INTR_OFLD_RXQ) { oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections"); } for_each_ofld_rxq(vi, i, ofld_rxq) { init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, vi->qsize_rxq); snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", device_get_nameunit(vi->dev), i); init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); if (vi->flags & INTR_OFLD_RXQ) { ofld_rxq->iq.flags |= IQ_INTR; rc = alloc_ofld_rxq(vi, ofld_rxq, intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; } } #endif /* * Second pass over all NIC and TOE rx queues. The queues forwarding * their interrupts are allocated now. */ j = 0; if (!(vi->flags & INTR_RXQ)) { oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, NULL, "rx queues"); for_each_rxq(vi, i, rxq) { MPASS(!(rxq->iq.flags & IQ_INTR)); intr_idx = vi_intr_iq(vi, j)->abs_id; rc = alloc_rxq(vi, rxq, intr_idx, i, oid); if (rc != 0) goto done; j++; } } #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0 && !(vi->flags & INTR_OFLD_RXQ)) { oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections"); for_each_ofld_rxq(vi, i, ofld_rxq) { MPASS(!(ofld_rxq->iq.flags & IQ_INTR)); intr_idx = vi_intr_iq(vi, j)->abs_id; rc = alloc_ofld_rxq(vi, ofld_rxq, intr_idx, i, oid); if (rc != 0) goto done; j++; } } #endif /* * Now the tx queues. Only one pass needed. */ oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, NULL, "tx queues"); j = 0; for_each_txq(vi, i, txq) { iqid = vi_intr_iq(vi, j)->cntxt_id; snprintf(name, sizeof(name), "%s txq%d", device_get_nameunit(vi->dev), i); init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, iqid, name); rc = alloc_txq(vi, txq, i, oid); if (rc != 0) goto done; j++; } #ifdef TCP_OFFLOAD oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections"); for_each_ofld_txq(vi, i, ofld_txq) { struct sysctl_oid *oid2; iqid = vi_intr_iq(vi, j)->cntxt_id; snprintf(name, sizeof(name), "%s ofld_txq%d", device_get_nameunit(vi->dev), i); init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, iqid, name); snprintf(name, sizeof(name), "%d", i); oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, "offload tx queue"); rc = alloc_wrq(sc, vi, ofld_txq, oid2); if (rc != 0) goto done; j++; } #endif /* * Finally, the control queue. */ if (!IS_MAIN_VI(vi) || sc->flags & IS_VF) goto done; oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, NULL, "ctrl queue"); ctrlq = &sc->sge.ctrlq[pi->port_id]; iqid = vi_intr_iq(vi, 0)->cntxt_id; snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(vi->dev)); init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name); rc = alloc_wrq(sc, vi, ctrlq, oid); done: if (rc) t4_teardown_vi_queues(vi); return (rc); } /* * Idempotent */ int t4_teardown_vi_queues(struct vi_info *vi) { int i; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; struct sge_nm_txq *nm_txq; #endif /* Do this before freeing the queues */ if (vi->flags & VI_SYSCTL_CTX) { sysctl_ctx_free(&vi->ctx); vi->flags &= ~VI_SYSCTL_CTX; } #ifdef DEV_NETMAP if (vi->ifp->if_capabilities & IFCAP_NETMAP) { for_each_nm_txq(vi, i, nm_txq) { free_nm_txq(vi, nm_txq); } for_each_nm_rxq(vi, i, nm_rxq) { free_nm_rxq(vi, nm_rxq); } } #endif /* * Take down all the tx queues first, as they reference the rx queues * (for egress updates, etc.). */ if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { free_txq(vi, txq); } #ifdef TCP_OFFLOAD for_each_ofld_txq(vi, i, ofld_txq) { free_wrq(sc, ofld_txq); } #endif /* * Then take down the rx queues that forward their interrupts, as they * reference other rx queues. */ for_each_rxq(vi, i, rxq) { if ((rxq->iq.flags & IQ_INTR) == 0) free_rxq(vi, rxq); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { if ((ofld_rxq->iq.flags & IQ_INTR) == 0) free_ofld_rxq(vi, ofld_rxq); } #endif /* * Then take down the rx queues that take direct interrupts. */ for_each_rxq(vi, i, rxq) { if (rxq->iq.flags & IQ_INTR) free_rxq(vi, rxq); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { if (ofld_rxq->iq.flags & IQ_INTR) free_ofld_rxq(vi, ofld_rxq); } #endif return (0); } /* * Deals with errors and the firmware event queue. All data rx queues forward * their interrupt to the firmware event queue. */ void t4_intr_all(void *arg) { struct adapter *sc = arg; struct sge_iq *fwq = &sc->sge.fwq; t4_intr_err(arg); if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) { service_iq(fwq, 0); atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE); } } /* Deals with error interrupts */ void t4_intr_err(void *arg) { struct adapter *sc = arg; t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); t4_slow_intr_handler(sc); } void t4_intr_evt(void *arg) { struct sge_iq *iq = arg; if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { service_iq(iq, 0); atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); } } void t4_intr(void *arg) { struct sge_iq *iq = arg; if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { service_iq(iq, 0); atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); } } void t4_vi_intr(void *arg) { struct irq *irq = arg; #ifdef DEV_NETMAP if (atomic_cmpset_int(&irq->nm_state, NM_ON, NM_BUSY)) { t4_nm_intr(irq->nm_rxq); atomic_cmpset_int(&irq->nm_state, NM_BUSY, NM_ON); } #endif if (irq->rxq != NULL) t4_intr(irq->rxq); } static inline int sort_before_lro(struct lro_ctrl *lro) { return (lro->lro_mbuf_max != 0); } /* * Deals with anything and everything on the given ingress queue. */ static int service_iq(struct sge_iq *iq, int budget) { struct sge_iq *q; struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ struct sge_fl *fl; /* Use iff IQ_HAS_FL */ struct adapter *sc = iq->adapter; struct iq_desc *d = &iq->desc[iq->cidx]; int ndescs = 0, limit; int rsp_type, refill; uint32_t lq; uint16_t fl_hw_cidx; struct mbuf *m0; STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); #if defined(INET) || defined(INET6) const struct timeval lro_timeout = {0, sc->lro_timeout}; struct lro_ctrl *lro = &rxq->lro; #endif KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); limit = budget ? budget : iq->qsize / 16; if (iq->flags & IQ_HAS_FL) { fl = &rxq->fl; fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ } else { fl = NULL; fl_hw_cidx = 0; /* to silence gcc warning */ } #if defined(INET) || defined(INET6) if (iq->flags & IQ_ADJ_CREDIT) { MPASS(sort_before_lro(lro)); iq->flags &= ~IQ_ADJ_CREDIT; if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { tcp_lro_flush_all(lro); t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); return (0); } ndescs = 1; } #else MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); #endif /* * We always come back and check the descriptor ring for new indirect * interrupts and other responses after running a single handler. */ for (;;) { while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { rmb(); refill = 0; m0 = NULL; rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); lq = be32toh(d->rsp.pldbuflen_qid); switch (rsp_type) { case X_RSPD_TYPE_FLBUF: KASSERT(iq->flags & IQ_HAS_FL, ("%s: data for an iq (%p) with no freelist", __func__, iq)); m0 = get_fl_payload(sc, fl, lq); if (__predict_false(m0 == NULL)) goto process_iql; refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2; #ifdef T4_PKT_TIMESTAMP /* * 60 bit timestamp for the payload is * *(uint64_t *)m0->m_pktdat. Note that it is * in the leading free-space in the mbuf. The * kernel can clobber it during a pullup, * m_copymdata, etc. You need to make sure that * the mbuf reaches you unmolested if you care * about the timestamp. */ *(uint64_t *)m0->m_pktdat = be64toh(ctrl->u.last_flit) & 0xfffffffffffffff; #endif /* fall through */ case X_RSPD_TYPE_CPL: KASSERT(d->rss.opcode < NUM_CPL_CMDS, ("%s: bad opcode %02x.", __func__, d->rss.opcode)); t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); break; case X_RSPD_TYPE_INTR: /* * Interrupts should be forwarded only to queues * that are not forwarding their interrupts. * This means service_iq can recurse but only 1 * level deep. */ KASSERT(budget == 0, ("%s: budget %u, rsp_type %u", __func__, budget, rsp_type)); /* * There are 1K interrupt-capable queues (qids 0 * through 1023). A response type indicating a * forwarded interrupt with a qid >= 1K is an * iWARP async notification. */ if (lq >= 1024) { t4_an_handler(iq, &d->rsp); break; } q = sc->sge.iqmap[lq - sc->sge.iq_start - sc->sge.iq_base]; if (atomic_cmpset_int(&q->state, IQS_IDLE, IQS_BUSY)) { if (service_iq(q, q->qsize / 16) == 0) { atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); } else { STAILQ_INSERT_TAIL(&iql, q, link); } } break; default: KASSERT(0, ("%s: illegal response type %d on iq %p", __func__, rsp_type, iq)); log(LOG_ERR, "%s: illegal response type %d on iq %p", device_get_nameunit(sc->dev), rsp_type, iq); break; } d++; if (__predict_false(++iq->cidx == iq->sidx)) { iq->cidx = 0; iq->gen ^= F_RSPD_GEN; d = &iq->desc[0]; } if (__predict_false(++ndescs == limit)) { t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); ndescs = 0; #if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED && !sort_before_lro(lro) && sc->lro_timeout != 0) { tcp_lro_flush_inactive(lro, &lro_timeout); } #endif if (budget) { if (iq->flags & IQ_HAS_FL) { FL_LOCK(fl); refill_fl(sc, fl, 32); FL_UNLOCK(fl); } return (EINPROGRESS); } } if (refill) { FL_LOCK(fl); refill_fl(sc, fl, 32); FL_UNLOCK(fl); fl_hw_cidx = fl->hw_cidx; } } process_iql: if (STAILQ_EMPTY(&iql)) break; /* * Process the head only, and send it to the back of the list if * it's still not done. */ q = STAILQ_FIRST(&iql); STAILQ_REMOVE_HEAD(&iql, link); if (service_iq(q, q->qsize / 8) == 0) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); else STAILQ_INSERT_TAIL(&iql, q, link); } #if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED) { if (ndescs > 0 && lro->lro_mbuf_count > 8) { MPASS(sort_before_lro(lro)); /* hold back one credit and don't flush LRO state */ iq->flags |= IQ_ADJ_CREDIT; ndescs--; } else { tcp_lro_flush_all(lro); } } #endif t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); if (iq->flags & IQ_HAS_FL) { int starved; FL_LOCK(fl); starved = refill_fl(sc, fl, 64); FL_UNLOCK(fl); if (__predict_false(starved != 0)) add_fl_to_sfl(sc, fl); } return (0); } static inline int cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll) { int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0; if (rc) MPASS(cll->region3 >= CL_METADATA_SIZE); return (rc); } static inline struct cluster_metadata * cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll, caddr_t cl) { if (cl_has_metadata(fl, cll)) { struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; return ((struct cluster_metadata *)(cl + swz->size) - 1); } return (NULL); } static void -rxb_free(struct mbuf *m, void *arg1, void *arg2) +rxb_free(struct mbuf *m) { - uma_zone_t zone = arg1; - caddr_t cl = arg2; + uma_zone_t zone = m->m_ext.ext_arg1; + void *cl = m->m_ext.ext_arg2; uma_zfree(zone, cl); counter_u64_add(extfree_rels, 1); } /* * The mbuf returned by this function could be allocated from zone_mbuf or * constructed in spare room in the cluster. * * The mbuf carries the payload in one of these ways * a) frame inside the mbuf (mbuf from zone_mbuf) * b) m_cljset (for clusters without metadata) zone_mbuf * c) m_extaddref (cluster with metadata) inline mbuf * d) m_extaddref (cluster with metadata) zone_mbuf */ static struct mbuf * get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, int remaining) { struct mbuf *m; struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; struct cluster_layout *cll = &sd->cll; struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx]; struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl); int len, blen; caddr_t payload; blen = hwb->size - fl->rx_offset; /* max possible in this buf */ len = min(remaining, blen); payload = sd->cl + cll->region1 + fl->rx_offset; if (fl->flags & FL_BUF_PACKING) { const u_int l = fr_offset + len; const u_int pad = roundup2(l, fl->buf_boundary) - l; if (fl->rx_offset + len + pad < hwb->size) blen = len + pad; MPASS(fl->rx_offset + blen <= hwb->size); } else { MPASS(fl->rx_offset == 0); /* not packing */ } if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { /* * Copy payload into a freshly allocated mbuf. */ m = fr_offset == 0 ? m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); fl->mbuf_allocated++; #ifdef T4_PKT_TIMESTAMP /* Leave room for a timestamp */ m->m_data += 8; #endif /* copy data to mbuf */ bcopy(payload, mtod(m, caddr_t), len); } else if (sd->nmbuf * MSIZE < cll->region1) { /* * There's spare room in the cluster for an mbuf. Create one * and associate it with the payload that's in the cluster. */ MPASS(clm != NULL); m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE); /* No bzero required */ if (m_init(m, M_NOWAIT, MT_DATA, fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE)) return (NULL); fl->mbuf_inlined++; m_extaddref(m, payload, blen, &clm->refcount, rxb_free, swz->zone, sd->cl); if (sd->nmbuf++ == 0) counter_u64_add(extfree_refs, 1); } else { /* * Grab an mbuf from zone_mbuf and associate it with the * payload in the cluster. */ m = fr_offset == 0 ? m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); fl->mbuf_allocated++; if (clm != NULL) { m_extaddref(m, payload, blen, &clm->refcount, rxb_free, swz->zone, sd->cl); if (sd->nmbuf++ == 0) counter_u64_add(extfree_refs, 1); } else { m_cljset(m, sd->cl, swz->type); sd->cl = NULL; /* consumed, not a recycle candidate */ } } if (fr_offset == 0) m->m_pkthdr.len = remaining; m->m_len = len; if (fl->flags & FL_BUF_PACKING) { fl->rx_offset += blen; MPASS(fl->rx_offset <= hwb->size); if (fl->rx_offset < hwb->size) return (m); /* without advancing the cidx */ } if (__predict_false(++fl->cidx % 8 == 0)) { uint16_t cidx = fl->cidx / 8; if (__predict_false(cidx == fl->sidx)) fl->cidx = cidx = 0; fl->hw_cidx = cidx; } fl->rx_offset = 0; return (m); } static struct mbuf * get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf) { struct mbuf *m0, *m, **pnext; u_int remaining; const u_int total = G_RSPD_LEN(len_newbuf); if (__predict_false(fl->flags & FL_BUF_RESUME)) { M_ASSERTPKTHDR(fl->m0); MPASS(fl->m0->m_pkthdr.len == total); MPASS(fl->remaining < total); m0 = fl->m0; pnext = fl->pnext; remaining = fl->remaining; fl->flags &= ~FL_BUF_RESUME; goto get_segment; } if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) { fl->rx_offset = 0; if (__predict_false(++fl->cidx % 8 == 0)) { uint16_t cidx = fl->cidx / 8; if (__predict_false(cidx == fl->sidx)) fl->cidx = cidx = 0; fl->hw_cidx = cidx; } } /* * Payload starts at rx_offset in the current hw buffer. Its length is * 'len' and it may span multiple hw buffers. */ m0 = get_scatter_segment(sc, fl, 0, total); if (m0 == NULL) return (NULL); remaining = total - m0->m_len; pnext = &m0->m_next; while (remaining > 0) { get_segment: MPASS(fl->rx_offset == 0); m = get_scatter_segment(sc, fl, total - remaining, remaining); if (__predict_false(m == NULL)) { fl->m0 = m0; fl->pnext = pnext; fl->remaining = remaining; fl->flags |= FL_BUF_RESUME; return (NULL); } *pnext = m; pnext = &m->m_next; remaining -= m->m_len; } *pnext = NULL; M_ASSERTPKTHDR(m0); return (m0); } static int t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) { struct sge_rxq *rxq = iq_to_rxq(iq); struct ifnet *ifp = rxq->ifp; struct adapter *sc = iq->adapter; const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); #if defined(INET) || defined(INET6) struct lro_ctrl *lro = &rxq->lro; #endif static const int sw_hashtype[4][2] = { {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, }; KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, rss->opcode)); m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; m0->m_len -= sc->params.sge.fl_pktshift; m0->m_data += sc->params.sge.fl_pktshift; m0->m_pkthdr.rcvif = ifp; M_HASHTYPE_SET(m0, sw_hashtype[rss->hash_type][rss->ipv6]); m0->m_pkthdr.flowid = be32toh(rss->hash_val); if (cpl->csum_calc && !(cpl->err_vec & sc->params.tp.err_vec_mask)) { if (ifp->if_capenable & IFCAP_RXCSUM && cpl->l2info & htobe32(F_RXF_IP)) { m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); rxq->rxcsum++; } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && cpl->l2info & htobe32(F_RXF_IP6)) { m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR); rxq->rxcsum++; } if (__predict_false(cpl->ip_frag)) m0->m_pkthdr.csum_data = be16toh(cpl->csum); else m0->m_pkthdr.csum_data = 0xffff; } if (cpl->vlan_ex) { m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); m0->m_flags |= M_VLANTAG; rxq->vlan_extraction++; } #if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED) { if (sort_before_lro(lro)) { tcp_lro_queue_mbuf(lro, m0); return (0); /* queued for sort, then LRO */ } if (tcp_lro_rx(lro, m0, 0) == 0) return (0); /* queued for LRO */ } #endif ifp->if_input(ifp, m0); return (0); } /* * Must drain the wrq or make sure that someone else will. */ static void wrq_tx_drain(void *arg, int n) { struct sge_wrq *wrq = arg; struct sge_eq *eq = &wrq->eq; EQ_LOCK(eq); if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) drain_wrq_wr_list(wrq->adapter, wrq); EQ_UNLOCK(eq); } static void drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) { struct sge_eq *eq = &wrq->eq; u_int available, dbdiff; /* # of hardware descriptors */ u_int n; struct wrqe *wr; struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ EQ_LOCK_ASSERT_OWNED(eq); MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); wr = STAILQ_FIRST(&wrq->wr_list); MPASS(wr != NULL); /* Must be called with something useful to do */ MPASS(eq->pidx == eq->dbidx); dbdiff = 0; do { eq->cidx = read_hw_cidx(eq); if (eq->pidx == eq->cidx) available = eq->sidx - 1; else available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; MPASS(wr->wrq == wrq); n = howmany(wr->wr_len, EQ_ESIZE); if (available < n) break; dst = (void *)&eq->desc[eq->pidx]; if (__predict_true(eq->sidx - eq->pidx > n)) { /* Won't wrap, won't end exactly at the status page. */ bcopy(&wr->wr[0], dst, wr->wr_len); eq->pidx += n; } else { int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; bcopy(&wr->wr[0], dst, first_portion); if (wr->wr_len > first_portion) { bcopy(&wr->wr[first_portion], &eq->desc[0], wr->wr_len - first_portion); } eq->pidx = n - (eq->sidx - eq->pidx); } wrq->tx_wrs_copied++; if (available < eq->sidx / 4 && atomic_cmpset_int(&eq->equiq, 0, 1)) { dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } dbdiff += n; if (dbdiff >= 16) { ring_eq_db(sc, eq, dbdiff); dbdiff = 0; } STAILQ_REMOVE_HEAD(&wrq->wr_list, link); free_wrqe(wr); MPASS(wrq->nwr_pending > 0); wrq->nwr_pending--; MPASS(wrq->ndesc_needed >= n); wrq->ndesc_needed -= n; } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); if (dbdiff) ring_eq_db(sc, eq, dbdiff); } /* * Doesn't fail. Holds on to work requests it can't send right away. */ void t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) { #ifdef INVARIANTS struct sge_eq *eq = &wrq->eq; #endif EQ_LOCK_ASSERT_OWNED(eq); MPASS(wr != NULL); MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); MPASS((wr->wr_len & 0x7) == 0); STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); wrq->nwr_pending++; wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) return; /* commit_wrq_wr will drain wr_list as well. */ drain_wrq_wr_list(sc, wrq); /* Doorbell must have caught up to the pidx. */ MPASS(eq->pidx == eq->dbidx); } void t4_update_fl_bufsize(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->pi->adapter; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif struct sge_fl *fl; int i, maxp, mtu = ifp->if_mtu; maxp = mtu_to_max_payload(sc, mtu, 0); for_each_rxq(vi, i, rxq) { fl = &rxq->fl; FL_LOCK(fl); find_best_refill_source(sc, fl, maxp); FL_UNLOCK(fl); } #ifdef TCP_OFFLOAD maxp = mtu_to_max_payload(sc, mtu, 1); for_each_ofld_rxq(vi, i, ofld_rxq) { fl = &ofld_rxq->fl; FL_LOCK(fl); find_best_refill_source(sc, fl, maxp); FL_UNLOCK(fl); } #endif } static inline int mbuf_nsegs(struct mbuf *m) { M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.l5hlen > 0, ("%s: mbuf %p missing information on # of segments.", __func__, m)); return (m->m_pkthdr.l5hlen); } static inline void set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) { M_ASSERTPKTHDR(m); m->m_pkthdr.l5hlen = nsegs; } static inline int mbuf_len16(struct mbuf *m) { int n; M_ASSERTPKTHDR(m); n = m->m_pkthdr.PH_loc.eight[0]; MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); return (n); } static inline void set_mbuf_len16(struct mbuf *m, uint8_t len16) { M_ASSERTPKTHDR(m); m->m_pkthdr.PH_loc.eight[0] = len16; } static inline int needs_tso(struct mbuf *m) { M_ASSERTPKTHDR(m); if (m->m_pkthdr.csum_flags & CSUM_TSO) { KASSERT(m->m_pkthdr.tso_segsz > 0, ("%s: TSO requested in mbuf %p but MSS not provided", __func__, m)); return (1); } return (0); } static inline int needs_l3_csum(struct mbuf *m) { M_ASSERTPKTHDR(m); if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) return (1); return (0); } static inline int needs_l4_csum(struct mbuf *m) { M_ASSERTPKTHDR(m); if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) return (1); return (0); } static inline int needs_vlan_insertion(struct mbuf *m) { M_ASSERTPKTHDR(m); if (m->m_flags & M_VLANTAG) { KASSERT(m->m_pkthdr.ether_vtag != 0, ("%s: HWVLAN requested in mbuf %p but tag not provided", __func__, m)); return (1); } return (0); } static void * m_advance(struct mbuf **pm, int *poffset, int len) { struct mbuf *m = *pm; int offset = *poffset; uintptr_t p = 0; MPASS(len > 0); for (;;) { if (offset + len < m->m_len) { offset += len; p = mtod(m, uintptr_t) + offset; break; } len -= m->m_len - offset; m = m->m_next; offset = 0; MPASS(m != NULL); } *poffset = offset; *pm = m; return ((void *)p); } /* * Can deal with empty mbufs in the chain that have m_len = 0, but the chain * must have at least one mbuf that's not empty. */ static inline int count_mbuf_nsegs(struct mbuf *m) { vm_paddr_t lastb, next; vm_offset_t va; int len, nsegs; MPASS(m != NULL); nsegs = 0; lastb = 0; for (; m; m = m->m_next) { len = m->m_len; if (__predict_false(len == 0)) continue; va = mtod(m, vm_offset_t); next = pmap_kextract(va); nsegs += sglist_count(m->m_data, len); if (lastb + 1 == next) nsegs--; lastb = pmap_kextract(va + len - 1); } MPASS(nsegs > 0); return (nsegs); } /* * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: * a) caller can assume it's been freed if this function returns with an error. * b) it may get defragged up if the gather list is too long for the hardware. */ int parse_pkt(struct adapter *sc, struct mbuf **mp) { struct mbuf *m0 = *mp, *m; int rc, nsegs, defragged = 0, offset; struct ether_header *eh; void *l3hdr; #if defined(INET) || defined(INET6) struct tcphdr *tcp; #endif uint16_t eh_type; M_ASSERTPKTHDR(m0); if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { rc = EINVAL; fail: m_freem(m0); *mp = NULL; return (rc); } restart: /* * First count the number of gather list segments in the payload. * Defrag the mbuf if nsegs exceeds the hardware limit. */ M_ASSERTPKTHDR(m0); MPASS(m0->m_pkthdr.len > 0); nsegs = count_mbuf_nsegs(m0); if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) { if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) { rc = EFBIG; goto fail; } *mp = m0 = m; /* update caller's copy after defrag */ goto restart; } if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN)) { m0 = m_pullup(m0, m0->m_pkthdr.len); if (m0 == NULL) { /* Should have left well enough alone. */ rc = EFBIG; goto fail; } *mp = m0; /* update caller's copy after pullup */ goto restart; } set_mbuf_nsegs(m0, nsegs); if (sc->flags & IS_VF) set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0))); else set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); if (!needs_tso(m0) && !(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0)))) return (0); m = m0; eh = mtod(m, struct ether_header *); eh_type = ntohs(eh->ether_type); if (eh_type == ETHERTYPE_VLAN) { struct ether_vlan_header *evh = (void *)eh; eh_type = ntohs(evh->evl_proto); m0->m_pkthdr.l2hlen = sizeof(*evh); } else m0->m_pkthdr.l2hlen = sizeof(*eh); offset = 0; l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); switch (eh_type) { #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = l3hdr; MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP); m0->m_pkthdr.l3hlen = sizeof(*ip6); break; } #endif #ifdef INET case ETHERTYPE_IP: { struct ip *ip = l3hdr; m0->m_pkthdr.l3hlen = ip->ip_hl * 4; break; } #endif default: panic("%s: ethertype 0x%04x unknown. if_cxgbe must be compiled" " with the same INET/INET6 options as the kernel.", __func__, eh_type); } #if defined(INET) || defined(INET6) if (needs_tso(m0)) { tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); m0->m_pkthdr.l4hlen = tcp->th_off * 4; } #endif MPASS(m0 == *mp); return (0); } void * start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) { struct sge_eq *eq = &wrq->eq; struct adapter *sc = wrq->adapter; int ndesc, available; struct wrqe *wr; void *w; MPASS(len16 > 0); ndesc = howmany(len16, EQ_ESIZE / 16); MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); EQ_LOCK(eq); if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) drain_wrq_wr_list(sc, wrq); if (!STAILQ_EMPTY(&wrq->wr_list)) { slowpath: EQ_UNLOCK(eq); wr = alloc_wrqe(len16 * 16, wrq); if (__predict_false(wr == NULL)) return (NULL); cookie->pidx = -1; cookie->ndesc = ndesc; return (&wr->wr); } eq->cidx = read_hw_cidx(eq); if (eq->pidx == eq->cidx) available = eq->sidx - 1; else available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; if (available < ndesc) goto slowpath; cookie->pidx = eq->pidx; cookie->ndesc = ndesc; TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); w = &eq->desc[eq->pidx]; IDXINCR(eq->pidx, ndesc, eq->sidx); if (__predict_false(cookie->pidx + ndesc > eq->sidx)) { w = &wrq->ss[0]; wrq->ss_pidx = cookie->pidx; wrq->ss_len = len16 * 16; } EQ_UNLOCK(eq); return (w); } void commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) { struct sge_eq *eq = &wrq->eq; struct adapter *sc = wrq->adapter; int ndesc, pidx; struct wrq_cookie *prev, *next; if (cookie->pidx == -1) { struct wrqe *wr = __containerof(w, struct wrqe, wr); t4_wrq_tx(sc, wr); return; } if (__predict_false(w == &wrq->ss[0])) { int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; MPASS(wrq->ss_len > n); /* WR had better wrap around. */ bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); wrq->tx_wrs_ss++; } else wrq->tx_wrs_direct++; EQ_LOCK(eq); ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ pidx = cookie->pidx; MPASS(pidx >= 0 && pidx < eq->sidx); prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); next = TAILQ_NEXT(cookie, link); if (prev == NULL) { MPASS(pidx == eq->dbidx); if (next == NULL || ndesc >= 16) ring_eq_db(wrq->adapter, eq, ndesc); else { MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); next->pidx = pidx; next->ndesc += ndesc; } } else { MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); prev->ndesc += ndesc; } TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) drain_wrq_wr_list(sc, wrq); #ifdef INVARIANTS if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { /* Doorbell must have caught up to the pidx. */ MPASS(wrq->eq.pidx == wrq->eq.dbidx); } #endif EQ_UNLOCK(eq); } static u_int can_resume_eth_tx(struct mp_ring *r) { struct sge_eq *eq = r->cookie; return (total_available_tx_desc(eq) > eq->sidx / 8); } static inline int cannot_use_txpkts(struct mbuf *m) { /* maybe put a GL limit too, to avoid silliness? */ return (needs_tso(m)); } static inline int discard_tx(struct sge_eq *eq) { return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); } /* * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to * be consumed. Return the actual number consumed. 0 indicates a stall. */ static u_int eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) { struct sge_txq *txq = r->cookie; struct sge_eq *eq = &txq->eq; struct ifnet *ifp = txq->ifp; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; u_int total, remaining; /* # of packets */ u_int available, dbdiff; /* # of hardware descriptors */ u_int n, next_cidx; struct mbuf *m0, *tail; struct txpkts txp; struct fw_eth_tx_pkts_wr *wr; /* any fw WR struct will do */ remaining = IDXDIFF(pidx, cidx, r->size); MPASS(remaining > 0); /* Must not be called without work to do. */ total = 0; TXQ_LOCK(txq); if (__predict_false(discard_tx(eq))) { while (cidx != pidx) { m0 = r->items[cidx]; m_freem(m0); if (++cidx == r->size) cidx = 0; } reclaim_tx_descs(txq, 2048); total = remaining; goto done; } /* How many hardware descriptors do we have readily available. */ if (eq->pidx == eq->cidx) available = eq->sidx - 1; else available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx); while (remaining > 0) { m0 = r->items[cidx]; M_ASSERTPKTHDR(m0); MPASS(m0->m_nextpkt == NULL); if (available < SGE_MAX_WR_NDESC) { available += reclaim_tx_descs(txq, 64); if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16)) break; /* out of descriptors */ } next_cidx = cidx + 1; if (__predict_false(next_cidx == r->size)) next_cidx = 0; wr = (void *)&eq->desc[eq->pidx]; if (sc->flags & IS_VF) { total++; remaining--; ETHER_BPF_MTAP(ifp, m0); n = write_txpkt_vm_wr(sc, txq, (void *)wr, m0, available); } else if (remaining > 1 && try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) { /* pkts at cidx, next_cidx should both be in txp. */ MPASS(txp.npkt == 2); tail = r->items[next_cidx]; MPASS(tail->m_nextpkt == NULL); ETHER_BPF_MTAP(ifp, m0); ETHER_BPF_MTAP(ifp, tail); m0->m_nextpkt = tail; if (__predict_false(++next_cidx == r->size)) next_cidx = 0; while (next_cidx != pidx) { if (add_to_txpkts(r->items[next_cidx], &txp, available) != 0) break; tail->m_nextpkt = r->items[next_cidx]; tail = tail->m_nextpkt; ETHER_BPF_MTAP(ifp, tail); if (__predict_false(++next_cidx == r->size)) next_cidx = 0; } n = write_txpkts_wr(txq, wr, m0, &txp, available); total += txp.npkt; remaining -= txp.npkt; } else { total++; remaining--; ETHER_BPF_MTAP(ifp, m0); n = write_txpkt_wr(txq, (void *)wr, m0, available); } MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); available -= n; dbdiff += n; IDXINCR(eq->pidx, n, eq->sidx); if (total_available_tx_desc(eq) < eq->sidx / 4 && atomic_cmpset_int(&eq->equiq, 0, 1)) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } if (dbdiff >= 16 && remaining >= 4) { ring_eq_db(sc, eq, dbdiff); available += reclaim_tx_descs(txq, 4 * dbdiff); dbdiff = 0; } cidx = next_cidx; } if (dbdiff != 0) { ring_eq_db(sc, eq, dbdiff); reclaim_tx_descs(txq, 32); } done: TXQ_UNLOCK(txq); return (total); } static inline void init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, int qsize) { KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, ("%s: bad tmr_idx %d", __func__, tmr_idx)); KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ ("%s: bad pktc_idx %d", __func__, pktc_idx)); iq->flags = 0; iq->adapter = sc; iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); iq->intr_pktc_idx = SGE_NCOUNTERS - 1; if (pktc_idx >= 0) { iq->intr_params |= F_QINTR_CNT_EN; iq->intr_pktc_idx = pktc_idx; } iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; } static inline void init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) { fl->qsize = qsize; fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; strlcpy(fl->lockname, name, sizeof(fl->lockname)); if (sc->flags & BUF_PACKING_OK && ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ fl->flags |= FL_BUF_PACKING; find_best_refill_source(sc, fl, maxp); find_safe_refill_source(sc, fl); } static inline void init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan, uint16_t iqid, char *name) { KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); eq->flags = eqtype & EQ_TYPEMASK; eq->tx_chan = tx_chan; eq->iqid = iqid; eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; strlcpy(eq->lockname, name, sizeof(eq->lockname)); } static int alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, bus_dmamap_t *map, bus_addr_t *pa, void **va) { int rc; rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); if (rc != 0) { device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); goto done; } rc = bus_dmamem_alloc(*tag, va, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); if (rc != 0) { device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); goto done; } rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); if (rc != 0) { device_printf(sc->dev, "cannot load DMA map: %d\n", rc); goto done; } done: if (rc) free_ring(sc, *tag, *map, *pa, *va); return (rc); } static int free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, bus_addr_t pa, void *va) { if (pa) bus_dmamap_unload(tag, map); if (va) bus_dmamem_free(tag, va, map); if (tag) bus_dma_tag_destroy(tag); return (0); } /* * Allocates the ring for an ingress queue and an optional freelist. If the * freelist is specified it will be allocated and then associated with the * ingress queue. * * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. * * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then * the intr_idx specifies the vector, starting from 0. Otherwise it specifies * the abs_id of the ingress queue to which its interrupts should be forwarded. */ static int alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, int intr_idx, int cong) { int rc, i, cntxt_id; size_t len; struct fw_iq_cmd c; struct port_info *pi = vi->pi; struct adapter *sc = iq->adapter; struct sge_params *sp = &sc->params.sge; __be32 v = 0; len = iq->qsize * IQ_ESIZE; rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, (void **)&iq->desc); if (rc != 0) return (rc); bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | V_FW_IQ_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | FW_LEN16(c)); /* Special handling for firmware event queue */ if (iq == &sc->sge.fwq) v |= F_FW_IQ_CMD_IQASYNCH; if (iq->flags & IQ_INTR) { KASSERT(intr_idx < sc->intr_count, ("%s: invalid direct intr_idx %d", __func__, intr_idx)); } else v |= F_FW_IQ_CMD_IQANDST; v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | V_FW_IQ_CMD_VIID(vi->viid) | V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | F_FW_IQ_CMD_IQGTSMODE | V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); c.iqsize = htobe16(iq->qsize); c.iqaddr = htobe64(iq->ba); if (cong >= 0) c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); if (fl) { mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); len = fl->qsize * EQ_ESIZE; rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, &fl->ba, (void **)&fl->desc); if (rc) return (rc); /* Allocate space for one software descriptor per buffer. */ rc = alloc_fl_sdesc(fl); if (rc != 0) { device_printf(sc->dev, "failed to setup fl software descriptors: %d\n", rc); return (rc); } if (fl->flags & FL_BUF_PACKING) { fl->lowat = roundup2(sp->fl_starve_threshold2, 8); fl->buf_boundary = sp->pack_boundary; } else { fl->lowat = roundup2(sp->fl_starve_threshold, 8); fl->buf_boundary = 16; } if (fl_pad && fl->buf_boundary < sp->pad_boundary) fl->buf_boundary = sp->pad_boundary; c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 0)); if (cong >= 0) { c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF | F_FW_IQ_CMD_FL0CONGEN); } c.fl0dcaen_to_fl0cidxfthresh = htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B) | V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); c.fl0size = htobe16(fl->qsize); c.fl0addr = htobe64(fl->ba); } rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, "failed to create ingress queue: %d\n", rc); return (rc); } iq->cidx = 0; iq->gen = F_RSPD_GEN; iq->intr_next = iq->intr_params; iq->cntxt_id = be16toh(c.iqid); iq->abs_id = be16toh(c.physiqid); iq->flags |= IQ_ALLOCATED; cntxt_id = iq->cntxt_id - sc->sge.iq_start; if (cntxt_id >= sc->sge.niq) { panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.niq - 1); } sc->sge.iqmap[cntxt_id] = iq; if (fl) { u_int qid; iq->flags |= IQ_HAS_FL; fl->cntxt_id = be16toh(c.fl0id); fl->pidx = fl->cidx = 0; cntxt_id = fl->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) { panic("%s: fl->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); } sc->sge.eqmap[cntxt_id] = (void *)fl; qid = fl->cntxt_id; if (isset(&sc->doorbells, DOORBELL_UDB)) { uint32_t s_qpp = sc->params.sge.eq_s_qpp; uint32_t mask = (1 << s_qpp) - 1; volatile uint8_t *udb; udb = sc->udbs_base + UDBS_DB_OFFSET; udb += (qid >> s_qpp) << PAGE_SHIFT; qid &= mask; if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { udb += qid << UDBS_SEG_SHIFT; qid = 0; } fl->udb = (volatile void *)udb; } fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; FL_LOCK(fl); /* Enough to make sure the SGE doesn't think it's starved */ refill_fl(sc, fl, fl->lowat); FL_UNLOCK(fl); } if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && cong >= 0) { uint32_t param, val; param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); if (cong == 0) val = 1 << 19; else { val = 2 << 19; for (i = 0; i < 4; i++) { if (cong & (1 << i)) val |= 1 << (i << 2); } } rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* report error but carry on */ device_printf(sc->dev, "failed to set congestion manager context for " "ingress queue %d: %d\n", iq->cntxt_id, rc); } } /* Enable IQ interrupts */ atomic_store_rel_int(&iq->state, IQS_IDLE); t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | V_INGRESSQID(iq->cntxt_id)); return (0); } static int free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) { int rc; struct adapter *sc = iq->adapter; device_t dev; if (sc == NULL) return (0); /* nothing to do */ dev = vi ? vi->dev : sc->dev; if (iq->flags & IQ_ALLOCATED) { rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); if (rc != 0) { device_printf(dev, "failed to free queue %p: %d\n", iq, rc); return (rc); } iq->flags &= ~IQ_ALLOCATED; } free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); bzero(iq, sizeof(*iq)); if (fl) { free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, fl->desc); if (fl->sdesc) free_fl_sdesc(sc, fl); if (mtx_initialized(&fl->fl_lock)) mtx_destroy(&fl->fl_lock); bzero(fl, sizeof(*fl)); } return (0); } static void add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, struct sge_fl *fl) { struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, "freelist"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &fl->ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, fl->sidx * EQ_ESIZE + sc->params.sge.spg_len, "desc ring size in bytes"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the freelist"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, fl_pad ? 1 : 0, "padding enabled"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 0, "consumer index"); if (fl->flags & FL_BUF_PACKING) { SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); } SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 0, "producer index"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated", CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined", CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); } static int alloc_fwq(struct adapter *sc) { int rc, intr_idx; struct sge_iq *fwq = &sc->sge.fwq; struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); fwq->flags |= IQ_INTR; /* always */ if (sc->flags & IS_VF) intr_idx = 0; else { intr_idx = sc->intr_count > 1 ? 1 : 0; fwq->set_tcb_rpl = t4_filter_rpl; fwq->l2t_write_rpl = do_l2t_write_rpl; } rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); if (rc != 0) { device_printf(sc->dev, "failed to create firmware event queue: %d\n", rc); return (rc); } oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, NULL, "firmware event queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(&sc->ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &fwq->ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(&sc->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, fwq->qsize * IQ_ESIZE, "descriptor ring size in bytes"); SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id", CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the queue"); SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", "consumer index"); return (0); } static int free_fwq(struct adapter *sc) { return free_iq_fl(NULL, &sc->sge.fwq, NULL); } static int alloc_mgmtq(struct adapter *sc) { int rc; struct sge_wrq *mgmtq = &sc->sge.mgmtq; char name[16]; struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD, NULL, "management queue"); snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev)); init_eq(sc, &mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, sc->sge.fwq.cntxt_id, name); rc = alloc_wrq(sc, NULL, mgmtq, oid); if (rc != 0) { device_printf(sc->dev, "failed to create management queue: %d\n", rc); return (rc); } return (0); } static int free_mgmtq(struct adapter *sc) { return free_wrq(sc, &sc->sge.mgmtq); } int tnl_cong(struct port_info *pi, int drop) { if (drop == -1) return (-1); else if (drop == 1) return (0); else return (pi->rx_chan_map); } static int alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, struct sysctl_oid *oid) { int rc; struct adapter *sc = vi->pi->adapter; struct sysctl_oid_list *children; char name[16]; rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(vi->pi, cong_drop)); if (rc != 0) return (rc); if (idx == 0) sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; else KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, ("iq_base mismatch")); KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, ("PF with non-zero iq_base")); /* * The freelist is just barely above the starvation threshold right now, * fill it up a bit more. */ FL_LOCK(&rxq->fl); refill_fl(sc, &rxq->fl, 128); FL_UNLOCK(&rxq->fl); #if defined(INET) || defined(INET6) rc = tcp_lro_init_args(&rxq->lro, vi->ifp, lro_entries, lro_mbufs); if (rc != 0) return (rc); MPASS(rxq->lro.ifp == vi->ifp); /* also indicates LRO init'ed */ if (vi->ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; #endif rxq->ifp = vi->ifp; children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(&vi->ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &rxq->iq.ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, rxq->iq.qsize * IQ_ESIZE, "descriptor ring size in bytes"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "abs_id", CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the queue"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I", "consumer index"); #if defined(INET) || defined(INET6) SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, &rxq->lro.lro_queued, 0, NULL); SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, &rxq->lro.lro_flushed, 0, NULL); #endif SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, &rxq->rxcsum, "# of times hardware assisted with checksum"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction", CTLFLAG_RD, &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag"); add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl); return (rc); } static int free_rxq(struct vi_info *vi, struct sge_rxq *rxq) { int rc; #if defined(INET) || defined(INET6) if (rxq->lro.ifp) { tcp_lro_free(&rxq->lro); rxq->lro.ifp = NULL; } #endif rc = free_iq_fl(vi, &rxq->iq, &rxq->fl); if (rc == 0) bzero(rxq, sizeof(*rxq)); return (rc); } #ifdef TCP_OFFLOAD static int alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, int intr_idx, int idx, struct sysctl_oid *oid) { struct port_info *pi = vi->pi; int rc; struct sysctl_oid_list *children; char name[16]; rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, pi->rx_chan_map); if (rc != 0) return (rc); children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(&vi->ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &ofld_rxq->iq.ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, ofld_rxq->iq.qsize * IQ_ESIZE, "descriptor ring size in bytes"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "abs_id", CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the queue"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I", "consumer index"); add_fl_sysctls(pi->adapter, &vi->ctx, oid, &ofld_rxq->fl); return (rc); } static int free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) { int rc; rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl); if (rc == 0) bzero(ofld_rxq, sizeof(*ofld_rxq)); return (rc); } #endif #ifdef DEV_NETMAP static int alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, int idx, struct sysctl_oid *oid) { int rc; struct sysctl_oid_list *children; struct sysctl_ctx_list *ctx; char name[16]; size_t len; struct adapter *sc = vi->pi->adapter; struct netmap_adapter *na = NA(vi->ifp); MPASS(na != NULL); len = vi->qsize_rxq * IQ_ESIZE; rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); if (rc != 0) return (rc); len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len; rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map, &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc); if (rc != 0) return (rc); nm_rxq->vi = vi; nm_rxq->nid = idx; nm_rxq->iq_cidx = 0; nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE; nm_rxq->iq_gen = F_RSPD_GEN; nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; nm_rxq->fl_sidx = na->num_rx_desc; nm_rxq->intr_idx = intr_idx; nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; ctx = &vi->ctx; children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cidx, 0, sysctl_uint16, "I", "consumer index"); children = SYSCTL_CHILDREN(oid); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, "freelist"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->fl_cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the freelist"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &nm_rxq->fl_cidx, 0, "consumer index"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &nm_rxq->fl_pidx, 0, "producer index"); return (rc); } static int free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) { struct adapter *sc = vi->pi->adapter; if (vi->flags & VI_INIT_DONE) MPASS(nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID); else MPASS(nm_rxq->iq_cntxt_id == 0); free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, nm_rxq->iq_desc); free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba, nm_rxq->fl_desc); return (0); } static int alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx, struct sysctl_oid *oid) { int rc; size_t len; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct netmap_adapter *na = NA(vi->ifp); char name[16]; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map, &nm_txq->ba, (void **)&nm_txq->desc); if (rc) return (rc); nm_txq->pidx = nm_txq->cidx = 0; nm_txq->sidx = na->num_tx_desc; nm_txq->nid = idx; nm_txq->iqidx = iqidx; nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "netmap tx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &nm_txq->cntxt_id, 0, "SGE context id of the queue"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &nm_txq->cidx, 0, sysctl_uint16, "I", "consumer index"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &nm_txq->pidx, 0, sysctl_uint16, "I", "producer index"); return (rc); } static int free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) { struct adapter *sc = vi->pi->adapter; if (vi->flags & VI_INIT_DONE) MPASS(nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID); else MPASS(nm_txq->cntxt_id == 0); free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, nm_txq->desc); return (0); } #endif static int ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_ctrl_cmd c; int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | V_FW_EQ_CTRL_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); c.physeqid_pkd = htobe32(0); c.fetchszm_to_iqid = htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, "failed to create control queue %d: %d\n", eq->tx_chan, rc); return (rc); } eq->flags |= EQ_ALLOCATED; eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); cntxt_id = eq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = eq; return (rc); } static int eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_eth_cmd c; int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | V_FW_EQ_ETH_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); c.fetchszm_to_iqid = htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | V_FW_EQ_ETH_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_ETH_CMD_EQSIZE(qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(vi->dev, "failed to create Ethernet egress queue: %d\n", rc); return (rc); } eq->flags |= EQ_ALLOCATED; eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); cntxt_id = eq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = eq; return (rc); } #ifdef TCP_OFFLOAD static int ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_ofld_cmd c; int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; bzero(&c, sizeof(c)); c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | V_FW_EQ_OFLD_CMD_VFN(0)); c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); c.fetchszm_to_iqid = htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(vi->dev, "failed to create egress queue for TCP offload: %d\n", rc); return (rc); } eq->flags |= EQ_ALLOCATED; eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); cntxt_id = eq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = eq; return (rc); } #endif static int alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, qsize; size_t len; mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; len = qsize * EQ_ESIZE; rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, &eq->ba, (void **)&eq->desc); if (rc) return (rc); eq->pidx = eq->cidx = 0; eq->equeqidx = eq->dbidx = 0; eq->doorbells = sc->doorbells; switch (eq->flags & EQ_TYPEMASK) { case EQ_CTRL: rc = ctrl_eq_alloc(sc, eq); break; case EQ_ETH: rc = eth_eq_alloc(sc, vi, eq); break; #ifdef TCP_OFFLOAD case EQ_OFLD: rc = ofld_eq_alloc(sc, vi, eq); break; #endif default: panic("%s: invalid eq type %d.", __func__, eq->flags & EQ_TYPEMASK); } if (rc != 0) { device_printf(sc->dev, "failed to allocate egress queue(%d): %d\n", eq->flags & EQ_TYPEMASK, rc); } if (isset(&eq->doorbells, DOORBELL_UDB) || isset(&eq->doorbells, DOORBELL_UDBWC) || isset(&eq->doorbells, DOORBELL_WCWR)) { uint32_t s_qpp = sc->params.sge.eq_s_qpp; uint32_t mask = (1 << s_qpp) - 1; volatile uint8_t *udb; udb = sc->udbs_base + UDBS_DB_OFFSET; udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ eq->udb_qid = eq->cntxt_id & mask; /* id in page */ if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) clrbit(&eq->doorbells, DOORBELL_WCWR); else { udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ eq->udb_qid = 0; } eq->udb = (volatile void *)udb; } return (rc); } static int free_eq(struct adapter *sc, struct sge_eq *eq) { int rc; if (eq->flags & EQ_ALLOCATED) { switch (eq->flags & EQ_TYPEMASK) { case EQ_CTRL: rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); break; case EQ_ETH: rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); break; #ifdef TCP_OFFLOAD case EQ_OFLD: rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); break; #endif default: panic("%s: invalid eq type %d.", __func__, eq->flags & EQ_TYPEMASK); } if (rc != 0) { device_printf(sc->dev, "failed to free egress queue (%d): %d\n", eq->flags & EQ_TYPEMASK, rc); return (rc); } eq->flags &= ~EQ_ALLOCATED; } free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); if (mtx_initialized(&eq->eq_lock)) mtx_destroy(&eq->eq_lock); bzero(eq, sizeof(*eq)); return (0); } static int alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, struct sysctl_oid *oid) { int rc; struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); rc = alloc_eq(sc, vi, &wrq->eq); if (rc) return (rc); wrq->adapter = sc; TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); TAILQ_INIT(&wrq->incomplete_wrs); STAILQ_INIT(&wrq->wr_list); wrq->nwr_pending = 0; wrq->ndesc_needed = 0; SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &wrq->eq.ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, wrq->eq.sidx * EQ_ESIZE + sc->params.sge.spg_len, "desc ring size in bytes"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", "consumer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", "producer index"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, wrq->eq.sidx, "status page index"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, &wrq->tx_wrs_direct, "# of work requests (direct)"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, &wrq->tx_wrs_copied, "# of work requests (copied)"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD, &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)"); return (rc); } static int free_wrq(struct adapter *sc, struct sge_wrq *wrq) { int rc; rc = free_eq(sc, &wrq->eq); if (rc) return (rc); bzero(wrq, sizeof(*wrq)); return (0); } static int alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, struct sysctl_oid *oid) { int rc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_eq *eq = &txq->eq; char name[16]; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx, M_CXGBE, M_WAITOK); if (rc != 0) { device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc); return (rc); } rc = alloc_eq(sc, vi, eq); if (rc != 0) { mp_ring_free(txq->r); txq->r = NULL; return (rc); } /* Can't fail after this point. */ if (idx == 0) sc->sge.eq_base = eq->abs_id - eq->cntxt_id; else KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, ("eq_base mismatch")); KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, ("PF with non-zero eq_base")); TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); txq->ifp = vi->ifp; txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); if (sc->flags & IS_VF) txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | V_TXPKT_INTF(pi->tx_chan)); else txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); txq->tc_idx = -1; txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, M_ZERO | M_WAITOK); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "tx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(&vi->ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &eq->ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, eq->sidx * EQ_ESIZE + sc->params.sge.spg_len, "desc ring size in bytes"); SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, &eq->abs_id, 0, "absolute id of the queue"); SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &eq->cntxt_id, 0, "SGE context id of the queue"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", "consumer index"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", "producer index"); SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, eq->sidx, "status page index"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc", CTLTYPE_INT | CTLFLAG_RW, vi, idx, sysctl_tc, "I", "traffic class (-1 means none)"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, &txq->txcsum, "# of times hardware assisted with checksum"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", CTLFLAG_RD, &txq->vlan_insertion, "# of times hardware inserted 802.1Q tag"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, &txq->tso_wrs, "# of TSO work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, &txq->imm_wrs, "# of work requests with immediate data"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, &txq->sgl_wrs, "# of work requests with direct SGL"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs", CTLFLAG_RD, &txq->txpkts0_wrs, "# of txpkts (type 0) work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", CTLFLAG_RD, &txq->txpkts1_wrs, "# of txpkts (type 1) work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", CTLFLAG_RD, &txq->txpkts0_pkts, "# of frames tx'd using type0 txpkts work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", CTLFLAG_RD, &txq->txpkts1_pkts, "# of frames tx'd using type1 txpkts work requests"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->r->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->r->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->r->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->r->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->r->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->r->abdications, "# of consumer abdications in the mp_ring for this queue"); return (0); } static int free_txq(struct vi_info *vi, struct sge_txq *txq) { int rc; struct adapter *sc = vi->pi->adapter; struct sge_eq *eq = &txq->eq; rc = free_eq(sc, eq); if (rc) return (rc); sglist_free(txq->gl); free(txq->sdesc, M_CXGBE); mp_ring_free(txq->r); bzero(txq, sizeof(*txq)); return (0); } static void oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *ba = arg; KASSERT(nseg == 1, ("%s meant for single segment mappings only.", __func__)); *ba = error ? 0 : segs->ds_addr; } static inline void ring_fl_db(struct adapter *sc, struct sge_fl *fl) { uint32_t n, v; n = IDXDIFF(fl->pidx / 8, fl->dbidx, fl->sidx); MPASS(n > 0); wmb(); v = fl->dbval | V_PIDX(n); if (fl->udb) *fl->udb = htole32(v); else t4_write_reg(sc, sc->sge_kdoorbell_reg, v); IDXINCR(fl->dbidx, n, fl->sidx); } /* * Fills up the freelist by allocating up to 'n' buffers. Buffers that are * recycled do not count towards this allocation budget. * * Returns non-zero to indicate that this freelist should be added to the list * of starving freelists. */ static int refill_fl(struct adapter *sc, struct sge_fl *fl, int n) { __be64 *d; struct fl_sdesc *sd; uintptr_t pa; caddr_t cl; struct cluster_layout *cll; struct sw_zone_info *swz; struct cluster_metadata *clm; uint16_t max_pidx; uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ FL_LOCK_ASSERT_OWNED(fl); /* * We always stop at the beginning of the hardware descriptor that's just * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, * which would mean an empty freelist to the chip. */ max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; if (fl->pidx == max_pidx * 8) return (0); d = &fl->desc[fl->pidx]; sd = &fl->sdesc[fl->pidx]; cll = &fl->cll_def; /* default layout */ swz = &sc->sge.sw_zone_info[cll->zidx]; while (n > 0) { if (sd->cl != NULL) { if (sd->nmbuf == 0) { /* * Fast recycle without involving any atomics on * the cluster's metadata (if the cluster has * metadata). This happens when all frames * received in the cluster were small enough to * fit within a single mbuf each. */ fl->cl_fast_recycled++; #ifdef INVARIANTS clm = cl_metadata(sc, fl, &sd->cll, sd->cl); if (clm != NULL) MPASS(clm->refcount == 1); #endif goto recycled_fast; } /* * Cluster is guaranteed to have metadata. Clusters * without metadata always take the fast recycle path * when they're recycled. */ clm = cl_metadata(sc, fl, &sd->cll, sd->cl); MPASS(clm != NULL); if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { fl->cl_recycled++; counter_u64_add(extfree_rels, 1); goto recycled; } sd->cl = NULL; /* gave up my reference */ } MPASS(sd->cl == NULL); alloc: cl = uma_zalloc(swz->zone, M_NOWAIT); if (__predict_false(cl == NULL)) { if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 || fl->cll_def.zidx == fl->cll_alt.zidx) break; /* fall back to the safe zone */ cll = &fl->cll_alt; swz = &sc->sge.sw_zone_info[cll->zidx]; goto alloc; } fl->cl_allocated++; n--; pa = pmap_kextract((vm_offset_t)cl); pa += cll->region1; sd->cl = cl; sd->cll = *cll; *d = htobe64(pa | cll->hwidx); clm = cl_metadata(sc, fl, cll, cl); if (clm != NULL) { recycled: #ifdef INVARIANTS clm->sd = sd; #endif clm->refcount = 1; } sd->nmbuf = 0; recycled_fast: d++; sd++; if (__predict_false(++fl->pidx % 8 == 0)) { uint16_t pidx = fl->pidx / 8; if (__predict_false(pidx == fl->sidx)) { fl->pidx = 0; pidx = 0; sd = fl->sdesc; d = fl->desc; } if (pidx == max_pidx) break; if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) ring_fl_db(sc, fl); } } if (fl->pidx / 8 != fl->dbidx) ring_fl_db(sc, fl); return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); } /* * Attempt to refill all starving freelists. */ static void refill_sfl(void *arg) { struct adapter *sc = arg; struct sge_fl *fl, *fl_temp; mtx_assert(&sc->sfl_lock, MA_OWNED); TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { FL_LOCK(fl); refill_fl(sc, fl, 64); if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { TAILQ_REMOVE(&sc->sfl, fl, link); fl->flags &= ~FL_STARVING; } FL_UNLOCK(fl); } if (!TAILQ_EMPTY(&sc->sfl)) callout_schedule(&sc->sfl_callout, hz / 5); } static int alloc_fl_sdesc(struct sge_fl *fl) { fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE, M_ZERO | M_WAITOK); return (0); } static void free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) { struct fl_sdesc *sd; struct cluster_metadata *clm; struct cluster_layout *cll; int i; sd = fl->sdesc; for (i = 0; i < fl->sidx * 8; i++, sd++) { if (sd->cl == NULL) continue; cll = &sd->cll; clm = cl_metadata(sc, fl, cll, sd->cl); if (sd->nmbuf == 0) uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); else if (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1) { uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); counter_u64_add(extfree_rels, 1); } sd->cl = NULL; } free(fl->sdesc, M_CXGBE); fl->sdesc = NULL; } static inline void get_pkt_gl(struct mbuf *m, struct sglist *gl) { int rc; M_ASSERTPKTHDR(m); sglist_reset(gl); rc = sglist_append_mbuf(gl, m); if (__predict_false(rc != 0)) { panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " "with %d.", __func__, m, mbuf_nsegs(m), rc); } KASSERT(gl->sg_nseg == mbuf_nsegs(m), ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, mbuf_nsegs(m), gl->sg_nseg)); KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS), ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)); } /* * len16 for a txpkt WR with a GL. Includes the firmware work request header. */ static inline u_int txpkt_len16(u_int nsegs, u_int tso) { u_int n; MPASS(nsegs > 0); nsegs--; /* first segment is part of ulptx_sgl */ n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); if (tso) n += sizeof(struct cpl_tx_pkt_lso_core); return (howmany(n, 16)); } /* * len16 for a txpkt_vm WR with a GL. Includes the firmware work * request header. */ static inline u_int txpkt_vm_len16(u_int nsegs, u_int tso) { u_int n; MPASS(nsegs > 0); nsegs--; /* first segment is part of ulptx_sgl */ n = sizeof(struct fw_eth_tx_pkt_vm_wr) + sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); if (tso) n += sizeof(struct cpl_tx_pkt_lso_core); return (howmany(n, 16)); } /* * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work * request header. */ static inline u_int txpkts0_len16(u_int nsegs) { u_int n; MPASS(nsegs > 0); nsegs--; /* first segment is part of ulptx_sgl */ n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); return (howmany(n, 16)); } /* * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work * request header. */ static inline u_int txpkts1_len16(void) { u_int n; n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); return (howmany(n, 16)); } static inline u_int imm_payload(u_int ndesc) { u_int n; n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - sizeof(struct cpl_tx_pkt_core); return (n); } /* * Write a VM txpkt WR for this packet to the hardware descriptors, update the * software descriptor, and advance the pidx. It is guaranteed that enough * descriptors are available. * * The return value is the # of hardware descriptors used. */ static u_int write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct fw_eth_tx_pkt_vm_wr *wr, struct mbuf *m0, u_int available) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; /* used in many unrelated places */ uint64_t ctrl1; int csum_type, len16, ndesc, pktlen, nsegs; caddr_t dst; TXQ_LOCK_ASSERT_OWNED(txq); M_ASSERTPKTHDR(m0); MPASS(available > 0 && available < eq->sidx); len16 = mbuf_len16(m0); nsegs = mbuf_nsegs(m0); pktlen = m0->m_pkthdr.len; ctrl = sizeof(struct cpl_tx_pkt_core); if (needs_tso(m0)) ctrl += sizeof(struct cpl_tx_pkt_lso_core); ndesc = howmany(len16, EQ_ESIZE / 16); MPASS(ndesc <= available); /* Firmware work request header */ MPASS(wr == (void *)&eq->desc[eq->pidx]); wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(len16); wr->equiq_to_len16 = htobe32(ctrl); wr->r3[0] = 0; wr->r3[1] = 0; /* * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. * vlantci is ignored unless the ethtype is 0x8100, so it's * simpler to always copy it rather than making it * conditional. Also, it seems that we do not have to set * vlantci or fake the ethtype when doing VLAN tag insertion. */ m_copydata(m0, 0, sizeof(struct ether_header) + 2, wr->ethmacdst); csum_type = -1; if (needs_tso(m0)) { struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && m0->m_pkthdr.l4hlen > 0, ("%s: mbuf %p needs TSO but missing header lengths", __func__, m0)); ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) ctrl |= V_LSO_ETHHDR_LEN(1); if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) ctrl |= F_LSO_IPV6; lso->lso_ctrl = htobe32(ctrl); lso->ipid_ofst = htobe16(0); lso->mss = htobe16(m0->m_pkthdr.tso_segsz); lso->seqno_offset = htobe32(0); lso->len = htobe32(pktlen); if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) csum_type = TX_CSUM_TCPIP6; else csum_type = TX_CSUM_TCPIP; cpl = (void *)(lso + 1); txq->tso_wrs++; } else { if (m0->m_pkthdr.csum_flags & CSUM_IP_TCP) csum_type = TX_CSUM_TCPIP; else if (m0->m_pkthdr.csum_flags & CSUM_IP_UDP) csum_type = TX_CSUM_UDPIP; else if (m0->m_pkthdr.csum_flags & CSUM_IP6_TCP) csum_type = TX_CSUM_TCPIP6; else if (m0->m_pkthdr.csum_flags & CSUM_IP6_UDP) csum_type = TX_CSUM_UDPIP6; #if defined(INET) else if (m0->m_pkthdr.csum_flags & CSUM_IP) { /* * XXX: The firmware appears to stomp on the * fragment/flags field of the IP header when * using TX_CSUM_IP. Fall back to doing * software checksums. */ u_short *sump; struct mbuf *m; int offset; m = m0; offset = 0; sump = m_advance(&m, &offset, m0->m_pkthdr.l2hlen + offsetof(struct ip, ip_sum)); *sump = in_cksum_skip(m0, m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen, m0->m_pkthdr.l2hlen); m0->m_pkthdr.csum_flags &= ~CSUM_IP; } #endif cpl = (void *)(wr + 1); } /* Checksum offload */ ctrl1 = 0; if (needs_l3_csum(m0) == 0) ctrl1 |= F_TXPKT_IPCSUM_DIS; if (csum_type >= 0) { KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0, ("%s: mbuf %p needs checksum offload but missing header lengths", __func__, m0)); if (chip_id(sc) <= CHELSIO_T5) { ctrl1 |= V_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - ETHER_HDR_LEN); } else { ctrl1 |= V_T6_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - ETHER_HDR_LEN); } ctrl1 |= V_TXPKT_IPHDR_LEN(m0->m_pkthdr.l3hlen); ctrl1 |= V_TXPKT_CSUM_TYPE(csum_type); } else ctrl1 |= F_TXPKT_L4CSUM_DIS; if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (needs_vlan_insertion(m0)) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(pktlen); cpl->ctrl1 = htobe64(ctrl1); /* SGL */ dst = (void *)(cpl + 1); /* * A packet using TSO will use up an entire descriptor for the * firmware work request header, LSO CPL, and TX_PKT_XT CPL. * If this descriptor is the last descriptor in the ring, wrap * around to the front of the ring explicitly for the start of * the sgl. */ if (dst == (void *)&eq->desc[eq->sidx]) { dst = (void *)&eq->desc[0]; write_gl_to_txd(txq, m0, &dst, 0); } else write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); txq->sgl_wrs++; txq->txpkt_wrs++; txsd = &txq->sdesc[eq->pidx]; txsd->m = m0; txsd->desc_used = ndesc; return (ndesc); } /* * Write a txpkt WR for this packet to the hardware descriptors, update the * software descriptor, and advance the pidx. It is guaranteed that enough * descriptors are available. * * The return value is the # of hardware descriptors used. */ static u_int write_txpkt_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_wr *wr, struct mbuf *m0, u_int available) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; /* used in many unrelated places */ uint64_t ctrl1; int len16, ndesc, pktlen, nsegs; caddr_t dst; TXQ_LOCK_ASSERT_OWNED(txq); M_ASSERTPKTHDR(m0); MPASS(available > 0 && available < eq->sidx); len16 = mbuf_len16(m0); nsegs = mbuf_nsegs(m0); pktlen = m0->m_pkthdr.len; ctrl = sizeof(struct cpl_tx_pkt_core); if (needs_tso(m0)) ctrl += sizeof(struct cpl_tx_pkt_lso_core); else if (pktlen <= imm_payload(2) && available >= 2) { /* Immediate data. Recalculate len16 and set nsegs to 0. */ ctrl += pktlen; len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + pktlen, 16); nsegs = 0; } ndesc = howmany(len16, EQ_ESIZE / 16); MPASS(ndesc <= available); /* Firmware work request header */ MPASS(wr == (void *)&eq->desc[eq->pidx]); wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(len16); wr->equiq_to_len16 = htobe32(ctrl); wr->r3 = 0; if (needs_tso(m0)) { struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && m0->m_pkthdr.l4hlen > 0, ("%s: mbuf %p needs TSO but missing header lengths", __func__, m0)); ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) ctrl |= V_LSO_ETHHDR_LEN(1); if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) ctrl |= F_LSO_IPV6; lso->lso_ctrl = htobe32(ctrl); lso->ipid_ofst = htobe16(0); lso->mss = htobe16(m0->m_pkthdr.tso_segsz); lso->seqno_offset = htobe32(0); lso->len = htobe32(pktlen); cpl = (void *)(lso + 1); txq->tso_wrs++; } else cpl = (void *)(wr + 1); /* Checksum offload */ ctrl1 = 0; if (needs_l3_csum(m0) == 0) ctrl1 |= F_TXPKT_IPCSUM_DIS; if (needs_l4_csum(m0) == 0) ctrl1 |= F_TXPKT_L4CSUM_DIS; if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (needs_vlan_insertion(m0)) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(pktlen); cpl->ctrl1 = htobe64(ctrl1); /* SGL */ dst = (void *)(cpl + 1); if (nsegs > 0) { write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); txq->sgl_wrs++; } else { struct mbuf *m; for (m = m0; m != NULL; m = m->m_next) { copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); #ifdef INVARIANTS pktlen -= m->m_len; #endif } #ifdef INVARIANTS KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); #endif txq->imm_wrs++; } txq->txpkt_wrs++; txsd = &txq->sdesc[eq->pidx]; txsd->m = m0; txsd->desc_used = ndesc; return (ndesc); } static int try_txpkts(struct mbuf *m, struct mbuf *n, struct txpkts *txp, u_int available) { u_int needed, nsegs1, nsegs2, l1, l2; if (cannot_use_txpkts(m) || cannot_use_txpkts(n)) return (1); nsegs1 = mbuf_nsegs(m); nsegs2 = mbuf_nsegs(n); if (nsegs1 + nsegs2 == 2) { txp->wr_type = 1; l1 = l2 = txpkts1_len16(); } else { txp->wr_type = 0; l1 = txpkts0_len16(nsegs1); l2 = txpkts0_len16(nsegs2); } txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + l1 + l2; needed = howmany(txp->len16, EQ_ESIZE / 16); if (needed > SGE_MAX_WR_NDESC || needed > available) return (1); txp->plen = m->m_pkthdr.len + n->m_pkthdr.len; if (txp->plen > 65535) return (1); txp->npkt = 2; set_mbuf_len16(m, l1); set_mbuf_len16(n, l2); return (0); } static int add_to_txpkts(struct mbuf *m, struct txpkts *txp, u_int available) { u_int plen, len16, needed, nsegs; MPASS(txp->wr_type == 0 || txp->wr_type == 1); nsegs = mbuf_nsegs(m); if (needs_tso(m) || (txp->wr_type == 1 && nsegs != 1)) return (1); plen = txp->plen + m->m_pkthdr.len; if (plen > 65535) return (1); if (txp->wr_type == 0) len16 = txpkts0_len16(nsegs); else len16 = txpkts1_len16(); needed = howmany(txp->len16 + len16, EQ_ESIZE / 16); if (needed > SGE_MAX_WR_NDESC || needed > available) return (1); txp->npkt++; txp->plen = plen; txp->len16 += len16; set_mbuf_len16(m, len16); return (0); } /* * Write a txpkts WR for the packets in txp to the hardware descriptors, update * the software descriptor, and advance the pidx. It is guaranteed that enough * descriptors are available. * * The return value is the # of hardware descriptors used. */ static u_int write_txpkts_wr(struct sge_txq *txq, struct fw_eth_tx_pkts_wr *wr, struct mbuf *m0, const struct txpkts *txp, u_int available) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; uint64_t ctrl1; int ndesc, checkwrap; struct mbuf *m; void *flitp; TXQ_LOCK_ASSERT_OWNED(txq); MPASS(txp->npkt > 0); MPASS(txp->plen < 65536); MPASS(m0 != NULL); MPASS(m0->m_nextpkt != NULL); MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); MPASS(available > 0 && available < eq->sidx); ndesc = howmany(txp->len16, EQ_ESIZE / 16); MPASS(ndesc <= available); MPASS(wr == (void *)&eq->desc[eq->pidx]); wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); ctrl = V_FW_WR_LEN16(txp->len16); wr->equiq_to_len16 = htobe32(ctrl); wr->plen = htobe16(txp->plen); wr->npkt = txp->npkt; wr->r3 = 0; wr->type = txp->wr_type; flitp = wr + 1; /* * At this point we are 16B into a hardware descriptor. If checkwrap is * set then we know the WR is going to wrap around somewhere. We'll * check for that at appropriate points. */ checkwrap = eq->sidx - ndesc < eq->pidx; for (m = m0; m != NULL; m = m->m_nextpkt) { if (txp->wr_type == 0) { struct ulp_txpkt *ulpmc; struct ulptx_idata *ulpsc; /* ULP master command */ ulpmc = flitp; ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); ulpmc->len = htobe32(mbuf_len16(m)); /* ULP subcommand */ ulpsc = (void *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | F_ULP_TX_SC_MORE); ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); cpl = (void *)(ulpsc + 1); if (checkwrap && (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) cpl = (void *)&eq->desc[0]; } else { cpl = flitp; } /* Checksum offload */ ctrl1 = 0; if (needs_l3_csum(m) == 0) ctrl1 |= F_TXPKT_IPCSUM_DIS; if (needs_l4_csum(m) == 0) ctrl1 |= F_TXPKT_L4CSUM_DIS; if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (needs_vlan_insertion(m)) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(m->m_pkthdr.len); cpl->ctrl1 = htobe64(ctrl1); flitp = cpl + 1; if (checkwrap && (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) flitp = (void *)&eq->desc[0]; write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); } if (txp->wr_type == 0) { txq->txpkts0_pkts += txp->npkt; txq->txpkts0_wrs++; } else { txq->txpkts1_pkts += txp->npkt; txq->txpkts1_wrs++; } txsd = &txq->sdesc[eq->pidx]; txsd->m = m0; txsd->desc_used = ndesc; return (ndesc); } /* * If the SGL ends on an address that is not 16 byte aligned, this function will * add a 0 filled flit at the end. */ static void write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) { struct sge_eq *eq = &txq->eq; struct sglist *gl = txq->gl; struct sglist_seg *seg; __be64 *flitp, *wrap; struct ulptx_sgl *usgl; int i, nflits, nsegs; KASSERT(((uintptr_t)(*to) & 0xf) == 0, ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); get_pkt_gl(m, gl); nsegs = gl->sg_nseg; MPASS(nsegs > 0); nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; flitp = (__be64 *)(*to); wrap = (__be64 *)(&eq->desc[eq->sidx]); seg = &gl->sg_segs[0]; usgl = (void *)flitp; /* * We start at a 16 byte boundary somewhere inside the tx descriptor * ring, so we're at least 16 bytes away from the status page. There is * no chance of a wrap around in the middle of usgl (which is 16 bytes). */ usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); usgl->len0 = htobe32(seg->ss_len); usgl->addr0 = htobe64(seg->ss_paddr); seg++; if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { /* Won't wrap around at all */ for (i = 0; i < nsegs - 1; i++, seg++) { usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); flitp += nflits; } else { /* Will wrap somewhere in the rest of the SGL */ /* 2 flits already written, write the rest flit by flit */ flitp = (void *)(usgl + 1); for (i = 0; i < nflits - 2; i++) { if (flitp == wrap) flitp = (void *)eq->desc; *flitp++ = get_flit(seg, nsegs - 1, i); } } if (nflits & 1) { MPASS(((uintptr_t)flitp) & 0xf); *flitp++ = 0; } MPASS((((uintptr_t)flitp) & 0xf) == 0); if (__predict_false(flitp == wrap)) *to = (void *)eq->desc; else *to = (void *)flitp; } static inline void copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) { MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)&eq->desc[eq->sidx])) { bcopy(from, *to, len); (*to) += len; } else { int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); bcopy(from, *to, portion); from += portion; portion = len - portion; /* remaining */ bcopy(from, (void *)eq->desc, portion); (*to) = (caddr_t)eq->desc + portion; } } static inline void ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) { u_int db; MPASS(n > 0); db = eq->doorbells; if (n > 1) clrbit(&db, DOORBELL_WCWR); wmb(); switch (ffs(db) - 1) { case DOORBELL_UDB: *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); break; case DOORBELL_WCWR: { volatile uint64_t *dst, *src; int i; /* * Queues whose 128B doorbell segment fits in the page do not * use relative qid (udb_qid is always 0). Only queues with * doorbell segments can do WCWR. */ KASSERT(eq->udb_qid == 0 && n == 1, ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", __func__, eq->doorbells, n, eq->dbidx, eq)); dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - UDBS_DB_OFFSET); i = eq->dbidx; src = (void *)&eq->desc[i]; while (src != (void *)&eq->desc[i + 1]) *dst++ = *src++; wmb(); break; } case DOORBELL_UDBWC: *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); wmb(); break; case DOORBELL_KDB: t4_write_reg(sc, sc->sge_kdoorbell_reg, V_QID(eq->cntxt_id) | V_PIDX(n)); break; } IDXINCR(eq->dbidx, n, eq->sidx); } static inline u_int reclaimable_tx_desc(struct sge_eq *eq) { uint16_t hw_cidx; hw_cidx = read_hw_cidx(eq); return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); } static inline u_int total_available_tx_desc(struct sge_eq *eq) { uint16_t hw_cidx, pidx; hw_cidx = read_hw_cidx(eq); pidx = eq->pidx; if (pidx == hw_cidx) return (eq->sidx - 1); else return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); } static inline uint16_t read_hw_cidx(struct sge_eq *eq) { struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; uint16_t cidx = spg->cidx; /* stable snapshot */ return (be16toh(cidx)); } /* * Reclaim 'n' descriptors approximately. */ static u_int reclaim_tx_descs(struct sge_txq *txq, u_int n) { struct tx_sdesc *txsd; struct sge_eq *eq = &txq->eq; u_int can_reclaim, reclaimed; TXQ_LOCK_ASSERT_OWNED(txq); MPASS(n > 0); reclaimed = 0; can_reclaim = reclaimable_tx_desc(eq); while (can_reclaim && reclaimed < n) { int ndesc; struct mbuf *m, *nextpkt; txsd = &txq->sdesc[eq->cidx]; ndesc = txsd->desc_used; /* Firmware doesn't return "partial" credits. */ KASSERT(can_reclaim >= ndesc, ("%s: unexpected number of credits: %d, %d", __func__, can_reclaim, ndesc)); for (m = txsd->m; m != NULL; m = nextpkt) { nextpkt = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); } reclaimed += ndesc; can_reclaim -= ndesc; IDXINCR(eq->cidx, ndesc, eq->sidx); } return (reclaimed); } static void tx_reclaim(void *arg, int n) { struct sge_txq *txq = arg; struct sge_eq *eq = &txq->eq; do { if (TXQ_TRYLOCK(txq) == 0) break; n = reclaim_tx_descs(txq, 32); if (eq->cidx == eq->pidx) eq->equeqidx = eq->pidx; TXQ_UNLOCK(txq); } while (n > 0); } static __be64 get_flit(struct sglist_seg *segs, int nsegs, int idx) { int i = (idx / 3) * 2; switch (idx % 3) { case 0: { __be64 rc; rc = htobe32(segs[i].ss_len); if (i + 1 < nsegs) rc |= (uint64_t)htobe32(segs[i + 1].ss_len) << 32; return (rc); } case 1: return (htobe64(segs[i].ss_paddr)); case 2: return (htobe64(segs[i + 1].ss_paddr)); } return (0); } static void find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp) { int8_t zidx, hwidx, idx; uint16_t region1, region3; int spare, spare_needed, n; struct sw_zone_info *swz; struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0]; /* * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize * large enough for the max payload and cluster metadata. Otherwise * settle for the largest bufsize that leaves enough room in the cluster * for metadata. * * Without buffer packing: Look for the smallest zone which has a * bufsize large enough for the max payload. Settle for the largest * bufsize available if there's nothing big enough for max payload. */ spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0; swz = &sc->sge.sw_zone_info[0]; hwidx = -1; for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) { if (swz->size > largest_rx_cluster) { if (__predict_true(hwidx != -1)) break; /* * This is a misconfiguration. largest_rx_cluster is * preventing us from finding a refill source. See * dev.t5nex..buffer_sizes to figure out why. */ device_printf(sc->dev, "largest_rx_cluster=%u leaves no" " refill source for fl %p (dma %u). Ignored.\n", largest_rx_cluster, fl, maxp); } for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) { hwb = &hwb_list[idx]; spare = swz->size - hwb->size; if (spare < spare_needed) continue; hwidx = idx; /* best option so far */ if (hwb->size >= maxp) { if ((fl->flags & FL_BUF_PACKING) == 0) goto done; /* stop looking (not packing) */ if (swz->size >= safest_rx_cluster) goto done; /* stop looking (packing) */ } break; /* keep looking, next zone */ } } done: /* A usable hwidx has been located. */ MPASS(hwidx != -1); hwb = &hwb_list[hwidx]; zidx = hwb->zidx; swz = &sc->sge.sw_zone_info[zidx]; region1 = 0; region3 = swz->size - hwb->size; /* * Stay within this zone and see if there is a better match when mbuf * inlining is allowed. Remember that the hwidx's are sorted in * decreasing order of size (so in increasing order of spare area). */ for (idx = hwidx; idx != -1; idx = hwb->next) { hwb = &hwb_list[idx]; spare = swz->size - hwb->size; if (allow_mbufs_in_cluster == 0 || hwb->size < maxp) break; /* * Do not inline mbufs if doing so would violate the pad/pack * boundary alignment requirement. */ if (fl_pad && (MSIZE % sc->params.sge.pad_boundary) != 0) continue; if (fl->flags & FL_BUF_PACKING && (MSIZE % sc->params.sge.pack_boundary) != 0) continue; if (spare < CL_METADATA_SIZE + MSIZE) continue; n = (spare - CL_METADATA_SIZE) / MSIZE; if (n > howmany(hwb->size, maxp)) break; hwidx = idx; if (fl->flags & FL_BUF_PACKING) { region1 = n * MSIZE; region3 = spare - region1; } else { region1 = MSIZE; region3 = spare - region1; break; } } KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES, ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp)); KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES, ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp)); KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 == sc->sge.sw_zone_info[zidx].size, ("%s: bad buffer layout for fl %p, maxp %d. " "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, sc->sge.sw_zone_info[zidx].size, region1, sc->sge.hw_buf_info[hwidx].size, region3)); if (fl->flags & FL_BUF_PACKING || region1 > 0) { KASSERT(region3 >= CL_METADATA_SIZE, ("%s: no room for metadata. fl %p, maxp %d; " "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, sc->sge.sw_zone_info[zidx].size, region1, sc->sge.hw_buf_info[hwidx].size, region3)); KASSERT(region1 % MSIZE == 0, ("%s: bad mbuf region for fl %p, maxp %d. " "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, sc->sge.sw_zone_info[zidx].size, region1, sc->sge.hw_buf_info[hwidx].size, region3)); } fl->cll_def.zidx = zidx; fl->cll_def.hwidx = hwidx; fl->cll_def.region1 = region1; fl->cll_def.region3 = region3; } static void find_safe_refill_source(struct adapter *sc, struct sge_fl *fl) { struct sge *s = &sc->sge; struct hw_buf_info *hwb; struct sw_zone_info *swz; int spare; int8_t hwidx; if (fl->flags & FL_BUF_PACKING) hwidx = s->safe_hwidx2; /* with room for metadata */ else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) { hwidx = s->safe_hwidx2; hwb = &s->hw_buf_info[hwidx]; swz = &s->sw_zone_info[hwb->zidx]; spare = swz->size - hwb->size; /* no good if there isn't room for an mbuf as well */ if (spare < CL_METADATA_SIZE + MSIZE) hwidx = s->safe_hwidx1; } else hwidx = s->safe_hwidx1; if (hwidx == -1) { /* No fallback source */ fl->cll_alt.hwidx = -1; fl->cll_alt.zidx = -1; return; } hwb = &s->hw_buf_info[hwidx]; swz = &s->sw_zone_info[hwb->zidx]; spare = swz->size - hwb->size; fl->cll_alt.hwidx = hwidx; fl->cll_alt.zidx = hwb->zidx; if (allow_mbufs_in_cluster && (fl_pad == 0 || (MSIZE % sc->params.sge.pad_boundary) == 0)) fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE; else fl->cll_alt.region1 = 0; fl->cll_alt.region3 = spare - fl->cll_alt.region1; } static void add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); if ((fl->flags & FL_DOOMED) == 0) { fl->flags |= FL_STARVING; TAILQ_INSERT_TAIL(&sc->sfl, fl, link); callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); } FL_UNLOCK(fl); mtx_unlock(&sc->sfl_lock); } static void handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) { struct sge_wrq *wrq = (void *)eq; atomic_readandclear_int(&eq->equiq); taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); } static void handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) { struct sge_txq *txq = (void *)eq; MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH); atomic_readandclear_int(&eq->equiq); mp_ring_check_drainage(txq->r, 0); taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); } static int handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); struct adapter *sc = iq->adapter; struct sge *s = &sc->sge; struct sge_eq *eq; static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, &handle_wrq_egr_update, &handle_eth_egr_update, &handle_wrq_egr_update}; KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); eq = s->eqmap[qid - s->eq_start - s->eq_base]; (*h[eq->flags & EQ_TYPEMASK])(sc, eq); return (0); } /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ offsetof(struct cpl_fw6_msg, data)); static int handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { const struct rss_header *rss2; rss2 = (const struct rss_header *)&cpl->data[0]; return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); } return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); } /** * t4_handle_wrerr_rpl - process a FW work request error message * @adap: the adapter * @rpl: start of the FW message */ static int t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) { u8 opcode = *(const u8 *)rpl; const struct fw_error_cmd *e = (const void *)rpl; unsigned int i; if (opcode != FW_ERROR_CMD) { log(LOG_ERR, "%s: Received WRERR_RPL message with opcode %#x\n", device_get_nameunit(adap->dev), opcode); return (EINVAL); } log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : "non-fatal"); switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { case FW_ERROR_TYPE_EXCEPTION: log(LOG_ERR, "exception info:\n"); for (i = 0; i < nitems(e->u.exception.info); i++) log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", be32toh(e->u.exception.info[i])); log(LOG_ERR, "\n"); break; case FW_ERROR_TYPE_HWMODULE: log(LOG_ERR, "HW module regaddr %08x regval %08x\n", be32toh(e->u.hwmodule.regaddr), be32toh(e->u.hwmodule.regval)); break; case FW_ERROR_TYPE_WR: log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", be16toh(e->u.wr.cidx), G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), be32toh(e->u.wr.eqid)); for (i = 0; i < nitems(e->u.wr.wrhdr); i++) log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", e->u.wr.wrhdr[i]); log(LOG_ERR, "\n"); break; case FW_ERROR_TYPE_ACL: log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", be16toh(e->u.acl.cidx), G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), be32toh(e->u.acl.eqid), G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : "MAC"); for (i = 0; i < nitems(e->u.acl.val); i++) log(LOG_ERR, " %02x", e->u.acl.val[i]); log(LOG_ERR, "\n"); break; default: log(LOG_ERR, "type %#x\n", G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); return (EINVAL); } return (0); } static int sysctl_uint16(SYSCTL_HANDLER_ARGS) { uint16_t *id = arg1; int i = *id; return sysctl_handle_int(oidp, &i, 0, req); } static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS) { struct sge *s = arg1; struct hw_buf_info *hwb = &s->hw_buf_info[0]; struct sw_zone_info *swz = &s->sw_zone_info[0]; int i, rc; struct sbuf sb; char c; sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) { if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster) c = '*'; else c = '\0'; sbuf_printf(&sb, "%u%c ", hwb->size, c); } sbuf_trim(&sb); sbuf_finish(&sb); rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); sbuf_delete(&sb); return (rc); } static int sysctl_tc(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct port_info *pi; struct adapter *sc; struct sge_txq *txq; struct tx_cl_rl_params *tc; int qidx = arg2, rc, tc_idx; uint32_t fw_queue, fw_class; MPASS(qidx >= 0 && qidx < vi->ntxq); pi = vi->pi; sc = pi->adapter; txq = &sc->sge.txq[vi->first_txq + qidx]; tc_idx = txq->tc_idx; rc = sysctl_handle_int(oidp, &tc_idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (sc->flags & IS_VF) return (EPERM); /* Note that -1 is legitimate input (it means unbind). */ if (tc_idx < -1 || tc_idx >= sc->chip_params->nsched_cls) return (EINVAL); mtx_lock(&sc->tc_lock); if (tc_idx == txq->tc_idx) { rc = 0; /* No change, nothing to do. */ goto done; } fw_queue = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id); if (tc_idx == -1) fw_class = 0xffffffff; /* Unbind. */ else { /* * Bind to a different class. */ tc = &pi->sched_params->cl_rl[tc_idx]; if (tc->flags & TX_CLRL_ERROR) { /* Previous attempt to set the cl-rl params failed. */ rc = EIO; goto done; } else { /* * Ok to proceed. Place a reference on the new class * while still holding on to the reference on the * previous class, if any. */ fw_class = tc_idx; tc->refcount++; } } mtx_unlock(&sc->tc_lock); rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4stc"); if (rc) return (rc); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); end_synchronized_op(sc, 0); mtx_lock(&sc->tc_lock); if (rc == 0) { if (txq->tc_idx != -1) { tc = &pi->sched_params->cl_rl[txq->tc_idx]; MPASS(tc->refcount > 0); tc->refcount--; } txq->tc_idx = tc_idx; } else if (tc_idx != -1) { tc = &pi->sched_params->cl_rl[tc_idx]; MPASS(tc->refcount > 0); tc->refcount--; } done: mtx_unlock(&sc->tc_lock); return (rc); } Index: head/sys/dev/cxgbe/tom/t4_cpl_io.c =================================================================== --- head/sys/dev/cxgbe/tom/t4_cpl_io.c (revision 324445) +++ head/sys/dev/cxgbe/tom/t4_cpl_io.c (revision 324446) @@ -1,2333 +1,2333 @@ /*- * Copyright (c) 2012, 2015 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ratelimit.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_tcb.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" VNET_DECLARE(int, tcp_do_autosndbuf); #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) VNET_DECLARE(int, tcp_autosndbuf_inc); #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) VNET_DECLARE(int, tcp_autosndbuf_max); #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) VNET_DECLARE(int, tcp_do_autorcvbuf); #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) VNET_DECLARE(int, tcp_autorcvbuf_inc); #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) VNET_DECLARE(int, tcp_autorcvbuf_max); #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) #define IS_AIOTX_MBUF(m) \ ((m)->m_flags & M_EXT && (m)->m_ext.ext_flags & EXT_FLAG_AIOTX) static void t4_aiotx_cancel(struct kaiocb *job); static void t4_aiotx_queue_toep(struct toepcb *toep); static size_t aiotx_mbuf_pgoff(struct mbuf *m) { struct aiotx_buffer *ab; MPASS(IS_AIOTX_MBUF(m)); ab = m->m_ext.ext_arg1; return ((ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) % PAGE_SIZE); } static vm_page_t * aiotx_mbuf_pages(struct mbuf *m) { struct aiotx_buffer *ab; int npages; MPASS(IS_AIOTX_MBUF(m)); ab = m->m_ext.ext_arg1; npages = (ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) / PAGE_SIZE; return (ab->ps.pages + npages); } void send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) { struct wrqe *wr; struct fw_flowc_wr *flowc; unsigned int nparams = ftxp ? 8 : 6, flowclen; struct vi_info *vi = toep->vi; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), ("%s: flowc for tid %u sent already", __func__, toep->tid)); flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = htobe32(pfvf); flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; flowc->mnemval[1].val = htobe32(pi->tx_chan); flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; flowc->mnemval[2].val = htobe32(pi->tx_chan); flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id); if (ftxp) { uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; flowc->mnemval[4].val = htobe32(ftxp->snd_nxt); flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt); flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; flowc->mnemval[6].val = htobe32(sndbuf); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = htobe32(ftxp->mss); CTR6(KTR_CXGBE, "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, ftxp->rcv_nxt); } else { flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; flowc->mnemval[4].val = htobe32(512); flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[5].val = htobe32(512); CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); } txsd->tx_credits = howmany(flowclen, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; toep->flags |= TPF_FLOWC_WR_SENT; t4_wrq_tx(sc, wr); } #ifdef RATELIMIT /* * Input is Bytes/second (so_max_pacing-rate), chip counts in Kilobits/second. */ static int update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps) { int tc_idx, rc; const u_int kbps = (u_int) (uint64_t)Bps * 8ULL / 1000; const int port_id = toep->vi->pi->port_id; CTR3(KTR_CXGBE, "%s: tid %u, rate %uKbps", __func__, toep->tid, kbps); if (kbps == 0) { /* unbind */ tc_idx = -1; } else { rc = t4_reserve_cl_rl_kbps(sc, port_id, kbps, &tc_idx); if (rc != 0) return (rc); MPASS(tc_idx >= 0 && tc_idx < sc->chip_params->nsched_cls); } if (toep->tc_idx != tc_idx) { struct wrqe *wr; struct fw_flowc_wr *flowc; int nparams = 1, flowclen, flowclen16; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); flowclen16 = howmany(flowclen, 16); if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0 || (wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq)) == NULL) { if (tc_idx >= 0) t4_release_cl_rl_kbps(sc, port_id, tc_idx); return (ENOMEM); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; if (tc_idx == -1) flowc->mnemval[0].val = htobe32(0xff); else flowc->mnemval[0].val = htobe32(tc_idx); txsd->tx_credits = flowclen16; txsd->plen = 0; toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; t4_wrq_tx(sc, wr); } if (toep->tc_idx >= 0) t4_release_cl_rl_kbps(sc, port_id, toep->tc_idx); toep->tc_idx = tc_idx; return (0); } #endif void send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) { struct wrqe *wr; struct cpl_abort_req *req; int tid = toep->tid; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ INP_WLOCK_ASSERT(inp); CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", __func__, toep->tid, inp->inp_flags & INP_DROPPED ? "inp dropped" : tcpstates[tp->t_state], toep->flags, inp->inp_flags, toep->flags & TPF_ABORT_SHUTDOWN ? " (abort already in progress)" : ""); if (toep->flags & TPF_ABORT_SHUTDOWN) return; /* abort already in progress */ toep->flags |= TPF_ABORT_SHUTDOWN; KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %d.", __func__, tid)); wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); if (inp->inp_flags & INP_DROPPED) req->rsvd0 = htobe32(snd_nxt); else req->rsvd0 = htobe32(tp->snd_nxt); req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); req->cmd = CPL_ABORT_SEND_RST; /* * XXX: What's the correct way to tell that the inp hasn't been detached * from its socket? Should I even be flushing the snd buffer here? */ if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) /* because I'm not sure. See comment above */ sbflush(&so->so_snd); } t4_l2t_send(sc, wr, toep->l2te); } /* * Called when a connection is established to translate the TCP options * reported by HW to FreeBSD's native format. */ static void assign_rxopt(struct tcpcb *tp, unsigned int opt) { struct toepcb *toep = tp->t_toe; struct inpcb *inp = tp->t_inpcb; struct adapter *sc = td_adapter(toep->td); int n; INP_LOCK_ASSERT(inp); if (inp->inp_inc.inc_flags & INC_ISIPV6) n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else n = sizeof(struct ip) + sizeof(struct tcphdr); if (V_tcp_do_rfc1323) n += TCPOLEN_TSTAMP_APPA; tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid, G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]); if (G_TCPOPT_TSTAMP(opt)) { tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ tp->ts_recent = 0; /* hmmm */ tp->ts_recent_age = tcp_ts_getticks(); } if (G_TCPOPT_SACK(opt)) tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ else tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ if (G_TCPOPT_WSCALE_OK(opt)) tp->t_flags |= TF_RCVD_SCALE; /* Doing window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == (TF_RCVD_SCALE | TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); } } /* * Completes some final bits of initialization for just established connections * and changes their state to TCPS_ESTABLISHED. * * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. */ void make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, uint16_t opt) { struct inpcb *inp = toep->inp; struct socket *so = inp->inp_socket; struct tcpcb *tp = intotcpcb(inp); long bufsize; uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ uint16_t tcpopt = be16toh(opt); struct flowc_tx_params ftxp; INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED, ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); CTR6(KTR_CXGBE, "%s: tid %d, so %p, inp %p, tp %p, toep %p", __func__, toep->tid, so, inp, tp, toep); tp->t_state = TCPS_ESTABLISHED; tp->t_starttime = ticks; TCPSTAT_INC(tcps_connects); tp->irs = irs; tcp_rcvseqinit(tp); tp->rcv_wnd = toep->rx_credits << 10; tp->rcv_adv += tp->rcv_wnd; tp->last_ack_sent = tp->rcv_nxt; /* * If we were unable to send all rx credits via opt0, save the remainder * in rx_credits so that they can be handed over with the next credit * update. */ SOCKBUF_LOCK(&so->so_rcv); bufsize = select_rcv_wnd(so); SOCKBUF_UNLOCK(&so->so_rcv); toep->rx_credits = bufsize - tp->rcv_wnd; tp->iss = iss; tcp_sendseqinit(tp); tp->snd_una = iss + 1; tp->snd_nxt = iss + 1; tp->snd_max = iss + 1; assign_rxopt(tp, tcpopt); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) bufsize = V_tcp_autosndbuf_max; else bufsize = sbspace(&so->so_snd); SOCKBUF_UNLOCK(&so->so_snd); ftxp.snd_nxt = tp->snd_nxt; ftxp.rcv_nxt = tp->rcv_nxt; ftxp.snd_space = bufsize; ftxp.mss = tp->t_maxseg; send_flowc_wr(toep, &ftxp); soisconnected(so); } static int send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) { struct wrqe *wr; struct cpl_rx_data_ack *req; uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); wr = alloc_wrqe(sizeof(*req), toep->ctrlq); if (wr == NULL) return (0); req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); t4_wrq_tx(sc, wr); return (credits); } void t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_rcv; struct toepcb *toep = tp->t_toe; int credits; INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK_ASSERT(sb); KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); toep->rx_credits += toep->sb_cc - sbused(sb); toep->sb_cc = sbused(sb); if (toep->rx_credits > 0 && (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 || (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) || toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) { credits = send_rx_credits(sc, toep, toep->rx_credits); toep->rx_credits -= credits; tp->rcv_wnd += credits; tp->rcv_adv += credits; } } void t4_rcvd(struct toedev *tod, struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_rcv; SOCKBUF_LOCK(sb); t4_rcvd_locked(tod, tp); SOCKBUF_UNLOCK(sb); } /* * Close a connection by sending a CPL_CLOSE_CON_REQ message. */ static int close_conn(struct adapter *sc, struct toepcb *toep) { struct wrqe *wr; struct cpl_close_con_req *req; unsigned int tid = toep->tid; CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); if (toep->flags & TPF_FIN_SENT) return (0); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, tid)); wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | V_FW_WR_FLOWID(tid)); req->wr.wr_lo = cpu_to_be64(0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); req->rsvd = 0; toep->flags |= TPF_FIN_SENT; toep->flags &= ~TPF_SEND_FIN; t4_l2t_send(sc, wr, toep->l2te); return (0); } #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) /* Maximum amount of immediate data we could stuff in a WR */ static inline int max_imm_payload(int tx_credits) { const int n = 2; /* Use only up to 2 desc for imm. data WR */ KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits < MIN_OFLD_TX_CREDITS) return (0); if (tx_credits >= (n * EQ_ESIZE) / 16) return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); else return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); } /* Maximum number of SGL entries we could stuff in a WR */ static inline int max_dsgl_nsegs(int tx_credits) { int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits < MIN_OFLD_TX_CREDITS) return (0); nseg += 2 * (sge_pair_credits * 16 / 24); if ((sge_pair_credits * 16) % 24 == 16) nseg++; return (nseg); } static inline void write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, unsigned int plen, uint8_t credits, int shove, int ulp_submode, int txalign) { struct fw_ofld_tx_data_wr *txwr = dst; txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | V_FW_WR_IMMDLEN(immdlen)); txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | V_FW_WR_LEN16(credits)); txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(toep->ulp_mode) | V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove)); txwr->plen = htobe32(plen); if (txalign > 0) { struct tcpcb *tp = intotcpcb(toep->inp); if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi)) txwr->lsodisable_to_flags |= htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); else txwr->lsodisable_to_flags |= htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | (tp->t_flags & TF_NODELAY ? 0 : F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); } } /* * Generate a DSGL from a starting mbuf. The total number of segments and the * maximum segments in any one mbuf are provided. */ static void write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) { struct mbuf *m; struct ulptx_sgl *usgl = dst; int i, j, rc; struct sglist sg; struct sglist_seg segs[n]; KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); sglist_init(&sg, n, segs); usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); i = -1; for (m = start; m != stop; m = m->m_next) { if (IS_AIOTX_MBUF(m)) rc = sglist_append_vmpages(&sg, aiotx_mbuf_pages(m), aiotx_mbuf_pgoff(m), m->m_len); else rc = sglist_append(&sg, mtod(m, void *), m->m_len); if (__predict_false(rc != 0)) panic("%s: sglist_append %d", __func__, rc); for (j = 0; j < sg.sg_nseg; i++, j++) { if (i < 0) { usgl->len0 = htobe32(segs[j].ss_len); usgl->addr0 = htobe64(segs[j].ss_paddr); } else { usgl->sge[i / 2].len[i & 1] = htobe32(segs[j].ss_len); usgl->sge[i / 2].addr[i & 1] = htobe64(segs[j].ss_paddr); } #ifdef INVARIANTS nsegs--; #endif } sglist_reset(&sg); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", __func__, nsegs, start, stop)); } /* * Max number of SGL entries an offload tx work request can have. This is 41 * (1 + 40) for a full 512B work request. * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) */ #define OFLD_SGL_LEN (41) /* * Send data and/or a FIN to the peer. * * The socket's so_snd buffer consists of a stream of data starting with sb_mb * and linked together with m_next. sb_sndptr, if set, is the last mbuf that * was transmitted. * * drop indicates the number of bytes that should be dropped from the head of * the send buffer. It is an optimization that lets do_fw4_ack avoid creating * contention on the send buffer lock (before this change it used to do * sowwakeup and then t4_push_frames right after that when recovering from tx * stalls). When drop is set this function MUST drop the bytes and wake up any * writers. */ void t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) { struct mbuf *sndptr, *m, *sb_sndptr; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; int tx_credits, shove, compl, sowwakeup; struct ofld_tx_sdesc *txsd; bool aiotx_mbuf_seen; INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); KASSERT(toep->ulp_mode == ULP_MODE_NONE || toep->ulp_mode == ULP_MODE_TCPDDP || toep->ulp_mode == ULP_MODE_RDMA, ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d", __func__, toep->tid, toep->flags, tp->t_flags); #endif if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) return; #ifdef RATELIMIT if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) && (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) { inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; } #endif /* * This function doesn't resume by itself. Someone else must clear the * flag and call this function. */ if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { KASSERT(drop == 0, ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); return; } txsd = &toep->txsd[toep->txsd_pidx]; do { tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); max_imm = max_imm_payload(tx_credits); max_nsegs = max_dsgl_nsegs(tx_credits); SOCKBUF_LOCK(sb); sowwakeup = drop; if (drop) { sbdrop_locked(sb, drop); drop = 0; } sb_sndptr = sb->sb_sndptr; sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; plen = 0; nsegs = 0; max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ aiotx_mbuf_seen = false; for (m = sndptr; m != NULL; m = m->m_next) { int n; if (IS_AIOTX_MBUF(m)) n = sglist_count_vmpages(aiotx_mbuf_pages(m), aiotx_mbuf_pgoff(m), m->m_len); else n = sglist_count(mtod(m, void *), m->m_len); nsegs += n; plen += m->m_len; /* This mbuf sent us _over_ the nsegs limit, back out */ if (plen > max_imm && nsegs > max_nsegs) { nsegs -= n; plen -= m->m_len; if (plen == 0) { /* Too few credits */ toep->flags |= TPF_TX_SUSPENDED; if (sowwakeup) { if (!TAILQ_EMPTY( &toep->aiotx_jobq)) t4_aiotx_queue_toep( toep); sowwakeup_locked(so); } else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); return; } break; } if (IS_AIOTX_MBUF(m)) aiotx_mbuf_seen = true; if (max_nsegs_1mbuf < n) max_nsegs_1mbuf = n; sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ /* This mbuf put us right at the max_nsegs limit */ if (plen > max_imm && nsegs == max_nsegs) { m = m->m_next; break; } } if (sbused(sb) > sb->sb_hiwat * 5 / 8 && toep->plen_nocompl + plen >= sb->sb_hiwat / 4) compl = 1; else compl = 0; if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf && sb->sb_hiwat < V_tcp_autosndbuf_max && sbused(sb) >= sb->sb_hiwat * 7 / 8) { int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, V_tcp_autosndbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else sowwakeup = 1; /* room available */ } if (sowwakeup) { if (!TAILQ_EMPTY(&toep->aiotx_jobq)) t4_aiotx_queue_toep(toep); sowwakeup_locked(so); } else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); /* nothing to send */ if (plen == 0) { KASSERT(m == NULL, ("%s: nothing to send, but m != NULL", __func__)); break; } if (__predict_false(toep->flags & TPF_FIN_SENT)) panic("%s: excess tx.", __func__); shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); if (plen <= max_imm && !aiotx_mbuf_seen) { /* Immediate data tx */ wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr->wr_len, 16); write_tx_wr(txwr, toep, plen, plen, credits, shove, 0, sc->tt.tx_align); m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); nsegs = 0; } else { int wr_len; /* DSGL tx */ wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr_len, 16); write_tx_wr(txwr, toep, 0, plen, credits, shove, 0, sc->tt.tx_align); write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); if (wr_len & 0xf) { uint64_t *pad = (uint64_t *) ((uintptr_t)txwr + wr_len); *pad = 0; } } KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); toep->tx_credits -= credits; toep->tx_nocompl += credits; toep->plen_nocompl += plen; if (toep->tx_credits <= toep->tx_total * 3 / 8 && toep->tx_nocompl >= toep->tx_total / 4) compl = 1; if (compl || toep->ulp_mode == ULP_MODE_RDMA) { txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); toep->tx_nocompl = 0; toep->plen_nocompl = 0; } tp->snd_nxt += plen; tp->snd_max += plen; SOCKBUF_LOCK(sb); KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); sb->sb_sndptr = sb_sndptr; SOCKBUF_UNLOCK(sb); toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TX_CREDITS) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { toep->txsd_pidx = 0; txsd = &toep->txsd[0]; } toep->txsd_avail--; t4_l2t_send(sc, wr, toep->l2te); } while (m != NULL); /* Send a FIN if requested, but only if there's no more data to send */ if (m == NULL && toep->flags & TPF_SEND_FIN) close_conn(sc, toep); } static inline void rqdrop_locked(struct mbufq *q, int plen) { struct mbuf *m; while (plen > 0) { m = mbufq_dequeue(q); /* Too many credits. */ MPASS(m != NULL); M_ASSERTPKTHDR(m); /* Partial credits. */ MPASS(plen >= m->m_pkthdr.len); plen -= m->m_pkthdr.len; m_freem(m); } } void t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) { struct mbuf *sndptr, *m; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; u_int adjusted_plen, ulp_submode; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); int tx_credits, shove; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; struct mbufq *pduq = &toep->ulp_pduq; static const u_int ulp_extra_len[] = {0, 4, 4, 8}; INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); KASSERT(toep->ulp_mode == ULP_MODE_ISCSI, ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) return; /* * This function doesn't resume by itself. Someone else must clear the * flag and call this function. */ if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { KASSERT(drop == 0, ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); return; } if (drop) rqdrop_locked(&toep->ulp_pdu_reclaimq, drop); while ((sndptr = mbufq_first(pduq)) != NULL) { M_ASSERTPKTHDR(sndptr); tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); max_imm = max_imm_payload(tx_credits); max_nsegs = max_dsgl_nsegs(tx_credits); plen = 0; nsegs = 0; max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ for (m = sndptr; m != NULL; m = m->m_next) { int n = sglist_count(mtod(m, void *), m->m_len); nsegs += n; plen += m->m_len; /* * This mbuf would send us _over_ the nsegs limit. * Suspend tx because the PDU can't be sent out. */ if (plen > max_imm && nsegs > max_nsegs) { toep->flags |= TPF_TX_SUSPENDED; return; } if (max_nsegs_1mbuf < n) max_nsegs_1mbuf = n; } if (__predict_false(toep->flags & TPF_FIN_SENT)) panic("%s: excess tx.", __func__); /* * We have a PDU to send. All of it goes out in one WR so 'm' * is NULL. A PDU's length is always a multiple of 4. */ MPASS(m == NULL); MPASS((plen & 3) == 0); MPASS(sndptr->m_pkthdr.len == plen); shove = !(tp->t_flags & TF_MORETOCOME); ulp_submode = mbuf_ulp_submode(sndptr); MPASS(ulp_submode < nitems(ulp_extra_len)); /* * plen doesn't include header and data digests, which are * generated and inserted in the right places by the TOE, but * they do occupy TCP sequence space and need to be accounted * for. */ adjusted_plen = plen + ulp_extra_len[ulp_submode]; if (plen <= max_imm) { /* Immediate data tx */ wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr->wr_len, 16); write_tx_wr(txwr, toep, plen, adjusted_plen, credits, shove, ulp_submode, sc->tt.tx_align); m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); nsegs = 0; } else { int wr_len; /* DSGL tx */ wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr_len, 16); write_tx_wr(txwr, toep, 0, adjusted_plen, credits, shove, ulp_submode, sc->tt.tx_align); write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); if (wr_len & 0xf) { uint64_t *pad = (uint64_t *) ((uintptr_t)txwr + wr_len); *pad = 0; } } KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); m = mbufq_dequeue(pduq); MPASS(m == sndptr); mbufq_enqueue(&toep->ulp_pdu_reclaimq, m); toep->tx_credits -= credits; toep->tx_nocompl += credits; toep->plen_nocompl += plen; if (toep->tx_credits <= toep->tx_total * 3 / 8 && toep->tx_nocompl >= toep->tx_total / 4) { txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); toep->tx_nocompl = 0; toep->plen_nocompl = 0; } tp->snd_nxt += adjusted_plen; tp->snd_max += adjusted_plen; toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TX_CREDITS) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { toep->txsd_pidx = 0; txsd = &toep->txsd[0]; } toep->txsd_avail--; t4_l2t_send(sc, wr, toep->l2te); } /* Send a FIN if requested, but only if there are no more PDUs to send */ if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN) close_conn(sc, toep); } int t4_tod_output(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); if (toep->ulp_mode == ULP_MODE_ISCSI) t4_push_pdus(sc, toep, 0); else t4_push_frames(sc, toep, 0); return (0); } int t4_send_fin(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); toep->flags |= TPF_SEND_FIN; if (tp->t_state >= TCPS_ESTABLISHED) { if (toep->ulp_mode == ULP_MODE_ISCSI) t4_push_pdus(sc, toep, 0); else t4_push_frames(sc, toep, 0); } return (0); } int t4_send_rst(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #if defined(INVARIANTS) struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); /* hmmmm */ KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc for tid %u [%s] not sent already", __func__, toep->tid, tcpstates[tp->t_state])); send_reset(sc, toep, 0); return (0); } /* * Peer has sent us a FIN. */ static int do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_peer_close *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PEER_CLOSE, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (__predict_false(toep->flags & TPF_SYNQE)) { #ifdef INVARIANTS struct synq_entry *synqe = (void *)toep; INP_WLOCK(synqe->lctx->inp); if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } else { /* * do_pass_accept_req is still running and will * eventually take care of this tid. */ } INP_WUNLOCK(synqe->lctx->inp); #endif CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); if (toep->flags & TPF_ABORT_SHUTDOWN) goto done; tp->rcv_nxt++; /* FIN */ so = inp->inp_socket; if (toep->ulp_mode == ULP_MODE_TCPDDP) { DDP_LOCK(toep); if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) handle_ddp_close(toep, tp, cpl->rcv_nxt); DDP_UNLOCK(toep); } socantrcvmore(so); if (toep->ulp_mode != ULP_MODE_RDMA) { KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, be32toh(cpl->rcv_nxt))); } switch (tp->t_state) { case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: tp->t_state = TCPS_CLOSE_WAIT; break; case TCPS_FIN_WAIT_1: tp->t_state = TCPS_CLOSING; break; case TCPS_FIN_WAIT_2: tcp_twstart(tp); INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); INP_WLOCK(inp); final_cpl_received(toep); return (0); default: log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", __func__, tid, tp->t_state); } done: INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return (0); } /* * Peer has ACK'd our FIN. */ static int do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so = NULL; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_CLOSE_CON_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); if (toep->flags & TPF_ABORT_SHUTDOWN) goto done; so = inp->inp_socket; tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ switch (tp->t_state) { case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ tcp_twstart(tp); release: INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); INP_WLOCK(inp); final_cpl_received(toep); /* no more CPLs expected */ return (0); case TCPS_LAST_ACK: if (tcp_close(tp)) INP_WUNLOCK(inp); goto release; case TCPS_FIN_WAIT_1: if (so->so_rcv.sb_state & SBS_CANTRCVMORE) soisdisconnected(so); tp->t_state = TCPS_FIN_WAIT_2; break; default: log(LOG_ERR, "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", __func__, tid, tcpstates[tp->t_state]); } done: INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return (0); } void send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, int rst_status) { struct wrqe *wr; struct cpl_abort_rpl *cpl; wr = alloc_wrqe(sizeof(*cpl), ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } cpl = wrtod(wr); INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); cpl->cmd = rst_status; t4_wrq_tx(sc, wr); } static int abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) { switch (abort_reason) { case CPL_ERR_BAD_SYN: case CPL_ERR_CONN_RESET: return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); case CPL_ERR_XMIT_TIMEDOUT: case CPL_ERR_PERSIST_TIMEDOUT: case CPL_ERR_FINWAIT2_TIMEDOUT: case CPL_ERR_KEEPALIVE_TIMEDOUT: return (ETIMEDOUT); default: return (EIO); } } /* * TCP RST from the peer, timeout, or some other such critical error. */ static int do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct sge_wrq *ofld_txq = toep->ofld_txq; struct inpcb *inp; struct tcpcb *tp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_REQ_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (toep->flags & TPF_SYNQE) return (do_abort_req_synqe(iq, rss, m)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); if (negative_advice(cpl->status)) { CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", __func__, cpl->status, tid, toep->flags); return (0); /* Ignore negative advice */ } inp = toep->inp; CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ INP_WLOCK(inp); tp = intotcpcb(inp); CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp->inp_flags, cpl->status); /* * If we'd initiated an abort earlier the reply to it is responsible for * cleaning up resources. Otherwise we tear everything down right here * right now. We owe the T4 a CPL_ABORT_RPL no matter what. */ if (toep->flags & TPF_ABORT_SHUTDOWN) { INP_WUNLOCK(inp); goto done; } toep->flags |= TPF_ABORT_SHUTDOWN; if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) so_error_set(so, abort_status_to_errno(tp, cpl->status)); tp = tcp_close(tp); if (tp == NULL) INP_WLOCK(inp); /* re-acquire */ } final_cpl_received(toep); done: INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); return (0); } /* * Reply to the CPL_ABORT_REQ (send_reset) */ static int do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_RPL_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (toep->flags & TPF_SYNQE) return (do_abort_rpl_synqe(iq, rss, m)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", __func__, tid, toep, inp, cpl->status); KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, ("%s: wasn't expecting abort reply", __func__)); INP_WLOCK(inp); final_cpl_received(toep); return (0); } static int do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_data *cpl = mtod(m, const void *); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp; struct socket *so; struct sockbuf *sb; int len; uint32_t ddp_placed = 0; if (__predict_false(toep->flags & TPF_SYNQE)) { #ifdef INVARIANTS struct synq_entry *synqe = (void *)toep; INP_WLOCK(synqe->lctx->inp); if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } else { /* * do_pass_accept_req is still running and will * eventually take care of this tid. */ } INP_WUNLOCK(synqe->lctx->inp); #endif CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); m_freem(m); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); /* strip off CPL header */ m_adj(m, sizeof(*cpl)); len = m->m_pkthdr.len; INP_WLOCK(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", __func__, tid, len, inp->inp_flags); INP_WUNLOCK(inp); m_freem(m); return (0); } tp = intotcpcb(inp); if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; tp->rcv_nxt += len; if (tp->rcv_wnd < len) { KASSERT(toep->ulp_mode == ULP_MODE_RDMA, ("%s: negative window size", __func__)); } tp->rcv_wnd -= len; tp->t_rcvtime = ticks; if (toep->ulp_mode == ULP_MODE_TCPDDP) DDP_LOCK(toep); so = inp_inpcbtosocket(inp); sb = &so->so_rcv; SOCKBUF_LOCK(sb); if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", __func__, tid, len); m_freem(m); SOCKBUF_UNLOCK(sb); if (toep->ulp_mode == ULP_MODE_TCPDDP) DDP_UNLOCK(toep); INP_WUNLOCK(inp); CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return (0); } /* receive buffer autosize */ MPASS(toep->vnet == so->so_vnet); CURVNET_SET(toep->vnet); if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autorcvbuf && sb->sb_hiwat < V_tcp_autorcvbuf_max && len > (sbspace(sb) / 8 * 7)) { unsigned int hiwat = sb->sb_hiwat; unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else toep->rx_credits += newsize - hiwat; } if (toep->ddp_waiting_count != 0 || toep->ddp_active_count != 0) CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", __func__, tid, len); if (toep->ulp_mode == ULP_MODE_TCPDDP) { int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off; if (changed) { if (toep->ddp_flags & DDP_SC_REQ) toep->ddp_flags ^= DDP_ON | DDP_SC_REQ; else { KASSERT(cpl->ddp_off == 1, ("%s: DDP switched on by itself.", __func__)); /* Fell out of DDP mode */ toep->ddp_flags &= ~DDP_ON; CTR1(KTR_CXGBE, "%s: fell out of DDP mode", __func__); insert_ddp_data(toep, ddp_placed); } } if (toep->ddp_flags & DDP_ON) { /* * CPL_RX_DATA with DDP on can only be an indicate. * Start posting queued AIO requests via DDP. The * payload that arrived in this indicate is appended * to the socket buffer as usual. */ handle_ddp_indicate(toep); } } KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); toep->rx_credits += toep->sb_cc - sbused(sb); sbappendstream_locked(sb, m, 0); toep->sb_cc = sbused(sb); if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) { int credits; credits = send_rx_credits(sc, toep, toep->rx_credits); toep->rx_credits -= credits; tp->rcv_wnd += credits; tp->rcv_adv += credits; } if (toep->ddp_waiting_count > 0 && sbavail(sb) != 0) { CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__, tid); ddp_queue_toep(toep); } sorwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); if (toep->ulp_mode == ULP_MODE_TCPDDP) DDP_UNLOCK(toep); INP_WUNLOCK(inp); CURVNET_RESTORE(); return (0); } #define S_CPL_FW4_ACK_OPCODE 24 #define M_CPL_FW4_ACK_OPCODE 0xff #define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) #define G_CPL_FW4_ACK_OPCODE(x) \ (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) #define S_CPL_FW4_ACK_FLOWID 0 #define M_CPL_FW4_ACK_FLOWID 0xffffff #define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) #define G_CPL_FW4_ACK_FLOWID(x) \ (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) #define S_CPL_FW4_ACK_CR 24 #define M_CPL_FW4_ACK_CR 0xff #define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) #define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) #define S_CPL_FW4_ACK_SEQVAL 0 #define M_CPL_FW4_ACK_SEQVAL 0x1 #define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) #define G_CPL_FW4_ACK_SEQVAL(x) \ (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) #define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) static int do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp; struct tcpcb *tp; struct socket *so; uint8_t credits = cpl->credits; struct ofld_tx_sdesc *txsd; int plen; #ifdef INVARIANTS unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); #endif /* * Very unusual case: we'd sent a flowc + abort_req for a synq entry and * now this comes back carrying the credits for the flowc. */ if (__predict_false(toep->flags & TPF_SYNQE)) { KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, ("%s: credits for a synq entry %p", __func__, toep)); return (0); } inp = toep->inp; KASSERT(opcode == CPL_FW4_ACK, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_WLOCK(inp); if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { INP_WUNLOCK(inp); return (0); } KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); tp = intotcpcb(inp); if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { tcp_seq snd_una = be32toh(cpl->snd_una); #ifdef INVARIANTS if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { log(LOG_ERR, "%s: unexpected seq# %x for TID %u, snd_una %x\n", __func__, snd_una, toep->tid, tp->snd_una); } #endif if (tp->snd_una != snd_una) { tp->snd_una = snd_una; tp->ts_recent_age = tcp_ts_getticks(); } } #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits); #endif so = inp->inp_socket; txsd = &toep->txsd[toep->txsd_cidx]; plen = 0; while (credits) { KASSERT(credits >= txsd->tx_credits, ("%s: too many (or partial) credits", __func__)); credits -= txsd->tx_credits; toep->tx_credits += txsd->tx_credits; plen += txsd->plen; txsd++; toep->txsd_avail++; KASSERT(toep->txsd_avail <= toep->txsd_total, ("%s: txsd avail > total", __func__)); if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { txsd = &toep->txsd[0]; toep->txsd_cidx = 0; } } if (toep->tx_credits == toep->tx_total) { toep->tx_nocompl = 0; toep->plen_nocompl = 0; } if (toep->flags & TPF_TX_SUSPENDED && toep->tx_credits >= toep->tx_total / 4) { #ifdef VERBOSE_TRACES CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__, tid); #endif toep->flags &= ~TPF_TX_SUSPENDED; CURVNET_SET(toep->vnet); if (toep->ulp_mode == ULP_MODE_ISCSI) t4_push_pdus(sc, toep, plen); else t4_push_frames(sc, toep, plen); CURVNET_RESTORE(); } else if (plen > 0) { struct sockbuf *sb = &so->so_snd; int sbu; SOCKBUF_LOCK(sb); sbu = sbused(sb); if (toep->ulp_mode == ULP_MODE_ISCSI) { if (__predict_false(sbu > 0)) { /* * The data trasmitted before the tid's ULP mode * changed to ISCSI is still in so_snd. * Incoming credits should account for so_snd * first. */ sbdrop_locked(sb, min(sbu, plen)); plen -= min(sbu, plen); } sowwakeup_locked(so); /* unlocks so_snd */ rqdrop_locked(&toep->ulp_pdu_reclaimq, plen); } else { #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__, tid, plen); #endif sbdrop_locked(sb, plen); if (!TAILQ_EMPTY(&toep->aiotx_jobq)) t4_aiotx_queue_toep(toep); sowwakeup_locked(so); /* unlocks so_snd */ } SOCKBUF_UNLOCK_ASSERT(sb); } INP_WUNLOCK(inp); return (0); } int do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_SET_TCB_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); MPASS(iq != &sc->sge.fwq); toep = lookup_tid(sc, tid); if (toep->ulp_mode == ULP_MODE_TCPDDP) { handle_ddp_tcb_rpl(toep, cpl); return (0); } /* * TOM and/or other ULPs don't request replies for CPL_SET_TCB or * CPL_SET_TCB_FIELD requests. This can easily change and when it does * the dispatch code will go here. */ #ifdef INVARIANTS panic("%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p", __func__, tid, iq); #else log(LOG_ERR, "%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p\n", __func__, tid, iq); #endif return (0); } void t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, int tid, uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie, int iqid) { struct wrqe *wr; struct cpl_set_tcb_field *req; MPASS((cookie & ~M_COOKIE) == 0); MPASS((iqid & ~M_QUEUENO) == 0); wr = alloc_wrqe(sizeof(*req), wrq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, tid); req->reply_ctrl = htobe16(V_QUEUENO(iqid)); if (reply == 0) req->reply_ctrl |= htobe16(F_NO_REPLY); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie)); req->mask = htobe64(mask); req->val = htobe64(val); t4_wrq_tx(sc, wr); } void t4_init_cpl_io_handlers(void) { t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); t4_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); t4_register_cpl_handler(CPL_RX_DATA, do_rx_data); t4_register_cpl_handler(CPL_FW4_ACK, do_fw4_ack); } void t4_uninit_cpl_io_handlers(void) { t4_register_cpl_handler(CPL_PEER_CLOSE, NULL); t4_register_cpl_handler(CPL_CLOSE_CON_RPL, NULL); t4_register_cpl_handler(CPL_ABORT_REQ_RSS, NULL); t4_register_cpl_handler(CPL_ABORT_RPL_RSS, NULL); t4_register_cpl_handler(CPL_RX_DATA, NULL); t4_register_cpl_handler(CPL_FW4_ACK, NULL); } /* * Use the 'backend3' field in AIO jobs to store the amount of data * sent by the AIO job so far and the 'backend4' field to hold an * error that should be reported when the job is completed. */ #define aio_sent backend3 #define aio_error backend4 #define jobtotid(job) \ (((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid) static void free_aiotx_buffer(struct aiotx_buffer *ab) { struct kaiocb *job; long status; int error; if (refcount_release(&ab->refcount) == 0) return; job = ab->job; error = job->aio_error; status = job->aio_sent; vm_page_unhold_pages(ab->ps.pages, ab->ps.npages); free(ab, M_CXGBE); #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__, jobtotid(job), job, status, error); #endif if (error == ECANCELED && status != 0) error = 0; if (error == ECANCELED) aio_cancel(job); else if (error) aio_complete(job, -1, error); else aio_complete(job, status, 0); } static void -t4_aiotx_mbuf_free(struct mbuf *m, void *buffer, void *arg) +t4_aiotx_mbuf_free(struct mbuf *m) { - struct aiotx_buffer *ab = buffer; + struct aiotx_buffer *ab = m->m_ext.ext_arg1; #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__, m->m_len, jobtotid(ab->job)); #endif free_aiotx_buffer(ab); } /* * Hold the buffer backing an AIO request and return an AIO transmit * buffer. */ static int hold_aio(struct kaiocb *job) { struct aiotx_buffer *ab; struct vmspace *vm; vm_map_t map; vm_offset_t start, end, pgoff; int n; MPASS(job->backend1 == NULL); /* * The AIO subsystem will cancel and drain all requests before * permitting a process to exit or exec, so p_vmspace should * be stable here. */ vm = job->userproc->p_vmspace; map = &vm->vm_map; start = (uintptr_t)job->uaiocb.aio_buf; pgoff = start & PAGE_MASK; end = round_page(start + job->uaiocb.aio_nbytes); start = trunc_page(start); n = atop(end - start); ab = malloc(sizeof(*ab) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK | M_ZERO); refcount_init(&ab->refcount, 1); ab->ps.pages = (vm_page_t *)(ab + 1); ab->ps.npages = vm_fault_quick_hold_pages(map, start, end - start, VM_PROT_WRITE, ab->ps.pages, n); if (ab->ps.npages < 0) { free(ab, M_CXGBE); return (EFAULT); } KASSERT(ab->ps.npages == n, ("hold_aio: page count mismatch: %d vs %d", ab->ps.npages, n)); ab->ps.offset = pgoff; ab->ps.len = job->uaiocb.aio_nbytes; ab->job = job; job->backend1 = ab; #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d", __func__, jobtotid(job), &ab->ps, job, ab->ps.npages); #endif return (0); } static void t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job) { struct adapter *sc; struct sockbuf *sb; struct file *fp; struct aiotx_buffer *ab; struct inpcb *inp; struct tcpcb *tp; struct mbuf *m; int error; bool moretocome, sendmore; sc = td_adapter(toep->td); sb = &so->so_snd; SOCKBUF_UNLOCK(sb); fp = job->fd_file; ab = job->backend1; m = NULL; #ifdef MAC error = mac_socket_check_send(fp->f_cred, so); if (error != 0) goto out; #endif if (ab == NULL) { error = hold_aio(job); if (error != 0) goto out; ab = job->backend1; } /* Inline sosend_generic(). */ job->msgsnd = 1; error = sblock(sb, SBL_WAIT); MPASS(error == 0); sendanother: m = m_get(M_WAITOK, MT_DATA); SOCKBUF_LOCK(sb); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(sb); sbunlock(sb); if ((so->so_options & SO_NOSIGPIPE) == 0) { PROC_LOCK(job->userproc); kern_psignal(job->userproc, SIGPIPE); PROC_UNLOCK(job->userproc); } error = EPIPE; goto out; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(sb); sbunlock(sb); goto out; } if ((so->so_state & SS_ISCONNECTED) == 0) { SOCKBUF_UNLOCK(sb); sbunlock(sb); error = ENOTCONN; goto out; } if (sbspace(sb) < sb->sb_lowat) { MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO)); /* * Don't block if there is too little room in the socket * buffer. Instead, requeue the request. */ if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { SOCKBUF_UNLOCK(sb); sbunlock(sb); error = ECANCELED; goto out; } TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); SOCKBUF_UNLOCK(sb); sbunlock(sb); goto out; } /* * Write as much data as the socket permits, but no more than a * a single sndbuf at a time. */ m->m_len = sbspace(sb); if (m->m_len > ab->ps.len - job->aio_sent) { m->m_len = ab->ps.len - job->aio_sent; moretocome = false; } else moretocome = true; if (m->m_len > sc->tt.sndbuf) { m->m_len = sc->tt.sndbuf; sendmore = true; } else sendmore = false; if (!TAILQ_EMPTY(&toep->aiotx_jobq)) moretocome = true; SOCKBUF_UNLOCK(sb); MPASS(m->m_len != 0); /* Inlined tcp_usr_send(). */ inp = toep->inp; INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { INP_WUNLOCK(inp); sbunlock(sb); error = ECONNRESET; goto out; } refcount_acquire(&ab->refcount); m_extadd(m, NULL, ab->ps.len, t4_aiotx_mbuf_free, ab, (void *)(uintptr_t)job->aio_sent, 0, EXT_NET_DRV); m->m_ext.ext_flags |= EXT_FLAG_AIOTX; job->aio_sent += m->m_len; sbappendstream(sb, m, 0); m = NULL; if (!(inp->inp_flags & INP_DROPPED)) { tp = intotcpcb(inp); if (moretocome) tp->t_flags |= TF_MORETOCOME; error = tp->t_fb->tfb_tcp_output(tp); if (moretocome) tp->t_flags &= ~TF_MORETOCOME; } INP_WUNLOCK(inp); if (sendmore) goto sendanother; sbunlock(sb); if (error) goto out; /* * If this is a non-blocking socket and the request has not * been fully completed, requeue it until the socket is ready * again. */ if (job->aio_sent < job->uaiocb.aio_nbytes && !(so->so_state & SS_NBIO)) { SOCKBUF_LOCK(sb); if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { SOCKBUF_UNLOCK(sb); error = ECANCELED; goto out; } TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); return; } /* * If the request will not be requeued, drop a reference on * the aiotx buffer. Any mbufs in flight should still * contain a reference, but this drops the reference that the * job owns while it is waiting to queue mbufs to the socket. */ free_aiotx_buffer(ab); out: if (error) { if (ab != NULL) { job->aio_error = error; free_aiotx_buffer(ab); } else { MPASS(job->aio_sent == 0); aio_complete(job, -1, error); } } if (m != NULL) m_free(m); SOCKBUF_LOCK(sb); } static void t4_aiotx_task(void *context, int pending) { struct toepcb *toep = context; struct inpcb *inp = toep->inp; struct socket *so = inp->inp_socket; struct kaiocb *job; CURVNET_SET(toep->vnet); SOCKBUF_LOCK(&so->so_snd); while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) { job = TAILQ_FIRST(&toep->aiotx_jobq); TAILQ_REMOVE(&toep->aiotx_jobq, job, list); if (!aio_clear_cancel_function(job)) continue; t4_aiotx_process_job(toep, so, job); } toep->aiotx_task_active = false; SOCKBUF_UNLOCK(&so->so_snd); CURVNET_RESTORE(); free_toepcb(toep); } static void t4_aiotx_queue_toep(struct toepcb *toep) { SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd); #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s", __func__, toep->tid, toep->aiotx_task_active ? "true" : "false"); #endif if (toep->aiotx_task_active) return; toep->aiotx_task_active = true; hold_toepcb(toep); soaio_enqueue(&toep->aiotx_task); } static void t4_aiotx_cancel(struct kaiocb *job) { struct aiotx_buffer *ab; struct socket *so; struct sockbuf *sb; struct tcpcb *tp; struct toepcb *toep; so = job->fd_file->f_data; tp = so_sototcpcb(so); toep = tp->t_toe; MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE); sb = &so->so_snd; SOCKBUF_LOCK(sb); if (!aio_cancel_cleared(job)) TAILQ_REMOVE(&toep->aiotx_jobq, job, list); SOCKBUF_UNLOCK(sb); ab = job->backend1; if (ab != NULL) free_aiotx_buffer(ab); else aio_cancel(job); } int t4_aio_queue_aiotx(struct socket *so, struct kaiocb *job) { struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct adapter *sc = td_adapter(toep->td); /* This only handles writes. */ if (job->uaiocb.aio_lio_opcode != LIO_WRITE) return (EOPNOTSUPP); if (!sc->tt.tx_zcopy) return (EOPNOTSUPP); SOCKBUF_LOCK(&so->so_snd); #ifdef VERBOSE_TRACES CTR2(KTR_CXGBE, "%s: queueing %p", __func__, job); #endif if (!aio_set_cancel_function(job, t4_aiotx_cancel)) panic("new job was cancelled"); TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list); if (sowriteable(so)) t4_aiotx_queue_toep(toep); SOCKBUF_UNLOCK(&so->so_snd); return (0); } void aiotx_init_toep(struct toepcb *toep) { TAILQ_INIT(&toep->aiotx_jobq); TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep); } #endif Index: head/sys/dev/dpaa/if_dtsec_rm.c =================================================================== --- head/sys/dev/dpaa/if_dtsec_rm.c (revision 324445) +++ head/sys/dev/dpaa/if_dtsec_rm.c (revision 324446) @@ -1,654 +1,656 @@ /*- * Copyright (c) 2012 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "miibus_if.h" #include #include #include #include "fman.h" #include "bman.h" #include "qman.h" #include "if_dtsec.h" #include "if_dtsec_rm.h" /** * @group dTSEC RM private defines. * @{ */ #define DTSEC_BPOOLS_USED (1) #define DTSEC_MAX_TX_QUEUE_LEN 256 struct dtsec_rm_frame_info { struct mbuf *fi_mbuf; t_DpaaSGTE fi_sgt[DPAA_NUM_OF_SG_TABLE_ENTRY]; }; enum dtsec_rm_pool_params { DTSEC_RM_POOL_RX_LOW_MARK = 16, DTSEC_RM_POOL_RX_HIGH_MARK = 64, DTSEC_RM_POOL_RX_MAX_SIZE = 256, DTSEC_RM_POOL_FI_LOW_MARK = 16, DTSEC_RM_POOL_FI_HIGH_MARK = 64, DTSEC_RM_POOL_FI_MAX_SIZE = 256, }; enum dtsec_rm_fqr_params { DTSEC_RM_FQR_RX_CHANNEL = e_QM_FQ_CHANNEL_POOL1, DTSEC_RM_FQR_RX_WQ = 1, DTSEC_RM_FQR_TX_CONF_CHANNEL = e_QM_FQ_CHANNEL_SWPORTAL0, DTSEC_RM_FQR_TX_WQ = 1, DTSEC_RM_FQR_TX_CONF_WQ = 1 }; /** @} */ /** * @group dTSEC Frame Info routines. * @{ */ void dtsec_rm_fi_pool_free(struct dtsec_softc *sc) { if (sc->sc_fi_zone != NULL) uma_zdestroy(sc->sc_fi_zone); } int dtsec_rm_fi_pool_init(struct dtsec_softc *sc) { snprintf(sc->sc_fi_zname, sizeof(sc->sc_fi_zname), "%s: Frame Info", device_get_nameunit(sc->sc_dev)); sc->sc_fi_zone = uma_zcreate(sc->sc_fi_zname, sizeof(struct dtsec_rm_frame_info), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); if (sc->sc_fi_zone == NULL) return (EIO); return (0); } static struct dtsec_rm_frame_info * dtsec_rm_fi_alloc(struct dtsec_softc *sc) { struct dtsec_rm_frame_info *fi; fi = uma_zalloc(sc->sc_fi_zone, M_NOWAIT); return (fi); } static void dtsec_rm_fi_free(struct dtsec_softc *sc, struct dtsec_rm_frame_info *fi) { uma_zfree(sc->sc_fi_zone, fi); } /** @} */ /** * @group dTSEC FMan PORT routines. * @{ */ int dtsec_rm_fm_port_rx_init(struct dtsec_softc *sc, int unit) { t_FmPortParams params; t_FmPortRxParams *rx_params; t_FmPortExtPools *pool_params; t_Error error; memset(¶ms, 0, sizeof(params)); params.baseAddr = sc->sc_fm_base + sc->sc_port_rx_hw_id; params.h_Fm = sc->sc_fmh; params.portType = dtsec_fm_port_rx_type(sc->sc_eth_dev_type); params.portId = sc->sc_eth_id; params.independentModeEnable = FALSE; params.liodnBase = FM_PORT_LIODN_BASE; params.f_Exception = dtsec_fm_port_rx_exception_callback; params.h_App = sc; rx_params = ¶ms.specificParams.rxParams; rx_params->errFqid = sc->sc_rx_fqid; rx_params->dfltFqid = sc->sc_rx_fqid; rx_params->liodnOffset = 0; pool_params = &rx_params->extBufPools; pool_params->numOfPoolsUsed = DTSEC_BPOOLS_USED; pool_params->extBufPool->id = sc->sc_rx_bpid; pool_params->extBufPool->size = FM_PORT_BUFFER_SIZE; sc->sc_rxph = FM_PORT_Config(¶ms); if (sc->sc_rxph == NULL) { device_printf(sc->sc_dev, "couldn't configure FM Port RX.\n"); return (ENXIO); } error = FM_PORT_Init(sc->sc_rxph); if (error != E_OK) { device_printf(sc->sc_dev, "couldn't initialize FM Port RX.\n"); FM_PORT_Free(sc->sc_rxph); return (ENXIO); } if (bootverbose) device_printf(sc->sc_dev, "RX hw port 0x%02x initialized.\n", sc->sc_port_rx_hw_id); return (0); } int dtsec_rm_fm_port_tx_init(struct dtsec_softc *sc, int unit) { t_FmPortParams params; t_FmPortNonRxParams *tx_params; t_Error error; memset(¶ms, 0, sizeof(params)); params.baseAddr = sc->sc_fm_base + sc->sc_port_tx_hw_id; params.h_Fm = sc->sc_fmh; params.portType = dtsec_fm_port_tx_type(sc->sc_eth_dev_type); params.portId = sc->sc_eth_id; params.independentModeEnable = FALSE; params.liodnBase = FM_PORT_LIODN_BASE; params.f_Exception = dtsec_fm_port_tx_exception_callback; params.h_App = sc; tx_params = ¶ms.specificParams.nonRxParams; tx_params->errFqid = sc->sc_tx_conf_fqid; tx_params->dfltFqid = sc->sc_tx_conf_fqid; tx_params->qmChannel = sc->sc_port_tx_qman_chan; #ifdef FM_OP_PARTITION_ERRATA_FMANx8 tx_params->opLiodnOffset = 0; #endif sc->sc_txph = FM_PORT_Config(¶ms); if (sc->sc_txph == NULL) { device_printf(sc->sc_dev, "couldn't configure FM Port TX.\n"); return (ENXIO); } error = FM_PORT_Init(sc->sc_txph); if (error != E_OK) { device_printf(sc->sc_dev, "couldn't initialize FM Port TX.\n"); FM_PORT_Free(sc->sc_txph); return (ENXIO); } if (bootverbose) device_printf(sc->sc_dev, "TX hw port 0x%02x initialized.\n", sc->sc_port_tx_hw_id); return (0); } /** @} */ /** * @group dTSEC buffer pools routines. * @{ */ static t_Error dtsec_rm_pool_rx_put_buffer(t_Handle h_BufferPool, uint8_t *buffer, t_Handle context) { struct dtsec_softc *sc; sc = h_BufferPool; uma_zfree(sc->sc_rx_zone, buffer); return (E_OK); } static uint8_t * dtsec_rm_pool_rx_get_buffer(t_Handle h_BufferPool, t_Handle *context) { struct dtsec_softc *sc; uint8_t *buffer; sc = h_BufferPool; buffer = uma_zalloc(sc->sc_rx_zone, M_NOWAIT); return (buffer); } static void dtsec_rm_pool_rx_depleted(t_Handle h_App, bool in) { struct dtsec_softc *sc; unsigned int count; sc = h_App; if (!in) return; while (1) { count = bman_count(sc->sc_rx_pool); if (count > DTSEC_RM_POOL_RX_HIGH_MARK) return; bman_pool_fill(sc->sc_rx_pool, DTSEC_RM_POOL_RX_HIGH_MARK); } } void dtsec_rm_pool_rx_free(struct dtsec_softc *sc) { if (sc->sc_rx_pool != NULL) bman_pool_destroy(sc->sc_rx_pool); if (sc->sc_rx_zone != NULL) uma_zdestroy(sc->sc_rx_zone); } int dtsec_rm_pool_rx_init(struct dtsec_softc *sc) { /* FM_PORT_BUFFER_SIZE must be less than PAGE_SIZE */ CTASSERT(FM_PORT_BUFFER_SIZE < PAGE_SIZE); snprintf(sc->sc_rx_zname, sizeof(sc->sc_rx_zname), "%s: RX Buffers", device_get_nameunit(sc->sc_dev)); sc->sc_rx_zone = uma_zcreate(sc->sc_rx_zname, FM_PORT_BUFFER_SIZE, NULL, NULL, NULL, NULL, FM_PORT_BUFFER_SIZE - 1, 0); if (sc->sc_rx_zone == NULL) return (EIO); sc->sc_rx_pool = bman_pool_create(&sc->sc_rx_bpid, FM_PORT_BUFFER_SIZE, 0, 0, DTSEC_RM_POOL_RX_MAX_SIZE, dtsec_rm_pool_rx_get_buffer, dtsec_rm_pool_rx_put_buffer, DTSEC_RM_POOL_RX_LOW_MARK, DTSEC_RM_POOL_RX_HIGH_MARK, 0, 0, dtsec_rm_pool_rx_depleted, sc, NULL, NULL); if (sc->sc_rx_pool == NULL) { device_printf(sc->sc_dev, "NULL rx pool somehow\n"); dtsec_rm_pool_rx_free(sc); return (EIO); } return (0); } /** @} */ /** * @group dTSEC Frame Queue Range routines. * @{ */ static void -dtsec_rm_fqr_mext_free(struct mbuf *m, void *buffer, void *arg) +dtsec_rm_fqr_mext_free(struct mbuf *m) { struct dtsec_softc *sc; + void *buffer; - sc = arg; + buffer = m->m_ext.ext_arg1; + sc = m->m_ext.ext_arg2; if (bman_count(sc->sc_rx_pool) <= DTSEC_RM_POOL_RX_MAX_SIZE) bman_put_buffer(sc->sc_rx_pool, buffer); else dtsec_rm_pool_rx_put_buffer(arg, buffer, NULL); } static e_RxStoreResponse dtsec_rm_fqr_rx_callback(t_Handle app, t_Handle fqr, t_Handle portal, uint32_t fqid_off, t_DpaaFD *frame) { struct dtsec_softc *sc; struct mbuf *m; m = NULL; sc = app; KASSERT(DPAA_FD_GET_FORMAT(frame) == e_DPAA_FD_FORMAT_TYPE_SHORT_SBSF, ("%s(): Got unsupported frame format 0x%02X!", __func__, DPAA_FD_GET_FORMAT(frame))); KASSERT(DPAA_FD_GET_OFFSET(frame) == 0, ("%s(): Only offset 0 is supported!", __func__)); if (DPAA_FD_GET_STATUS(frame) != 0) { device_printf(sc->sc_dev, "RX error: 0x%08X\n", DPAA_FD_GET_STATUS(frame)); goto err; } m = m_gethdr(M_NOWAIT, MT_HEADER); if (m == NULL) goto err; m_extadd(m, DPAA_FD_GET_ADDR(frame), FM_PORT_BUFFER_SIZE, dtsec_rm_fqr_mext_free, DPAA_FD_GET_ADDR(frame), sc, 0, EXT_NET_DRV); m->m_pkthdr.rcvif = sc->sc_ifnet; m->m_len = DPAA_FD_GET_LENGTH(frame); m_fixhdr(m); (*sc->sc_ifnet->if_input)(sc->sc_ifnet, m); return (e_RX_STORE_RESPONSE_CONTINUE); err: bman_put_buffer(sc->sc_rx_pool, DPAA_FD_GET_ADDR(frame)); if (m != NULL) m_freem(m); return (e_RX_STORE_RESPONSE_CONTINUE); } static e_RxStoreResponse dtsec_rm_fqr_tx_confirm_callback(t_Handle app, t_Handle fqr, t_Handle portal, uint32_t fqid_off, t_DpaaFD *frame) { struct dtsec_rm_frame_info *fi; struct dtsec_softc *sc; unsigned int qlen; t_DpaaSGTE *sgt0; sc = app; if (DPAA_FD_GET_STATUS(frame) != 0) device_printf(sc->sc_dev, "TX error: 0x%08X\n", DPAA_FD_GET_STATUS(frame)); /* * We are storing struct dtsec_rm_frame_info in first entry * of scatter-gather table. */ sgt0 = DPAA_FD_GET_ADDR(frame); fi = DPAA_SGTE_GET_ADDR(sgt0); /* Free transmitted frame */ m_freem(fi->fi_mbuf); dtsec_rm_fi_free(sc, fi); qlen = qman_fqr_get_counter(sc->sc_tx_conf_fqr, 0, e_QM_FQR_COUNTERS_FRAME); if (qlen == 0) { DTSEC_LOCK(sc); if (sc->sc_tx_fqr_full) { sc->sc_tx_fqr_full = 0; dtsec_rm_if_start_locked(sc); } DTSEC_UNLOCK(sc); } return (e_RX_STORE_RESPONSE_CONTINUE); } void dtsec_rm_fqr_rx_free(struct dtsec_softc *sc) { if (sc->sc_rx_fqr) qman_fqr_free(sc->sc_rx_fqr); } int dtsec_rm_fqr_rx_init(struct dtsec_softc *sc) { t_Error error; t_Handle fqr; /* Default Frame Queue */ fqr = qman_fqr_create(1, DTSEC_RM_FQR_RX_CHANNEL, DTSEC_RM_FQR_RX_WQ, FALSE, 0, FALSE, FALSE, TRUE, FALSE, 0, 0, 0); if (fqr == NULL) { device_printf(sc->sc_dev, "could not create default RX queue" "\n"); return (EIO); } sc->sc_rx_fqr = fqr; sc->sc_rx_fqid = qman_fqr_get_base_fqid(fqr); error = qman_fqr_register_cb(fqr, dtsec_rm_fqr_rx_callback, sc); if (error != E_OK) { device_printf(sc->sc_dev, "could not register RX callback\n"); dtsec_rm_fqr_rx_free(sc); return (EIO); } return (0); } void dtsec_rm_fqr_tx_free(struct dtsec_softc *sc) { if (sc->sc_tx_fqr) qman_fqr_free(sc->sc_tx_fqr); if (sc->sc_tx_conf_fqr) qman_fqr_free(sc->sc_tx_conf_fqr); } int dtsec_rm_fqr_tx_init(struct dtsec_softc *sc) { t_Error error; t_Handle fqr; /* TX Frame Queue */ fqr = qman_fqr_create(1, sc->sc_port_tx_qman_chan, DTSEC_RM_FQR_TX_WQ, FALSE, 0, FALSE, FALSE, TRUE, FALSE, 0, 0, 0); if (fqr == NULL) { device_printf(sc->sc_dev, "could not create default TX queue" "\n"); return (EIO); } sc->sc_tx_fqr = fqr; /* TX Confirmation Frame Queue */ fqr = qman_fqr_create(1, DTSEC_RM_FQR_TX_CONF_CHANNEL, DTSEC_RM_FQR_TX_CONF_WQ, FALSE, 0, FALSE, FALSE, TRUE, FALSE, 0, 0, 0); if (fqr == NULL) { device_printf(sc->sc_dev, "could not create TX confirmation " "queue\n"); dtsec_rm_fqr_tx_free(sc); return (EIO); } sc->sc_tx_conf_fqr = fqr; sc->sc_tx_conf_fqid = qman_fqr_get_base_fqid(fqr); error = qman_fqr_register_cb(fqr, dtsec_rm_fqr_tx_confirm_callback, sc); if (error != E_OK) { device_printf(sc->sc_dev, "could not register TX confirmation " "callback\n"); dtsec_rm_fqr_tx_free(sc); return (EIO); } return (0); } /** @} */ /** * @group dTSEC IFnet routines. * @{ */ void dtsec_rm_if_start_locked(struct dtsec_softc *sc) { vm_size_t dsize, psize, ssize; struct dtsec_rm_frame_info *fi; unsigned int qlen, i; struct mbuf *m0, *m; vm_offset_t vaddr; vm_paddr_t paddr; t_DpaaFD fd; DTSEC_LOCK_ASSERT(sc); /* TODO: IFF_DRV_OACTIVE */ if ((sc->sc_mii->mii_media_status & IFM_ACTIVE) == 0) return; if ((sc->sc_ifnet->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) return; while (!IFQ_DRV_IS_EMPTY(&sc->sc_ifnet->if_snd)) { /* Check length of the TX queue */ qlen = qman_fqr_get_counter(sc->sc_tx_fqr, 0, e_QM_FQR_COUNTERS_FRAME); if (qlen >= DTSEC_MAX_TX_QUEUE_LEN) { sc->sc_tx_fqr_full = 1; return; } fi = dtsec_rm_fi_alloc(sc); if (fi == NULL) return; IFQ_DRV_DEQUEUE(&sc->sc_ifnet->if_snd, m0); if (m0 == NULL) { dtsec_rm_fi_free(sc, fi); return; } i = 0; m = m0; psize = 0; dsize = 0; fi->fi_mbuf = m0; while (m && i < DPAA_NUM_OF_SG_TABLE_ENTRY) { if (m->m_len == 0) continue; /* * First entry in scatter-gather table is used to keep * pointer to frame info structure. */ DPAA_SGTE_SET_ADDR(&fi->fi_sgt[i], (void *)fi); DPAA_SGTE_SET_LENGTH(&fi->fi_sgt[i], 0); DPAA_SGTE_SET_EXTENSION(&fi->fi_sgt[i], 0); DPAA_SGTE_SET_FINAL(&fi->fi_sgt[i], 0); DPAA_SGTE_SET_BPID(&fi->fi_sgt[i], 0); DPAA_SGTE_SET_OFFSET(&fi->fi_sgt[i], 0); i++; dsize = m->m_len; vaddr = (vm_offset_t)m->m_data; while (dsize > 0 && i < DPAA_NUM_OF_SG_TABLE_ENTRY) { paddr = XX_VirtToPhys((void *)vaddr); ssize = PAGE_SIZE - (paddr & PAGE_MASK); if (m->m_len < ssize) ssize = m->m_len; DPAA_SGTE_SET_ADDR(&fi->fi_sgt[i], (void *)vaddr); DPAA_SGTE_SET_LENGTH(&fi->fi_sgt[i], ssize); DPAA_SGTE_SET_EXTENSION(&fi->fi_sgt[i], 0); DPAA_SGTE_SET_FINAL(&fi->fi_sgt[i], 0); DPAA_SGTE_SET_BPID(&fi->fi_sgt[i], 0); DPAA_SGTE_SET_OFFSET(&fi->fi_sgt[i], 0); dsize -= ssize; vaddr += ssize; psize += ssize; i++; } if (dsize > 0) break; m = m->m_next; } /* Check if SG table was constructed properly */ if (m != NULL || dsize != 0) { dtsec_rm_fi_free(sc, fi); m_freem(m0); continue; } DPAA_SGTE_SET_FINAL(&fi->fi_sgt[i-1], 1); DPAA_FD_SET_ADDR(&fd, fi->fi_sgt); DPAA_FD_SET_LENGTH(&fd, psize); DPAA_FD_SET_FORMAT(&fd, e_DPAA_FD_FORMAT_TYPE_SHORT_MBSF); DPAA_FD_SET_DD(&fd, 0); DPAA_FD_SET_PID(&fd, 0); DPAA_FD_SET_BPID(&fd, 0); DPAA_FD_SET_OFFSET(&fd, 0); DPAA_FD_SET_STATUS(&fd, 0); DTSEC_UNLOCK(sc); if (qman_fqr_enqueue(sc->sc_tx_fqr, 0, &fd) != E_OK) { dtsec_rm_fi_free(sc, fi); m_freem(m0); } DTSEC_LOCK(sc); } } /** @} */ Index: head/sys/dev/if_ndis/if_ndis.c =================================================================== --- head/sys/dev/if_ndis/if_ndis.c (revision 324445) +++ head/sys/dev/if_ndis/if_ndis.c (revision 324446) @@ -1,3424 +1,3424 @@ /*- * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * WPA support originally contributed by Arvind Srinivasan * then hacked upon mercilessly by my. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NDIS_DEBUG #ifdef NDIS_DEBUG #define DPRINTF(x) do { if (ndis_debug > 0) printf x; } while (0) int ndis_debug = 0; SYSCTL_INT(_debug, OID_AUTO, ndis, CTLFLAG_RW, &ndis_debug, 0, "if_ndis debug level"); #else #define DPRINTF(x) #endif SYSCTL_DECL(_hw_ndisusb); int ndisusb_halt = 1; SYSCTL_INT(_hw_ndisusb, OID_AUTO, halt, CTLFLAG_RW, &ndisusb_halt, 0, "Halt NDIS USB driver when it's attached"); /* 0 - 30 dBm to mW conversion table */ static const uint16_t dBm2mW[] = { 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 25, 28, 32, 35, 40, 45, 50, 56, 63, 71, 79, 89, 100, 112, 126, 141, 158, 178, 200, 224, 251, 282, 316, 355, 398, 447, 501, 562, 631, 708, 794, 891, 1000 }; MODULE_DEPEND(ndis, ether, 1, 1, 1); MODULE_DEPEND(ndis, wlan, 1, 1, 1); MODULE_DEPEND(ndis, ndisapi, 1, 1, 1); MODULE_VERSION(ndis, 1); int ndis_attach (device_t); int ndis_detach (device_t); int ndis_suspend (device_t); int ndis_resume (device_t); void ndis_shutdown (device_t); int ndisdrv_modevent (module_t, int, void *); static void ndis_txeof (ndis_handle, ndis_packet *, ndis_status); static void ndis_rxeof (ndis_handle, ndis_packet **, uint32_t); static void ndis_rxeof_eth (ndis_handle, ndis_handle, char *, void *, uint32_t, void *, uint32_t, uint32_t); static void ndis_rxeof_done (ndis_handle); static void ndis_rxeof_xfr (kdpc *, ndis_handle, void *, void *); static void ndis_rxeof_xfr_done (ndis_handle, ndis_packet *, uint32_t, uint32_t); static void ndis_linksts (ndis_handle, ndis_status, void *, uint32_t); static void ndis_linksts_done (ndis_handle); /* We need to wrap these functions for amd64. */ static funcptr ndis_txeof_wrap; static funcptr ndis_rxeof_wrap; static funcptr ndis_rxeof_eth_wrap; static funcptr ndis_rxeof_done_wrap; static funcptr ndis_rxeof_xfr_wrap; static funcptr ndis_rxeof_xfr_done_wrap; static funcptr ndis_linksts_wrap; static funcptr ndis_linksts_done_wrap; static funcptr ndis_ticktask_wrap; static funcptr ndis_ifstarttask_wrap; static funcptr ndis_resettask_wrap; static funcptr ndis_inputtask_wrap; static struct ieee80211vap *ndis_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void ndis_vap_delete (struct ieee80211vap *); static void ndis_tick (void *); static void ndis_ticktask (device_object *, void *); static int ndis_raw_xmit (struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static void ndis_update_mcast (struct ieee80211com *); static void ndis_update_promisc (struct ieee80211com *); static void ndis_ifstart (struct ifnet *); static void ndis_ifstarttask (device_object *, void *); static void ndis_resettask (device_object *, void *); static void ndis_inputtask (device_object *, void *); static int ndis_ifioctl (struct ifnet *, u_long, caddr_t); static int ndis_newstate (struct ieee80211vap *, enum ieee80211_state, int); static int ndis_nettype_chan (uint32_t); static int ndis_nettype_mode (uint32_t); static void ndis_scan (void *); static void ndis_scan_results (struct ndis_softc *); static void ndis_scan_start (struct ieee80211com *); static void ndis_scan_end (struct ieee80211com *); static void ndis_set_channel (struct ieee80211com *); static void ndis_scan_curchan (struct ieee80211_scan_state *, unsigned long); static void ndis_scan_mindwell (struct ieee80211_scan_state *); static void ndis_init (void *); static void ndis_stop (struct ndis_softc *); static int ndis_ifmedia_upd (struct ifnet *); static void ndis_ifmedia_sts (struct ifnet *, struct ifmediareq *); static int ndis_get_bssid_list (struct ndis_softc *, ndis_80211_bssid_list_ex **); static int ndis_get_assoc (struct ndis_softc *, ndis_wlan_bssid_ex **); static int ndis_probe_offload (struct ndis_softc *); static int ndis_set_offload (struct ndis_softc *); static void ndis_getstate_80211 (struct ndis_softc *); static void ndis_setstate_80211 (struct ndis_softc *); static void ndis_auth_and_assoc (struct ndis_softc *, struct ieee80211vap *); static void ndis_media_status (struct ifnet *, struct ifmediareq *); static int ndis_set_cipher (struct ndis_softc *, int); static int ndis_set_wpa (struct ndis_softc *, void *, int); static int ndis_add_key (struct ieee80211vap *, const struct ieee80211_key *); static int ndis_del_key (struct ieee80211vap *, const struct ieee80211_key *); static void ndis_setmulti (struct ndis_softc *); static void ndis_map_sclist (void *, bus_dma_segment_t *, int, bus_size_t, int); static int ndis_ifattach(struct ndis_softc *); static int ndis_80211attach(struct ndis_softc *); static int ndis_80211ioctl(struct ieee80211com *, u_long , void *); static int ndis_80211transmit(struct ieee80211com *, struct mbuf *); static void ndis_80211parent(struct ieee80211com *); static int ndisdrv_loaded = 0; /* * This routine should call windrv_load() once for each driver * image. This will do the relocation and dynalinking for the * image, and create a Windows driver object which will be * saved in our driver database. */ int ndisdrv_modevent(mod, cmd, arg) module_t mod; int cmd; void *arg; { int error = 0; switch (cmd) { case MOD_LOAD: ndisdrv_loaded++; if (ndisdrv_loaded > 1) break; windrv_wrap((funcptr)ndis_rxeof, &ndis_rxeof_wrap, 3, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_eth, &ndis_rxeof_eth_wrap, 8, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_done, &ndis_rxeof_done_wrap, 1, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_xfr, &ndis_rxeof_xfr_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_xfr_done, &ndis_rxeof_xfr_done_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_txeof, &ndis_txeof_wrap, 3, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_linksts, &ndis_linksts_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_linksts_done, &ndis_linksts_done_wrap, 1, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_ticktask, &ndis_ticktask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_ifstarttask, &ndis_ifstarttask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_resettask, &ndis_resettask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_inputtask, &ndis_inputtask_wrap, 2, WINDRV_WRAP_STDCALL); break; case MOD_UNLOAD: ndisdrv_loaded--; if (ndisdrv_loaded > 0) break; /* fallthrough */ case MOD_SHUTDOWN: windrv_unwrap(ndis_rxeof_wrap); windrv_unwrap(ndis_rxeof_eth_wrap); windrv_unwrap(ndis_rxeof_done_wrap); windrv_unwrap(ndis_rxeof_xfr_wrap); windrv_unwrap(ndis_rxeof_xfr_done_wrap); windrv_unwrap(ndis_txeof_wrap); windrv_unwrap(ndis_linksts_wrap); windrv_unwrap(ndis_linksts_done_wrap); windrv_unwrap(ndis_ticktask_wrap); windrv_unwrap(ndis_ifstarttask_wrap); windrv_unwrap(ndis_resettask_wrap); windrv_unwrap(ndis_inputtask_wrap); break; default: error = EINVAL; break; } return (error); } /* * Program the 64-bit multicast hash filter. */ static void ndis_setmulti(sc) struct ndis_softc *sc; { struct ifnet *ifp; struct ifmultiaddr *ifma; int len, mclistsz, error; uint8_t *mclist; if (!NDIS_INITIALIZED(sc)) return; if (sc->ndis_80211) return; ifp = sc->ifp; if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set allmulti failed: %d\n", error); return; } if (TAILQ_EMPTY(&ifp->if_multiaddrs)) return; len = sizeof(mclistsz); ndis_get_info(sc, OID_802_3_MAXIMUM_LIST_SIZE, &mclistsz, &len); mclist = malloc(ETHER_ADDR_LEN * mclistsz, M_TEMP, M_NOWAIT|M_ZERO); if (mclist == NULL) { sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; goto out; } sc->ndis_filter |= NDIS_PACKET_TYPE_MULTICAST; len = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), mclist + (ETHER_ADDR_LEN * len), ETHER_ADDR_LEN); len++; if (len > mclistsz) { if_maddr_runlock(ifp); sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST; goto out; } } if_maddr_runlock(ifp); len = len * ETHER_ADDR_LEN; error = ndis_set_info(sc, OID_802_3_MULTICAST_LIST, mclist, &len); if (error) { device_printf(sc->ndis_dev, "set mclist failed: %d\n", error); sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST; } out: free(mclist, M_TEMP); len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set multi failed: %d\n", error); } static int ndis_set_offload(sc) struct ndis_softc *sc; { ndis_task_offload *nto; ndis_task_offload_hdr *ntoh; ndis_task_tcpip_csum *nttc; struct ifnet *ifp; int len, error; if (!NDIS_INITIALIZED(sc)) return (EINVAL); if (sc->ndis_80211) return (EINVAL); /* See if there's anything to set. */ ifp = sc->ifp; error = ndis_probe_offload(sc); if (error) return (error); if (sc->ndis_hwassist == 0 && ifp->if_capabilities == 0) return (0); len = sizeof(ndis_task_offload_hdr) + sizeof(ndis_task_offload) + sizeof(ndis_task_tcpip_csum); ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO); if (ntoh == NULL) return (ENOMEM); ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION; ntoh->ntoh_len = sizeof(ndis_task_offload_hdr); ntoh->ntoh_offset_firsttask = sizeof(ndis_task_offload_hdr); ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header); ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3; ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN; nto = (ndis_task_offload *)((char *)ntoh + ntoh->ntoh_offset_firsttask); nto->nto_vers = NDIS_TASK_OFFLOAD_VERSION; nto->nto_len = sizeof(ndis_task_offload); nto->nto_task = NDIS_TASK_TCPIP_CSUM; nto->nto_offset_nexttask = 0; nto->nto_taskbuflen = sizeof(ndis_task_tcpip_csum); nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf; if (ifp->if_capenable & IFCAP_TXCSUM) nttc->nttc_v4tx = sc->ndis_v4tx; if (ifp->if_capenable & IFCAP_RXCSUM) nttc->nttc_v4rx = sc->ndis_v4rx; error = ndis_set_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len); free(ntoh, M_TEMP); return (error); } static int ndis_probe_offload(sc) struct ndis_softc *sc; { ndis_task_offload *nto; ndis_task_offload_hdr *ntoh; ndis_task_tcpip_csum *nttc = NULL; struct ifnet *ifp; int len, error, dummy; ifp = sc->ifp; len = sizeof(dummy); error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, &dummy, &len); if (error != ENOSPC) return (error); ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO); if (ntoh == NULL) return (ENOMEM); ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION; ntoh->ntoh_len = sizeof(ndis_task_offload_hdr); ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header); ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3; ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN; error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len); if (error) { free(ntoh, M_TEMP); return (error); } if (ntoh->ntoh_vers != NDIS_TASK_OFFLOAD_VERSION) { free(ntoh, M_TEMP); return (EINVAL); } nto = (ndis_task_offload *)((char *)ntoh + ntoh->ntoh_offset_firsttask); while (1) { switch (nto->nto_task) { case NDIS_TASK_TCPIP_CSUM: nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf; break; /* Don't handle these yet. */ case NDIS_TASK_IPSEC: case NDIS_TASK_TCP_LARGESEND: default: break; } if (nto->nto_offset_nexttask == 0) break; nto = (ndis_task_offload *)((char *)nto + nto->nto_offset_nexttask); } if (nttc == NULL) { free(ntoh, M_TEMP); return (ENOENT); } sc->ndis_v4tx = nttc->nttc_v4tx; sc->ndis_v4rx = nttc->nttc_v4rx; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_IP_CSUM) sc->ndis_hwassist |= CSUM_IP; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_TCP_CSUM) sc->ndis_hwassist |= CSUM_TCP; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_UDP_CSUM) sc->ndis_hwassist |= CSUM_UDP; if (sc->ndis_hwassist) ifp->if_capabilities |= IFCAP_TXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_IP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_TCP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_UDP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; free(ntoh, M_TEMP); return (0); } static int ndis_nettype_chan(uint32_t type) { switch (type) { case NDIS_80211_NETTYPE_11FH: return (IEEE80211_CHAN_FHSS); case NDIS_80211_NETTYPE_11DS: return (IEEE80211_CHAN_B); case NDIS_80211_NETTYPE_11OFDM5: return (IEEE80211_CHAN_A); case NDIS_80211_NETTYPE_11OFDM24: return (IEEE80211_CHAN_G); } DPRINTF(("unknown channel nettype %d\n", type)); return (IEEE80211_CHAN_B); /* Default to 11B chan */ } static int ndis_nettype_mode(uint32_t type) { switch (type) { case NDIS_80211_NETTYPE_11FH: return (IEEE80211_MODE_FH); case NDIS_80211_NETTYPE_11DS: return (IEEE80211_MODE_11B); case NDIS_80211_NETTYPE_11OFDM5: return (IEEE80211_MODE_11A); case NDIS_80211_NETTYPE_11OFDM24: return (IEEE80211_MODE_11G); } DPRINTF(("unknown mode nettype %d\n", type)); return (IEEE80211_MODE_AUTO); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ int ndis_attach(device_t dev) { struct ndis_softc *sc; driver_object *pdrv; device_object *pdo; int error = 0, len; int i; sc = device_get_softc(dev); mtx_init(&sc->ndis_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); KeInitializeSpinLock(&sc->ndis_rxlock); KeInitializeSpinLock(&sc->ndisusb_tasklock); KeInitializeSpinLock(&sc->ndisusb_xferdonelock); InitializeListHead(&sc->ndis_shlist); InitializeListHead(&sc->ndisusb_tasklist); InitializeListHead(&sc->ndisusb_xferdonelist); callout_init(&sc->ndis_stat_callout, 1); mbufq_init(&sc->ndis_rxqueue, INT_MAX); /* XXXGL: sane maximum */ if (sc->ndis_iftype == PCMCIABus) { error = ndis_alloc_amem(sc); if (error) { device_printf(dev, "failed to allocate " "attribute memory\n"); goto fail; } } /* Create sysctl registry nodes */ ndis_create_sysctls(sc); /* Find the PDO for this device instance. */ if (sc->ndis_iftype == PCIBus) pdrv = windrv_lookup(0, "PCI Bus"); else if (sc->ndis_iftype == PCMCIABus) pdrv = windrv_lookup(0, "PCCARD Bus"); else pdrv = windrv_lookup(0, "USB Bus"); pdo = windrv_find_pdo(pdrv, dev); /* * Create a new functional device object for this * device. This is what creates the miniport block * for this device instance. */ if (NdisAddDevice(sc->ndis_dobj, pdo) != STATUS_SUCCESS) { device_printf(dev, "failed to create FDO!\n"); error = ENXIO; goto fail; } /* Tell the user what version of the API the driver is using. */ device_printf(dev, "NDIS API version: %d.%d\n", sc->ndis_chars->nmc_version_major, sc->ndis_chars->nmc_version_minor); /* Do resource conversion. */ if (sc->ndis_iftype == PCMCIABus || sc->ndis_iftype == PCIBus) ndis_convert_res(sc); else sc->ndis_block->nmb_rlist = NULL; /* Install our RX and TX interrupt handlers. */ sc->ndis_block->nmb_senddone_func = ndis_txeof_wrap; sc->ndis_block->nmb_pktind_func = ndis_rxeof_wrap; sc->ndis_block->nmb_ethrxindicate_func = ndis_rxeof_eth_wrap; sc->ndis_block->nmb_ethrxdone_func = ndis_rxeof_done_wrap; sc->ndis_block->nmb_tdcond_func = ndis_rxeof_xfr_done_wrap; /* Override the status handler so we can detect link changes. */ sc->ndis_block->nmb_status_func = ndis_linksts_wrap; sc->ndis_block->nmb_statusdone_func = ndis_linksts_done_wrap; /* Set up work item handlers. */ sc->ndis_tickitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_startitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_resetitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_inputitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndisusb_xferdoneitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndisusb_taskitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); KeInitializeDpc(&sc->ndis_rxdpc, ndis_rxeof_xfr_wrap, sc->ndis_block); /* Call driver's init routine. */ if (ndis_init_nic(sc)) { device_printf(dev, "init handler failed\n"); error = ENXIO; goto fail; } /* * Figure out how big to make the TX buffer pool. */ len = sizeof(sc->ndis_maxpkts); if (ndis_get_info(sc, OID_GEN_MAXIMUM_SEND_PACKETS, &sc->ndis_maxpkts, &len)) { device_printf(dev, "failed to get max TX packets\n"); error = ENXIO; goto fail; } /* * If this is a deserialized miniport, we don't have * to honor the OID_GEN_MAXIMUM_SEND_PACKETS result. */ if (!NDIS_SERIALIZED(sc->ndis_block)) sc->ndis_maxpkts = NDIS_TXPKTS; /* Enforce some sanity, just in case. */ if (sc->ndis_maxpkts == 0) sc->ndis_maxpkts = 10; sc->ndis_txarray = malloc(sizeof(ndis_packet *) * sc->ndis_maxpkts, M_DEVBUF, M_NOWAIT|M_ZERO); /* Allocate a pool of ndis_packets for TX encapsulation. */ NdisAllocatePacketPool(&i, &sc->ndis_txpool, sc->ndis_maxpkts, PROTOCOL_RESERVED_SIZE_IN_PACKET); if (i != NDIS_STATUS_SUCCESS) { sc->ndis_txpool = NULL; device_printf(dev, "failed to allocate TX packet pool"); error = ENOMEM; goto fail; } sc->ndis_txpending = sc->ndis_maxpkts; sc->ndis_oidcnt = 0; /* Get supported oid list. */ ndis_get_supported_oids(sc, &sc->ndis_oids, &sc->ndis_oidcnt); /* If the NDIS module requested scatter/gather, init maps. */ if (sc->ndis_sc) ndis_init_dma(sc); /* * See if the OID_802_11_CONFIGURATION OID is * supported by this driver. If it is, then this an 802.11 * wireless driver, and we should set up media for wireless. */ for (i = 0; i < sc->ndis_oidcnt; i++) if (sc->ndis_oids[i] == OID_802_11_CONFIGURATION) { sc->ndis_80211 = 1; break; } if (sc->ndis_80211) error = ndis_80211attach(sc); else error = ndis_ifattach(sc); fail: if (error) { ndis_detach(dev); return (error); } if (sc->ndis_iftype == PNPBus && ndisusb_halt == 0) return (error); DPRINTF(("attach done.\n")); /* We're done talking to the NIC for now; halt it. */ ndis_halt_nic(sc); DPRINTF(("halting done.\n")); return (error); } static int ndis_80211attach(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; ndis_80211_rates_ex rates; struct ndis_80211_nettype_list *ntl; uint32_t arg; int mode, i, r, len, nonettypes = 1; uint8_t bands[IEEE80211_MODE_BYTES] = { 0 }; callout_init(&sc->ndis_scan_callout, 1); ic->ic_softc = sc; ic->ic_ioctl = ndis_80211ioctl; ic->ic_name = device_get_nameunit(sc->ndis_dev); ic->ic_opmode = IEEE80211_M_STA; ic->ic_phytype = IEEE80211_T_DS; ic->ic_caps = IEEE80211_C_8023ENCAP | IEEE80211_C_STA | IEEE80211_C_IBSS; setbit(ic->ic_modecaps, IEEE80211_MODE_AUTO); len = 0; r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, NULL, &len); if (r != ENOSPC) goto nonettypes; ntl = malloc(len, M_DEVBUF, M_WAITOK | M_ZERO); r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, ntl, &len); if (r != 0) { free(ntl, M_DEVBUF); goto nonettypes; } for (i = 0; i < ntl->ntl_items; i++) { mode = ndis_nettype_mode(ntl->ntl_type[i]); if (mode) { nonettypes = 0; setbit(ic->ic_modecaps, mode); setbit(bands, mode); } else device_printf(sc->ndis_dev, "Unknown nettype %d\n", ntl->ntl_type[i]); } free(ntl, M_DEVBUF); nonettypes: /* Default to 11b channels if the card did not supply any */ if (nonettypes) { setbit(ic->ic_modecaps, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11B); } len = sizeof(rates); bzero((char *)&rates, len); r = ndis_get_info(sc, OID_802_11_SUPPORTED_RATES, (void *)rates, &len); if (r != 0) device_printf(sc->ndis_dev, "get rates failed: 0x%x\n", r); /* * Since the supported rates only up to 8 can be supported, * if this is not 802.11b we're just going to be faking it * all up to heck. */ #define TESTSETRATE(x, y) \ do { \ int i; \ for (i = 0; i < ic->ic_sup_rates[x].rs_nrates; i++) { \ if (ic->ic_sup_rates[x].rs_rates[i] == (y)) \ break; \ } \ if (i == ic->ic_sup_rates[x].rs_nrates) { \ ic->ic_sup_rates[x].rs_rates[i] = (y); \ ic->ic_sup_rates[x].rs_nrates++; \ } \ } while (0) #define SETRATE(x, y) \ ic->ic_sup_rates[x].rs_rates[ic->ic_sup_rates[x].rs_nrates] = (y) #define INCRATE(x) \ ic->ic_sup_rates[x].rs_nrates++ ic->ic_curmode = IEEE80211_MODE_AUTO; if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) ic->ic_sup_rates[IEEE80211_MODE_11A].rs_nrates = 0; if (isset(ic->ic_modecaps, IEEE80211_MODE_11B)) ic->ic_sup_rates[IEEE80211_MODE_11B].rs_nrates = 0; if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) ic->ic_sup_rates[IEEE80211_MODE_11G].rs_nrates = 0; for (i = 0; i < len; i++) { switch (rates[i] & IEEE80211_RATE_VAL) { case 2: case 4: case 11: case 10: case 22: if (isclr(ic->ic_modecaps, IEEE80211_MODE_11B)) { /* Lazy-init 802.11b. */ setbit(ic->ic_modecaps, IEEE80211_MODE_11B); ic->ic_sup_rates[IEEE80211_MODE_11B]. rs_nrates = 0; } SETRATE(IEEE80211_MODE_11B, rates[i]); INCRATE(IEEE80211_MODE_11B); break; default: if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) { SETRATE(IEEE80211_MODE_11A, rates[i]); INCRATE(IEEE80211_MODE_11A); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) { SETRATE(IEEE80211_MODE_11G, rates[i]); INCRATE(IEEE80211_MODE_11G); } break; } } /* * If the hardware supports 802.11g, it most * likely supports 802.11b and all of the * 802.11b and 802.11g speeds, so maybe we can * just cheat here. Just how in the heck do * we detect turbo modes, though? */ if (isset(ic->ic_modecaps, IEEE80211_MODE_11B)) { TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|2); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|4); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|11); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|22); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) { TESTSETRATE(IEEE80211_MODE_11G, 48); TESTSETRATE(IEEE80211_MODE_11G, 72); TESTSETRATE(IEEE80211_MODE_11G, 96); TESTSETRATE(IEEE80211_MODE_11G, 108); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) { TESTSETRATE(IEEE80211_MODE_11A, 48); TESTSETRATE(IEEE80211_MODE_11A, 72); TESTSETRATE(IEEE80211_MODE_11A, 96); TESTSETRATE(IEEE80211_MODE_11A, 108); } #undef SETRATE #undef INCRATE #undef TESTSETRATE ieee80211_init_channels(ic, NULL, bands); /* * To test for WPA support, we need to see if we can * set AUTHENTICATION_MODE to WPA and read it back * successfully. */ i = sizeof(arg); arg = NDIS_80211_AUTHMODE_WPA; r = ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); if (r == 0) { r = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); if (r == 0 && arg == NDIS_80211_AUTHMODE_WPA) ic->ic_caps |= IEEE80211_C_WPA; } /* * To test for supported ciphers, we set each * available encryption type in descending order. * If ENC3 works, then we have WEP, TKIP and AES. * If only ENC2 works, then we have WEP and TKIP. * If only ENC1 works, then we have just WEP. */ i = sizeof(arg); arg = NDIS_80211_WEPSTAT_ENC3ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) { ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_TKIP | IEEE80211_CRYPTO_AES_CCM; goto got_crypto; } arg = NDIS_80211_WEPSTAT_ENC2ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) { ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_TKIP; goto got_crypto; } arg = NDIS_80211_WEPSTAT_ENC1ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP; got_crypto: i = sizeof(arg); r = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &i); if (r == 0) ic->ic_caps |= IEEE80211_C_PMGT; r = ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &i); if (r == 0) ic->ic_caps |= IEEE80211_C_TXPMGT; /* * Get station address from the driver. */ len = sizeof(ic->ic_macaddr); ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, &ic->ic_macaddr, &len); ieee80211_ifattach(ic); ic->ic_raw_xmit = ndis_raw_xmit; ic->ic_scan_start = ndis_scan_start; ic->ic_scan_end = ndis_scan_end; ic->ic_set_channel = ndis_set_channel; ic->ic_scan_curchan = ndis_scan_curchan; ic->ic_scan_mindwell = ndis_scan_mindwell; ic->ic_bsschan = IEEE80211_CHAN_ANYC; ic->ic_vap_create = ndis_vap_create; ic->ic_vap_delete = ndis_vap_delete; ic->ic_update_mcast = ndis_update_mcast; ic->ic_update_promisc = ndis_update_promisc; ic->ic_transmit = ndis_80211transmit; ic->ic_parent = ndis_80211parent; if (bootverbose) ieee80211_announce(ic); return (0); } static int ndis_ifattach(struct ndis_softc *sc) { struct ifnet *ifp; u_char eaddr[ETHER_ADDR_LEN]; int len; ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOSPC); sc->ifp = ifp; ifp->if_softc = sc; /* Check for task offload support. */ ndis_probe_offload(sc); /* * Get station address from the driver. */ len = sizeof(eaddr); ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, eaddr, &len); if_initname(ifp, device_get_name(sc->ndis_dev), device_get_unit(sc->ndis_dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ndis_ifioctl; ifp->if_start = ndis_ifstart; ifp->if_init = ndis_init; ifp->if_baudrate = 10000000; IFQ_SET_MAXLEN(&ifp->if_snd, 50); ifp->if_snd.ifq_drv_maxlen = 25; IFQ_SET_READY(&ifp->if_snd); ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = sc->ndis_hwassist; ifmedia_init(&sc->ifmedia, IFM_IMASK, ndis_ifmedia_upd, ndis_ifmedia_sts); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL); ifmedia_set(&sc->ifmedia, IFM_ETHER|IFM_AUTO); ether_ifattach(ifp, eaddr); return (0); } static struct ieee80211vap * ndis_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ndis_vap *nvp; struct ieee80211vap *vap; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return NULL; nvp = malloc(sizeof(struct ndis_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &nvp->vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); /* override with driver methods */ nvp->newstate = vap->iv_newstate; vap->iv_newstate = ndis_newstate; /* complete setup */ ieee80211_vap_attach(vap, ieee80211_media_change, ndis_media_status, mac); ic->ic_opmode = opmode; /* install key handing routines */ vap->iv_key_set = ndis_add_key; vap->iv_key_delete = ndis_del_key; return vap; } static void ndis_vap_delete(struct ieee80211vap *vap) { struct ndis_vap *nvp = NDIS_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct ndis_softc *sc = ic->ic_softc; ndis_stop(sc); callout_drain(&sc->ndis_scan_callout); ieee80211_vap_detach(vap); free(nvp, M_80211_VAP); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ int ndis_detach(device_t dev) { struct ifnet *ifp; struct ndis_softc *sc; driver_object *drv; sc = device_get_softc(dev); NDIS_LOCK(sc); if (!sc->ndis_80211) ifp = sc->ifp; else ifp = NULL; if (ifp != NULL) ifp->if_flags &= ~IFF_UP; if (device_is_attached(dev)) { NDIS_UNLOCK(sc); ndis_stop(sc); if (sc->ndis_80211) ieee80211_ifdetach(&sc->ndis_ic); else if (ifp != NULL) ether_ifdetach(ifp); } else NDIS_UNLOCK(sc); if (sc->ndis_tickitem != NULL) IoFreeWorkItem(sc->ndis_tickitem); if (sc->ndis_startitem != NULL) IoFreeWorkItem(sc->ndis_startitem); if (sc->ndis_resetitem != NULL) IoFreeWorkItem(sc->ndis_resetitem); if (sc->ndis_inputitem != NULL) IoFreeWorkItem(sc->ndis_inputitem); if (sc->ndisusb_xferdoneitem != NULL) IoFreeWorkItem(sc->ndisusb_xferdoneitem); if (sc->ndisusb_taskitem != NULL) IoFreeWorkItem(sc->ndisusb_taskitem); bus_generic_detach(dev); ndis_unload_driver(sc); if (sc->ndis_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->ndis_irq); if (sc->ndis_res_io) bus_release_resource(dev, SYS_RES_IOPORT, sc->ndis_io_rid, sc->ndis_res_io); if (sc->ndis_res_mem) bus_release_resource(dev, SYS_RES_MEMORY, sc->ndis_mem_rid, sc->ndis_res_mem); if (sc->ndis_res_altmem) bus_release_resource(dev, SYS_RES_MEMORY, sc->ndis_altmem_rid, sc->ndis_res_altmem); if (ifp != NULL) if_free(ifp); if (sc->ndis_iftype == PCMCIABus) ndis_free_amem(sc); if (sc->ndis_sc) ndis_destroy_dma(sc); if (sc->ndis_txarray) free(sc->ndis_txarray, M_DEVBUF); if (!sc->ndis_80211) ifmedia_removeall(&sc->ifmedia); if (sc->ndis_txpool != NULL) NdisFreePacketPool(sc->ndis_txpool); /* Destroy the PDO for this device. */ if (sc->ndis_iftype == PCIBus) drv = windrv_lookup(0, "PCI Bus"); else if (sc->ndis_iftype == PCMCIABus) drv = windrv_lookup(0, "PCCARD Bus"); else drv = windrv_lookup(0, "USB Bus"); if (drv == NULL) panic("couldn't find driver object"); windrv_destroy_pdo(drv, dev); if (sc->ndis_iftype == PCIBus) bus_dma_tag_destroy(sc->ndis_parent_tag); return (0); } int ndis_suspend(dev) device_t dev; { struct ndis_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; #ifdef notdef if (NDIS_INITIALIZED(sc)) ndis_stop(sc); #endif return (0); } int ndis_resume(dev) device_t dev; { struct ndis_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; if (NDIS_INITIALIZED(sc)) ndis_init(sc); return (0); } /* * The following bunch of routines are here to support drivers that * use the NdisMEthIndicateReceive()/MiniportTransferData() mechanism. * The NdisMEthIndicateReceive() handler runs at DISPATCH_LEVEL for * serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized * miniports. */ static void ndis_rxeof_eth(adapter, ctx, addr, hdr, hdrlen, lookahead, lookaheadlen, pktlen) ndis_handle adapter; ndis_handle ctx; char *addr; void *hdr; uint32_t hdrlen; void *lookahead; uint32_t lookaheadlen; uint32_t pktlen; { ndis_miniport_block *block; uint8_t irql = 0; uint32_t status; ndis_buffer *b; ndis_packet *p; struct mbuf *m; ndis_ethpriv *priv; block = adapter; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return; /* Save the data provided to us so far. */ m->m_len = lookaheadlen + hdrlen; m->m_pkthdr.len = pktlen + hdrlen; m->m_next = NULL; m_copyback(m, 0, hdrlen, hdr); m_copyback(m, hdrlen, lookaheadlen, lookahead); /* Now create a fake NDIS_PACKET to hold the data */ NdisAllocatePacket(&status, &p, block->nmb_rxpool); if (status != NDIS_STATUS_SUCCESS) { m_freem(m); return; } p->np_m0 = m; b = IoAllocateMdl(m->m_data, m->m_pkthdr.len, FALSE, FALSE, NULL); if (b == NULL) { NdisFreePacket(p); m_freem(m); return; } p->np_private.npp_head = p->np_private.npp_tail = b; p->np_private.npp_totlen = m->m_pkthdr.len; /* Save the packet RX context somewhere. */ priv = (ndis_ethpriv *)&p->np_protocolreserved; priv->nep_ctx = ctx; if (!NDIS_SERIALIZED(block)) KeAcquireSpinLock(&block->nmb_lock, &irql); InsertTailList((&block->nmb_packetlist), (&p->np_list)); if (!NDIS_SERIALIZED(block)) KeReleaseSpinLock(&block->nmb_lock, irql); } /* * NdisMEthIndicateReceiveComplete() handler, runs at DISPATCH_LEVEL * for serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized * miniports. */ static void ndis_rxeof_done(adapter) ndis_handle adapter; { struct ndis_softc *sc; ndis_miniport_block *block; block = adapter; /* Schedule transfer/RX of queued packets. */ sc = device_get_softc(block->nmb_physdeviceobj->do_devext); KeInsertQueueDpc(&sc->ndis_rxdpc, NULL, NULL); } /* * MiniportTransferData() handler, runs at DISPATCH_LEVEL. */ static void ndis_rxeof_xfr(dpc, adapter, sysarg1, sysarg2) kdpc *dpc; ndis_handle adapter; void *sysarg1; void *sysarg2; { ndis_miniport_block *block; struct ndis_softc *sc; ndis_packet *p; list_entry *l; uint32_t status; ndis_ethpriv *priv; struct ifnet *ifp; struct mbuf *m; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; KeAcquireSpinLockAtDpcLevel(&block->nmb_lock); l = block->nmb_packetlist.nle_flink; while(!IsListEmpty(&block->nmb_packetlist)) { l = RemoveHeadList((&block->nmb_packetlist)); p = CONTAINING_RECORD(l, ndis_packet, np_list); InitializeListHead((&p->np_list)); priv = (ndis_ethpriv *)&p->np_protocolreserved; m = p->np_m0; p->np_softc = sc; p->np_m0 = NULL; KeReleaseSpinLockFromDpcLevel(&block->nmb_lock); status = MSCALL6(sc->ndis_chars->nmc_transferdata_func, p, &p->np_private.npp_totlen, block, priv->nep_ctx, m->m_len, m->m_pkthdr.len - m->m_len); KeAcquireSpinLockAtDpcLevel(&block->nmb_lock); /* * If status is NDIS_STATUS_PENDING, do nothing and * wait for a callback to the ndis_rxeof_xfr_done() * handler. */ m->m_len = m->m_pkthdr.len; m->m_pkthdr.rcvif = ifp; if (status == NDIS_STATUS_SUCCESS) { IoFreeMdl(p->np_private.npp_head); NdisFreePacket(p); KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } if (status == NDIS_STATUS_FAILURE) m_freem(m); /* Advance to next packet */ l = block->nmb_packetlist.nle_flink; } KeReleaseSpinLockFromDpcLevel(&block->nmb_lock); } /* * NdisMTransferDataComplete() handler, runs at DISPATCH_LEVEL. */ static void ndis_rxeof_xfr_done(adapter, packet, status, len) ndis_handle adapter; ndis_packet *packet; uint32_t status; uint32_t len; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; struct mbuf *m; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; m = packet->np_m0; IoFreeMdl(packet->np_private.npp_head); NdisFreePacket(packet); if (status != NDIS_STATUS_SUCCESS) { m_freem(m); return; } m->m_len = m->m_pkthdr.len; m->m_pkthdr.rcvif = ifp; KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. * * When handling received NDIS packets, the 'status' field in the * out-of-band portion of the ndis_packet has special meaning. In the * most common case, the underlying NDIS driver will set this field * to NDIS_STATUS_SUCCESS, which indicates that it's ok for us to * take possession of it. We then change the status field to * NDIS_STATUS_PENDING to tell the driver that we now own the packet, * and that we will return it at some point in the future via the * return packet handler. * * If the driver hands us a packet with a status of NDIS_STATUS_RESOURCES, * this means the driver is running out of packet/buffer resources and * wants to maintain ownership of the packet. In this case, we have to * copy the packet data into local storage and let the driver keep the * packet. */ static void ndis_rxeof(adapter, packets, pktcnt) ndis_handle adapter; ndis_packet **packets; uint32_t pktcnt; { struct ndis_softc *sc; ndis_miniport_block *block; ndis_packet *p; uint32_t s; ndis_tcpip_csum *csum; struct ifnet *ifp; struct mbuf *m0, *m; int i; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; /* * There's a slim chance the driver may indicate some packets * before we're completely ready to handle them. If we detect this, * we need to return them to the miniport and ignore them. */ if (!sc->ndis_running) { for (i = 0; i < pktcnt; i++) { p = packets[i]; if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS) { p->np_refcnt++; - (void)ndis_return_packet(NULL ,p, block); + ndis_return_packet(p); } } return; } for (i = 0; i < pktcnt; i++) { p = packets[i]; /* Stash the softc here so ptom can use it. */ p->np_softc = sc; if (ndis_ptom(&m0, p)) { device_printf(sc->ndis_dev, "ptom failed\n"); if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS) - (void)ndis_return_packet(NULL, p, block); + ndis_return_packet(p); } else { #ifdef notdef if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES) { m = m_dup(m0, M_NOWAIT); /* * NOTE: we want to destroy the mbuf here, but * we don't actually want to return it to the * driver via the return packet handler. By * bumping np_refcnt, we can prevent the * ndis_return_packet() routine from actually * doing anything. */ p->np_refcnt++; m_freem(m0); if (m == NULL) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); else m0 = m; } else p->np_oob.npo_status = NDIS_STATUS_PENDING; #endif m = m_dup(m0, M_NOWAIT); if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES) p->np_refcnt++; else p->np_oob.npo_status = NDIS_STATUS_PENDING; m_freem(m0); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); continue; } m0 = m; m0->m_pkthdr.rcvif = ifp; /* Deal with checksum offload. */ if (ifp->if_capenable & IFCAP_RXCSUM && p->np_ext.npe_info[ndis_tcpipcsum_info] != NULL) { s = (uintptr_t) p->np_ext.npe_info[ndis_tcpipcsum_info]; csum = (ndis_tcpip_csum *)&s; if (csum->u.ntc_rxflags & NDIS_RXCSUM_IP_PASSED) m0->m_pkthdr.csum_flags |= CSUM_IP_CHECKED|CSUM_IP_VALID; if (csum->u.ntc_rxflags & (NDIS_RXCSUM_TCP_PASSED | NDIS_RXCSUM_UDP_PASSED)) { m0->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m0->m_pkthdr.csum_data = 0xFFFF; } } KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m0); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } } } /* * This routine is run at PASSIVE_LEVEL. We use this routine to pass * packets into the stack in order to avoid calling (*ifp->if_input)() * with any locks held (at DISPATCH_LEVEL, we'll be holding the * 'dispatch level' per-cpu sleep lock). */ static void ndis_inputtask(device_object *dobj, void *arg) { ndis_miniport_block *block; struct ndis_softc *sc = arg; struct mbuf *m; uint8_t irql; block = dobj->do_devext; KeAcquireSpinLock(&sc->ndis_rxlock, &irql); while ((m = mbufq_dequeue(&sc->ndis_rxqueue)) != NULL) { KeReleaseSpinLock(&sc->ndis_rxlock, irql); if ((sc->ndis_80211 != 0)) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) vap->iv_deliver_data(vap, vap->iv_bss, m); } else { struct ifnet *ifp = sc->ifp; (*ifp->if_input)(ifp, m); } KeAcquireSpinLock(&sc->ndis_rxlock, &irql); } KeReleaseSpinLock(&sc->ndis_rxlock, irql); } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void ndis_txeof(adapter, packet, status) ndis_handle adapter; ndis_packet *packet; ndis_status status; { struct ndis_softc *sc; ndis_miniport_block *block; struct ifnet *ifp; int idx; struct mbuf *m; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; m = packet->np_m0; idx = packet->np_txidx; if (sc->ndis_sc) bus_dmamap_unload(sc->ndis_ttag, sc->ndis_tmaps[idx]); ndis_free_packet(packet); m_freem(m); NDIS_LOCK(sc); sc->ndis_txarray[idx] = NULL; sc->ndis_txpending++; if (!sc->ndis_80211) { struct ifnet *ifp = sc->ifp; if (status == NDIS_STATUS_SUCCESS) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->ndis_tx_timer = 0; NDIS_UNLOCK(sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); DPRINTF(("%s: ndis_ifstarttask_wrap sc=%p\n", __func__, sc)); } static void ndis_linksts(adapter, status, sbuf, slen) ndis_handle adapter; ndis_status status; void *sbuf; uint32_t slen; { ndis_miniport_block *block; struct ndis_softc *sc; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); sc->ndis_sts = status; /* Event list is all full up, drop this one. */ NDIS_LOCK(sc); if (sc->ndis_evt[sc->ndis_evtpidx].ne_sts) { NDIS_UNLOCK(sc); return; } /* Cache the event. */ if (slen) { sc->ndis_evt[sc->ndis_evtpidx].ne_buf = malloc(slen, M_TEMP, M_NOWAIT); if (sc->ndis_evt[sc->ndis_evtpidx].ne_buf == NULL) { NDIS_UNLOCK(sc); return; } bcopy((char *)sbuf, sc->ndis_evt[sc->ndis_evtpidx].ne_buf, slen); } sc->ndis_evt[sc->ndis_evtpidx].ne_sts = status; sc->ndis_evt[sc->ndis_evtpidx].ne_len = slen; NDIS_EVTINC(sc->ndis_evtpidx); NDIS_UNLOCK(sc); } static void ndis_linksts_done(adapter) ndis_handle adapter; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; if (!NDIS_INITIALIZED(sc)) return; switch (sc->ndis_sts) { case NDIS_STATUS_MEDIA_CONNECT: IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); break; case NDIS_STATUS_MEDIA_DISCONNECT: if (sc->ndis_link) IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); break; default: break; } } static void ndis_tick(xsc) void *xsc; { struct ndis_softc *sc; sc = xsc; if (sc->ndis_hang_timer && --sc->ndis_hang_timer == 0) { IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs; } if (sc->ndis_tx_timer && --sc->ndis_tx_timer == 0) { if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); device_printf(sc->ndis_dev, "watchdog timeout\n"); IoQueueWorkItem(sc->ndis_resetitem, (io_workitem_func)ndis_resettask_wrap, WORKQUEUE_CRITICAL, sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); } callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc); } static void ndis_ticktask(device_object *d, void *xsc) { struct ndis_softc *sc = xsc; ndis_checkforhang_handler hangfunc; uint8_t rval; NDIS_LOCK(sc); if (!NDIS_INITIALIZED(sc)) { NDIS_UNLOCK(sc); return; } NDIS_UNLOCK(sc); hangfunc = sc->ndis_chars->nmc_checkhang_func; if (hangfunc != NULL) { rval = MSCALL1(hangfunc, sc->ndis_block->nmb_miniportadapterctx); if (rval == TRUE) { ndis_reset_nic(sc); return; } } NDIS_LOCK(sc); if (sc->ndis_link == 0 && sc->ndis_sts == NDIS_STATUS_MEDIA_CONNECT) { sc->ndis_link = 1; if (sc->ndis_80211 != 0) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) { NDIS_UNLOCK(sc); ndis_getstate_80211(sc); ieee80211_new_state(vap, IEEE80211_S_RUN, -1); NDIS_LOCK(sc); if_link_state_change(vap->iv_ifp, LINK_STATE_UP); } } else if_link_state_change(sc->ifp, LINK_STATE_UP); } if (sc->ndis_link == 1 && sc->ndis_sts == NDIS_STATUS_MEDIA_DISCONNECT) { sc->ndis_link = 0; if (sc->ndis_80211 != 0) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) { NDIS_UNLOCK(sc); ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); NDIS_LOCK(sc); if_link_state_change(vap->iv_ifp, LINK_STATE_DOWN); } } else if_link_state_change(sc->ifp, LINK_STATE_DOWN); } NDIS_UNLOCK(sc); } static void ndis_map_sclist(arg, segs, nseg, mapsize, error) void *arg; bus_dma_segment_t *segs; int nseg; bus_size_t mapsize; int error; { struct ndis_sc_list *sclist; int i; if (error || arg == NULL) return; sclist = arg; sclist->nsl_frags = nseg; for (i = 0; i < nseg; i++) { sclist->nsl_elements[i].nse_addr.np_quad = segs[i].ds_addr; sclist->nsl_elements[i].nse_len = segs[i].ds_len; } } static int ndis_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { /* no support; just discard */ m_freem(m); ieee80211_free_node(ni); return (0); } static void ndis_update_mcast(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; ndis_setmulti(sc); } static void ndis_update_promisc(struct ieee80211com *ic) { /* not supported */ } static void ndis_ifstarttask(device_object *d, void *arg) { struct ndis_softc *sc = arg; DPRINTF(("%s: sc=%p, ifp=%p\n", __func__, sc, sc->ifp)); if (sc->ndis_80211) return; struct ifnet *ifp = sc->ifp; if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ndis_ifstart(ifp); } /* * Main transmit routine. To make NDIS drivers happy, we need to * transform mbuf chains into NDIS packets and feed them to the * send packet routines. Most drivers allow you to send several * packets at once (up to the maxpkts limit). Unfortunately, rather * that accepting them in the form of a linked list, they expect * a contiguous array of pointers to packets. * * For those drivers which use the NDIS scatter/gather DMA mechanism, * we need to perform busdma work here. Those that use map registers * will do the mapping themselves on a buffer by buffer basis. */ static void ndis_ifstart(struct ifnet *ifp) { struct ndis_softc *sc; struct mbuf *m = NULL; ndis_packet **p0 = NULL, *p = NULL; ndis_tcpip_csum *csum; int pcnt = 0, status; sc = ifp->if_softc; NDIS_LOCK(sc); if (!sc->ndis_link || ifp->if_drv_flags & IFF_DRV_OACTIVE) { NDIS_UNLOCK(sc); return; } p0 = &sc->ndis_txarray[sc->ndis_txidx]; while(sc->ndis_txpending) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; NdisAllocatePacket(&status, &sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool); if (status != NDIS_STATUS_SUCCESS) break; if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) { IFQ_DRV_PREPEND(&ifp->if_snd, m); NDIS_UNLOCK(sc); return; } /* * Save pointer to original mbuf * so we can free it later. */ p = sc->ndis_txarray[sc->ndis_txidx]; p->np_txidx = sc->ndis_txidx; p->np_m0 = m; p->np_oob.npo_status = NDIS_STATUS_PENDING; /* * Do scatter/gather processing, if driver requested it. */ if (sc->ndis_sc) { bus_dmamap_load_mbuf(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], m, ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT); bus_dmamap_sync(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], BUS_DMASYNC_PREREAD); p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist; } /* Handle checksum offload. */ if (ifp->if_capenable & IFCAP_TXCSUM && m->m_pkthdr.csum_flags) { csum = (ndis_tcpip_csum *) &p->np_ext.npe_info[ndis_tcpipcsum_info]; csum->u.ntc_txflags = NDIS_TXCSUM_DO_IPV4; if (m->m_pkthdr.csum_flags & CSUM_IP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_IP; if (m->m_pkthdr.csum_flags & CSUM_TCP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_TCP; if (m->m_pkthdr.csum_flags & CSUM_UDP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_UDP; p->np_private.npp_flags = NDIS_PROTOCOL_ID_TCP_IP; } NDIS_INC(sc); sc->ndis_txpending--; pcnt++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ if (!sc->ndis_80211) /* XXX handle 80211 */ BPF_MTAP(ifp, m); /* * The array that p0 points to must appear contiguous, * so we must not wrap past the end of sc->ndis_txarray[]. * If it looks like we're about to wrap, break out here * so the this batch of packets can be transmitted, then * wait for txeof to ask us to send the rest. */ if (sc->ndis_txidx == 0) break; } if (pcnt == 0) { NDIS_UNLOCK(sc); return; } if (sc->ndis_txpending == 0) ifp->if_drv_flags |= IFF_DRV_OACTIVE; /* * Set a timeout in case the chip goes out to lunch. */ sc->ndis_tx_timer = 5; NDIS_UNLOCK(sc); /* * According to NDIS documentation, if a driver exports * a MiniportSendPackets() routine, we prefer that over * a MiniportSend() routine (which sends just a single * packet). */ if (sc->ndis_chars->nmc_sendmulti_func != NULL) ndis_send_packets(sc, p0, pcnt); else ndis_send_packet(sc, p); return; } static int ndis_80211transmit(struct ieee80211com *ic, struct mbuf *m) { struct ndis_softc *sc = ic->ic_softc; ndis_packet **p0 = NULL, *p = NULL; int status; NDIS_LOCK(sc); if (!sc->ndis_link || !sc->ndis_running) { NDIS_UNLOCK(sc); return (ENXIO); } if (sc->ndis_txpending == 0) { NDIS_UNLOCK(sc); return (ENOBUFS); } p0 = &sc->ndis_txarray[sc->ndis_txidx]; NdisAllocatePacket(&status, &sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool); if (status != NDIS_STATUS_SUCCESS) { NDIS_UNLOCK(sc); return (ENOBUFS); } if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) { NDIS_UNLOCK(sc); return (ENOBUFS); } /* * Save pointer to original mbuf * so we can free it later. */ p = sc->ndis_txarray[sc->ndis_txidx]; p->np_txidx = sc->ndis_txidx; p->np_m0 = m; p->np_oob.npo_status = NDIS_STATUS_PENDING; /* * Do scatter/gather processing, if driver requested it. */ if (sc->ndis_sc) { bus_dmamap_load_mbuf(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], m, ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT); bus_dmamap_sync(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], BUS_DMASYNC_PREREAD); p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist; } NDIS_INC(sc); sc->ndis_txpending--; /* * Set a timeout in case the chip goes out to lunch. */ sc->ndis_tx_timer = 5; NDIS_UNLOCK(sc); /* * According to NDIS documentation, if a driver exports * a MiniportSendPackets() routine, we prefer that over * a MiniportSend() routine (which sends just a single * packet). */ if (sc->ndis_chars->nmc_sendmulti_func != NULL) ndis_send_packets(sc, p0, 1); else ndis_send_packet(sc, p); return (0); } static void ndis_80211parent(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; /*NDIS_LOCK(sc);*/ if (ic->ic_nrunning > 0) { if (!sc->ndis_running) ndis_init(sc); } else if (sc->ndis_running) ndis_stop(sc); /*NDIS_UNLOCK(sc);*/ } static void ndis_init(void *xsc) { struct ndis_softc *sc = xsc; int i, len, error; /* * Avoid reintializing the link unnecessarily. * This should be dealt with in a better way by * fixing the upper layer modules so they don't * call ifp->if_init() quite as often. */ if (sc->ndis_link) return; /* * Cancel pending I/O and free all RX/TX buffers. */ ndis_stop(sc); if (!(sc->ndis_iftype == PNPBus && ndisusb_halt == 0)) { error = ndis_init_nic(sc); if (error != 0) { device_printf(sc->ndis_dev, "failed to initialize the device: %d\n", error); return; } } /* Program the packet filter */ sc->ndis_filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST; if (sc->ndis_80211) { struct ieee80211com *ic = &sc->ndis_ic; if (ic->ic_promisc > 0) sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; } else { struct ifnet *ifp = sc->ifp; if (ifp->if_flags & IFF_PROMISC) sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; } len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set filter failed: %d\n", error); /* * Set lookahead. */ if (sc->ndis_80211) i = ETHERMTU; else i = sc->ifp->if_mtu; len = sizeof(i); ndis_set_info(sc, OID_GEN_CURRENT_LOOKAHEAD, &i, &len); /* * Program the multicast filter, if necessary. */ ndis_setmulti(sc); /* Setup task offload. */ ndis_set_offload(sc); NDIS_LOCK(sc); sc->ndis_txidx = 0; sc->ndis_txpending = sc->ndis_maxpkts; sc->ndis_link = 0; if (!sc->ndis_80211) { if_link_state_change(sc->ifp, LINK_STATE_UNKNOWN); sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->ndis_tx_timer = 0; /* * Some drivers don't set this value. The NDIS spec says * the default checkforhang timeout is "approximately 2 * seconds." We use 3 seconds, because it seems for some * drivers, exactly 2 seconds is too fast. */ if (sc->ndis_block->nmb_checkforhangsecs == 0) sc->ndis_block->nmb_checkforhangsecs = 3; sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs; callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc); sc->ndis_running = 1; NDIS_UNLOCK(sc); /* XXX force handling */ if (sc->ndis_80211) ieee80211_start_all(&sc->ndis_ic); /* start all vap's */ } /* * Set media options. */ static int ndis_ifmedia_upd(ifp) struct ifnet *ifp; { struct ndis_softc *sc; sc = ifp->if_softc; if (NDIS_INITIALIZED(sc)) ndis_init(sc); return (0); } /* * Report current media status. */ static void ndis_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct ndis_softc *sc; uint32_t media_info; ndis_media_state linkstate; int len; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; sc = ifp->if_softc; if (!NDIS_INITIALIZED(sc)) return; len = sizeof(linkstate); ndis_get_info(sc, OID_GEN_MEDIA_CONNECT_STATUS, (void *)&linkstate, &len); len = sizeof(media_info); ndis_get_info(sc, OID_GEN_LINK_SPEED, (void *)&media_info, &len); if (linkstate == nmc_connected) ifmr->ifm_status |= IFM_ACTIVE; switch (media_info) { case 100000: ifmr->ifm_active |= IFM_10_T; break; case 1000000: ifmr->ifm_active |= IFM_100_TX; break; case 10000000: ifmr->ifm_active |= IFM_1000_T; break; default: device_printf(sc->ndis_dev, "unknown speed: %d\n", media_info); break; } } static int ndis_set_cipher(struct ndis_softc *sc, int cipher) { struct ieee80211com *ic = &sc->ndis_ic; int rval = 0, len; uint32_t arg, save; len = sizeof(arg); if (cipher == WPA_CSE_WEP40 || cipher == WPA_CSE_WEP104) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_WEP)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC1ENABLED; } if (cipher == WPA_CSE_TKIP) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_TKIP)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC2ENABLED; } if (cipher == WPA_CSE_CCMP) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_AES_CCM)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC3ENABLED; } DPRINTF(("Setting cipher to %d\n", arg)); save = arg; rval = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); if (rval) return (rval); /* Check that the cipher was set correctly. */ len = sizeof(save); rval = ndis_get_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); if (rval != 0 || arg != save) return (ENODEV); return (0); } /* * WPA is hairy to set up. Do the work in a separate routine * so we don't clutter the setstate function too much. * Important yet undocumented fact: first we have to set the * authentication mode, _then_ we enable the ciphers. If one * of the WPA authentication modes isn't enabled, the driver * might not permit the TKIP or AES ciphers to be selected. */ static int ndis_set_wpa(sc, ie, ielen) struct ndis_softc *sc; void *ie; int ielen; { struct ieee80211_ie_wpa *w; struct ndis_ie *n; char *pos; uint32_t arg; int i; /* * Apparently, the only way for us to know what ciphers * and key management/authentication mode to use is for * us to inspect the optional information element (IE) * stored in the 802.11 state machine. This IE should be * supplied by the WPA supplicant. */ w = (struct ieee80211_ie_wpa *)ie; /* Check for the right kind of IE. */ if (w->wpa_id != IEEE80211_ELEMID_VENDOR) { DPRINTF(("Incorrect IE type %d\n", w->wpa_id)); return (EINVAL); } /* Skip over the ucast cipher OIDs. */ pos = (char *)&w->wpa_uciphers[0]; pos += w->wpa_uciphercnt * sizeof(struct ndis_ie); /* Skip over the authmode count. */ pos += sizeof(u_int16_t); /* * Check for the authentication modes. I'm * pretty sure there's only supposed to be one. */ n = (struct ndis_ie *)pos; if (n->ni_val == WPA_ASE_NONE) arg = NDIS_80211_AUTHMODE_WPANONE; if (n->ni_val == WPA_ASE_8021X_UNSPEC) arg = NDIS_80211_AUTHMODE_WPA; if (n->ni_val == WPA_ASE_8021X_PSK) arg = NDIS_80211_AUTHMODE_WPAPSK; DPRINTF(("Setting WPA auth mode to %d\n", arg)); i = sizeof(arg); if (ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i)) return (ENOTSUP); i = sizeof(arg); ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); /* Now configure the desired ciphers. */ /* First, set up the multicast group cipher. */ n = (struct ndis_ie *)&w->wpa_mcipher[0]; if (ndis_set_cipher(sc, n->ni_val)) return (ENOTSUP); /* Now start looking around for the unicast ciphers. */ pos = (char *)&w->wpa_uciphers[0]; n = (struct ndis_ie *)pos; for (i = 0; i < w->wpa_uciphercnt; i++) { if (ndis_set_cipher(sc, n->ni_val)) return (ENOTSUP); n++; } return (0); } static void ndis_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct ieee80211vap *vap = ifp->if_softc; struct ndis_softc *sc = vap->iv_ic->ic_softc; uint32_t txrate; int len; if (!NDIS_INITIALIZED(sc)) return; len = sizeof(txrate); if (ndis_get_info(sc, OID_GEN_LINK_SPEED, &txrate, &len) == 0) vap->iv_bss->ni_txrate = txrate / 5000; ieee80211_media_status(ifp, imr); } static void ndis_setstate_80211(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); ndis_80211_macaddr bssid; ndis_80211_config config; int rval = 0, len; uint32_t arg; if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: NDIS not initialized\n", __func__)); return; } /* Disassociate and turn off radio. */ len = sizeof(arg); arg = 1; ndis_set_info(sc, OID_802_11_DISASSOCIATE, &arg, &len); /* Set network infrastructure mode. */ len = sizeof(arg); if (ic->ic_opmode == IEEE80211_M_IBSS) arg = NDIS_80211_NET_INFRA_IBSS; else arg = NDIS_80211_NET_INFRA_BSS; rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len); if (rval) device_printf (sc->ndis_dev, "set infra failed: %d\n", rval); /* Set power management */ len = sizeof(arg); if (vap->iv_flags & IEEE80211_F_PMGTON) arg = NDIS_80211_POWERMODE_FAST_PSP; else arg = NDIS_80211_POWERMODE_CAM; ndis_set_info(sc, OID_802_11_POWER_MODE, &arg, &len); /* Set TX power */ if ((ic->ic_caps & IEEE80211_C_TXPMGT) && ic->ic_txpowlimit < nitems(dBm2mW)) { arg = dBm2mW[ic->ic_txpowlimit]; len = sizeof(arg); ndis_set_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len); } /* * Default encryption mode to off, authentication * to open and privacy to 'accept everything.' */ len = sizeof(arg); arg = NDIS_80211_WEPSTAT_DISABLED; ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); len = sizeof(arg); arg = NDIS_80211_AUTHMODE_OPEN; ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); /* * Note that OID_802_11_PRIVACY_FILTER is optional: * not all drivers implement it. */ len = sizeof(arg); arg = NDIS_80211_PRIVFILT_8021XWEP; ndis_set_info(sc, OID_802_11_PRIVACY_FILTER, &arg, &len); len = sizeof(config); bzero((char *)&config, len); config.nc_length = len; config.nc_fhconfig.ncf_length = sizeof(ndis_80211_config_fh); rval = ndis_get_info(sc, OID_802_11_CONFIGURATION, &config, &len); /* * Some drivers expect us to initialize these values, so * provide some defaults. */ if (config.nc_beaconperiod == 0) config.nc_beaconperiod = 100; if (config.nc_atimwin == 0) config.nc_atimwin = 100; if (config.nc_fhconfig.ncf_dwelltime == 0) config.nc_fhconfig.ncf_dwelltime = 200; if (rval == 0 && ic->ic_bsschan != IEEE80211_CHAN_ANYC) { int chan, chanflag; chan = ieee80211_chan2ieee(ic, ic->ic_bsschan); chanflag = config.nc_dsconfig > 2500000 ? IEEE80211_CHAN_2GHZ : IEEE80211_CHAN_5GHZ; if (chan != ieee80211_mhz2ieee(config.nc_dsconfig / 1000, 0)) { config.nc_dsconfig = ic->ic_bsschan->ic_freq * 1000; len = sizeof(config); config.nc_length = len; config.nc_fhconfig.ncf_length = sizeof(ndis_80211_config_fh); DPRINTF(("Setting channel to %ukHz\n", config.nc_dsconfig)); rval = ndis_set_info(sc, OID_802_11_CONFIGURATION, &config, &len); if (rval) device_printf(sc->ndis_dev, "couldn't change " "DS config to %ukHz: %d\n", config.nc_dsconfig, rval); } } else if (rval) device_printf(sc->ndis_dev, "couldn't retrieve " "channel info: %d\n", rval); /* Set the BSSID to our value so the driver doesn't associate */ len = IEEE80211_ADDR_LEN; bcopy(vap->iv_myaddr, bssid, len); DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":")); rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len); if (rval) device_printf(sc->ndis_dev, "setting BSSID failed: %d\n", rval); } static void ndis_auth_and_assoc(struct ndis_softc *sc, struct ieee80211vap *vap) { struct ieee80211_node *ni = vap->iv_bss; ndis_80211_ssid ssid; ndis_80211_macaddr bssid; ndis_80211_wep wep; int i, rval = 0, len, error; uint32_t arg; if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: NDIS not initialized\n", __func__)); return; } /* Initial setup */ ndis_setstate_80211(sc); /* Set network infrastructure mode. */ len = sizeof(arg); if (vap->iv_opmode == IEEE80211_M_IBSS) arg = NDIS_80211_NET_INFRA_IBSS; else arg = NDIS_80211_NET_INFRA_BSS; rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len); if (rval) device_printf (sc->ndis_dev, "set infra failed: %d\n", rval); /* Set RTS threshold */ len = sizeof(arg); arg = vap->iv_rtsthreshold; ndis_set_info(sc, OID_802_11_RTS_THRESHOLD, &arg, &len); /* Set fragmentation threshold */ len = sizeof(arg); arg = vap->iv_fragthreshold; ndis_set_info(sc, OID_802_11_FRAGMENTATION_THRESHOLD, &arg, &len); /* Set WEP */ if (vap->iv_flags & IEEE80211_F_PRIVACY && !(vap->iv_flags & IEEE80211_F_WPA)) { int keys_set = 0; if (ni->ni_authmode == IEEE80211_AUTH_SHARED) { len = sizeof(arg); arg = NDIS_80211_AUTHMODE_SHARED; DPRINTF(("Setting shared auth\n")); ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); } for (i = 0; i < IEEE80211_WEP_NKID; i++) { if (vap->iv_nw_keys[i].wk_keylen) { if (vap->iv_nw_keys[i].wk_cipher->ic_cipher != IEEE80211_CIPHER_WEP) continue; bzero((char *)&wep, sizeof(wep)); wep.nw_keylen = vap->iv_nw_keys[i].wk_keylen; /* * 5, 13 and 16 are the only valid * key lengths. Anything in between * will be zero padded out to the * next highest boundary. */ if (vap->iv_nw_keys[i].wk_keylen < 5) wep.nw_keylen = 5; else if (vap->iv_nw_keys[i].wk_keylen > 5 && vap->iv_nw_keys[i].wk_keylen < 13) wep.nw_keylen = 13; else if (vap->iv_nw_keys[i].wk_keylen > 13 && vap->iv_nw_keys[i].wk_keylen < 16) wep.nw_keylen = 16; wep.nw_keyidx = i; wep.nw_length = (sizeof(uint32_t) * 3) + wep.nw_keylen; if (i == vap->iv_def_txkey) wep.nw_keyidx |= NDIS_80211_WEPKEY_TX; bcopy(vap->iv_nw_keys[i].wk_key, wep.nw_keydata, wep.nw_length); len = sizeof(wep); DPRINTF(("Setting WEP key %d\n", i)); rval = ndis_set_info(sc, OID_802_11_ADD_WEP, &wep, &len); if (rval) device_printf(sc->ndis_dev, "set wepkey failed: %d\n", rval); keys_set++; } } if (keys_set) { DPRINTF(("Setting WEP on\n")); arg = NDIS_80211_WEPSTAT_ENABLED; len = sizeof(arg); rval = ndis_set_info(sc, OID_802_11_WEP_STATUS, &arg, &len); if (rval) device_printf(sc->ndis_dev, "enable WEP failed: %d\n", rval); if (vap->iv_flags & IEEE80211_F_DROPUNENC) arg = NDIS_80211_PRIVFILT_8021XWEP; else arg = NDIS_80211_PRIVFILT_ACCEPTALL; len = sizeof(arg); ndis_set_info(sc, OID_802_11_PRIVACY_FILTER, &arg, &len); } } /* Set up WPA. */ if ((vap->iv_flags & IEEE80211_F_WPA) && vap->iv_appie_assocreq != NULL) { struct ieee80211_appie *ie = vap->iv_appie_assocreq; error = ndis_set_wpa(sc, ie->ie_data, ie->ie_len); if (error != 0) device_printf(sc->ndis_dev, "WPA setup failed\n"); } #ifdef notyet /* Set network type. */ arg = 0; switch (vap->iv_curmode) { case IEEE80211_MODE_11A: arg = NDIS_80211_NETTYPE_11OFDM5; break; case IEEE80211_MODE_11B: arg = NDIS_80211_NETTYPE_11DS; break; case IEEE80211_MODE_11G: arg = NDIS_80211_NETTYPE_11OFDM24; break; default: device_printf(sc->ndis_dev, "unknown mode: %d\n", vap->iv_curmode); } if (arg) { DPRINTF(("Setting network type to %d\n", arg)); len = sizeof(arg); rval = ndis_set_info(sc, OID_802_11_NETWORK_TYPE_IN_USE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "set nettype failed: %d\n", rval); } #endif /* * If the user selected a specific BSSID, try * to use that one. This is useful in the case where * there are several APs in range with the same network * name. To delete the BSSID, we use the broadcast * address as the BSSID. * Note that some drivers seem to allow setting a BSSID * in ad-hoc mode, which has the effect of forcing the * NIC to create an ad-hoc cell with a specific BSSID, * instead of a randomly chosen one. However, the net80211 * code makes the assumtion that the BSSID setting is invalid * when you're in ad-hoc mode, so we don't allow that here. */ len = IEEE80211_ADDR_LEN; if (vap->iv_flags & IEEE80211_F_DESBSSID && vap->iv_opmode != IEEE80211_M_IBSS) bcopy(ni->ni_bssid, bssid, len); else bcopy(ieee80211broadcastaddr, bssid, len); DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":")); rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len); if (rval) device_printf(sc->ndis_dev, "setting BSSID failed: %d\n", rval); /* Set SSID -- always do this last. */ #ifdef NDIS_DEBUG if (ndis_debug > 0) { printf("Setting ESSID to "); ieee80211_print_essid(ni->ni_essid, ni->ni_esslen); printf("\n"); } #endif len = sizeof(ssid); bzero((char *)&ssid, len); ssid.ns_ssidlen = ni->ni_esslen; if (ssid.ns_ssidlen == 0) { ssid.ns_ssidlen = 1; } else bcopy(ni->ni_essid, ssid.ns_ssid, ssid.ns_ssidlen); rval = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len); if (rval) device_printf (sc->ndis_dev, "set ssid failed: %d\n", rval); return; } static int ndis_get_bssid_list(sc, bl) struct ndis_softc *sc; ndis_80211_bssid_list_ex **bl; { int len, error; len = sizeof(uint32_t) + (sizeof(ndis_wlan_bssid_ex) * 16); *bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); if (*bl == NULL) return (ENOMEM); error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len); if (error == ENOSPC) { free(*bl, M_DEVBUF); *bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); if (*bl == NULL) return (ENOMEM); error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len); } if (error) { DPRINTF(("%s: failed to read\n", __func__)); free(*bl, M_DEVBUF); return (error); } return (0); } static int ndis_get_assoc(struct ndis_softc *sc, ndis_wlan_bssid_ex **assoc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap; struct ieee80211_node *ni; ndis_80211_bssid_list_ex *bl; ndis_wlan_bssid_ex *bs; ndis_80211_macaddr bssid; int i, len, error; if (!sc->ndis_link) return (ENOENT); len = sizeof(bssid); error = ndis_get_info(sc, OID_802_11_BSSID, &bssid, &len); if (error) { device_printf(sc->ndis_dev, "failed to get bssid\n"); return (ENOENT); } vap = TAILQ_FIRST(&ic->ic_vaps); ni = vap->iv_bss; error = ndis_get_bssid_list(sc, &bl); if (error) return (error); bs = (ndis_wlan_bssid_ex *)&bl->nblx_bssid[0]; for (i = 0; i < bl->nblx_items; i++) { if (bcmp(bs->nwbx_macaddr, bssid, sizeof(bssid)) == 0) { *assoc = malloc(bs->nwbx_len, M_TEMP, M_NOWAIT); if (*assoc == NULL) { free(bl, M_TEMP); return (ENOMEM); } bcopy((char *)bs, (char *)*assoc, bs->nwbx_len); free(bl, M_TEMP); if (ic->ic_opmode == IEEE80211_M_STA) ni->ni_associd = 1 | 0xc000; /* fake associd */ return (0); } bs = (ndis_wlan_bssid_ex *)((char *)bs + bs->nwbx_len); } free(bl, M_TEMP); return (ENOENT); } static void ndis_getstate_80211(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_node *ni = vap->iv_bss; ndis_wlan_bssid_ex *bs; int rval, len, i = 0; int chanflag; uint32_t arg; if (!NDIS_INITIALIZED(sc)) return; if ((rval = ndis_get_assoc(sc, &bs)) != 0) return; /* We're associated, retrieve info on the current bssid. */ ic->ic_curmode = ndis_nettype_mode(bs->nwbx_nettype); chanflag = ndis_nettype_chan(bs->nwbx_nettype); IEEE80211_ADDR_COPY(ni->ni_bssid, bs->nwbx_macaddr); /* Get SSID from current association info. */ bcopy(bs->nwbx_ssid.ns_ssid, ni->ni_essid, bs->nwbx_ssid.ns_ssidlen); ni->ni_esslen = bs->nwbx_ssid.ns_ssidlen; if (ic->ic_caps & IEEE80211_C_PMGT) { len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get power mode failed: %d\n", rval); if (arg == NDIS_80211_POWERMODE_CAM) vap->iv_flags &= ~IEEE80211_F_PMGTON; else vap->iv_flags |= IEEE80211_F_PMGTON; } /* Get TX power */ if (ic->ic_caps & IEEE80211_C_TXPMGT) { len = sizeof(arg); ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len); for (i = 0; i < nitems(dBm2mW); i++) if (dBm2mW[i] >= arg) break; ic->ic_txpowlimit = i; } /* * Use the current association information to reflect * what channel we're on. */ ic->ic_curchan = ieee80211_find_channel(ic, bs->nwbx_config.nc_dsconfig / 1000, chanflag); if (ic->ic_curchan == NULL) ic->ic_curchan = &ic->ic_channels[0]; ni->ni_chan = ic->ic_curchan; ic->ic_bsschan = ic->ic_curchan; free(bs, M_TEMP); /* * Determine current authentication mode. */ len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get authmode status failed: %d\n", rval); else { vap->iv_flags &= ~IEEE80211_F_WPA; switch (arg) { case NDIS_80211_AUTHMODE_OPEN: ni->ni_authmode = IEEE80211_AUTH_OPEN; break; case NDIS_80211_AUTHMODE_SHARED: ni->ni_authmode = IEEE80211_AUTH_SHARED; break; case NDIS_80211_AUTHMODE_AUTO: ni->ni_authmode = IEEE80211_AUTH_AUTO; break; case NDIS_80211_AUTHMODE_WPA: case NDIS_80211_AUTHMODE_WPAPSK: case NDIS_80211_AUTHMODE_WPANONE: ni->ni_authmode = IEEE80211_AUTH_WPA; vap->iv_flags |= IEEE80211_F_WPA1; break; case NDIS_80211_AUTHMODE_WPA2: case NDIS_80211_AUTHMODE_WPA2PSK: ni->ni_authmode = IEEE80211_AUTH_WPA; vap->iv_flags |= IEEE80211_F_WPA2; break; default: ni->ni_authmode = IEEE80211_AUTH_NONE; break; } } len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_WEP_STATUS, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get wep status failed: %d\n", rval); if (arg == NDIS_80211_WEPSTAT_ENABLED) vap->iv_flags |= IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC; else vap->iv_flags &= ~(IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC); } static int ndis_ifioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct ndis_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; int i, error = 0; /*NDIS_LOCK(sc);*/ switch (command) { case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if (sc->ndis_running && ifp->if_flags & IFF_PROMISC && !(sc->ndis_if_flags & IFF_PROMISC)) { sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; i = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &i); } else if (sc->ndis_running && !(ifp->if_flags & IFF_PROMISC) && sc->ndis_if_flags & IFF_PROMISC) { sc->ndis_filter &= ~NDIS_PACKET_TYPE_PROMISCUOUS; i = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &i); } else ndis_init(sc); } else { if (sc->ndis_running) ndis_stop(sc); } sc->ndis_if_flags = ifp->if_flags; error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: ndis_setmulti(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; case SIOCSIFCAP: ifp->if_capenable = ifr->ifr_reqcap; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = sc->ndis_hwassist; else ifp->if_hwassist = 0; ndis_set_offload(sc); break; default: error = ether_ioctl(ifp, command, data); break; } /*NDIS_UNLOCK(sc);*/ return(error); } static int ndis_80211ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct ndis_softc *sc = ic->ic_softc; struct ifreq *ifr = data; struct ndis_oid_data oid; struct ndis_evt evt; void *oidbuf = NULL; int error = 0; if ((error = priv_check(curthread, PRIV_DRIVER)) != 0) return (error); switch (cmd) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: error = copyin(ifr->ifr_data, &oid, sizeof(oid)); if (error) break; oidbuf = malloc(oid.len, M_TEMP, M_WAITOK | M_ZERO); error = copyin(ifr->ifr_data + sizeof(oid), oidbuf, oid.len); } if (error) { free(oidbuf, M_TEMP); return (error); } switch (cmd) { case SIOCGDRVSPEC: error = ndis_get_info(sc, oid.oid, oidbuf, &oid.len); break; case SIOCSDRVSPEC: error = ndis_set_info(sc, oid.oid, oidbuf, &oid.len); break; case SIOCGPRIVATE_0: NDIS_LOCK(sc); if (sc->ndis_evt[sc->ndis_evtcidx].ne_sts == 0) { error = ENOENT; NDIS_UNLOCK(sc); break; } error = copyin(ifr->ifr_data, &evt, sizeof(evt)); if (error) { NDIS_UNLOCK(sc); break; } if (evt.ne_len < sc->ndis_evt[sc->ndis_evtcidx].ne_len) { error = ENOSPC; NDIS_UNLOCK(sc); break; } error = copyout(&sc->ndis_evt[sc->ndis_evtcidx], ifr->ifr_data, sizeof(uint32_t) * 2); if (error) { NDIS_UNLOCK(sc); break; } if (sc->ndis_evt[sc->ndis_evtcidx].ne_len) { error = copyout(sc->ndis_evt[sc->ndis_evtcidx].ne_buf, ifr->ifr_data + (sizeof(uint32_t) * 2), sc->ndis_evt[sc->ndis_evtcidx].ne_len); if (error) { NDIS_UNLOCK(sc); break; } free(sc->ndis_evt[sc->ndis_evtcidx].ne_buf, M_TEMP); sc->ndis_evt[sc->ndis_evtcidx].ne_buf = NULL; } sc->ndis_evt[sc->ndis_evtcidx].ne_len = 0; sc->ndis_evt[sc->ndis_evtcidx].ne_sts = 0; NDIS_EVTINC(sc->ndis_evtcidx); NDIS_UNLOCK(sc); break; default: error = ENOTTY; break; } switch (cmd) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: error = copyout(&oid, ifr->ifr_data, sizeof(oid)); if (error) break; error = copyout(oidbuf, ifr->ifr_data + sizeof(oid), oid.len); } free(oidbuf, M_TEMP); return (error); } int ndis_del_key(struct ieee80211vap *vap, const struct ieee80211_key *key) { struct ndis_softc *sc = vap->iv_ic->ic_softc; ndis_80211_key rkey; int len, error = 0; bzero((char *)&rkey, sizeof(rkey)); len = sizeof(rkey); rkey.nk_len = len; rkey.nk_keyidx = key->wk_keyix; bcopy(vap->iv_ifp->if_broadcastaddr, rkey.nk_bssid, IEEE80211_ADDR_LEN); error = ndis_set_info(sc, OID_802_11_REMOVE_KEY, &rkey, &len); if (error) return (0); return (1); } /* * In theory this could be called for any key, but we'll * only use it for WPA TKIP or AES keys. These need to be * set after initial authentication with the AP. */ static int ndis_add_key(struct ieee80211vap *vap, const struct ieee80211_key *key) { struct ndis_softc *sc = vap->iv_ic->ic_softc; ndis_80211_key rkey; int len, error = 0; switch (key->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_TKIP: len = sizeof(ndis_80211_key); bzero((char *)&rkey, sizeof(rkey)); rkey.nk_len = len; rkey.nk_keylen = key->wk_keylen; if (key->wk_flags & IEEE80211_KEY_SWMIC) rkey.nk_keylen += 16; /* key index - gets weird in NDIS */ if (key->wk_keyix != IEEE80211_KEYIX_NONE) rkey.nk_keyidx = key->wk_keyix; else rkey.nk_keyidx = 0; if (key->wk_flags & IEEE80211_KEY_XMIT) rkey.nk_keyidx |= 1 << 31; if (key->wk_flags & IEEE80211_KEY_GROUP) { bcopy(ieee80211broadcastaddr, rkey.nk_bssid, IEEE80211_ADDR_LEN); } else { bcopy(vap->iv_bss->ni_bssid, rkey.nk_bssid, IEEE80211_ADDR_LEN); /* pairwise key */ rkey.nk_keyidx |= 1 << 30; } /* need to set bit 29 based on keyrsc */ rkey.nk_keyrsc = key->wk_keyrsc[0]; /* XXX need tid */ if (rkey.nk_keyrsc) rkey.nk_keyidx |= 1 << 29; if (key->wk_flags & IEEE80211_KEY_SWMIC) { bcopy(key->wk_key, rkey.nk_keydata, 16); bcopy(key->wk_key + 24, rkey.nk_keydata + 16, 8); bcopy(key->wk_key + 16, rkey.nk_keydata + 24, 8); } else bcopy(key->wk_key, rkey.nk_keydata, key->wk_keylen); error = ndis_set_info(sc, OID_802_11_ADD_KEY, &rkey, &len); break; case IEEE80211_CIPHER_WEP: error = 0; break; /* * I don't know how to set up keys for the AES * cipher yet. Is it the same as TKIP? */ case IEEE80211_CIPHER_AES_CCM: default: error = ENOTTY; break; } /* We need to return 1 for success, 0 for failure. */ if (error) return (0); return (1); } static void ndis_resettask(d, arg) device_object *d; void *arg; { struct ndis_softc *sc; sc = arg; ndis_reset_nic(sc); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void ndis_stop(struct ndis_softc *sc) { int i; callout_drain(&sc->ndis_stat_callout); NDIS_LOCK(sc); sc->ndis_tx_timer = 0; sc->ndis_link = 0; if (!sc->ndis_80211) sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->ndis_running = 0; NDIS_UNLOCK(sc); if (sc->ndis_iftype != PNPBus || (sc->ndis_iftype == PNPBus && !(sc->ndisusb_status & NDISUSB_STATUS_DETACH) && ndisusb_halt != 0)) ndis_halt_nic(sc); NDIS_LOCK(sc); for (i = 0; i < NDIS_EVENTS; i++) { if (sc->ndis_evt[i].ne_sts && sc->ndis_evt[i].ne_buf != NULL) { free(sc->ndis_evt[i].ne_buf, M_TEMP); sc->ndis_evt[i].ne_buf = NULL; } sc->ndis_evt[i].ne_sts = 0; sc->ndis_evt[i].ne_len = 0; } sc->ndis_evtcidx = 0; sc->ndis_evtpidx = 0; NDIS_UNLOCK(sc); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ void ndis_shutdown(dev) device_t dev; { struct ndis_softc *sc; sc = device_get_softc(dev); ndis_stop(sc); } static int ndis_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct ndis_vap *nvp = NDIS_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct ndis_softc *sc = ic->ic_softc; enum ieee80211_state ostate; DPRINTF(("%s: %s -> %s\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate])); ostate = vap->iv_state; vap->iv_state = nstate; switch (nstate) { /* pass on to net80211 */ case IEEE80211_S_INIT: case IEEE80211_S_SCAN: return nvp->newstate(vap, nstate, arg); case IEEE80211_S_ASSOC: if (ostate != IEEE80211_S_AUTH) { IEEE80211_UNLOCK(ic); ndis_auth_and_assoc(sc, vap); IEEE80211_LOCK(ic); } break; case IEEE80211_S_AUTH: IEEE80211_UNLOCK(ic); ndis_auth_and_assoc(sc, vap); if (vap->iv_state == IEEE80211_S_AUTH) /* XXX */ ieee80211_new_state(vap, IEEE80211_S_ASSOC, 0); IEEE80211_LOCK(ic); break; default: break; } return (0); } static void ndis_scan(void *arg) { struct ieee80211vap *vap = arg; ieee80211_scan_done(vap); } static void ndis_scan_results(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); ndis_80211_bssid_list_ex *bl; ndis_wlan_bssid_ex *wb; struct ieee80211_scanparams sp; struct ieee80211_frame wh; struct ieee80211_channel *saved_chan; int i, j; int rssi, noise, freq, chanflag; uint8_t ssid[2+IEEE80211_NWID_LEN]; uint8_t rates[2+IEEE80211_RATE_MAXSIZE]; uint8_t *frm, *efrm; saved_chan = ic->ic_curchan; noise = -96; if (ndis_get_bssid_list(sc, &bl)) return; DPRINTF(("%s: %d results\n", __func__, bl->nblx_items)); wb = &bl->nblx_bssid[0]; for (i = 0; i < bl->nblx_items; i++) { memset(&sp, 0, sizeof(sp)); memcpy(wh.i_addr2, wb->nwbx_macaddr, sizeof(wh.i_addr2)); memcpy(wh.i_addr3, wb->nwbx_macaddr, sizeof(wh.i_addr3)); rssi = 100 * (wb->nwbx_rssi - noise) / (-32 - noise); rssi = max(0, min(rssi, 100)); /* limit 0 <= rssi <= 100 */ if (wb->nwbx_privacy) sp.capinfo |= IEEE80211_CAPINFO_PRIVACY; sp.bintval = wb->nwbx_config.nc_beaconperiod; switch (wb->nwbx_netinfra) { case NDIS_80211_NET_INFRA_IBSS: sp.capinfo |= IEEE80211_CAPINFO_IBSS; break; case NDIS_80211_NET_INFRA_BSS: sp.capinfo |= IEEE80211_CAPINFO_ESS; break; } sp.rates = &rates[0]; for (j = 0; j < IEEE80211_RATE_MAXSIZE; j++) { /* XXX - check units */ if (wb->nwbx_supportedrates[j] == 0) break; rates[2 + j] = wb->nwbx_supportedrates[j] & 0x7f; } rates[1] = j; sp.ssid = (uint8_t *)&ssid[0]; memcpy(sp.ssid + 2, &wb->nwbx_ssid.ns_ssid, wb->nwbx_ssid.ns_ssidlen); sp.ssid[1] = wb->nwbx_ssid.ns_ssidlen; chanflag = ndis_nettype_chan(wb->nwbx_nettype); freq = wb->nwbx_config.nc_dsconfig / 1000; sp.chan = sp.bchan = ieee80211_mhz2ieee(freq, chanflag); /* Hack ic->ic_curchan to be in sync with the scan result */ ic->ic_curchan = ieee80211_find_channel(ic, freq, chanflag); if (ic->ic_curchan == NULL) ic->ic_curchan = &ic->ic_channels[0]; /* Process extended info from AP */ if (wb->nwbx_len > sizeof(ndis_wlan_bssid)) { frm = (uint8_t *)&wb->nwbx_ies; efrm = frm + wb->nwbx_ielen; if (efrm - frm < 12) goto done; sp.tstamp = frm; frm += 8; sp.bintval = le16toh(*(uint16_t *)frm); frm += 2; sp.capinfo = le16toh(*(uint16_t *)frm); frm += 2; sp.ies = frm; sp.ies_len = efrm - frm; } done: DPRINTF(("scan: bssid %s chan %dMHz (%d/%d) rssi %d\n", ether_sprintf(wb->nwbx_macaddr), freq, sp.bchan, chanflag, rssi)); ieee80211_add_scan(vap, ic->ic_curchan, &sp, &wh, 0, rssi, noise); wb = (ndis_wlan_bssid_ex *)((char *)wb + wb->nwbx_len); } free(bl, M_DEVBUF); /* Restore the channel after messing with it */ ic->ic_curchan = saved_chan; } static void ndis_scan_start(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; struct ieee80211vap *vap; struct ieee80211_scan_state *ss; ndis_80211_ssid ssid; int error, len; ss = ic->ic_scan; vap = TAILQ_FIRST(&ic->ic_vaps); if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: scan aborted\n", __func__)); ieee80211_cancel_scan(vap); return; } len = sizeof(ssid); bzero((char *)&ssid, len); if (ss->ss_nssid == 0) ssid.ns_ssidlen = 1; else { /* Perform a directed scan */ ssid.ns_ssidlen = ss->ss_ssid[0].len; bcopy(ss->ss_ssid[0].ssid, ssid.ns_ssid, ssid.ns_ssidlen); } error = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len); if (error) DPRINTF(("%s: set ESSID failed\n", __func__)); len = 0; error = ndis_set_info(sc, OID_802_11_BSSID_LIST_SCAN, NULL, &len); if (error) { DPRINTF(("%s: scan command failed\n", __func__)); ieee80211_cancel_scan(vap); return; } /* Set a timer to collect the results */ callout_reset(&sc->ndis_scan_callout, hz * 3, ndis_scan, vap); } static void ndis_set_channel(struct ieee80211com *ic) { /* ignore */ } static void ndis_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { /* ignore */ } static void ndis_scan_mindwell(struct ieee80211_scan_state *ss) { /* NB: don't try to abort scan; wait for firmware to finish */ } static void ndis_scan_end(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; ndis_scan_results(sc); } Index: head/sys/dev/iscsi_initiator/isc_soc.c =================================================================== --- head/sys/dev/iscsi_initiator/isc_soc.c (revision 324445) +++ head/sys/dev/iscsi_initiator/isc_soc.c (revision 324446) @@ -1,701 +1,699 @@ /*- * Copyright (c) 2005-2010 Daniel Braniss * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* | $Id: isc_soc.c 998 2009-12-20 10:32:45Z danny $ */ #include __FBSDID("$FreeBSD$"); #include "opt_iscsi_initiator.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NO_USE_MBUF #define USE_MBUF #endif #ifdef USE_MBUF static int ou_refcnt = 0; /* | function for freeing external storage for mbuf */ static void -ext_free(struct mbuf *m, void *a, void *b) +ext_free(struct mbuf *m) { - pduq_t *pq = b; + pduq_t *pq = m->m_ext.ext_arg1; if(pq->buf != NULL) { - debug(3, "ou_refcnt=%d a=%p b=%p", ou_refcnt, a, pq->buf); + debug(3, "ou_refcnt=%d a=%p b=%p", + ou_refcnt, m->m_ext.ext_buf, pq->buf); free(pq->buf, M_ISCSIBUF); pq->buf = NULL; } } int isc_sendPDU(isc_session_t *sp, pduq_t *pq) { struct mbuf *mh, **mp; pdu_t *pp = &pq->pdu; int len, error; debug_called(8); /* | mbuf for the iSCSI header */ MGETHDR(mh, M_WAITOK, MT_DATA); mh->m_pkthdr.rcvif = NULL; mh->m_next = NULL; mh->m_len = sizeof(union ipdu_u); if(ISOK2DIG(sp->hdrDigest, pp)) { pp->hdr_dig = sp->hdrDigest(&pp->ipdu, sizeof(union ipdu_u), 0); mh->m_len += sizeof(pp->hdr_dig); if(pp->ahs_len) { debug(2, "ahs_len=%d", pp->ahs_len); pp->hdr_dig = sp->hdrDigest(&pp->ahs_addr, pp->ahs_len, pp->hdr_dig); } debug(3, "pp->hdr_dig=%04x", htonl(pp->hdr_dig)); } if(pp->ahs_len) { /* | Add any AHS to the iSCSI hdr mbuf */ if((mh->m_len + pp->ahs_len) < MHLEN) { M_ALIGN(mh, mh->m_len + pp->ahs_len); bcopy(&pp->ipdu, mh->m_data, mh->m_len); bcopy(pp->ahs_addr, mh->m_data + mh->m_len, pp->ahs_len); mh->m_len += pp->ahs_len; } else panic("len AHS=%d too big, not impleneted yet", pp->ahs_len); } else { M_ALIGN(mh, mh->m_len); bcopy(&pp->ipdu, mh->m_data, mh->m_len); } mh->m_pkthdr.len = mh->m_len; mp = &mh->m_next; if(pp->ds_len && pq->pdu.ds_addr) { struct mbuf *md; int off = 0; len = pp->ds_len; while(len > 0) { int l; MGET(md, M_WAITOK, MT_DATA); md->m_ext.ext_cnt = &ou_refcnt; l = min(MCLBYTES, len); debug(4, "setting ext_free(arg=%p len/l=%d/%d)", pq->buf, len, l); - MEXTADD(md, pp->ds_addr + off, l, ext_free, -#if __FreeBSD_version >= 800000 - pp->ds_addr + off, -#endif - pq, 0, EXT_EXTREF); + m_extadd(md, pp->ds_addr + off, l, ext_free, pq, NULL, 0, + EXT_EXTREF); md->m_len = l; md->m_next = NULL; mh->m_pkthdr.len += l; *mp = md; mp = &md->m_next; len -= l; off += l; } if(((pp->ds_len & 03) != 0) || ISOK2DIG(sp->dataDigest, pp)) { MGET(md, M_WAITOK, MT_DATA); if(pp->ds_len & 03) len = 4 - (pp->ds_len & 03); else len = 0; md->m_len = len; if(ISOK2DIG(sp->dataDigest, pp)) md->m_len += sizeof(pp->ds_dig); M_ALIGN(md, md->m_len); if(ISOK2DIG(sp->dataDigest, pp)) { pp->ds_dig = sp->dataDigest(pp->ds_addr, pp->ds_len, 0); if(len) { bzero(md->m_data, len); // RFC says SHOULD be 0 pp->ds_dig = sp->dataDigest(md->m_data, len, pp->ds_dig); } bcopy(&pp->ds_dig, md->m_data+len, sizeof(pp->ds_dig)); } md->m_next = NULL; mh->m_pkthdr.len += md->m_len; *mp = md; } } if((error = sosend(sp->soc, NULL, NULL, mh, 0, 0, sp->td)) != 0) { sdebug(2, "error=%d", error); return error; } sp->stats.nsent++; getbintime(&sp->stats.t_sent); return 0; } #else /* NO_USE_MBUF */ int isc_sendPDU(isc_session_t *sp, pduq_t *pq) { struct uio *uio = &pq->uio; struct iovec *iv; pdu_t *pp = &pq->pdu; int len, error; debug_called(8); bzero(uio, sizeof(struct uio)); uio->uio_rw = UIO_WRITE; uio->uio_segflg = UIO_SYSSPACE; uio->uio_td = sp->td; uio->uio_iov = iv = pq->iov; iv->iov_base = &pp->ipdu; iv->iov_len = sizeof(union ipdu_u); uio->uio_resid = iv->iov_len; iv++; if(ISOK2DIG(sp->hdrDigest, pp)) pq->pdu.hdr_dig = sp->hdrDigest(&pp->ipdu, sizeof(union ipdu_u), 0); if(pp->ahs_len) { iv->iov_base = pp->ahs_addr; iv->iov_len = pp->ahs_len; uio->uio_resid += iv->iov_len; iv++; if(ISOK2DIG(sp->hdrDigest, pp)) pp->hdr_dig = sp->hdrDigest(&pp->ahs_addr, pp->ahs_len, pp->hdr_dig); } if(ISOK2DIG(sp->hdrDigest, pp)) { debug(3, "hdr_dig=%04x", htonl(pp->hdr_dig)); iv->iov_base = &pp->hdr_dig; iv->iov_len = sizeof(int); uio->uio_resid += iv->iov_len ; iv++; } if(pq->pdu.ds_addr && pp->ds_len) { iv->iov_base = pp->ds_addr; iv->iov_len = pp->ds_len; while(iv->iov_len & 03) // the specs say it must be int aligned iv->iov_len++; uio->uio_resid += iv->iov_len ; iv++; if(ISOK2DIG(sp->dataDigest, pp)) { pp->ds_dig = sp->dataDigest(pp->ds, pp->ds_len, 0); iv->iov_base = &pp->ds_dig; iv->iov_len = sizeof(pp->ds_dig); uio->uio_resid += iv->iov_len ; iv++; } } uio->uio_iovcnt = iv - pq->iov; sdebug(4, "pq->len=%d uio->uio_resid=%d uio->uio_iovcnt=%d", pq->len, uio->uio_resid, uio->uio_iovcnt); sdebug(4, "opcode=%x iovcnt=%d uio_resid=%d itt=%x", pp->ipdu.bhs.opcode, uio->uio_iovcnt, uio->uio_resid, ntohl(pp->ipdu.bhs.itt)); sdebug(5, "sp=%p sp->soc=%p uio=%p sp->td=%p", sp, sp->soc, uio, sp->td); do { len = uio->uio_resid; error = sosend(sp->soc, NULL, uio, 0, 0, 0, sp->td); if(uio->uio_resid == 0 || error || len == uio->uio_resid) { if(uio->uio_resid) { sdebug(2, "uio->uio_resid=%d uio->uio_iovcnt=%d error=%d len=%d", uio->uio_resid, uio->uio_iovcnt, error, len); if(error == 0) error = EAGAIN; // 35 } break; } /* | XXX: untested code */ sdebug(1, "uio->uio_resid=%d uio->uio_iovcnt=%d", uio->uio_resid, uio->uio_iovcnt); iv = uio->uio_iov; len -= uio->uio_resid; while(uio->uio_iovcnt > 0) { if(iv->iov_len > len) { caddr_t bp = (caddr_t)iv->iov_base; iv->iov_len -= len; iv->iov_base = (void *)&bp[len]; break; } len -= iv->iov_len; uio->uio_iovcnt--; uio->uio_iov++; iv++; } } while(uio->uio_resid); if(error == 0) { sp->stats.nsent++; getbintime(&sp->stats.t_sent); } return error; } #endif /* USE_MBUF */ /* | wait till a PDU header is received | from the socket. */ /* The format of the BHS is: Byte/ 0 | 1 | 2 | 3 | / | | | | |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| +---------------+---------------+---------------+---------------+ 0|.|I| Opcode |F| Opcode-specific fields | +---------------+---------------+---------------+---------------+ 4|TotalAHSLength | DataSegmentLength | +---------------+---------------+---------------+---------------+ 8| LUN or Opcode-specific fields | + + 12| | +---------------+---------------+---------------+---------------+ 16| Initiator Task Tag | +---------------+---------------+---------------+---------------+ 20/ Opcode-specific fields / +/ / +---------------+---------------+---------------+---------------+ 48 */ static __inline int so_getbhs(isc_session_t *sp) { bhs_t *bhs = &sp->bhs; struct uio *uio = &sp->uio; struct iovec *iov = &sp->iov; int error, flags; debug_called(8); iov->iov_base = bhs; iov->iov_len = sizeof(bhs_t); uio->uio_iov = iov; uio->uio_iovcnt = 1; uio->uio_rw = UIO_READ; uio->uio_segflg = UIO_SYSSPACE; uio->uio_td = curthread; // why ... uio->uio_resid = sizeof(bhs_t); flags = MSG_WAITALL; error = soreceive(sp->soc, NULL, uio, 0, 0, &flags); if(error) debug(2, #if __FreeBSD_version > 800000 "error=%d so_error=%d uio->uio_resid=%zd iov.iov_len=%zd", #else "error=%d so_error=%d uio->uio_resid=%d iov.iov_len=%zd", #endif error, sp->soc->so_error, uio->uio_resid, iov->iov_len); if(!error && (uio->uio_resid > 0)) { error = EPIPE; // was EAGAIN debug(2, #if __FreeBSD_version > 800000 "error=%d so_error=%d uio->uio_resid=%zd iov.iov_len=%zd so_state=%x", #else "error=%d so_error=%d uio->uio_resid=%d iov.iov_len=%zd so_state=%x", #endif error, sp->soc->so_error, uio->uio_resid, iov->iov_len, sp->soc->so_state); } return error; } /* | so_recv gets called when | an iSCSI header has been received. | Note: the designers had no intentions | in making programmer's life easy. */ static int so_recv(isc_session_t *sp, pduq_t *pq) { sn_t *sn = &sp->sn; struct uio *uio = &pq->uio; pdu_t *pp = &pq->pdu; bhs_t *bhs = &pp->ipdu.bhs; struct iovec *iov = pq->iov; int error; u_int len; u_int max, exp; int flags = MSG_WAITALL; debug_called(8); /* | now calculate how much data should be in the buffer */ uio->uio_iov = iov; uio->uio_iovcnt = 0; len = 0; if(bhs->AHSLength) { debug(2, "bhs->AHSLength=%d", bhs->AHSLength); pp->ahs_len = bhs->AHSLength * 4; len += pp->ahs_len; pp->ahs_addr = malloc(pp->ahs_len, M_TEMP, M_WAITOK); // XXX: could get stuck here iov->iov_base = pp->ahs_addr; iov->iov_len = pp->ahs_len; uio->uio_iovcnt++; iov++; } if(ISOK2DIG(sp->hdrDigest, pp)) { len += sizeof(pp->hdr_dig); iov->iov_base = &pp->hdr_dig; iov->iov_len = sizeof(pp->hdr_dig); uio->uio_iovcnt++; } if(len) { uio->uio_rw = UIO_READ; uio->uio_segflg = UIO_SYSSPACE; uio->uio_resid = len; uio->uio_td = sp->td; // why ... error = soreceive(sp->soc, NULL, uio, NULL, NULL, &flags); //if(error == EAGAIN) // XXX: this needs work! it hangs iscontrol if(error || uio->uio_resid) { debug(2, #if __FreeBSD_version > 800000 "len=%d error=%d uio->uio_resid=%zd", #else "len=%d error=%d uio->uio_resid=%d", #endif len, error, uio->uio_resid); goto out; } if(ISOK2DIG(sp->hdrDigest, pp)) { bhs_t *bhs; u_int digest; bhs = (bhs_t *)&pp->ipdu; digest = sp->hdrDigest(bhs, sizeof(bhs_t), 0); if(pp->ahs_len) digest = sp->hdrDigest(pp->ahs_addr, pp->ahs_len, digest); if(pp->hdr_dig != digest) { debug(2, "bad header digest: received=%x calculated=%x", pp->hdr_dig, digest); // XXX: now what? error = EIO; goto out; } } if(pp->ahs_len) { debug(2, "ahs len=%x type=%x spec=%x", pp->ahs_addr->len, pp->ahs_addr->type, pp->ahs_addr->spec); // XXX: till I figure out what to do with this free(pp->ahs_addr, M_TEMP); } pq->len += len; // XXX: who needs this? bzero(uio, sizeof(struct uio)); len = 0; } if(bhs->DSLength) { len = bhs->DSLength; #if BYTE_ORDER == LITTLE_ENDIAN len = ((len & 0x00ff0000) >> 16) | (len & 0x0000ff00) | ((len & 0x000000ff) << 16); #endif pp->ds_len = len; if((sp->opt.maxRecvDataSegmentLength > 0) && (len > sp->opt.maxRecvDataSegmentLength)) { xdebug("impossible PDU length(%d) opt.maxRecvDataSegmentLength=%d", len, sp->opt.maxRecvDataSegmentLength); log(LOG_ERR, "so_recv: impossible PDU length(%d) from iSCSI %s/%s\n", len, sp->opt.targetAddress, sp->opt.targetName); /* | XXX: this will really screwup the stream. | should clear up the buffer till a valid header | is found, or just close connection ... | should read the RFC. */ error = E2BIG; goto out; } while(len & 03) len++; if(ISOK2DIG(sp->dataDigest, pp)) len += 4; uio->uio_resid = len; uio->uio_td = sp->td; // why ... pq->len += len; // XXX: do we need this? error = soreceive(sp->soc, NULL, uio, &pq->mp, NULL, &flags); //if(error == EAGAIN) // XXX: this needs work! it hangs iscontrol if(error || uio->uio_resid) goto out; if(ISOK2DIG(sp->dataDigest, pp)) { struct mbuf *m; u_int digest, ds_len, cnt; // get the received digest m_copydata(pq->mp, len - sizeof(pp->ds_dig), sizeof(pp->ds_dig), (caddr_t)&pp->ds_dig); // calculate all mbufs digest = 0; ds_len = len - sizeof(pp->ds_dig); for(m = pq->mp; m != NULL; m = m->m_next) { cnt = MIN(ds_len, m->m_len); digest = sp->dataDigest(mtod(m, char *), cnt, digest); ds_len -= cnt; if(ds_len == 0) break; } if(digest != pp->ds_dig) { sdebug(1, "bad data digest: received=%x calculated=%x", pp->ds_dig, digest); error = EIO; // XXX: find a better error goto out; } KASSERT(ds_len == 0, ("ds_len not zero")); } } sdebug(6, "len=%d] opcode=0x%x ahs_len=0x%x ds_len=0x%x", pq->len, bhs->opcode, pp->ahs_len, pp->ds_len); max = ntohl(bhs->MaxCmdSN); exp = ntohl(bhs->ExpStSN); if(max < exp - 1 && max > exp - _MAXINCR) { sdebug(2, "bad cmd window size"); error = EIO; // XXX: for now; goto out; // error } if(SNA_GT(max, sn->maxCmd)) sn->maxCmd = max; if(SNA_GT(exp, sn->expCmd)) sn->expCmd = exp; /* | remove from the holding queue packets | that have been acked and don't need | further processing. */ i_acked_hld(sp, NULL); sp->cws = sn->maxCmd - sn->expCmd + 1; return 0; out: // XXX: need some work here if(pp->ahs_len) { // XXX: till I figure out what to do with this free(pp->ahs_addr, M_TEMP); } xdebug("have a problem, error=%d", error); pdu_free(sp->isc, pq); if(!error && uio->uio_resid > 0) error = EPIPE; return error; } /* | wait for something to arrive. | and if the pdu is without errors, process it. */ static int so_input(isc_session_t *sp) { pduq_t *pq; int error; debug_called(8); /* | first read in the iSCSI header */ error = so_getbhs(sp); if(error == 0) { /* | now read the rest. */ pq = pdu_alloc(sp->isc, M_NOWAIT); if(pq == NULL) { // XXX: might cause a deadlock ... debug(2, "out of pdus, wait"); pq = pdu_alloc(sp->isc, M_WAITOK); // OK to WAIT } pq->pdu.ipdu.bhs = sp->bhs; pq->len = sizeof(bhs_t); // so far only the header was read error = so_recv(sp, pq); if(error != 0) { error += 0x800; // XXX: just to see the error. // terminal error // XXX: close connection and exit } else { sp->stats.nrecv++; getbintime(&sp->stats.t_recv); ism_recv(sp, pq); } } return error; } /* | one per active (connected) session. | this thread is responsible for reading | in packets from the target. */ static void isc_in(void *vp) { isc_session_t *sp = (isc_session_t *)vp; struct socket *so = sp->soc; int error; debug_called(8); sp->flags |= ISC_CON_RUNNING; error = 0; while((sp->flags & (ISC_CON_RUN | ISC_LINK_UP)) == (ISC_CON_RUN | ISC_LINK_UP)) { // XXX: hunting ... if(sp->soc == NULL || !(so->so_state & SS_ISCONNECTED)) { debug(2, "sp->soc=%p", sp->soc); break; } error = so_input(sp); if(error == 0) { mtx_lock(&sp->io_mtx); if(sp->flags & ISC_OWAITING) { wakeup(&sp->flags); } mtx_unlock(&sp->io_mtx); } else if(error == EPIPE) { break; } else if(error == EAGAIN) { if(so->so_state & SS_ISCONNECTED) // there seems to be a problem in 6.0 ... tsleep(sp, PRIBIO, "isc_soc", 2*hz); } } sdebug(2, "terminated, flags=%x so_count=%d so_state=%x error=%d proc=%p", sp->flags, so->so_count, so->so_state, error, sp->proc); if((sp->proc != NULL) && sp->signal) { PROC_LOCK(sp->proc); kern_psignal(sp->proc, sp->signal); PROC_UNLOCK(sp->proc); sp->flags |= ISC_SIGNALED; sdebug(2, "pid=%d signaled(%d)", sp->proc->p_pid, sp->signal); } else { // we have to do something ourselves // like closing this session ... } /* | we've been terminated */ // do we need this mutex ...? mtx_lock(&sp->io_mtx); sp->flags &= ~(ISC_CON_RUNNING | ISC_LINK_UP); wakeup(&sp->soc); mtx_unlock(&sp->io_mtx); sdebug(2, "dropped ISC_CON_RUNNING"); #if __FreeBSD_version >= 800000 kproc_exit(0); #else kthread_exit(0); #endif } void isc_stop_receiver(isc_session_t *sp) { int n; debug_called(8); sdebug(3, "sp=%p sp->soc=%p", sp, sp? sp->soc: 0); mtx_lock(&sp->io_mtx); sp->flags &= ~ISC_LINK_UP; msleep(&sp->soc, &sp->io_mtx, PRIBIO|PDROP, "isc_stpc", 5*hz); soshutdown(sp->soc, SHUT_RD); mtx_lock(&sp->io_mtx); sdebug(3, "soshutdown"); sp->flags &= ~ISC_CON_RUN; n = 2; while(n-- && (sp->flags & ISC_CON_RUNNING)) { sdebug(3, "waiting n=%d... flags=%x", n, sp->flags); msleep(&sp->soc, &sp->io_mtx, PRIBIO, "isc_stpc", 5*hz); } mtx_unlock(&sp->io_mtx); if(sp->fp != NULL) fdrop(sp->fp, sp->td); sp->soc = NULL; sp->fp = NULL; sdebug(3, "done"); } void isc_start_receiver(isc_session_t *sp) { debug_called(8); sp->flags |= ISC_CON_RUN | ISC_LINK_UP; #if __FreeBSD_version >= 800000 kproc_create #else kthread_create #endif (isc_in, sp, &sp->soc_proc, 0, 0, "isc_in %d", sp->sid); } Index: head/sys/dev/lge/if_lge.c =================================================================== --- head/sys/dev/lge/if_lge.c (revision 324445) +++ head/sys/dev/lge/if_lge.c (revision 324446) @@ -1,1588 +1,1587 @@ /*- * Copyright (c) 2001 Wind River Systems * Copyright (c) 1997, 1998, 1999, 2000, 2001 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Level 1 LXT1001 gigabit ethernet driver for FreeBSD. Public * documentation not available, but ask me nicely. * * The Level 1 chip is used on some D-Link, SMC and Addtron NICs. * It's a 64-bit PCI part that supports TCP/IP checksum offload, * VLAN tagging/insertion, GMII and TBI (1000baseX) ports. There * are three supported methods for data transfer between host and * NIC: programmed I/O, traditional scatter/gather DMA and Packet * Propulsion Technology (tm) DMA. The latter mechanism is a form * of double buffer DMA where the packet data is copied to a * pre-allocated DMA buffer who's physical address has been loaded * into a table at device initialization time. The rationale is that * the virtual to physical address translation needed for normal * scatter/gather DMA is more expensive than the data copy needed * for double buffering. This may be true in Windows NT and the like, * but it isn't true for us, at least on the x86 arch. This driver * uses the scatter/gather I/O method for both TX and RX. * * The LXT1001 only supports TCP/IP checksum offload on receive. * Also, the VLAN tagging is done using a 16-entry table which allows * the chip to perform hardware filtering based on VLAN tags. Sadly, * our vlan support doesn't currently play well with this kind of * hardware support. * * Special thanks to: * - Jeff James at Intel, for arranging to have the LXT1001 manual * released (at long last) * - Beny Chen at D-Link, for actually sending it to me * - Brad Short and Keith Alexis at SMC, for sending me sample * SMC9462SX and SMC9462TX adapters for testing * - Paul Saab at Y!, for not killing me (though it remains to be seen * if in fact he did me much of a favor) */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for vtophys */ #include /* for vtophys */ #include #include #include #include #include #include #include #include #define LGE_USEIOSPACE #include /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* * Various supported device vendors/types and their names. */ static const struct lge_type lge_devs[] = { { LGE_VENDORID, LGE_DEVICEID, "Level 1 Gigabit Ethernet" }, { 0, 0, NULL } }; static int lge_probe(device_t); static int lge_attach(device_t); static int lge_detach(device_t); static int lge_alloc_jumbo_mem(struct lge_softc *); static void lge_free_jumbo_mem(struct lge_softc *); static void *lge_jalloc(struct lge_softc *); -static void lge_jfree(struct mbuf *, void *, void *); +static void lge_jfree(struct mbuf *); static int lge_newbuf(struct lge_softc *, struct lge_rx_desc *, struct mbuf *); static int lge_encap(struct lge_softc *, struct mbuf *, u_int32_t *); static void lge_rxeof(struct lge_softc *, int); static void lge_rxeoc(struct lge_softc *); static void lge_txeof(struct lge_softc *); static void lge_intr(void *); static void lge_tick(void *); static void lge_start(struct ifnet *); static void lge_start_locked(struct ifnet *); static int lge_ioctl(struct ifnet *, u_long, caddr_t); static void lge_init(void *); static void lge_init_locked(struct lge_softc *); static void lge_stop(struct lge_softc *); static void lge_watchdog(struct lge_softc *); static int lge_shutdown(device_t); static int lge_ifmedia_upd(struct ifnet *); static void lge_ifmedia_upd_locked(struct ifnet *); static void lge_ifmedia_sts(struct ifnet *, struct ifmediareq *); static void lge_eeprom_getword(struct lge_softc *, int, u_int16_t *); static void lge_read_eeprom(struct lge_softc *, caddr_t, int, int, int); static int lge_miibus_readreg(device_t, int, int); static int lge_miibus_writereg(device_t, int, int, int); static void lge_miibus_statchg(device_t); static void lge_setmulti(struct lge_softc *); static void lge_reset(struct lge_softc *); static int lge_list_rx_init(struct lge_softc *); static int lge_list_tx_init(struct lge_softc *); #ifdef LGE_USEIOSPACE #define LGE_RES SYS_RES_IOPORT #define LGE_RID LGE_PCI_LOIO #else #define LGE_RES SYS_RES_MEMORY #define LGE_RID LGE_PCI_LOMEM #endif static device_method_t lge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, lge_probe), DEVMETHOD(device_attach, lge_attach), DEVMETHOD(device_detach, lge_detach), DEVMETHOD(device_shutdown, lge_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, lge_miibus_readreg), DEVMETHOD(miibus_writereg, lge_miibus_writereg), DEVMETHOD(miibus_statchg, lge_miibus_statchg), DEVMETHOD_END }; static driver_t lge_driver = { "lge", lge_methods, sizeof(struct lge_softc) }; static devclass_t lge_devclass; DRIVER_MODULE(lge, pci, lge_driver, lge_devclass, 0, 0); DRIVER_MODULE(miibus, lge, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(lge, pci, 1, 1, 1); MODULE_DEPEND(lge, ether, 1, 1, 1); MODULE_DEPEND(lge, miibus, 1, 1, 1); #define LGE_SETBIT(sc, reg, x) \ CSR_WRITE_4(sc, reg, \ CSR_READ_4(sc, reg) | (x)) #define LGE_CLRBIT(sc, reg, x) \ CSR_WRITE_4(sc, reg, \ CSR_READ_4(sc, reg) & ~(x)) #define SIO_SET(x) \ CSR_WRITE_4(sc, LGE_MEAR, CSR_READ_4(sc, LGE_MEAR) | x) #define SIO_CLR(x) \ CSR_WRITE_4(sc, LGE_MEAR, CSR_READ_4(sc, LGE_MEAR) & ~x) /* * Read a word of data stored in the EEPROM at address 'addr.' */ static void lge_eeprom_getword(sc, addr, dest) struct lge_softc *sc; int addr; u_int16_t *dest; { int i; u_int32_t val; CSR_WRITE_4(sc, LGE_EECTL, LGE_EECTL_CMD_READ| LGE_EECTL_SINGLEACCESS|((addr >> 1) << 8)); for (i = 0; i < LGE_TIMEOUT; i++) if (!(CSR_READ_4(sc, LGE_EECTL) & LGE_EECTL_CMD_READ)) break; if (i == LGE_TIMEOUT) { device_printf(sc->lge_dev, "EEPROM read timed out\n"); return; } val = CSR_READ_4(sc, LGE_EEDATA); if (addr & 1) *dest = (val >> 16) & 0xFFFF; else *dest = val & 0xFFFF; return; } /* * Read a sequence of words from the EEPROM. */ static void lge_read_eeprom(sc, dest, off, cnt, swap) struct lge_softc *sc; caddr_t dest; int off; int cnt; int swap; { int i; u_int16_t word = 0, *ptr; for (i = 0; i < cnt; i++) { lge_eeprom_getword(sc, off + i, &word); ptr = (u_int16_t *)(dest + (i * 2)); if (swap) *ptr = ntohs(word); else *ptr = word; } return; } static int lge_miibus_readreg(dev, phy, reg) device_t dev; int phy, reg; { struct lge_softc *sc; int i; sc = device_get_softc(dev); /* * If we have a non-PCS PHY, pretend that the internal * autoneg stuff at PHY address 0 isn't there so that * the miibus code will find only the GMII PHY. */ if (sc->lge_pcs == 0 && phy == 0) return(0); CSR_WRITE_4(sc, LGE_GMIICTL, (phy << 8) | reg | LGE_GMIICMD_READ); for (i = 0; i < LGE_TIMEOUT; i++) if (!(CSR_READ_4(sc, LGE_GMIICTL) & LGE_GMIICTL_CMDBUSY)) break; if (i == LGE_TIMEOUT) { device_printf(sc->lge_dev, "PHY read timed out\n"); return(0); } return(CSR_READ_4(sc, LGE_GMIICTL) >> 16); } static int lge_miibus_writereg(dev, phy, reg, data) device_t dev; int phy, reg, data; { struct lge_softc *sc; int i; sc = device_get_softc(dev); CSR_WRITE_4(sc, LGE_GMIICTL, (data << 16) | (phy << 8) | reg | LGE_GMIICMD_WRITE); for (i = 0; i < LGE_TIMEOUT; i++) if (!(CSR_READ_4(sc, LGE_GMIICTL) & LGE_GMIICTL_CMDBUSY)) break; if (i == LGE_TIMEOUT) { device_printf(sc->lge_dev, "PHY write timed out\n"); return(0); } return(0); } static void lge_miibus_statchg(dev) device_t dev; { struct lge_softc *sc; struct mii_data *mii; sc = device_get_softc(dev); mii = device_get_softc(sc->lge_miibus); LGE_CLRBIT(sc, LGE_GMIIMODE, LGE_GMIIMODE_SPEED); switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_1000_T: case IFM_1000_SX: LGE_SETBIT(sc, LGE_GMIIMODE, LGE_SPEED_1000); break; case IFM_100_TX: LGE_SETBIT(sc, LGE_GMIIMODE, LGE_SPEED_100); break; case IFM_10_T: LGE_SETBIT(sc, LGE_GMIIMODE, LGE_SPEED_10); break; default: /* * Choose something, even if it's wrong. Clearing * all the bits will hose autoneg on the internal * PHY. */ LGE_SETBIT(sc, LGE_GMIIMODE, LGE_SPEED_1000); break; } if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) { LGE_SETBIT(sc, LGE_GMIIMODE, LGE_GMIIMODE_FDX); } else { LGE_CLRBIT(sc, LGE_GMIIMODE, LGE_GMIIMODE_FDX); } return; } static void lge_setmulti(sc) struct lge_softc *sc; { struct ifnet *ifp; struct ifmultiaddr *ifma; u_int32_t h = 0, hashes[2] = { 0, 0 }; ifp = sc->lge_ifp; LGE_LOCK_ASSERT(sc); /* Make sure multicast hash table is enabled. */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RX_MCAST); if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { CSR_WRITE_4(sc, LGE_MAR0, 0xFFFFFFFF); CSR_WRITE_4(sc, LGE_MAR1, 0xFFFFFFFF); return; } /* first, zot all the existing hash bits */ CSR_WRITE_4(sc, LGE_MAR0, 0); CSR_WRITE_4(sc, LGE_MAR1, 0); /* now program new ones */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 26; if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); } if_maddr_runlock(ifp); CSR_WRITE_4(sc, LGE_MAR0, hashes[0]); CSR_WRITE_4(sc, LGE_MAR1, hashes[1]); return; } static void lge_reset(sc) struct lge_softc *sc; { int i; LGE_SETBIT(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL0|LGE_MODE1_SOFTRST); for (i = 0; i < LGE_TIMEOUT; i++) { if (!(CSR_READ_4(sc, LGE_MODE1) & LGE_MODE1_SOFTRST)) break; } if (i == LGE_TIMEOUT) device_printf(sc->lge_dev, "reset never completed\n"); /* Wait a little while for the chip to get its brains in order. */ DELAY(1000); return; } /* * Probe for a Level 1 chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int lge_probe(dev) device_t dev; { const struct lge_type *t; t = lge_devs; while(t->lge_name != NULL) { if ((pci_get_vendor(dev) == t->lge_vid) && (pci_get_device(dev) == t->lge_did)) { device_set_desc(dev, t->lge_name); return(BUS_PROBE_DEFAULT); } t++; } return(ENXIO); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int lge_attach(dev) device_t dev; { u_char eaddr[ETHER_ADDR_LEN]; struct lge_softc *sc; struct ifnet *ifp = NULL; int error = 0, rid; sc = device_get_softc(dev); sc->lge_dev = dev; mtx_init(&sc->lge_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->lge_stat_callout, &sc->lge_mtx, 0); /* * Map control/status registers. */ pci_enable_busmaster(dev); rid = LGE_RID; sc->lge_res = bus_alloc_resource_any(dev, LGE_RES, &rid, RF_ACTIVE); if (sc->lge_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } sc->lge_btag = rman_get_bustag(sc->lge_res); sc->lge_bhandle = rman_get_bushandle(sc->lge_res); /* Allocate interrupt */ rid = 0; sc->lge_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->lge_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } /* Reset the adapter. */ lge_reset(sc); /* * Get station address from the EEPROM. */ lge_read_eeprom(sc, (caddr_t)&eaddr[0], LGE_EE_NODEADDR_0, 1, 0); lge_read_eeprom(sc, (caddr_t)&eaddr[2], LGE_EE_NODEADDR_1, 1, 0); lge_read_eeprom(sc, (caddr_t)&eaddr[4], LGE_EE_NODEADDR_2, 1, 0); sc->lge_ldata = contigmalloc(sizeof(struct lge_list_data), M_DEVBUF, M_NOWAIT | M_ZERO, 0, 0xffffffff, PAGE_SIZE, 0); if (sc->lge_ldata == NULL) { device_printf(dev, "no memory for list buffers!\n"); error = ENXIO; goto fail; } /* Try to allocate memory for jumbo buffers. */ if (lge_alloc_jumbo_mem(sc)) { device_printf(dev, "jumbo buffer allocation failed\n"); error = ENXIO; goto fail; } ifp = sc->lge_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = lge_ioctl; ifp->if_start = lge_start; ifp->if_init = lge_init; ifp->if_snd.ifq_maxlen = LGE_TX_LIST_CNT - 1; ifp->if_capabilities = IFCAP_RXCSUM; ifp->if_capenable = ifp->if_capabilities; if (CSR_READ_4(sc, LGE_GMIIMODE) & LGE_GMIIMODE_PCSENH) sc->lge_pcs = 1; else sc->lge_pcs = 0; /* * Do MII setup. */ error = mii_attach(dev, &sc->lge_miibus, ifp, lge_ifmedia_upd, lge_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); error = bus_setup_intr(dev, sc->lge_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, lge_intr, sc, &sc->lge_intrhand); if (error) { ether_ifdetach(ifp); device_printf(dev, "couldn't set up irq\n"); goto fail; } return (0); fail: lge_free_jumbo_mem(sc); if (sc->lge_ldata) contigfree(sc->lge_ldata, sizeof(struct lge_list_data), M_DEVBUF); if (ifp) if_free(ifp); if (sc->lge_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->lge_irq); if (sc->lge_res) bus_release_resource(dev, LGE_RES, LGE_RID, sc->lge_res); mtx_destroy(&sc->lge_mtx); return(error); } static int lge_detach(dev) device_t dev; { struct lge_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->lge_ifp; LGE_LOCK(sc); lge_reset(sc); lge_stop(sc); LGE_UNLOCK(sc); callout_drain(&sc->lge_stat_callout); ether_ifdetach(ifp); bus_generic_detach(dev); device_delete_child(dev, sc->lge_miibus); bus_teardown_intr(dev, sc->lge_irq, sc->lge_intrhand); bus_release_resource(dev, SYS_RES_IRQ, 0, sc->lge_irq); bus_release_resource(dev, LGE_RES, LGE_RID, sc->lge_res); contigfree(sc->lge_ldata, sizeof(struct lge_list_data), M_DEVBUF); if_free(ifp); lge_free_jumbo_mem(sc); mtx_destroy(&sc->lge_mtx); return(0); } /* * Initialize the transmit descriptors. */ static int lge_list_tx_init(sc) struct lge_softc *sc; { struct lge_list_data *ld; struct lge_ring_data *cd; int i; cd = &sc->lge_cdata; ld = sc->lge_ldata; for (i = 0; i < LGE_TX_LIST_CNT; i++) { ld->lge_tx_list[i].lge_mbuf = NULL; ld->lge_tx_list[i].lge_ctl = 0; } cd->lge_tx_prod = cd->lge_tx_cons = 0; return(0); } /* * Initialize the RX descriptors and allocate mbufs for them. Note that * we arralge the descriptors in a closed ring, so that the last descriptor * points back to the first. */ static int lge_list_rx_init(sc) struct lge_softc *sc; { struct lge_list_data *ld; struct lge_ring_data *cd; int i; ld = sc->lge_ldata; cd = &sc->lge_cdata; cd->lge_rx_prod = cd->lge_rx_cons = 0; CSR_WRITE_4(sc, LGE_RXDESC_ADDR_HI, 0); for (i = 0; i < LGE_RX_LIST_CNT; i++) { if (CSR_READ_1(sc, LGE_RXCMDFREE_8BIT) == 0) break; if (lge_newbuf(sc, &ld->lge_rx_list[i], NULL) == ENOBUFS) return(ENOBUFS); } /* Clear possible 'rx command queue empty' interrupt. */ CSR_READ_4(sc, LGE_ISR); return(0); } /* * Initialize an RX descriptor and attach an MBUF cluster. */ static int lge_newbuf(sc, c, m) struct lge_softc *sc; struct lge_rx_desc *c; struct mbuf *m; { struct mbuf *m_new = NULL; - caddr_t *buf = NULL; + char *buf = NULL; if (m == NULL) { MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) { device_printf(sc->lge_dev, "no memory for rx list " "-- packet dropped!\n"); return(ENOBUFS); } /* Allocate the jumbo buffer */ buf = lge_jalloc(sc); if (buf == NULL) { #ifdef LGE_VERBOSE device_printf(sc->lge_dev, "jumbo allocation failed " "-- packet dropped!\n"); #endif m_freem(m_new); return(ENOBUFS); } /* Attach the buffer to the mbuf */ - m_new->m_data = (void *)buf; m_new->m_len = m_new->m_pkthdr.len = LGE_JUMBO_FRAMELEN; - MEXTADD(m_new, buf, LGE_JUMBO_FRAMELEN, lge_jfree, - buf, (struct lge_softc *)sc, 0, EXT_NET_DRV); + m_extadd(m_new, buf, LGE_JUMBO_FRAMELEN, lge_jfree, sc, NULL, + 0, EXT_NET_DRV); } else { m_new = m; m_new->m_len = m_new->m_pkthdr.len = LGE_JUMBO_FRAMELEN; m_new->m_data = m_new->m_ext.ext_buf; } /* * Adjust alignment so packet payload begins on a * longword boundary. Mandatory for Alpha, useful on * x86 too. */ m_adj(m_new, ETHER_ALIGN); c->lge_mbuf = m_new; c->lge_fragptr_hi = 0; c->lge_fragptr_lo = vtophys(mtod(m_new, caddr_t)); c->lge_fraglen = m_new->m_len; c->lge_ctl = m_new->m_len | LGE_RXCTL_WANTINTR | LGE_FRAGCNT(1); c->lge_sts = 0; /* * Put this buffer in the RX command FIFO. To do this, * we just write the physical address of the descriptor * into the RX descriptor address registers. Note that * there are two registers, one high DWORD and one low * DWORD, which lets us specify a 64-bit address if * desired. We only use a 32-bit address for now. * Writing to the low DWORD register is what actually * causes the command to be issued, so we do that * last. */ CSR_WRITE_4(sc, LGE_RXDESC_ADDR_LO, vtophys(c)); LGE_INC(sc->lge_cdata.lge_rx_prod, LGE_RX_LIST_CNT); return(0); } static int lge_alloc_jumbo_mem(sc) struct lge_softc *sc; { caddr_t ptr; int i; struct lge_jpool_entry *entry; /* Grab a big chunk o' storage. */ sc->lge_cdata.lge_jumbo_buf = contigmalloc(LGE_JMEM, M_DEVBUF, M_NOWAIT, 0, 0xffffffff, PAGE_SIZE, 0); if (sc->lge_cdata.lge_jumbo_buf == NULL) { device_printf(sc->lge_dev, "no memory for jumbo buffers!\n"); return(ENOBUFS); } SLIST_INIT(&sc->lge_jfree_listhead); SLIST_INIT(&sc->lge_jinuse_listhead); /* * Now divide it up into 9K pieces and save the addresses * in an array. */ ptr = sc->lge_cdata.lge_jumbo_buf; for (i = 0; i < LGE_JSLOTS; i++) { sc->lge_cdata.lge_jslots[i] = ptr; ptr += LGE_JLEN; entry = malloc(sizeof(struct lge_jpool_entry), M_DEVBUF, M_NOWAIT); if (entry == NULL) { device_printf(sc->lge_dev, "no memory for jumbo " "buffer queue!\n"); return(ENOBUFS); } entry->slot = i; SLIST_INSERT_HEAD(&sc->lge_jfree_listhead, entry, jpool_entries); } return(0); } static void lge_free_jumbo_mem(sc) struct lge_softc *sc; { struct lge_jpool_entry *entry; if (sc->lge_cdata.lge_jumbo_buf == NULL) return; while ((entry = SLIST_FIRST(&sc->lge_jinuse_listhead))) { device_printf(sc->lge_dev, "asked to free buffer that is in use!\n"); SLIST_REMOVE_HEAD(&sc->lge_jinuse_listhead, jpool_entries); SLIST_INSERT_HEAD(&sc->lge_jfree_listhead, entry, jpool_entries); } while (!SLIST_EMPTY(&sc->lge_jfree_listhead)) { entry = SLIST_FIRST(&sc->lge_jfree_listhead); SLIST_REMOVE_HEAD(&sc->lge_jfree_listhead, jpool_entries); free(entry, M_DEVBUF); } contigfree(sc->lge_cdata.lge_jumbo_buf, LGE_JMEM, M_DEVBUF); return; } /* * Allocate a jumbo buffer. */ static void * lge_jalloc(sc) struct lge_softc *sc; { struct lge_jpool_entry *entry; entry = SLIST_FIRST(&sc->lge_jfree_listhead); if (entry == NULL) { #ifdef LGE_VERBOSE device_printf(sc->lge_dev, "no free jumbo buffers\n"); #endif return(NULL); } SLIST_REMOVE_HEAD(&sc->lge_jfree_listhead, jpool_entries); SLIST_INSERT_HEAD(&sc->lge_jinuse_listhead, entry, jpool_entries); return(sc->lge_cdata.lge_jslots[entry->slot]); } /* * Release a jumbo buffer. */ static void -lge_jfree(struct mbuf *m, void *buf, void *args) +lge_jfree(struct mbuf *m) { struct lge_softc *sc; int i; struct lge_jpool_entry *entry; /* Extract the softc struct pointer. */ - sc = args; + sc = m->m_ext.ext_arg1; if (sc == NULL) panic("lge_jfree: can't find softc pointer!"); /* calculate the slot this buffer belongs to */ - i = ((vm_offset_t)buf + i = ((vm_offset_t)m->m_ext.ext_buf - (vm_offset_t)sc->lge_cdata.lge_jumbo_buf) / LGE_JLEN; if ((i < 0) || (i >= LGE_JSLOTS)) panic("lge_jfree: asked to free buffer that we don't manage!"); entry = SLIST_FIRST(&sc->lge_jinuse_listhead); if (entry == NULL) panic("lge_jfree: buffer not in use!"); entry->slot = i; SLIST_REMOVE_HEAD(&sc->lge_jinuse_listhead, jpool_entries); SLIST_INSERT_HEAD(&sc->lge_jfree_listhead, entry, jpool_entries); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. */ static void lge_rxeof(sc, cnt) struct lge_softc *sc; int cnt; { struct mbuf *m; struct ifnet *ifp; struct lge_rx_desc *cur_rx; int c, i, total_len = 0; u_int32_t rxsts, rxctl; ifp = sc->lge_ifp; /* Find out how many frames were processed. */ c = cnt; i = sc->lge_cdata.lge_rx_cons; /* Suck them in. */ while(c) { struct mbuf *m0 = NULL; cur_rx = &sc->lge_ldata->lge_rx_list[i]; rxctl = cur_rx->lge_ctl; rxsts = cur_rx->lge_sts; m = cur_rx->lge_mbuf; cur_rx->lge_mbuf = NULL; total_len = LGE_RXBYTES(cur_rx); LGE_INC(i, LGE_RX_LIST_CNT); c--; /* * If an error occurs, update stats, clear the * status word and leave the mbuf cluster in place: * it should simply get re-used next time this descriptor * comes up in the ring. */ if (rxctl & LGE_RXCTL_ERRMASK) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); lge_newbuf(sc, &LGE_RXTAIL(sc), m); continue; } if (lge_newbuf(sc, &LGE_RXTAIL(sc), NULL) == ENOBUFS) { m0 = m_devget(mtod(m, char *), total_len, ETHER_ALIGN, ifp, NULL); lge_newbuf(sc, &LGE_RXTAIL(sc), m); if (m0 == NULL) { device_printf(sc->lge_dev, "no receive buffers " "available -- packet dropped!\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); continue; } m = m0; } else { m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = total_len; } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* Do IP checksum checking. */ if (rxsts & LGE_RXSTS_ISIP) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (!(rxsts & LGE_RXSTS_IPCSUMERR)) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if ((rxsts & LGE_RXSTS_ISTCP && !(rxsts & LGE_RXSTS_TCPCSUMERR)) || (rxsts & LGE_RXSTS_ISUDP && !(rxsts & LGE_RXSTS_UDPCSUMERR))) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } LGE_UNLOCK(sc); (*ifp->if_input)(ifp, m); LGE_LOCK(sc); } sc->lge_cdata.lge_rx_cons = i; return; } static void lge_rxeoc(sc) struct lge_softc *sc; { struct ifnet *ifp; ifp = sc->lge_ifp; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; lge_init_locked(sc); return; } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void lge_txeof(sc) struct lge_softc *sc; { struct lge_tx_desc *cur_tx = NULL; struct ifnet *ifp; u_int32_t idx, txdone; ifp = sc->lge_ifp; /* Clear the timeout timer. */ sc->lge_timer = 0; /* * Go through our tx list and free mbufs for those * frames that have been transmitted. */ idx = sc->lge_cdata.lge_tx_cons; txdone = CSR_READ_1(sc, LGE_TXDMADONE_8BIT); while (idx != sc->lge_cdata.lge_tx_prod && txdone) { cur_tx = &sc->lge_ldata->lge_tx_list[idx]; if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (cur_tx->lge_mbuf != NULL) { m_freem(cur_tx->lge_mbuf); cur_tx->lge_mbuf = NULL; } cur_tx->lge_ctl = 0; txdone--; LGE_INC(idx, LGE_TX_LIST_CNT); sc->lge_timer = 0; } sc->lge_cdata.lge_tx_cons = idx; if (cur_tx != NULL) ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return; } static void lge_tick(xsc) void *xsc; { struct lge_softc *sc; struct mii_data *mii; struct ifnet *ifp; sc = xsc; ifp = sc->lge_ifp; LGE_LOCK_ASSERT(sc); CSR_WRITE_4(sc, LGE_STATSIDX, LGE_STATS_SINGLE_COLL_PKTS); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, CSR_READ_4(sc, LGE_STATSVAL)); CSR_WRITE_4(sc, LGE_STATSIDX, LGE_STATS_MULTI_COLL_PKTS); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, CSR_READ_4(sc, LGE_STATSVAL)); if (!sc->lge_link) { mii = device_get_softc(sc->lge_miibus); mii_tick(mii); if (mii->mii_media_status & IFM_ACTIVE && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { sc->lge_link++; if (bootverbose && (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX|| IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T)) device_printf(sc->lge_dev, "gigabit link up\n"); if (ifp->if_snd.ifq_head != NULL) lge_start_locked(ifp); } } if (sc->lge_timer != 0 && --sc->lge_timer == 0) lge_watchdog(sc); callout_reset(&sc->lge_stat_callout, hz, lge_tick, sc); return; } static void lge_intr(arg) void *arg; { struct lge_softc *sc; struct ifnet *ifp; u_int32_t status; sc = arg; ifp = sc->lge_ifp; LGE_LOCK(sc); /* Suppress unwanted interrupts */ if (!(ifp->if_flags & IFF_UP)) { lge_stop(sc); LGE_UNLOCK(sc); return; } for (;;) { /* * Reading the ISR register clears all interrupts, and * clears the 'interrupts enabled' bit in the IMR * register. */ status = CSR_READ_4(sc, LGE_ISR); if ((status & LGE_INTRS) == 0) break; if ((status & (LGE_ISR_TXCMDFIFO_EMPTY|LGE_ISR_TXDMA_DONE))) lge_txeof(sc); if (status & LGE_ISR_RXDMA_DONE) lge_rxeof(sc, LGE_RX_DMACNT(status)); if (status & LGE_ISR_RXCMDFIFO_EMPTY) lge_rxeoc(sc); if (status & LGE_ISR_PHY_INTR) { sc->lge_link = 0; callout_stop(&sc->lge_stat_callout); lge_tick(sc); } } /* Re-enable interrupts. */ CSR_WRITE_4(sc, LGE_IMR, LGE_IMR_SETRST_CTL0|LGE_IMR_INTR_ENB); if (ifp->if_snd.ifq_head != NULL) lge_start_locked(ifp); LGE_UNLOCK(sc); return; } /* * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data * pointers to the fragment pointers. */ static int lge_encap(sc, m_head, txidx) struct lge_softc *sc; struct mbuf *m_head; u_int32_t *txidx; { struct lge_frag *f = NULL; struct lge_tx_desc *cur_tx; struct mbuf *m; int frag = 0, tot_len = 0; /* * Start packing the mbufs in this chain into * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ m = m_head; cur_tx = &sc->lge_ldata->lge_tx_list[*txidx]; frag = 0; for (m = m_head; m != NULL; m = m->m_next) { if (m->m_len != 0) { tot_len += m->m_len; f = &cur_tx->lge_frags[frag]; f->lge_fraglen = m->m_len; f->lge_fragptr_lo = vtophys(mtod(m, vm_offset_t)); f->lge_fragptr_hi = 0; frag++; } } if (m != NULL) return(ENOBUFS); cur_tx->lge_mbuf = m_head; cur_tx->lge_ctl = LGE_TXCTL_WANTINTR|LGE_FRAGCNT(frag)|tot_len; LGE_INC((*txidx), LGE_TX_LIST_CNT); /* Queue for transmit */ CSR_WRITE_4(sc, LGE_TXDESC_ADDR_LO, vtophys(cur_tx)); return(0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit lists. We also save a * copy of the pointers since the transmit list fragment pointers are * physical addresses. */ static void lge_start(ifp) struct ifnet *ifp; { struct lge_softc *sc; sc = ifp->if_softc; LGE_LOCK(sc); lge_start_locked(ifp); LGE_UNLOCK(sc); } static void lge_start_locked(ifp) struct ifnet *ifp; { struct lge_softc *sc; struct mbuf *m_head = NULL; u_int32_t idx; sc = ifp->if_softc; if (!sc->lge_link) return; idx = sc->lge_cdata.lge_tx_prod; if (ifp->if_drv_flags & IFF_DRV_OACTIVE) return; while(sc->lge_ldata->lge_tx_list[idx].lge_mbuf == NULL) { if (CSR_READ_1(sc, LGE_TXCMDFREE_8BIT) == 0) break; IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (lge_encap(sc, m_head, &idx)) { IF_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } /* * If there's a BPF listener, bounce a copy of this frame * to him. */ BPF_MTAP(ifp, m_head); } sc->lge_cdata.lge_tx_prod = idx; /* * Set a timeout in case the chip goes out to lunch. */ sc->lge_timer = 5; return; } static void lge_init(xsc) void *xsc; { struct lge_softc *sc = xsc; LGE_LOCK(sc); lge_init_locked(sc); LGE_UNLOCK(sc); } static void lge_init_locked(sc) struct lge_softc *sc; { struct ifnet *ifp = sc->lge_ifp; LGE_LOCK_ASSERT(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; /* * Cancel pending I/O and free all RX/TX buffers. */ lge_stop(sc); lge_reset(sc); /* Set MAC address */ CSR_WRITE_4(sc, LGE_PAR0, *(u_int32_t *)(&IF_LLADDR(sc->lge_ifp)[0])); CSR_WRITE_4(sc, LGE_PAR1, *(u_int32_t *)(&IF_LLADDR(sc->lge_ifp)[4])); /* Init circular RX list. */ if (lge_list_rx_init(sc) == ENOBUFS) { device_printf(sc->lge_dev, "initialization failed: no " "memory for rx buffers\n"); lge_stop(sc); return; } /* * Init tx descriptors. */ lge_list_tx_init(sc); /* Set initial value for MODE1 register. */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_UCAST| LGE_MODE1_TX_CRC|LGE_MODE1_TXPAD| LGE_MODE1_RX_FLOWCTL|LGE_MODE1_SETRST_CTL0| LGE_MODE1_SETRST_CTL1|LGE_MODE1_SETRST_CTL2); /* If we want promiscuous mode, set the allframes bit. */ if (ifp->if_flags & IFF_PROMISC) { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RX_PROMISC); } else { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_PROMISC); } /* * Set the capture broadcast bit to capture broadcast frames. */ if (ifp->if_flags & IFF_BROADCAST) { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RX_BCAST); } else { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_BCAST); } /* Packet padding workaround? */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RMVPAD); /* No error frames */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_ERRPKTS); /* Receive large frames */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RX_GIANTS); /* Workaround: disable RX/TX flow control */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_TX_FLOWCTL); CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_FLOWCTL); /* Make sure to strip CRC from received frames */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_CRC); /* Turn off magic packet mode */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_MPACK_ENB); /* Turn off all VLAN stuff */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_VLAN_RX|LGE_MODE1_VLAN_TX| LGE_MODE1_VLAN_STRIP|LGE_MODE1_VLAN_INSERT); /* Workarond: FIFO overflow */ CSR_WRITE_2(sc, LGE_RXFIFO_HIWAT, 0x3FFF); CSR_WRITE_4(sc, LGE_IMR, LGE_IMR_SETRST_CTL1|LGE_IMR_RXFIFO_WAT); /* * Load the multicast filter. */ lge_setmulti(sc); /* * Enable hardware checksum validation for all received IPv4 * packets, do not reject packets with bad checksums. */ CSR_WRITE_4(sc, LGE_MODE2, LGE_MODE2_RX_IPCSUM| LGE_MODE2_RX_TCPCSUM|LGE_MODE2_RX_UDPCSUM| LGE_MODE2_RX_ERRCSUM); /* * Enable the delivery of PHY interrupts based on * link/speed/duplex status chalges. */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL0|LGE_MODE1_GMIIPOLL); /* Enable receiver and transmitter. */ CSR_WRITE_4(sc, LGE_RXDESC_ADDR_HI, 0); CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RX_ENB); CSR_WRITE_4(sc, LGE_TXDESC_ADDR_HI, 0); CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_TX_ENB); /* * Enable interrupts. */ CSR_WRITE_4(sc, LGE_IMR, LGE_IMR_SETRST_CTL0| LGE_IMR_SETRST_CTL1|LGE_IMR_INTR_ENB|LGE_INTRS); lge_ifmedia_upd_locked(ifp); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->lge_stat_callout, hz, lge_tick, sc); return; } /* * Set media options. */ static int lge_ifmedia_upd(ifp) struct ifnet *ifp; { struct lge_softc *sc; sc = ifp->if_softc; LGE_LOCK(sc); lge_ifmedia_upd_locked(ifp); LGE_UNLOCK(sc); return(0); } static void lge_ifmedia_upd_locked(ifp) struct ifnet *ifp; { struct lge_softc *sc; struct mii_data *mii; struct mii_softc *miisc; sc = ifp->if_softc; LGE_LOCK_ASSERT(sc); mii = device_get_softc(sc->lge_miibus); sc->lge_link = 0; LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); mii_mediachg(mii); } /* * Report current media status. */ static void lge_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct lge_softc *sc; struct mii_data *mii; sc = ifp->if_softc; LGE_LOCK(sc); mii = device_get_softc(sc->lge_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; LGE_UNLOCK(sc); return; } static int lge_ioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct lge_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii; int error = 0; switch(command) { case SIOCSIFMTU: LGE_LOCK(sc); if (ifr->ifr_mtu > LGE_JUMBO_MTU) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; LGE_UNLOCK(sc); break; case SIOCSIFFLAGS: LGE_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->lge_if_flags & IFF_PROMISC)) { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1| LGE_MODE1_RX_PROMISC); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->lge_if_flags & IFF_PROMISC) { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_PROMISC); } else { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; lge_init_locked(sc); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) lge_stop(sc); } sc->lge_if_flags = ifp->if_flags; LGE_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: LGE_LOCK(sc); lge_setmulti(sc); LGE_UNLOCK(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc->lge_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; default: error = ether_ioctl(ifp, command, data); break; } return(error); } static void lge_watchdog(sc) struct lge_softc *sc; { struct ifnet *ifp; LGE_LOCK_ASSERT(sc); ifp = sc->lge_ifp; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if_printf(ifp, "watchdog timeout\n"); lge_stop(sc); lge_reset(sc); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; lge_init_locked(sc); if (ifp->if_snd.ifq_head != NULL) lge_start_locked(ifp); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void lge_stop(sc) struct lge_softc *sc; { int i; struct ifnet *ifp; LGE_LOCK_ASSERT(sc); ifp = sc->lge_ifp; sc->lge_timer = 0; callout_stop(&sc->lge_stat_callout); CSR_WRITE_4(sc, LGE_IMR, LGE_IMR_INTR_ENB); /* Disable receiver and transmitter. */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_ENB|LGE_MODE1_TX_ENB); sc->lge_link = 0; /* * Free data in the RX lists. */ for (i = 0; i < LGE_RX_LIST_CNT; i++) { if (sc->lge_ldata->lge_rx_list[i].lge_mbuf != NULL) { m_freem(sc->lge_ldata->lge_rx_list[i].lge_mbuf); sc->lge_ldata->lge_rx_list[i].lge_mbuf = NULL; } } bzero((char *)&sc->lge_ldata->lge_rx_list, sizeof(sc->lge_ldata->lge_rx_list)); /* * Free the TX list buffers. */ for (i = 0; i < LGE_TX_LIST_CNT; i++) { if (sc->lge_ldata->lge_tx_list[i].lge_mbuf != NULL) { m_freem(sc->lge_ldata->lge_tx_list[i].lge_mbuf); sc->lge_ldata->lge_tx_list[i].lge_mbuf = NULL; } } bzero((char *)&sc->lge_ldata->lge_tx_list, sizeof(sc->lge_ldata->lge_tx_list)); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); return; } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int lge_shutdown(dev) device_t dev; { struct lge_softc *sc; sc = device_get_softc(dev); LGE_LOCK(sc); lge_reset(sc); lge_stop(sc); LGE_UNLOCK(sc); return (0); } Index: head/sys/dev/mwl/if_mwl.c =================================================================== --- head/sys/dev/mwl/if_mwl.c (revision 324445) +++ head/sys/dev/mwl/if_mwl.c (revision 324446) @@ -1,4834 +1,4834 @@ /*- * Copyright (c) 2007-2009 Sam Leffler, Errno Consulting * Copyright (c) 2007-2008 Marvell Semiconductor, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Marvell 88W8363 Wireless LAN controller. */ #include "opt_inet.h" #include "opt_mwl.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #include #include /* idiomatic shorthands: MS = mask+shift, SM = shift+mask */ #define MS(v,x) (((v) & x) >> x##_S) #define SM(v,x) (((v) << x##_S) & x) static struct ieee80211vap *mwl_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void mwl_vap_delete(struct ieee80211vap *); static int mwl_setupdma(struct mwl_softc *); static int mwl_hal_reset(struct mwl_softc *sc); static int mwl_init(struct mwl_softc *); static void mwl_parent(struct ieee80211com *); static int mwl_reset(struct ieee80211vap *, u_long); static void mwl_stop(struct mwl_softc *); static void mwl_start(struct mwl_softc *); static int mwl_transmit(struct ieee80211com *, struct mbuf *); static int mwl_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static int mwl_media_change(struct ifnet *); static void mwl_watchdog(void *); static int mwl_ioctl(struct ieee80211com *, u_long, void *); static void mwl_radar_proc(void *, int); static void mwl_chanswitch_proc(void *, int); static void mwl_bawatchdog_proc(void *, int); static int mwl_key_alloc(struct ieee80211vap *, struct ieee80211_key *, ieee80211_keyix *, ieee80211_keyix *); static int mwl_key_delete(struct ieee80211vap *, const struct ieee80211_key *); static int mwl_key_set(struct ieee80211vap *, const struct ieee80211_key *); static int _mwl_key_set(struct ieee80211vap *, const struct ieee80211_key *, const uint8_t mac[IEEE80211_ADDR_LEN]); static int mwl_mode_init(struct mwl_softc *); static void mwl_update_mcast(struct ieee80211com *); static void mwl_update_promisc(struct ieee80211com *); static void mwl_updateslot(struct ieee80211com *); static int mwl_beacon_setup(struct ieee80211vap *); static void mwl_beacon_update(struct ieee80211vap *, int); #ifdef MWL_HOST_PS_SUPPORT static void mwl_update_ps(struct ieee80211vap *, int); static int mwl_set_tim(struct ieee80211_node *, int); #endif static int mwl_dma_setup(struct mwl_softc *); static void mwl_dma_cleanup(struct mwl_softc *); static struct ieee80211_node *mwl_node_alloc(struct ieee80211vap *, const uint8_t [IEEE80211_ADDR_LEN]); static void mwl_node_cleanup(struct ieee80211_node *); static void mwl_node_drain(struct ieee80211_node *); static void mwl_node_getsignal(const struct ieee80211_node *, int8_t *, int8_t *); static void mwl_node_getmimoinfo(const struct ieee80211_node *, struct ieee80211_mimo_info *); static int mwl_rxbuf_init(struct mwl_softc *, struct mwl_rxbuf *); static void mwl_rx_proc(void *, int); static void mwl_txq_init(struct mwl_softc *sc, struct mwl_txq *, int); static int mwl_tx_setup(struct mwl_softc *, int, int); static int mwl_wme_update(struct ieee80211com *); static void mwl_tx_cleanupq(struct mwl_softc *, struct mwl_txq *); static void mwl_tx_cleanup(struct mwl_softc *); static uint16_t mwl_calcformat(uint8_t rate, const struct ieee80211_node *); static int mwl_tx_start(struct mwl_softc *, struct ieee80211_node *, struct mwl_txbuf *, struct mbuf *); static void mwl_tx_proc(void *, int); static int mwl_chan_set(struct mwl_softc *, struct ieee80211_channel *); static void mwl_draintxq(struct mwl_softc *); static void mwl_cleartxq(struct mwl_softc *, struct ieee80211vap *); static int mwl_recv_action(struct ieee80211_node *, const struct ieee80211_frame *, const uint8_t *, const uint8_t *); static int mwl_addba_request(struct ieee80211_node *, struct ieee80211_tx_ampdu *, int dialogtoken, int baparamset, int batimeout); static int mwl_addba_response(struct ieee80211_node *, struct ieee80211_tx_ampdu *, int status, int baparamset, int batimeout); static void mwl_addba_stop(struct ieee80211_node *, struct ieee80211_tx_ampdu *); static int mwl_startrecv(struct mwl_softc *); static MWL_HAL_APMODE mwl_getapmode(const struct ieee80211vap *, struct ieee80211_channel *); static int mwl_setapmode(struct ieee80211vap *, struct ieee80211_channel*); static void mwl_scan_start(struct ieee80211com *); static void mwl_scan_end(struct ieee80211com *); static void mwl_set_channel(struct ieee80211com *); static int mwl_peerstadb(struct ieee80211_node *, int aid, int staid, MWL_HAL_PEERINFO *pi); static int mwl_localstadb(struct ieee80211vap *); static int mwl_newstate(struct ieee80211vap *, enum ieee80211_state, int); static int allocstaid(struct mwl_softc *sc, int aid); static void delstaid(struct mwl_softc *sc, int staid); static void mwl_newassoc(struct ieee80211_node *, int); static void mwl_agestations(void *); static int mwl_setregdomain(struct ieee80211com *, struct ieee80211_regdomain *, int, struct ieee80211_channel []); static void mwl_getradiocaps(struct ieee80211com *, int, int *, struct ieee80211_channel []); static int mwl_getchannels(struct mwl_softc *); static void mwl_sysctlattach(struct mwl_softc *); static void mwl_announce(struct mwl_softc *); SYSCTL_NODE(_hw, OID_AUTO, mwl, CTLFLAG_RD, 0, "Marvell driver parameters"); static int mwl_rxdesc = MWL_RXDESC; /* # rx desc's to allocate */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxdesc, CTLFLAG_RW, &mwl_rxdesc, 0, "rx descriptors allocated"); static int mwl_rxbuf = MWL_RXBUF; /* # rx buffers to allocate */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxbuf, CTLFLAG_RWTUN, &mwl_rxbuf, 0, "rx buffers allocated"); static int mwl_txbuf = MWL_TXBUF; /* # tx buffers to allocate */ SYSCTL_INT(_hw_mwl, OID_AUTO, txbuf, CTLFLAG_RWTUN, &mwl_txbuf, 0, "tx buffers allocated"); static int mwl_txcoalesce = 8; /* # tx packets to q before poking f/w*/ SYSCTL_INT(_hw_mwl, OID_AUTO, txcoalesce, CTLFLAG_RWTUN, &mwl_txcoalesce, 0, "tx buffers to send at once"); static int mwl_rxquota = MWL_RXBUF; /* # max buffers to process */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxquota, CTLFLAG_RWTUN, &mwl_rxquota, 0, "max rx buffers to process per interrupt"); static int mwl_rxdmalow = 3; /* # min buffers for wakeup */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxdmalow, CTLFLAG_RWTUN, &mwl_rxdmalow, 0, "min free rx buffers before restarting traffic"); #ifdef MWL_DEBUG static int mwl_debug = 0; SYSCTL_INT(_hw_mwl, OID_AUTO, debug, CTLFLAG_RWTUN, &mwl_debug, 0, "control debugging printfs"); enum { MWL_DEBUG_XMIT = 0x00000001, /* basic xmit operation */ MWL_DEBUG_XMIT_DESC = 0x00000002, /* xmit descriptors */ MWL_DEBUG_RECV = 0x00000004, /* basic recv operation */ MWL_DEBUG_RECV_DESC = 0x00000008, /* recv descriptors */ MWL_DEBUG_RESET = 0x00000010, /* reset processing */ MWL_DEBUG_BEACON = 0x00000020, /* beacon handling */ MWL_DEBUG_INTR = 0x00000040, /* ISR */ MWL_DEBUG_TX_PROC = 0x00000080, /* tx ISR proc */ MWL_DEBUG_RX_PROC = 0x00000100, /* rx ISR proc */ MWL_DEBUG_KEYCACHE = 0x00000200, /* key cache management */ MWL_DEBUG_STATE = 0x00000400, /* 802.11 state transitions */ MWL_DEBUG_NODE = 0x00000800, /* node management */ MWL_DEBUG_RECV_ALL = 0x00001000, /* trace all frames (beacons) */ MWL_DEBUG_TSO = 0x00002000, /* TSO processing */ MWL_DEBUG_AMPDU = 0x00004000, /* BA stream handling */ MWL_DEBUG_ANY = 0xffffffff }; #define IS_BEACON(wh) \ ((wh->i_fc[0] & (IEEE80211_FC0_TYPE_MASK|IEEE80211_FC0_SUBTYPE_MASK)) == \ (IEEE80211_FC0_TYPE_MGT|IEEE80211_FC0_SUBTYPE_BEACON)) #define IFF_DUMPPKTS_RECV(sc, wh) \ ((sc->sc_debug & MWL_DEBUG_RECV) && \ ((sc->sc_debug & MWL_DEBUG_RECV_ALL) || !IS_BEACON(wh))) #define IFF_DUMPPKTS_XMIT(sc) \ (sc->sc_debug & MWL_DEBUG_XMIT) #define DPRINTF(sc, m, fmt, ...) do { \ if (sc->sc_debug & (m)) \ printf(fmt, __VA_ARGS__); \ } while (0) #define KEYPRINTF(sc, hk, mac) do { \ if (sc->sc_debug & MWL_DEBUG_KEYCACHE) \ mwl_keyprint(sc, __func__, hk, mac); \ } while (0) static void mwl_printrxbuf(const struct mwl_rxbuf *bf, u_int ix); static void mwl_printtxbuf(const struct mwl_txbuf *bf, u_int qnum, u_int ix); #else #define IFF_DUMPPKTS_RECV(sc, wh) 0 #define IFF_DUMPPKTS_XMIT(sc) 0 #define DPRINTF(sc, m, fmt, ...) do { (void )sc; } while (0) #define KEYPRINTF(sc, k, mac) do { (void )sc; } while (0) #endif static MALLOC_DEFINE(M_MWLDEV, "mwldev", "mwl driver dma buffers"); /* * Each packet has fixed front matter: a 2-byte length * of the payload, followed by a 4-address 802.11 header * (regardless of the actual header and always w/o any * QoS header). The payload then follows. */ struct mwltxrec { uint16_t fwlen; struct ieee80211_frame_addr4 wh; } __packed; /* * Read/Write shorthands for accesses to BAR 0. Note * that all BAR 1 operations are done in the "hal" and * there should be no reference to them here. */ #ifdef MWL_DEBUG static __inline uint32_t RD4(struct mwl_softc *sc, bus_size_t off) { return bus_space_read_4(sc->sc_io0t, sc->sc_io0h, off); } #endif static __inline void WR4(struct mwl_softc *sc, bus_size_t off, uint32_t val) { bus_space_write_4(sc->sc_io0t, sc->sc_io0h, off, val); } int mwl_attach(uint16_t devid, struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct mwl_hal *mh; int error = 0; DPRINTF(sc, MWL_DEBUG_ANY, "%s: devid 0x%x\n", __func__, devid); /* * Setup the RX free list lock early, so it can be consistently * removed. */ MWL_RXFREE_INIT(sc); mh = mwl_hal_attach(sc->sc_dev, devid, sc->sc_io1h, sc->sc_io1t, sc->sc_dmat); if (mh == NULL) { device_printf(sc->sc_dev, "unable to attach HAL\n"); error = EIO; goto bad; } sc->sc_mh = mh; /* * Load firmware so we can get setup. We arbitrarily * pick station firmware; we'll re-load firmware as * needed so setting up the wrong mode isn't a big deal. */ if (mwl_hal_fwload(mh, NULL) != 0) { device_printf(sc->sc_dev, "unable to setup builtin firmware\n"); error = EIO; goto bad1; } if (mwl_hal_gethwspecs(mh, &sc->sc_hwspecs) != 0) { device_printf(sc->sc_dev, "unable to fetch h/w specs\n"); error = EIO; goto bad1; } error = mwl_getchannels(sc); if (error != 0) goto bad1; sc->sc_txantenna = 0; /* h/w default */ sc->sc_rxantenna = 0; /* h/w default */ sc->sc_invalid = 0; /* ready to go, enable int handling */ sc->sc_ageinterval = MWL_AGEINTERVAL; /* * Allocate tx+rx descriptors and populate the lists. * We immediately push the information to the firmware * as otherwise it gets upset. */ error = mwl_dma_setup(sc); if (error != 0) { device_printf(sc->sc_dev, "failed to setup descriptors: %d\n", error); goto bad1; } error = mwl_setupdma(sc); /* push to firmware */ if (error != 0) /* NB: mwl_setupdma prints msg */ goto bad1; callout_init(&sc->sc_timer, 1); callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0); mbufq_init(&sc->sc_snd, ifqmaxlen); sc->sc_tq = taskqueue_create("mwl_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->sc_tq); taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->sc_dev)); TASK_INIT(&sc->sc_rxtask, 0, mwl_rx_proc, sc); TASK_INIT(&sc->sc_radartask, 0, mwl_radar_proc, sc); TASK_INIT(&sc->sc_chanswitchtask, 0, mwl_chanswitch_proc, sc); TASK_INIT(&sc->sc_bawatchdogtask, 0, mwl_bawatchdog_proc, sc); /* NB: insure BK queue is the lowest priority h/w queue */ if (!mwl_tx_setup(sc, WME_AC_BK, MWL_WME_AC_BK)) { device_printf(sc->sc_dev, "unable to setup xmit queue for %s traffic!\n", ieee80211_wme_acnames[WME_AC_BK]); error = EIO; goto bad2; } if (!mwl_tx_setup(sc, WME_AC_BE, MWL_WME_AC_BE) || !mwl_tx_setup(sc, WME_AC_VI, MWL_WME_AC_VI) || !mwl_tx_setup(sc, WME_AC_VO, MWL_WME_AC_VO)) { /* * Not enough hardware tx queues to properly do WME; * just punt and assign them all to the same h/w queue. * We could do a better job of this if, for example, * we allocate queues when we switch from station to * AP mode. */ if (sc->sc_ac2q[WME_AC_VI] != NULL) mwl_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_VI]); if (sc->sc_ac2q[WME_AC_BE] != NULL) mwl_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_BE]); sc->sc_ac2q[WME_AC_BE] = sc->sc_ac2q[WME_AC_BK]; sc->sc_ac2q[WME_AC_VI] = sc->sc_ac2q[WME_AC_BK]; sc->sc_ac2q[WME_AC_VO] = sc->sc_ac2q[WME_AC_BK]; } TASK_INIT(&sc->sc_txtask, 0, mwl_tx_proc, sc); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(sc->sc_dev); /* XXX not right but it's not used anywhere important */ ic->ic_phytype = IEEE80211_T_OFDM; ic->ic_opmode = IEEE80211_M_STA; ic->ic_caps = IEEE80211_C_STA /* station mode supported */ | IEEE80211_C_HOSTAP /* hostap mode */ | IEEE80211_C_MONITOR /* monitor mode */ #if 0 | IEEE80211_C_IBSS /* ibss, nee adhoc, mode */ | IEEE80211_C_AHDEMO /* adhoc demo mode */ #endif | IEEE80211_C_MBSS /* mesh point link mode */ | IEEE80211_C_WDS /* WDS supported */ | IEEE80211_C_SHPREAMBLE /* short preamble supported */ | IEEE80211_C_SHSLOT /* short slot time supported */ | IEEE80211_C_WME /* WME/WMM supported */ | IEEE80211_C_BURST /* xmit bursting supported */ | IEEE80211_C_WPA /* capable of WPA1+WPA2 */ | IEEE80211_C_BGSCAN /* capable of bg scanning */ | IEEE80211_C_TXFRAG /* handle tx frags */ | IEEE80211_C_TXPMGT /* capable of txpow mgt */ | IEEE80211_C_DFS /* DFS supported */ ; ic->ic_htcaps = IEEE80211_HTCAP_SMPS_ENA /* SM PS mode enabled */ | IEEE80211_HTCAP_CHWIDTH40 /* 40MHz channel width */ | IEEE80211_HTCAP_SHORTGI20 /* short GI in 20MHz */ | IEEE80211_HTCAP_SHORTGI40 /* short GI in 40MHz */ | IEEE80211_HTCAP_RXSTBC_2STREAM/* 1-2 spatial streams */ #if MWL_AGGR_SIZE == 7935 | IEEE80211_HTCAP_MAXAMSDU_7935 /* max A-MSDU length */ #else | IEEE80211_HTCAP_MAXAMSDU_3839 /* max A-MSDU length */ #endif #if 0 | IEEE80211_HTCAP_PSMP /* PSMP supported */ | IEEE80211_HTCAP_40INTOLERANT /* 40MHz intolerant */ #endif /* s/w capabilities */ | IEEE80211_HTC_HT /* HT operation */ | IEEE80211_HTC_AMPDU /* tx A-MPDU */ | IEEE80211_HTC_AMSDU /* tx A-MSDU */ | IEEE80211_HTC_SMPS /* SMPS available */ ; /* * Mark h/w crypto support. * XXX no way to query h/w support. */ ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_AES_CCM | IEEE80211_CRYPTO_TKIP | IEEE80211_CRYPTO_TKIPMIC ; /* * Transmit requires space in the packet for a special * format transmit record and optional padding between * this record and the payload. Ask the net80211 layer * to arrange this when encapsulating packets so we can * add it efficiently. */ ic->ic_headroom = sizeof(struct mwltxrec) - sizeof(struct ieee80211_frame); IEEE80211_ADDR_COPY(ic->ic_macaddr, sc->sc_hwspecs.macAddr); /* call MI attach routine. */ ieee80211_ifattach(ic); ic->ic_setregdomain = mwl_setregdomain; ic->ic_getradiocaps = mwl_getradiocaps; /* override default methods */ ic->ic_raw_xmit = mwl_raw_xmit; ic->ic_newassoc = mwl_newassoc; ic->ic_updateslot = mwl_updateslot; ic->ic_update_mcast = mwl_update_mcast; ic->ic_update_promisc = mwl_update_promisc; ic->ic_wme.wme_update = mwl_wme_update; ic->ic_transmit = mwl_transmit; ic->ic_ioctl = mwl_ioctl; ic->ic_parent = mwl_parent; ic->ic_node_alloc = mwl_node_alloc; sc->sc_node_cleanup = ic->ic_node_cleanup; ic->ic_node_cleanup = mwl_node_cleanup; sc->sc_node_drain = ic->ic_node_drain; ic->ic_node_drain = mwl_node_drain; ic->ic_node_getsignal = mwl_node_getsignal; ic->ic_node_getmimoinfo = mwl_node_getmimoinfo; ic->ic_scan_start = mwl_scan_start; ic->ic_scan_end = mwl_scan_end; ic->ic_set_channel = mwl_set_channel; sc->sc_recv_action = ic->ic_recv_action; ic->ic_recv_action = mwl_recv_action; sc->sc_addba_request = ic->ic_addba_request; ic->ic_addba_request = mwl_addba_request; sc->sc_addba_response = ic->ic_addba_response; ic->ic_addba_response = mwl_addba_response; sc->sc_addba_stop = ic->ic_addba_stop; ic->ic_addba_stop = mwl_addba_stop; ic->ic_vap_create = mwl_vap_create; ic->ic_vap_delete = mwl_vap_delete; ieee80211_radiotap_attach(ic, &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th), MWL_TX_RADIOTAP_PRESENT, &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th), MWL_RX_RADIOTAP_PRESENT); /* * Setup dynamic sysctl's now that country code and * regdomain are available from the hal. */ mwl_sysctlattach(sc); if (bootverbose) ieee80211_announce(ic); mwl_announce(sc); return 0; bad2: mwl_dma_cleanup(sc); bad1: mwl_hal_detach(mh); bad: MWL_RXFREE_DESTROY(sc); sc->sc_invalid = 1; return error; } int mwl_detach(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; MWL_LOCK(sc); mwl_stop(sc); MWL_UNLOCK(sc); /* * NB: the order of these is important: * o call the 802.11 layer before detaching the hal to * insure callbacks into the driver to delete global * key cache entries can be handled * o reclaim the tx queue data structures after calling * the 802.11 layer as we'll get called back to reclaim * node state and potentially want to use them * o to cleanup the tx queues the hal is called, so detach * it last * Other than that, it's straightforward... */ ieee80211_ifdetach(ic); callout_drain(&sc->sc_watchdog); mwl_dma_cleanup(sc); MWL_RXFREE_DESTROY(sc); mwl_tx_cleanup(sc); mwl_hal_detach(sc->sc_mh); mbufq_drain(&sc->sc_snd); return 0; } /* * MAC address handling for multiple BSS on the same radio. * The first vap uses the MAC address from the EEPROM. For * subsequent vap's we set the U/L bit (bit 1) in the MAC * address and use the next six bits as an index. */ static void assign_address(struct mwl_softc *sc, uint8_t mac[IEEE80211_ADDR_LEN], int clone) { int i; if (clone && mwl_hal_ismbsscapable(sc->sc_mh)) { /* NB: we only do this if h/w supports multiple bssid */ for (i = 0; i < 32; i++) if ((sc->sc_bssidmask & (1<sc_bssidmask |= 1<sc_nbssid0++; } static void reclaim_address(struct mwl_softc *sc, const uint8_t mac[IEEE80211_ADDR_LEN]) { int i = mac[0] >> 2; if (i != 0 || --sc->sc_nbssid0 == 0) sc->sc_bssidmask &= ~(1<ic_softc; struct mwl_hal *mh = sc->sc_mh; struct ieee80211vap *vap, *apvap; struct mwl_hal_vap *hvap; struct mwl_vap *mvp; uint8_t mac[IEEE80211_ADDR_LEN]; IEEE80211_ADDR_COPY(mac, mac0); switch (opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: if ((flags & IEEE80211_CLONE_MACADDR) == 0) assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID); hvap = mwl_hal_newvap(mh, MWL_HAL_AP, mac); if (hvap == NULL) { if ((flags & IEEE80211_CLONE_MACADDR) == 0) reclaim_address(sc, mac); return NULL; } break; case IEEE80211_M_STA: if ((flags & IEEE80211_CLONE_MACADDR) == 0) assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID); hvap = mwl_hal_newvap(mh, MWL_HAL_STA, mac); if (hvap == NULL) { if ((flags & IEEE80211_CLONE_MACADDR) == 0) reclaim_address(sc, mac); return NULL; } /* no h/w beacon miss support; always use s/w */ flags |= IEEE80211_CLONE_NOBEACONS; break; case IEEE80211_M_WDS: hvap = NULL; /* NB: we use associated AP vap */ if (sc->sc_napvaps == 0) return NULL; /* no existing AP vap */ break; case IEEE80211_M_MONITOR: hvap = NULL; break; case IEEE80211_M_IBSS: case IEEE80211_M_AHDEMO: default: return NULL; } mvp = malloc(sizeof(struct mwl_vap), M_80211_VAP, M_WAITOK | M_ZERO); mvp->mv_hvap = hvap; if (opmode == IEEE80211_M_WDS) { /* * WDS vaps must have an associated AP vap; find one. * XXX not right. */ TAILQ_FOREACH(apvap, &ic->ic_vaps, iv_next) if (apvap->iv_opmode == IEEE80211_M_HOSTAP) { mvp->mv_ap_hvap = MWL_VAP(apvap)->mv_hvap; break; } KASSERT(mvp->mv_ap_hvap != NULL, ("no ap vap")); } vap = &mvp->mv_vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); /* override with driver methods */ mvp->mv_newstate = vap->iv_newstate; vap->iv_newstate = mwl_newstate; vap->iv_max_keyix = 0; /* XXX */ vap->iv_key_alloc = mwl_key_alloc; vap->iv_key_delete = mwl_key_delete; vap->iv_key_set = mwl_key_set; #ifdef MWL_HOST_PS_SUPPORT if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) { vap->iv_update_ps = mwl_update_ps; mvp->mv_set_tim = vap->iv_set_tim; vap->iv_set_tim = mwl_set_tim; } #endif vap->iv_reset = mwl_reset; vap->iv_update_beacon = mwl_beacon_update; /* override max aid so sta's cannot assoc when we're out of sta id's */ vap->iv_max_aid = MWL_MAXSTAID; /* override default A-MPDU rx parameters */ vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_64K; vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_4; /* complete setup */ ieee80211_vap_attach(vap, mwl_media_change, ieee80211_media_status, mac); switch (vap->iv_opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: case IEEE80211_M_STA: /* * Setup sta db entry for local address. */ mwl_localstadb(vap); if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS) sc->sc_napvaps++; else sc->sc_nstavaps++; break; case IEEE80211_M_WDS: sc->sc_nwdsvaps++; break; default: break; } /* * Setup overall operating mode. */ if (sc->sc_napvaps) ic->ic_opmode = IEEE80211_M_HOSTAP; else if (sc->sc_nstavaps) ic->ic_opmode = IEEE80211_M_STA; else ic->ic_opmode = opmode; return vap; } static void mwl_vap_delete(struct ieee80211vap *vap) { struct mwl_vap *mvp = MWL_VAP(vap); struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; struct mwl_hal_vap *hvap = mvp->mv_hvap; enum ieee80211_opmode opmode = vap->iv_opmode; /* XXX disallow ap vap delete if WDS still present */ if (sc->sc_running) { /* quiesce h/w while we remove the vap */ mwl_hal_intrset(mh, 0); /* disable interrupts */ } ieee80211_vap_detach(vap); switch (opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: case IEEE80211_M_STA: KASSERT(hvap != NULL, ("no hal vap handle")); (void) mwl_hal_delstation(hvap, vap->iv_myaddr); mwl_hal_delvap(hvap); if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) sc->sc_napvaps--; else sc->sc_nstavaps--; /* XXX don't do it for IEEE80211_CLONE_MACADDR */ reclaim_address(sc, vap->iv_myaddr); break; case IEEE80211_M_WDS: sc->sc_nwdsvaps--; break; default: break; } mwl_cleartxq(sc, vap); free(mvp, M_80211_VAP); if (sc->sc_running) mwl_hal_intrset(mh, sc->sc_imask); } void mwl_suspend(struct mwl_softc *sc) { MWL_LOCK(sc); mwl_stop(sc); MWL_UNLOCK(sc); } void mwl_resume(struct mwl_softc *sc) { int error = EDOOFUS; MWL_LOCK(sc); if (sc->sc_ic.ic_nrunning > 0) error = mwl_init(sc); MWL_UNLOCK(sc); if (error == 0) ieee80211_start_all(&sc->sc_ic); /* start all vap's */ } void mwl_shutdown(void *arg) { struct mwl_softc *sc = arg; MWL_LOCK(sc); mwl_stop(sc); MWL_UNLOCK(sc); } /* * Interrupt handler. Most of the actual processing is deferred. */ void mwl_intr(void *arg) { struct mwl_softc *sc = arg; struct mwl_hal *mh = sc->sc_mh; uint32_t status; if (sc->sc_invalid) { /* * The hardware is not ready/present, don't touch anything. * Note this can happen early on if the IRQ is shared. */ DPRINTF(sc, MWL_DEBUG_ANY, "%s: invalid; ignored\n", __func__); return; } /* * Figure out the reason(s) for the interrupt. */ mwl_hal_getisr(mh, &status); /* NB: clears ISR too */ if (status == 0) /* must be a shared irq */ return; DPRINTF(sc, MWL_DEBUG_INTR, "%s: status 0x%x imask 0x%x\n", __func__, status, sc->sc_imask); if (status & MACREG_A2HRIC_BIT_RX_RDY) taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask); if (status & MACREG_A2HRIC_BIT_TX_DONE) taskqueue_enqueue(sc->sc_tq, &sc->sc_txtask); if (status & MACREG_A2HRIC_BIT_BA_WATCHDOG) taskqueue_enqueue(sc->sc_tq, &sc->sc_bawatchdogtask); if (status & MACREG_A2HRIC_BIT_OPC_DONE) mwl_hal_cmddone(mh); if (status & MACREG_A2HRIC_BIT_MAC_EVENT) { ; } if (status & MACREG_A2HRIC_BIT_ICV_ERROR) { /* TKIP ICV error */ sc->sc_stats.mst_rx_badtkipicv++; } if (status & MACREG_A2HRIC_BIT_QUEUE_EMPTY) { /* 11n aggregation queue is empty, re-fill */ ; } if (status & MACREG_A2HRIC_BIT_QUEUE_FULL) { ; } if (status & MACREG_A2HRIC_BIT_RADAR_DETECT) { /* radar detected, process event */ taskqueue_enqueue(sc->sc_tq, &sc->sc_radartask); } if (status & MACREG_A2HRIC_BIT_CHAN_SWITCH) { /* DFS channel switch */ taskqueue_enqueue(sc->sc_tq, &sc->sc_chanswitchtask); } } static void mwl_radar_proc(void *arg, int pending) { struct mwl_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; DPRINTF(sc, MWL_DEBUG_ANY, "%s: radar detected, pending %u\n", __func__, pending); sc->sc_stats.mst_radardetect++; /* XXX stop h/w BA streams? */ IEEE80211_LOCK(ic); ieee80211_dfs_notify_radar(ic, ic->ic_curchan); IEEE80211_UNLOCK(ic); } static void mwl_chanswitch_proc(void *arg, int pending) { struct mwl_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; DPRINTF(sc, MWL_DEBUG_ANY, "%s: channel switch notice, pending %u\n", __func__, pending); IEEE80211_LOCK(ic); sc->sc_csapending = 0; ieee80211_csa_completeswitch(ic); IEEE80211_UNLOCK(ic); } static void mwl_bawatchdog(const MWL_HAL_BASTREAM *sp) { struct ieee80211_node *ni = sp->data[0]; /* send DELBA and drop the stream */ ieee80211_ampdu_stop(ni, sp->data[1], IEEE80211_REASON_UNSPECIFIED); } static void mwl_bawatchdog_proc(void *arg, int pending) { struct mwl_softc *sc = arg; struct mwl_hal *mh = sc->sc_mh; const MWL_HAL_BASTREAM *sp; uint8_t bitmap, n; sc->sc_stats.mst_bawatchdog++; if (mwl_hal_getwatchdogbitmap(mh, &bitmap) != 0) { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: could not get bitmap\n", __func__); sc->sc_stats.mst_bawatchdog_failed++; return; } DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: bitmap 0x%x\n", __func__, bitmap); if (bitmap == 0xff) { n = 0; /* disable all ba streams */ for (bitmap = 0; bitmap < 8; bitmap++) { sp = mwl_hal_bastream_lookup(mh, bitmap); if (sp != NULL) { mwl_bawatchdog(sp); n++; } } if (n == 0) { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no BA streams found\n", __func__); sc->sc_stats.mst_bawatchdog_empty++; } } else if (bitmap != 0xaa) { /* disable a single ba stream */ sp = mwl_hal_bastream_lookup(mh, bitmap); if (sp != NULL) { mwl_bawatchdog(sp); } else { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no BA stream %d\n", __func__, bitmap); sc->sc_stats.mst_bawatchdog_notfound++; } } } /* * Convert net80211 channel to a HAL channel. */ static void mwl_mapchan(MWL_HAL_CHANNEL *hc, const struct ieee80211_channel *chan) { hc->channel = chan->ic_ieee; *(uint32_t *)&hc->channelFlags = 0; if (IEEE80211_IS_CHAN_2GHZ(chan)) hc->channelFlags.FreqBand = MWL_FREQ_BAND_2DOT4GHZ; else if (IEEE80211_IS_CHAN_5GHZ(chan)) hc->channelFlags.FreqBand = MWL_FREQ_BAND_5GHZ; if (IEEE80211_IS_CHAN_HT40(chan)) { hc->channelFlags.ChnlWidth = MWL_CH_40_MHz_WIDTH; if (IEEE80211_IS_CHAN_HT40U(chan)) hc->channelFlags.ExtChnlOffset = MWL_EXT_CH_ABOVE_CTRL_CH; else hc->channelFlags.ExtChnlOffset = MWL_EXT_CH_BELOW_CTRL_CH; } else hc->channelFlags.ChnlWidth = MWL_CH_20_MHz_WIDTH; /* XXX 10MHz channels */ } /* * Inform firmware of our tx/rx dma setup. The BAR 0 * writes below are for compatibility with older firmware. * For current firmware we send this information with a * cmd block via mwl_hal_sethwdma. */ static int mwl_setupdma(struct mwl_softc *sc) { int error, i; sc->sc_hwdma.rxDescRead = sc->sc_rxdma.dd_desc_paddr; WR4(sc, sc->sc_hwspecs.rxDescRead, sc->sc_hwdma.rxDescRead); WR4(sc, sc->sc_hwspecs.rxDescWrite, sc->sc_hwdma.rxDescRead); for (i = 0; i < MWL_NUM_TX_QUEUES-MWL_NUM_ACK_QUEUES; i++) { struct mwl_txq *txq = &sc->sc_txq[i]; sc->sc_hwdma.wcbBase[i] = txq->dma.dd_desc_paddr; WR4(sc, sc->sc_hwspecs.wcbBase[i], sc->sc_hwdma.wcbBase[i]); } sc->sc_hwdma.maxNumTxWcb = mwl_txbuf; sc->sc_hwdma.maxNumWCB = MWL_NUM_TX_QUEUES-MWL_NUM_ACK_QUEUES; error = mwl_hal_sethwdma(sc->sc_mh, &sc->sc_hwdma); if (error != 0) { device_printf(sc->sc_dev, "unable to setup tx/rx dma; hal status %u\n", error); /* XXX */ } return error; } /* * Inform firmware of tx rate parameters. * Called after a channel change. */ static int mwl_setcurchanrates(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; const struct ieee80211_rateset *rs; MWL_HAL_TXRATE rates; memset(&rates, 0, sizeof(rates)); rs = ieee80211_get_suprates(ic, ic->ic_curchan); /* rate used to send management frames */ rates.MgtRate = rs->rs_rates[0] & IEEE80211_RATE_VAL; /* rate used to send multicast frames */ rates.McastRate = rates.MgtRate; return mwl_hal_settxrate_auto(sc->sc_mh, &rates); } /* * Inform firmware of tx rate parameters. Called whenever * user-settable params change and after a channel change. */ static int mwl_setrates(struct ieee80211vap *vap) { struct mwl_vap *mvp = MWL_VAP(vap); struct ieee80211_node *ni = vap->iv_bss; const struct ieee80211_txparam *tp = ni->ni_txparms; MWL_HAL_TXRATE rates; KASSERT(vap->iv_state == IEEE80211_S_RUN, ("state %d", vap->iv_state)); /* * Update the h/w rate map. * NB: 0x80 for MCS is passed through unchanged */ memset(&rates, 0, sizeof(rates)); /* rate used to send management frames */ rates.MgtRate = tp->mgmtrate; /* rate used to send multicast frames */ rates.McastRate = tp->mcastrate; /* while here calculate EAPOL fixed rate cookie */ mvp->mv_eapolformat = htole16(mwl_calcformat(rates.MgtRate, ni)); return mwl_hal_settxrate(mvp->mv_hvap, tp->ucastrate != IEEE80211_FIXED_RATE_NONE ? RATE_FIXED : RATE_AUTO, &rates); } /* * Setup a fixed xmit rate cookie for EAPOL frames. */ static void mwl_seteapolformat(struct ieee80211vap *vap) { struct mwl_vap *mvp = MWL_VAP(vap); struct ieee80211_node *ni = vap->iv_bss; enum ieee80211_phymode mode; uint8_t rate; KASSERT(vap->iv_state == IEEE80211_S_RUN, ("state %d", vap->iv_state)); mode = ieee80211_chan2mode(ni->ni_chan); /* * Use legacy rates when operating a mixed HT+non-HT bss. * NB: this may violate POLA for sta and wds vap's. */ if (mode == IEEE80211_MODE_11NA && (vap->iv_flags_ht & IEEE80211_FHT_PUREN) == 0) rate = vap->iv_txparms[IEEE80211_MODE_11A].mgmtrate; else if (mode == IEEE80211_MODE_11NG && (vap->iv_flags_ht & IEEE80211_FHT_PUREN) == 0) rate = vap->iv_txparms[IEEE80211_MODE_11G].mgmtrate; else rate = vap->iv_txparms[mode].mgmtrate; mvp->mv_eapolformat = htole16(mwl_calcformat(rate, ni)); } /* * Map SKU+country code to region code for radar bin'ing. */ static int mwl_map2regioncode(const struct ieee80211_regdomain *rd) { switch (rd->regdomain) { case SKU_FCC: case SKU_FCC3: return DOMAIN_CODE_FCC; case SKU_CA: return DOMAIN_CODE_IC; case SKU_ETSI: case SKU_ETSI2: case SKU_ETSI3: if (rd->country == CTRY_SPAIN) return DOMAIN_CODE_SPAIN; if (rd->country == CTRY_FRANCE || rd->country == CTRY_FRANCE2) return DOMAIN_CODE_FRANCE; /* XXX force 1.3.1 radar type */ return DOMAIN_CODE_ETSI_131; case SKU_JAPAN: return DOMAIN_CODE_MKK; case SKU_ROW: return DOMAIN_CODE_DGT; /* Taiwan */ case SKU_APAC: case SKU_APAC2: case SKU_APAC3: return DOMAIN_CODE_AUS; /* Australia */ } /* XXX KOREA? */ return DOMAIN_CODE_FCC; /* XXX? */ } static int mwl_hal_reset(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct mwl_hal *mh = sc->sc_mh; mwl_hal_setantenna(mh, WL_ANTENNATYPE_RX, sc->sc_rxantenna); mwl_hal_setantenna(mh, WL_ANTENNATYPE_TX, sc->sc_txantenna); mwl_hal_setradio(mh, 1, WL_AUTO_PREAMBLE); mwl_hal_setwmm(sc->sc_mh, (ic->ic_flags & IEEE80211_F_WME) != 0); mwl_chan_set(sc, ic->ic_curchan); /* NB: RF/RA performance tuned for indoor mode */ mwl_hal_setrateadaptmode(mh, 0); mwl_hal_setoptimizationlevel(mh, (ic->ic_flags & IEEE80211_F_BURST) != 0); mwl_hal_setregioncode(mh, mwl_map2regioncode(&ic->ic_regdomain)); mwl_hal_setaggampduratemode(mh, 1, 80); /* XXX */ mwl_hal_setcfend(mh, 0); /* XXX */ return 1; } static int mwl_init(struct mwl_softc *sc) { struct mwl_hal *mh = sc->sc_mh; int error = 0; MWL_LOCK_ASSERT(sc); /* * Stop anything previously setup. This is safe * whether this is the first time through or not. */ mwl_stop(sc); /* * Push vap-independent state to the firmware. */ if (!mwl_hal_reset(sc)) { device_printf(sc->sc_dev, "unable to reset hardware\n"); return EIO; } /* * Setup recv (once); transmit is already good to go. */ error = mwl_startrecv(sc); if (error != 0) { device_printf(sc->sc_dev, "unable to start recv logic\n"); return error; } /* * Enable interrupts. */ sc->sc_imask = MACREG_A2HRIC_BIT_RX_RDY | MACREG_A2HRIC_BIT_TX_DONE | MACREG_A2HRIC_BIT_OPC_DONE #if 0 | MACREG_A2HRIC_BIT_MAC_EVENT #endif | MACREG_A2HRIC_BIT_ICV_ERROR | MACREG_A2HRIC_BIT_RADAR_DETECT | MACREG_A2HRIC_BIT_CHAN_SWITCH #if 0 | MACREG_A2HRIC_BIT_QUEUE_EMPTY #endif | MACREG_A2HRIC_BIT_BA_WATCHDOG | MACREQ_A2HRIC_BIT_TX_ACK ; sc->sc_running = 1; mwl_hal_intrset(mh, sc->sc_imask); callout_reset(&sc->sc_watchdog, hz, mwl_watchdog, sc); return 0; } static void mwl_stop(struct mwl_softc *sc) { MWL_LOCK_ASSERT(sc); if (sc->sc_running) { /* * Shutdown the hardware and driver. */ sc->sc_running = 0; callout_stop(&sc->sc_watchdog); sc->sc_tx_timer = 0; mwl_draintxq(sc); } } static int mwl_reset_vap(struct ieee80211vap *vap, int state) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211com *ic = vap->iv_ic; if (state == IEEE80211_S_RUN) mwl_setrates(vap); /* XXX off by 1? */ mwl_hal_setrtsthreshold(hvap, vap->iv_rtsthreshold); /* XXX auto? 20/40 split? */ mwl_hal_sethtgi(hvap, (vap->iv_flags_ht & (IEEE80211_FHT_SHORTGI20|IEEE80211_FHT_SHORTGI40)) ? 1 : 0); mwl_hal_setnprot(hvap, ic->ic_htprotmode == IEEE80211_PROT_NONE ? HTPROTECT_NONE : HTPROTECT_AUTO); /* XXX txpower cap */ /* re-setup beacons */ if (state == IEEE80211_S_RUN && (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS || vap->iv_opmode == IEEE80211_M_IBSS)) { mwl_setapmode(vap, vap->iv_bss->ni_chan); mwl_hal_setnprotmode(hvap, MS(ic->ic_curhtprotmode, IEEE80211_HTINFO_OPMODE)); return mwl_beacon_setup(vap); } return 0; } /* * Reset the hardware w/o losing operational state. * Used to to reset or reload hardware state for a vap. */ static int mwl_reset(struct ieee80211vap *vap, u_long cmd) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; int error = 0; if (hvap != NULL) { /* WDS, MONITOR, etc. */ struct ieee80211com *ic = vap->iv_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; /* XXX handle DWDS sta vap change */ /* XXX do we need to disable interrupts? */ mwl_hal_intrset(mh, 0); /* disable interrupts */ error = mwl_reset_vap(vap, vap->iv_state); mwl_hal_intrset(mh, sc->sc_imask); } return error; } /* * Allocate a tx buffer for sending a frame. The * packet is assumed to have the WME AC stored so * we can use it to select the appropriate h/w queue. */ static struct mwl_txbuf * mwl_gettxbuf(struct mwl_softc *sc, struct mwl_txq *txq) { struct mwl_txbuf *bf; /* * Grab a TX buffer and associated resources. */ MWL_TXQ_LOCK(txq); bf = STAILQ_FIRST(&txq->free); if (bf != NULL) { STAILQ_REMOVE_HEAD(&txq->free, bf_list); txq->nfree--; } MWL_TXQ_UNLOCK(txq); if (bf == NULL) DPRINTF(sc, MWL_DEBUG_XMIT, "%s: out of xmit buffers on q %d\n", __func__, txq->qnum); return bf; } /* * Return a tx buffer to the queue it came from. Note there * are two cases because we must preserve the order of buffers * as it reflects the fixed order of descriptors in memory * (the firmware pre-fetches descriptors so we cannot reorder). */ static void mwl_puttxbuf_head(struct mwl_txq *txq, struct mwl_txbuf *bf) { bf->bf_m = NULL; bf->bf_node = NULL; MWL_TXQ_LOCK(txq); STAILQ_INSERT_HEAD(&txq->free, bf, bf_list); txq->nfree++; MWL_TXQ_UNLOCK(txq); } static void mwl_puttxbuf_tail(struct mwl_txq *txq, struct mwl_txbuf *bf) { bf->bf_m = NULL; bf->bf_node = NULL; MWL_TXQ_LOCK(txq); STAILQ_INSERT_TAIL(&txq->free, bf, bf_list); txq->nfree++; MWL_TXQ_UNLOCK(txq); } static int mwl_transmit(struct ieee80211com *ic, struct mbuf *m) { struct mwl_softc *sc = ic->ic_softc; int error; MWL_LOCK(sc); if (!sc->sc_running) { MWL_UNLOCK(sc); return (ENXIO); } error = mbufq_enqueue(&sc->sc_snd, m); if (error) { MWL_UNLOCK(sc); return (error); } mwl_start(sc); MWL_UNLOCK(sc); return (0); } static void mwl_start(struct mwl_softc *sc) { struct ieee80211_node *ni; struct mwl_txbuf *bf; struct mbuf *m; struct mwl_txq *txq = NULL; /* XXX silence gcc */ int nqueued; MWL_LOCK_ASSERT(sc); if (!sc->sc_running || sc->sc_invalid) return; nqueued = 0; while ((m = mbufq_dequeue(&sc->sc_snd)) != NULL) { /* * Grab the node for the destination. */ ni = (struct ieee80211_node *) m->m_pkthdr.rcvif; KASSERT(ni != NULL, ("no node")); m->m_pkthdr.rcvif = NULL; /* committed, clear ref */ /* * Grab a TX buffer and associated resources. * We honor the classification by the 802.11 layer. */ txq = sc->sc_ac2q[M_WME_GETAC(m)]; bf = mwl_gettxbuf(sc, txq); if (bf == NULL) { m_freem(m); ieee80211_free_node(ni); #ifdef MWL_TX_NODROP sc->sc_stats.mst_tx_qstop++; break; #else DPRINTF(sc, MWL_DEBUG_XMIT, "%s: tail drop on q %d\n", __func__, txq->qnum); sc->sc_stats.mst_tx_qdrop++; continue; #endif /* MWL_TX_NODROP */ } /* * Pass the frame to the h/w for transmission. */ if (mwl_tx_start(sc, ni, bf, m)) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); mwl_puttxbuf_head(txq, bf); ieee80211_free_node(ni); continue; } nqueued++; if (nqueued >= mwl_txcoalesce) { /* * Poke the firmware to process queued frames; * see below about (lack of) locking. */ nqueued = 0; mwl_hal_txstart(sc->sc_mh, 0/*XXX*/); } } if (nqueued) { /* * NB: We don't need to lock against tx done because * this just prods the firmware to check the transmit * descriptors. The firmware will also start fetching * descriptors by itself if it notices new ones are * present when it goes to deliver a tx done interrupt * to the host. So if we race with tx done processing * it's ok. Delivering the kick here rather than in * mwl_tx_start is an optimization to avoid poking the * firmware for each packet. * * NB: the queue id isn't used so 0 is ok. */ mwl_hal_txstart(sc->sc_mh, 0/*XXX*/); } } static int mwl_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { struct ieee80211com *ic = ni->ni_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_txbuf *bf; struct mwl_txq *txq; if (!sc->sc_running || sc->sc_invalid) { m_freem(m); return ENETDOWN; } /* * Grab a TX buffer and associated resources. * Note that we depend on the classification * by the 802.11 layer to get to the right h/w * queue. Management frames must ALWAYS go on * queue 1 but we cannot just force that here * because we may receive non-mgt frames. */ txq = sc->sc_ac2q[M_WME_GETAC(m)]; bf = mwl_gettxbuf(sc, txq); if (bf == NULL) { sc->sc_stats.mst_tx_qstop++; m_freem(m); return ENOBUFS; } /* * Pass the frame to the h/w for transmission. */ if (mwl_tx_start(sc, ni, bf, m)) { mwl_puttxbuf_head(txq, bf); return EIO; /* XXX */ } /* * NB: We don't need to lock against tx done because * this just prods the firmware to check the transmit * descriptors. The firmware will also start fetching * descriptors by itself if it notices new ones are * present when it goes to deliver a tx done interrupt * to the host. So if we race with tx done processing * it's ok. Delivering the kick here rather than in * mwl_tx_start is an optimization to avoid poking the * firmware for each packet. * * NB: the queue id isn't used so 0 is ok. */ mwl_hal_txstart(sc->sc_mh, 0/*XXX*/); return 0; } static int mwl_media_change(struct ifnet *ifp) { struct ieee80211vap *vap = ifp->if_softc; int error; error = ieee80211_media_change(ifp); /* NB: only the fixed rate can change and that doesn't need a reset */ if (error == ENETRESET) { mwl_setrates(vap); error = 0; } return error; } #ifdef MWL_DEBUG static void mwl_keyprint(struct mwl_softc *sc, const char *tag, const MWL_HAL_KEYVAL *hk, const uint8_t mac[IEEE80211_ADDR_LEN]) { static const char *ciphers[] = { "WEP", "TKIP", "AES-CCM", }; int i, n; printf("%s: [%u] %-7s", tag, hk->keyIndex, ciphers[hk->keyTypeId]); for (i = 0, n = hk->keyLen; i < n; i++) printf(" %02x", hk->key.aes[i]); printf(" mac %s", ether_sprintf(mac)); if (hk->keyTypeId == KEY_TYPE_ID_TKIP) { printf(" %s", "rxmic"); for (i = 0; i < sizeof(hk->key.tkip.rxMic); i++) printf(" %02x", hk->key.tkip.rxMic[i]); printf(" txmic"); for (i = 0; i < sizeof(hk->key.tkip.txMic); i++) printf(" %02x", hk->key.tkip.txMic[i]); } printf(" flags 0x%x\n", hk->keyFlags); } #endif /* * Allocate a key cache slot for a unicast key. The * firmware handles key allocation and every station is * guaranteed key space so we are always successful. */ static int mwl_key_alloc(struct ieee80211vap *vap, struct ieee80211_key *k, ieee80211_keyix *keyix, ieee80211_keyix *rxkeyix) { struct mwl_softc *sc = vap->iv_ic->ic_softc; if (k->wk_keyix != IEEE80211_KEYIX_NONE || (k->wk_flags & IEEE80211_KEY_GROUP)) { if (!(&vap->iv_nw_keys[0] <= k && k < &vap->iv_nw_keys[IEEE80211_WEP_NKID])) { /* should not happen */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: bogus group key\n", __func__); return 0; } /* give the caller what they requested */ *keyix = *rxkeyix = ieee80211_crypto_get_key_wepidx(vap, k); } else { /* * Firmware handles key allocation. */ *keyix = *rxkeyix = 0; } return 1; } /* * Delete a key entry allocated by mwl_key_alloc. */ static int mwl_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) { struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; MWL_HAL_KEYVAL hk; const uint8_t bcastaddr[IEEE80211_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; if (hvap == NULL) { if (vap->iv_opmode != IEEE80211_M_WDS) { /* XXX monitor mode? */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: no hvap for opmode %d\n", __func__, vap->iv_opmode); return 0; } hvap = MWL_VAP(vap)->mv_ap_hvap; } DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: delete key %u\n", __func__, k->wk_keyix); memset(&hk, 0, sizeof(hk)); hk.keyIndex = k->wk_keyix; switch (k->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_WEP: hk.keyTypeId = KEY_TYPE_ID_WEP; break; case IEEE80211_CIPHER_TKIP: hk.keyTypeId = KEY_TYPE_ID_TKIP; break; case IEEE80211_CIPHER_AES_CCM: hk.keyTypeId = KEY_TYPE_ID_AES; break; default: /* XXX should not happen */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: unknown cipher %d\n", __func__, k->wk_cipher->ic_cipher); return 0; } return (mwl_hal_keyreset(hvap, &hk, bcastaddr) == 0); /*XXX*/ } static __inline int addgroupflags(MWL_HAL_KEYVAL *hk, const struct ieee80211_key *k) { if (k->wk_flags & IEEE80211_KEY_GROUP) { if (k->wk_flags & IEEE80211_KEY_XMIT) hk->keyFlags |= KEY_FLAG_TXGROUPKEY; if (k->wk_flags & IEEE80211_KEY_RECV) hk->keyFlags |= KEY_FLAG_RXGROUPKEY; return 1; } else return 0; } /* * Set the key cache contents for the specified key. Key cache * slot(s) must already have been allocated by mwl_key_alloc. */ static int mwl_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) { return (_mwl_key_set(vap, k, k->wk_macaddr)); } static int _mwl_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k, const uint8_t mac[IEEE80211_ADDR_LEN]) { #define GRPXMIT (IEEE80211_KEY_XMIT | IEEE80211_KEY_GROUP) /* NB: static wep keys are marked GROUP+tx/rx; GTK will be tx or rx */ #define IEEE80211_IS_STATICKEY(k) \ (((k)->wk_flags & (GRPXMIT|IEEE80211_KEY_RECV)) == \ (GRPXMIT|IEEE80211_KEY_RECV)) struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; const struct ieee80211_cipher *cip = k->wk_cipher; const uint8_t *macaddr; MWL_HAL_KEYVAL hk; KASSERT((k->wk_flags & IEEE80211_KEY_SWCRYPT) == 0, ("s/w crypto set?")); if (hvap == NULL) { if (vap->iv_opmode != IEEE80211_M_WDS) { /* XXX monitor mode? */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: no hvap for opmode %d\n", __func__, vap->iv_opmode); return 0; } hvap = MWL_VAP(vap)->mv_ap_hvap; } memset(&hk, 0, sizeof(hk)); hk.keyIndex = k->wk_keyix; switch (cip->ic_cipher) { case IEEE80211_CIPHER_WEP: hk.keyTypeId = KEY_TYPE_ID_WEP; hk.keyLen = k->wk_keylen; if (k->wk_keyix == vap->iv_def_txkey) hk.keyFlags = KEY_FLAG_WEP_TXKEY; if (!IEEE80211_IS_STATICKEY(k)) { /* NB: WEP is never used for the PTK */ (void) addgroupflags(&hk, k); } break; case IEEE80211_CIPHER_TKIP: hk.keyTypeId = KEY_TYPE_ID_TKIP; hk.key.tkip.tsc.high = (uint32_t)(k->wk_keytsc >> 16); hk.key.tkip.tsc.low = (uint16_t)k->wk_keytsc; hk.keyFlags = KEY_FLAG_TSC_VALID | KEY_FLAG_MICKEY_VALID; hk.keyLen = k->wk_keylen + IEEE80211_MICBUF_SIZE; if (!addgroupflags(&hk, k)) hk.keyFlags |= KEY_FLAG_PAIRWISE; break; case IEEE80211_CIPHER_AES_CCM: hk.keyTypeId = KEY_TYPE_ID_AES; hk.keyLen = k->wk_keylen; if (!addgroupflags(&hk, k)) hk.keyFlags |= KEY_FLAG_PAIRWISE; break; default: /* XXX should not happen */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: unknown cipher %d\n", __func__, k->wk_cipher->ic_cipher); return 0; } /* * NB: tkip mic keys get copied here too; the layout * just happens to match that in ieee80211_key. */ memcpy(hk.key.aes, k->wk_key, hk.keyLen); /* * Locate address of sta db entry for writing key; * the convention unfortunately is somewhat different * than how net80211, hostapd, and wpa_supplicant think. */ if (vap->iv_opmode == IEEE80211_M_STA) { /* * NB: keys plumbed before the sta reaches AUTH state * will be discarded or written to the wrong sta db * entry because iv_bss is meaningless. This is ok * (right now) because we handle deferred plumbing of * WEP keys when the sta reaches AUTH state. */ macaddr = vap->iv_bss->ni_bssid; if ((k->wk_flags & IEEE80211_KEY_GROUP) == 0) { /* XXX plumb to local sta db too for static key wep */ mwl_hal_keyset(hvap, &hk, vap->iv_myaddr); } } else if (vap->iv_opmode == IEEE80211_M_WDS && vap->iv_state != IEEE80211_S_RUN) { /* * Prior to RUN state a WDS vap will not it's BSS node * setup so we will plumb the key to the wrong mac * address (it'll be our local address). Workaround * this for the moment by grabbing the correct address. */ macaddr = vap->iv_des_bssid; } else if ((k->wk_flags & GRPXMIT) == GRPXMIT) macaddr = vap->iv_myaddr; else macaddr = mac; KEYPRINTF(sc, &hk, macaddr); return (mwl_hal_keyset(hvap, &hk, macaddr) == 0); #undef IEEE80211_IS_STATICKEY #undef GRPXMIT } /* * Set the multicast filter contents into the hardware. * XXX f/w has no support; just defer to the os. */ static void mwl_setmcastfilter(struct mwl_softc *sc) { #if 0 struct ether_multi *enm; struct ether_multistep estep; uint8_t macs[IEEE80211_ADDR_LEN*MWL_HAL_MCAST_MAX];/* XXX stack use */ uint8_t *mp; int nmc; mp = macs; nmc = 0; ETHER_FIRST_MULTI(estep, &sc->sc_ec, enm); while (enm != NULL) { /* XXX Punt on ranges. */ if (nmc == MWL_HAL_MCAST_MAX || !IEEE80211_ADDR_EQ(enm->enm_addrlo, enm->enm_addrhi)) { ifp->if_flags |= IFF_ALLMULTI; return; } IEEE80211_ADDR_COPY(mp, enm->enm_addrlo); mp += IEEE80211_ADDR_LEN, nmc++; ETHER_NEXT_MULTI(estep, enm); } ifp->if_flags &= ~IFF_ALLMULTI; mwl_hal_setmcast(sc->sc_mh, nmc, macs); #endif } static int mwl_mode_init(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct mwl_hal *mh = sc->sc_mh; mwl_hal_setpromisc(mh, ic->ic_promisc > 0); mwl_setmcastfilter(sc); return 0; } /* * Callback from the 802.11 layer after a multicast state change. */ static void mwl_update_mcast(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; mwl_setmcastfilter(sc); } /* * Callback from the 802.11 layer after a promiscuous mode change. * Note this interface does not check the operating mode as this * is an internal callback and we are expected to honor the current * state (e.g. this is used for setting the interface in promiscuous * mode when operating in hostap mode to do ACS). */ static void mwl_update_promisc(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; mwl_hal_setpromisc(sc->sc_mh, ic->ic_promisc > 0); } /* * Callback from the 802.11 layer to update the slot time * based on the current setting. We use it to notify the * firmware of ERP changes and the f/w takes care of things * like slot time and preamble. */ static void mwl_updateslot(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; int prot; /* NB: can be called early; suppress needless cmds */ if (!sc->sc_running) return; /* * Calculate the ERP flags. The firwmare will use * this to carry out the appropriate measures. */ prot = 0; if (IEEE80211_IS_CHAN_ANYG(ic->ic_curchan)) { if ((ic->ic_flags & IEEE80211_F_SHSLOT) == 0) prot |= IEEE80211_ERP_NON_ERP_PRESENT; if (ic->ic_flags & IEEE80211_F_USEPROT) prot |= IEEE80211_ERP_USE_PROTECTION; if (ic->ic_flags & IEEE80211_F_USEBARKER) prot |= IEEE80211_ERP_LONG_PREAMBLE; } DPRINTF(sc, MWL_DEBUG_RESET, "%s: chan %u MHz/flags 0x%x %s slot, (prot 0x%x ic_flags 0x%x)\n", __func__, ic->ic_curchan->ic_freq, ic->ic_curchan->ic_flags, ic->ic_flags & IEEE80211_F_SHSLOT ? "short" : "long", prot, ic->ic_flags); mwl_hal_setgprot(mh, prot); } /* * Setup the beacon frame. */ static int mwl_beacon_setup(struct ieee80211vap *vap) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211_node *ni = vap->iv_bss; struct mbuf *m; m = ieee80211_beacon_alloc(ni); if (m == NULL) return ENOBUFS; mwl_hal_setbeacon(hvap, mtod(m, const void *), m->m_len); m_free(m); return 0; } /* * Update the beacon frame in response to a change. */ static void mwl_beacon_update(struct ieee80211vap *vap, int item) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211com *ic = vap->iv_ic; KASSERT(hvap != NULL, ("no beacon")); switch (item) { case IEEE80211_BEACON_ERP: mwl_updateslot(ic); break; case IEEE80211_BEACON_HTINFO: mwl_hal_setnprotmode(hvap, MS(ic->ic_curhtprotmode, IEEE80211_HTINFO_OPMODE)); break; case IEEE80211_BEACON_CAPS: case IEEE80211_BEACON_WME: case IEEE80211_BEACON_APPIE: case IEEE80211_BEACON_CSA: break; case IEEE80211_BEACON_TIM: /* NB: firmware always forms TIM */ return; } /* XXX retain beacon frame and update */ mwl_beacon_setup(vap); } static void mwl_load_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *paddr = (bus_addr_t*) arg; KASSERT(error == 0, ("error %u on bus_dma callback", error)); *paddr = segs->ds_addr; } #ifdef MWL_HOST_PS_SUPPORT /* * Handle power save station occupancy changes. */ static void mwl_update_ps(struct ieee80211vap *vap, int nsta) { struct mwl_vap *mvp = MWL_VAP(vap); if (nsta == 0 || mvp->mv_last_ps_sta == 0) mwl_hal_setpowersave_bss(mvp->mv_hvap, nsta); mvp->mv_last_ps_sta = nsta; } /* * Handle associated station power save state changes. */ static int mwl_set_tim(struct ieee80211_node *ni, int set) { struct ieee80211vap *vap = ni->ni_vap; struct mwl_vap *mvp = MWL_VAP(vap); if (mvp->mv_set_tim(ni, set)) { /* NB: state change */ mwl_hal_setpowersave_sta(mvp->mv_hvap, IEEE80211_AID(ni->ni_associd), set); return 1; } else return 0; } #endif /* MWL_HOST_PS_SUPPORT */ static int mwl_desc_setup(struct mwl_softc *sc, const char *name, struct mwl_descdma *dd, int nbuf, size_t bufsize, int ndesc, size_t descsize) { uint8_t *ds; int error; DPRINTF(sc, MWL_DEBUG_RESET, "%s: %s DMA: %u bufs (%ju) %u desc/buf (%ju)\n", __func__, name, nbuf, (uintmax_t) bufsize, ndesc, (uintmax_t) descsize); dd->dd_name = name; dd->dd_desc_len = nbuf * ndesc * descsize; /* * Setup DMA descriptor area. */ error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), /* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dd->dd_desc_len, /* maxsize */ 1, /* nsegments */ dd->dd_desc_len, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dd->dd_dmat); if (error != 0) { device_printf(sc->sc_dev, "cannot allocate %s DMA tag\n", dd->dd_name); return error; } /* allocate descriptors */ error = bus_dmamem_alloc(dd->dd_dmat, (void**) &dd->dd_desc, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dd->dd_dmamap); if (error != 0) { device_printf(sc->sc_dev, "unable to alloc memory for %u %s descriptors, " "error %u\n", nbuf * ndesc, dd->dd_name, error); goto fail1; } error = bus_dmamap_load(dd->dd_dmat, dd->dd_dmamap, dd->dd_desc, dd->dd_desc_len, mwl_load_cb, &dd->dd_desc_paddr, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "unable to map %s descriptors, error %u\n", dd->dd_name, error); goto fail2; } ds = dd->dd_desc; memset(ds, 0, dd->dd_desc_len); DPRINTF(sc, MWL_DEBUG_RESET, "%s: %s DMA map: %p (%lu) -> 0x%jx (%lu)\n", __func__, dd->dd_name, ds, (u_long) dd->dd_desc_len, (uintmax_t) dd->dd_desc_paddr, /*XXX*/ (u_long) dd->dd_desc_len); return 0; fail2: bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap); fail1: bus_dma_tag_destroy(dd->dd_dmat); memset(dd, 0, sizeof(*dd)); return error; #undef DS2PHYS } static void mwl_desc_cleanup(struct mwl_softc *sc, struct mwl_descdma *dd) { bus_dmamap_unload(dd->dd_dmat, dd->dd_dmamap); bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap); bus_dma_tag_destroy(dd->dd_dmat); memset(dd, 0, sizeof(*dd)); } /* * Construct a tx q's free list. The order of entries on * the list must reflect the physical layout of tx descriptors * because the firmware pre-fetches descriptors. * * XXX might be better to use indices into the buffer array. */ static void mwl_txq_reset(struct mwl_softc *sc, struct mwl_txq *txq) { struct mwl_txbuf *bf; int i; bf = txq->dma.dd_bufptr; STAILQ_INIT(&txq->free); for (i = 0; i < mwl_txbuf; i++, bf++) STAILQ_INSERT_TAIL(&txq->free, bf, bf_list); txq->nfree = i; } #define DS2PHYS(_dd, _ds) \ ((_dd)->dd_desc_paddr + ((caddr_t)(_ds) - (caddr_t)(_dd)->dd_desc)) static int mwl_txdma_setup(struct mwl_softc *sc, struct mwl_txq *txq) { int error, bsize, i; struct mwl_txbuf *bf; struct mwl_txdesc *ds; error = mwl_desc_setup(sc, "tx", &txq->dma, mwl_txbuf, sizeof(struct mwl_txbuf), MWL_TXDESC, sizeof(struct mwl_txdesc)); if (error != 0) return error; /* allocate and setup tx buffers */ bsize = mwl_txbuf * sizeof(struct mwl_txbuf); bf = malloc(bsize, M_MWLDEV, M_NOWAIT | M_ZERO); if (bf == NULL) { device_printf(sc->sc_dev, "malloc of %u tx buffers failed\n", mwl_txbuf); return ENOMEM; } txq->dma.dd_bufptr = bf; ds = txq->dma.dd_desc; for (i = 0; i < mwl_txbuf; i++, bf++, ds += MWL_TXDESC) { bf->bf_desc = ds; bf->bf_daddr = DS2PHYS(&txq->dma, ds); error = bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &bf->bf_dmamap); if (error != 0) { device_printf(sc->sc_dev, "unable to create dmamap for tx " "buffer %u, error %u\n", i, error); return error; } } mwl_txq_reset(sc, txq); return 0; } static void mwl_txdma_cleanup(struct mwl_softc *sc, struct mwl_txq *txq) { struct mwl_txbuf *bf; int i; bf = txq->dma.dd_bufptr; for (i = 0; i < mwl_txbuf; i++, bf++) { KASSERT(bf->bf_m == NULL, ("mbuf on free list")); KASSERT(bf->bf_node == NULL, ("node on free list")); if (bf->bf_dmamap != NULL) bus_dmamap_destroy(sc->sc_dmat, bf->bf_dmamap); } STAILQ_INIT(&txq->free); txq->nfree = 0; if (txq->dma.dd_bufptr != NULL) { free(txq->dma.dd_bufptr, M_MWLDEV); txq->dma.dd_bufptr = NULL; } if (txq->dma.dd_desc_len != 0) mwl_desc_cleanup(sc, &txq->dma); } static int mwl_rxdma_setup(struct mwl_softc *sc) { int error, jumbosize, bsize, i; struct mwl_rxbuf *bf; struct mwl_jumbo *rbuf; struct mwl_rxdesc *ds; caddr_t data; error = mwl_desc_setup(sc, "rx", &sc->sc_rxdma, mwl_rxdesc, sizeof(struct mwl_rxbuf), 1, sizeof(struct mwl_rxdesc)); if (error != 0) return error; /* * Receive is done to a private pool of jumbo buffers. * This allows us to attach to mbuf's and avoid re-mapping * memory on each rx we post. We allocate a large chunk * of memory and manage it in the driver. The mbuf free * callback method is used to reclaim frames after sending * them up the stack. By default we allocate 2x the number of * rx descriptors configured so we have some slop to hold * us while frames are processed. */ if (mwl_rxbuf < 2*mwl_rxdesc) { device_printf(sc->sc_dev, "too few rx dma buffers (%d); increasing to %d\n", mwl_rxbuf, 2*mwl_rxdesc); mwl_rxbuf = 2*mwl_rxdesc; } jumbosize = roundup(MWL_AGGR_SIZE, PAGE_SIZE); sc->sc_rxmemsize = mwl_rxbuf*jumbosize; error = bus_dma_tag_create(sc->sc_dmat, /* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->sc_rxmemsize, /* maxsize */ 1, /* nsegments */ sc->sc_rxmemsize, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &sc->sc_rxdmat); if (error != 0) { device_printf(sc->sc_dev, "could not create rx DMA tag\n"); return error; } error = bus_dmamem_alloc(sc->sc_rxdmat, (void**) &sc->sc_rxmem, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &sc->sc_rxmap); if (error != 0) { device_printf(sc->sc_dev, "could not alloc %ju bytes of rx DMA memory\n", (uintmax_t) sc->sc_rxmemsize); return error; } error = bus_dmamap_load(sc->sc_rxdmat, sc->sc_rxmap, sc->sc_rxmem, sc->sc_rxmemsize, mwl_load_cb, &sc->sc_rxmem_paddr, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "could not load rx DMA map\n"); return error; } /* * Allocate rx buffers and set them up. */ bsize = mwl_rxdesc * sizeof(struct mwl_rxbuf); bf = malloc(bsize, M_MWLDEV, M_NOWAIT | M_ZERO); if (bf == NULL) { device_printf(sc->sc_dev, "malloc of %u rx buffers failed\n", bsize); return error; } sc->sc_rxdma.dd_bufptr = bf; STAILQ_INIT(&sc->sc_rxbuf); ds = sc->sc_rxdma.dd_desc; for (i = 0; i < mwl_rxdesc; i++, bf++, ds++) { bf->bf_desc = ds; bf->bf_daddr = DS2PHYS(&sc->sc_rxdma, ds); /* pre-assign dma buffer */ bf->bf_data = ((uint8_t *)sc->sc_rxmem) + (i*jumbosize); /* NB: tail is intentional to preserve descriptor order */ STAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list); } /* * Place remainder of dma memory buffers on the free list. */ SLIST_INIT(&sc->sc_rxfree); for (; i < mwl_rxbuf; i++) { data = ((uint8_t *)sc->sc_rxmem) + (i*jumbosize); rbuf = MWL_JUMBO_DATA2BUF(data); SLIST_INSERT_HEAD(&sc->sc_rxfree, rbuf, next); sc->sc_nrxfree++; } return 0; } #undef DS2PHYS static void mwl_rxdma_cleanup(struct mwl_softc *sc) { if (sc->sc_rxmem_paddr != 0) { bus_dmamap_unload(sc->sc_rxdmat, sc->sc_rxmap); sc->sc_rxmem_paddr = 0; } if (sc->sc_rxmem != NULL) { bus_dmamem_free(sc->sc_rxdmat, sc->sc_rxmem, sc->sc_rxmap); sc->sc_rxmem = NULL; } if (sc->sc_rxdma.dd_bufptr != NULL) { free(sc->sc_rxdma.dd_bufptr, M_MWLDEV); sc->sc_rxdma.dd_bufptr = NULL; } if (sc->sc_rxdma.dd_desc_len != 0) mwl_desc_cleanup(sc, &sc->sc_rxdma); } static int mwl_dma_setup(struct mwl_softc *sc) { int error, i; error = mwl_rxdma_setup(sc); if (error != 0) { mwl_rxdma_cleanup(sc); return error; } for (i = 0; i < MWL_NUM_TX_QUEUES; i++) { error = mwl_txdma_setup(sc, &sc->sc_txq[i]); if (error != 0) { mwl_dma_cleanup(sc); return error; } } return 0; } static void mwl_dma_cleanup(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_txdma_cleanup(sc, &sc->sc_txq[i]); mwl_rxdma_cleanup(sc); } static struct ieee80211_node * mwl_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ieee80211com *ic = vap->iv_ic; struct mwl_softc *sc = ic->ic_softc; const size_t space = sizeof(struct mwl_node); struct mwl_node *mn; mn = malloc(space, M_80211_NODE, M_NOWAIT|M_ZERO); if (mn == NULL) { /* XXX stat+msg */ return NULL; } DPRINTF(sc, MWL_DEBUG_NODE, "%s: mn %p\n", __func__, mn); return &mn->mn_node; } static void mwl_node_cleanup(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_node *mn = MWL_NODE(ni); DPRINTF(sc, MWL_DEBUG_NODE, "%s: ni %p ic %p staid %d\n", __func__, ni, ni->ni_ic, mn->mn_staid); if (mn->mn_staid != 0) { struct ieee80211vap *vap = ni->ni_vap; if (mn->mn_hvap != NULL) { if (vap->iv_opmode == IEEE80211_M_STA) mwl_hal_delstation(mn->mn_hvap, vap->iv_myaddr); else mwl_hal_delstation(mn->mn_hvap, ni->ni_macaddr); } /* * NB: legacy WDS peer sta db entry is installed using * the associate ap's hvap; use it again to delete it. * XXX can vap be NULL? */ else if (vap->iv_opmode == IEEE80211_M_WDS && MWL_VAP(vap)->mv_ap_hvap != NULL) mwl_hal_delstation(MWL_VAP(vap)->mv_ap_hvap, ni->ni_macaddr); delstaid(sc, mn->mn_staid); mn->mn_staid = 0; } sc->sc_node_cleanup(ni); } /* * Reclaim rx dma buffers from packets sitting on the ampdu * reorder queue for a station. We replace buffers with a * system cluster (if available). */ static void mwl_ampdu_rxdma_reclaim(struct ieee80211_rx_ampdu *rap) { #if 0 int i, n, off; struct mbuf *m; void *cl; n = rap->rxa_qframes; for (i = 0; i < rap->rxa_wnd && n > 0; i++) { m = rap->rxa_m[i]; if (m == NULL) continue; n--; /* our dma buffers have a well-known free routine */ if ((m->m_flags & M_EXT) == 0 || m->m_ext.ext_free != mwl_ext_free) continue; /* * Try to allocate a cluster and move the data. */ off = m->m_data - m->m_ext.ext_buf; if (off + m->m_pkthdr.len > MCLBYTES) { /* XXX no AMSDU for now */ continue; } cl = pool_cache_get_paddr(&mclpool_cache, 0, &m->m_ext.ext_paddr); if (cl != NULL) { /* * Copy the existing data to the cluster, remove * the rx dma buffer, and attach the cluster in * its place. Note we preserve the offset to the * data so frames being bridged can still prepend * their headers without adding another mbuf. */ memcpy((caddr_t) cl + off, m->m_data, m->m_pkthdr.len); MEXTREMOVE(m); MEXTADD(m, cl, MCLBYTES, 0, NULL, &mclpool_cache); /* setup mbuf like _MCLGET does */ m->m_flags |= M_CLUSTER | M_EXT_RW; _MOWNERREF(m, M_EXT | M_CLUSTER); /* NB: m_data is clobbered by MEXTADDR, adjust */ m->m_data += off; } } #endif } /* * Callback to reclaim resources. We first let the * net80211 layer do it's thing, then if we are still * blocked by a lack of rx dma buffers we walk the ampdu * reorder q's to reclaim buffers by copying to a system * cluster. */ static void mwl_node_drain(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_node *mn = MWL_NODE(ni); DPRINTF(sc, MWL_DEBUG_NODE, "%s: ni %p vap %p staid %d\n", __func__, ni, ni->ni_vap, mn->mn_staid); /* NB: call up first to age out ampdu q's */ sc->sc_node_drain(ni); /* XXX better to not check low water mark? */ if (sc->sc_rxblocked && mn->mn_staid != 0 && (ni->ni_flags & IEEE80211_NODE_HT)) { uint8_t tid; /* * Walk the reorder q and reclaim rx dma buffers by copying * the packet contents into clusters. */ for (tid = 0; tid < WME_NUM_TID; tid++) { struct ieee80211_rx_ampdu *rap; rap = &ni->ni_rx_ampdu[tid]; if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) continue; if (rap->rxa_qframes) mwl_ampdu_rxdma_reclaim(rap); } } } static void mwl_node_getsignal(const struct ieee80211_node *ni, int8_t *rssi, int8_t *noise) { *rssi = ni->ni_ic->ic_node_getrssi(ni); #ifdef MWL_ANT_INFO_SUPPORT #if 0 /* XXX need to smooth data */ *noise = -MWL_NODE_CONST(ni)->mn_ai.nf; #else *noise = -95; /* XXX */ #endif #else *noise = -95; /* XXX */ #endif } /* * Convert Hardware per-antenna rssi info to common format: * Let a1, a2, a3 represent the amplitudes per chain * Let amax represent max[a1, a2, a3] * Rssi1_dBm = RSSI_dBm + 20*log10(a1/amax) * Rssi1_dBm = RSSI_dBm + 20*log10(a1) - 20*log10(amax) * We store a table that is 4*20*log10(idx) - the extra 4 is to store or * maintain some extra precision. * * Values are stored in .5 db format capped at 127. */ static void mwl_node_getmimoinfo(const struct ieee80211_node *ni, struct ieee80211_mimo_info *mi) { #define CVT(_dst, _src) do { \ (_dst) = rssi + ((logdbtbl[_src] - logdbtbl[rssi_max]) >> 2); \ (_dst) = (_dst) > 64 ? 127 : ((_dst) << 1); \ } while (0) static const int8_t logdbtbl[32] = { 0, 0, 24, 38, 48, 56, 62, 68, 72, 76, 80, 83, 86, 89, 92, 94, 96, 98, 100, 102, 104, 106, 107, 109, 110, 112, 113, 115, 116, 117, 118, 119 }; const struct mwl_node *mn = MWL_NODE_CONST(ni); uint8_t rssi = mn->mn_ai.rsvd1/2; /* XXX */ uint32_t rssi_max; rssi_max = mn->mn_ai.rssi_a; if (mn->mn_ai.rssi_b > rssi_max) rssi_max = mn->mn_ai.rssi_b; if (mn->mn_ai.rssi_c > rssi_max) rssi_max = mn->mn_ai.rssi_c; CVT(mi->ch[0].rssi[0], mn->mn_ai.rssi_a); CVT(mi->ch[1].rssi[0], mn->mn_ai.rssi_b); CVT(mi->ch[2].rssi[0], mn->mn_ai.rssi_c); mi->ch[0].noise[0] = mn->mn_ai.nf_a; mi->ch[1].noise[0] = mn->mn_ai.nf_b; mi->ch[2].noise[0] = mn->mn_ai.nf_c; #undef CVT } static __inline void * mwl_getrxdma(struct mwl_softc *sc) { struct mwl_jumbo *buf; void *data; /* * Allocate from jumbo pool. */ MWL_RXFREE_LOCK(sc); buf = SLIST_FIRST(&sc->sc_rxfree); if (buf == NULL) { DPRINTF(sc, MWL_DEBUG_ANY, "%s: out of rx dma buffers\n", __func__); sc->sc_stats.mst_rx_nodmabuf++; data = NULL; } else { SLIST_REMOVE_HEAD(&sc->sc_rxfree, next); sc->sc_nrxfree--; data = MWL_JUMBO_BUF2DATA(buf); } MWL_RXFREE_UNLOCK(sc); return data; } static __inline void mwl_putrxdma(struct mwl_softc *sc, void *data) { struct mwl_jumbo *buf; /* XXX bounds check data */ MWL_RXFREE_LOCK(sc); buf = MWL_JUMBO_DATA2BUF(data); SLIST_INSERT_HEAD(&sc->sc_rxfree, buf, next); sc->sc_nrxfree++; MWL_RXFREE_UNLOCK(sc); } static int mwl_rxbuf_init(struct mwl_softc *sc, struct mwl_rxbuf *bf) { struct mwl_rxdesc *ds; ds = bf->bf_desc; if (bf->bf_data == NULL) { bf->bf_data = mwl_getrxdma(sc); if (bf->bf_data == NULL) { /* mark descriptor to be skipped */ ds->RxControl = EAGLE_RXD_CTRL_OS_OWN; /* NB: don't need PREREAD */ MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_PREWRITE); sc->sc_stats.mst_rxbuf_failed++; return ENOMEM; } } /* * NB: DMA buffer contents is known to be unmodified * so there's no need to flush the data cache. */ /* * Setup descriptor. */ ds->QosCtrl = 0; ds->RSSI = 0; ds->Status = EAGLE_RXD_STATUS_IDLE; ds->Channel = 0; ds->PktLen = htole16(MWL_AGGR_SIZE); ds->SQ2 = 0; ds->pPhysBuffData = htole32(MWL_JUMBO_DMA_ADDR(sc, bf->bf_data)); /* NB: don't touch pPhysNext, set once */ ds->RxControl = EAGLE_RXD_CTRL_DRIVER_OWN; MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return 0; } static void -mwl_ext_free(struct mbuf *m, void *data, void *arg) +mwl_ext_free(struct mbuf *m) { - struct mwl_softc *sc = arg; + struct mwl_softc *sc = m->m_ext.ext_arg1; /* XXX bounds check data */ - mwl_putrxdma(sc, data); + mwl_putrxdma(sc, m->m_ext.ext_buf); /* * If we were previously blocked by a lack of rx dma buffers * check if we now have enough to restart rx interrupt handling. * NB: we know we are called at splvm which is above splnet. */ if (sc->sc_rxblocked && sc->sc_nrxfree > mwl_rxdmalow) { sc->sc_rxblocked = 0; mwl_hal_intrset(sc->sc_mh, sc->sc_imask); } } struct mwl_frame_bar { u_int8_t i_fc[2]; u_int8_t i_dur[2]; u_int8_t i_ra[IEEE80211_ADDR_LEN]; u_int8_t i_ta[IEEE80211_ADDR_LEN]; /* ctl, seq, FCS */ } __packed; /* * Like ieee80211_anyhdrsize, but handles BAR frames * specially so the logic below to piece the 802.11 * header together works. */ static __inline int mwl_anyhdrsize(const void *data) { const struct ieee80211_frame *wh = data; if ((wh->i_fc[0]&IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_CTL) { switch (wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) { case IEEE80211_FC0_SUBTYPE_CTS: case IEEE80211_FC0_SUBTYPE_ACK: return sizeof(struct ieee80211_frame_ack); case IEEE80211_FC0_SUBTYPE_BAR: return sizeof(struct mwl_frame_bar); } return sizeof(struct ieee80211_frame_min); } else return ieee80211_hdrsize(data); } static void mwl_handlemicerror(struct ieee80211com *ic, const uint8_t *data) { const struct ieee80211_frame *wh; struct ieee80211_node *ni; wh = (const struct ieee80211_frame *)(data + sizeof(uint16_t)); ni = ieee80211_find_rxnode(ic, (const struct ieee80211_frame_min *) wh); if (ni != NULL) { ieee80211_notify_michael_failure(ni->ni_vap, wh, 0); ieee80211_free_node(ni); } } /* * Convert hardware signal strength to rssi. The value * provided by the device has the noise floor added in; * we need to compensate for this but we don't have that * so we use a fixed value. * * The offset of 8 is good for both 2.4 and 5GHz. The LNA * offset is already set as part of the initial gain. This * will give at least +/- 3dB for 2.4GHz and +/- 5dB for 5GHz. */ static __inline int cvtrssi(uint8_t ssi) { int rssi = (int) ssi + 8; /* XXX hack guess until we have a real noise floor */ rssi = 2*(87 - rssi); /* NB: .5 dBm units */ return (rssi < 0 ? 0 : rssi > 127 ? 127 : rssi); } static void mwl_rx_proc(void *arg, int npending) { struct mwl_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; struct mwl_rxbuf *bf; struct mwl_rxdesc *ds; struct mbuf *m; struct ieee80211_qosframe *wh; struct ieee80211_qosframe_addr4 *wh4; struct ieee80211_node *ni; struct mwl_node *mn; int off, len, hdrlen, pktlen, rssi, ntodo; uint8_t *data, status; void *newdata; int16_t nf; DPRINTF(sc, MWL_DEBUG_RX_PROC, "%s: pending %u rdptr 0x%x wrptr 0x%x\n", __func__, npending, RD4(sc, sc->sc_hwspecs.rxDescRead), RD4(sc, sc->sc_hwspecs.rxDescWrite)); nf = -96; /* XXX */ bf = sc->sc_rxnext; for (ntodo = mwl_rxquota; ntodo > 0; ntodo--) { if (bf == NULL) bf = STAILQ_FIRST(&sc->sc_rxbuf); ds = bf->bf_desc; data = bf->bf_data; if (data == NULL) { /* * If data allocation failed previously there * will be no buffer; try again to re-populate it. * Note the firmware will not advance to the next * descriptor with a dma buffer so we must mimic * this or we'll get out of sync. */ DPRINTF(sc, MWL_DEBUG_ANY, "%s: rx buf w/o dma memory\n", __func__); (void) mwl_rxbuf_init(sc, bf); sc->sc_stats.mst_rx_dmabufmissing++; break; } MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (ds->RxControl != EAGLE_RXD_CTRL_DMA_OWN) break; #ifdef MWL_DEBUG if (sc->sc_debug & MWL_DEBUG_RECV_DESC) mwl_printrxbuf(bf, 0); #endif status = ds->Status; if (status & EAGLE_RXD_STATUS_DECRYPT_ERR_MASK) { counter_u64_add(ic->ic_ierrors, 1); sc->sc_stats.mst_rx_crypto++; /* * NB: Check EAGLE_RXD_STATUS_GENERAL_DECRYPT_ERR * for backwards compatibility. */ if (status != EAGLE_RXD_STATUS_GENERAL_DECRYPT_ERR && (status & EAGLE_RXD_STATUS_TKIP_MIC_DECRYPT_ERR)) { /* * MIC error, notify upper layers. */ bus_dmamap_sync(sc->sc_rxdmat, sc->sc_rxmap, BUS_DMASYNC_POSTREAD); mwl_handlemicerror(ic, data); sc->sc_stats.mst_rx_tkipmic++; } /* XXX too painful to tap packets */ goto rx_next; } /* * Sync the data buffer. */ len = le16toh(ds->PktLen); bus_dmamap_sync(sc->sc_rxdmat, sc->sc_rxmap, BUS_DMASYNC_POSTREAD); /* * The 802.11 header is provided all or in part at the front; * use it to calculate the true size of the header that we'll * construct below. We use this to figure out where to copy * payload prior to constructing the header. */ hdrlen = mwl_anyhdrsize(data + sizeof(uint16_t)); off = sizeof(uint16_t) + sizeof(struct ieee80211_frame_addr4); /* calculate rssi early so we can re-use for each aggregate */ rssi = cvtrssi(ds->RSSI); pktlen = hdrlen + (len - off); /* * NB: we know our frame is at least as large as * IEEE80211_MIN_LEN because there is a 4-address * frame at the front. Hence there's no need to * vet the packet length. If the frame in fact * is too small it should be discarded at the * net80211 layer. */ /* * Attach dma buffer to an mbuf. We tried * doing this based on the packet size (i.e. * copying small packets) but it turns out to * be a net loss. The tradeoff might be system * dependent (cache architecture is important). */ MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { DPRINTF(sc, MWL_DEBUG_ANY, "%s: no rx mbuf\n", __func__); sc->sc_stats.mst_rx_nombuf++; goto rx_next; } /* * Acquire the replacement dma buffer before * processing the frame. If we're out of dma * buffers we disable rx interrupts and wait * for the free pool to reach mlw_rxdmalow buffers * before starting to do work again. If the firmware * runs out of descriptors then it will toss frames * which is better than our doing it as that can * starve our processing. It is also important that * we always process rx'd frames in case they are * A-MPDU as otherwise the host's view of the BA * window may get out of sync with the firmware. */ newdata = mwl_getrxdma(sc); if (newdata == NULL) { /* NB: stat+msg in mwl_getrxdma */ m_free(m); /* disable RX interrupt and mark state */ mwl_hal_intrset(sc->sc_mh, sc->sc_imask &~ MACREG_A2HRIC_BIT_RX_RDY); sc->sc_rxblocked = 1; ieee80211_drain(ic); /* XXX check rxblocked and immediately start again? */ goto rx_stop; } bf->bf_data = newdata; /* * Attach the dma buffer to the mbuf; * mwl_rxbuf_init will re-setup the rx * descriptor using the replacement dma * buffer we just installed above. */ - MEXTADD(m, data, MWL_AGGR_SIZE, mwl_ext_free, - data, sc, 0, EXT_NET_DRV); + m_extadd(m, data, MWL_AGGR_SIZE, mwl_ext_free, sc, NULL, 0, + EXT_NET_DRV); m->m_data += off - hdrlen; m->m_pkthdr.len = m->m_len = pktlen; /* NB: dma buffer assumed read-only */ /* * Piece 802.11 header together. */ wh = mtod(m, struct ieee80211_qosframe *); /* NB: don't need to do this sometimes but ... */ /* XXX special case so we can memcpy after m_devget? */ ovbcopy(data + sizeof(uint16_t), wh, hdrlen); if (IEEE80211_QOS_HAS_SEQ(wh)) { if (IEEE80211_IS_DSTODS(wh)) { wh4 = mtod(m, struct ieee80211_qosframe_addr4*); *(uint16_t *)wh4->i_qos = ds->QosCtrl; } else { *(uint16_t *)wh->i_qos = ds->QosCtrl; } } /* * The f/w strips WEP header but doesn't clear * the WEP bit; mark the packet with M_WEP so * net80211 will treat the data as decrypted. * While here also clear the PWR_MGT bit since * power save is handled by the firmware and * passing this up will potentially cause the * upper layer to put a station in power save * (except when configured with MWL_HOST_PS_SUPPORT). */ if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) m->m_flags |= M_WEP; #ifdef MWL_HOST_PS_SUPPORT wh->i_fc[1] &= ~IEEE80211_FC1_PROTECTED; #else wh->i_fc[1] &= ~(IEEE80211_FC1_PROTECTED | IEEE80211_FC1_PWR_MGT); #endif if (ieee80211_radiotap_active(ic)) { struct mwl_rx_radiotap_header *tap = &sc->sc_rx_th; tap->wr_flags = 0; tap->wr_rate = ds->Rate; tap->wr_antsignal = rssi + nf; tap->wr_antnoise = nf; } if (IFF_DUMPPKTS_RECV(sc, wh)) { ieee80211_dump_pkt(ic, mtod(m, caddr_t), len, ds->Rate, rssi); } /* dispatch */ ni = ieee80211_find_rxnode(ic, (const struct ieee80211_frame_min *) wh); if (ni != NULL) { mn = MWL_NODE(ni); #ifdef MWL_ANT_INFO_SUPPORT mn->mn_ai.rssi_a = ds->ai.rssi_a; mn->mn_ai.rssi_b = ds->ai.rssi_b; mn->mn_ai.rssi_c = ds->ai.rssi_c; mn->mn_ai.rsvd1 = rssi; #endif /* tag AMPDU aggregates for reorder processing */ if (ni->ni_flags & IEEE80211_NODE_HT) m->m_flags |= M_AMPDU; (void) ieee80211_input(ni, m, rssi, nf); ieee80211_free_node(ni); } else (void) ieee80211_input_all(ic, m, rssi, nf); rx_next: /* NB: ignore ENOMEM so we process more descriptors */ (void) mwl_rxbuf_init(sc, bf); bf = STAILQ_NEXT(bf, bf_list); } rx_stop: sc->sc_rxnext = bf; if (mbufq_first(&sc->sc_snd) != NULL) { /* NB: kick fw; the tx thread may have been preempted */ mwl_hal_txstart(sc->sc_mh, 0); mwl_start(sc); } } static void mwl_txq_init(struct mwl_softc *sc, struct mwl_txq *txq, int qnum) { struct mwl_txbuf *bf, *bn; struct mwl_txdesc *ds; MWL_TXQ_LOCK_INIT(sc, txq); txq->qnum = qnum; txq->txpri = 0; /* XXX */ #if 0 /* NB: q setup by mwl_txdma_setup XXX */ STAILQ_INIT(&txq->free); #endif STAILQ_FOREACH(bf, &txq->free, bf_list) { bf->bf_txq = txq; ds = bf->bf_desc; bn = STAILQ_NEXT(bf, bf_list); if (bn == NULL) bn = STAILQ_FIRST(&txq->free); ds->pPhysNext = htole32(bn->bf_daddr); } STAILQ_INIT(&txq->active); } /* * Setup a hardware data transmit queue for the specified * access control. We record the mapping from ac's * to h/w queues for use by mwl_tx_start. */ static int mwl_tx_setup(struct mwl_softc *sc, int ac, int mvtype) { struct mwl_txq *txq; if (ac >= nitems(sc->sc_ac2q)) { device_printf(sc->sc_dev, "AC %u out of range, max %zu!\n", ac, nitems(sc->sc_ac2q)); return 0; } if (mvtype >= MWL_NUM_TX_QUEUES) { device_printf(sc->sc_dev, "mvtype %u out of range, max %u!\n", mvtype, MWL_NUM_TX_QUEUES); return 0; } txq = &sc->sc_txq[mvtype]; mwl_txq_init(sc, txq, mvtype); sc->sc_ac2q[ac] = txq; return 1; } /* * Update WME parameters for a transmit queue. */ static int mwl_txq_update(struct mwl_softc *sc, int ac) { #define MWL_EXPONENT_TO_VALUE(v) ((1<sc_ic; struct mwl_txq *txq = sc->sc_ac2q[ac]; struct wmeParams *wmep = &ic->ic_wme.wme_chanParams.cap_wmeParams[ac]; struct mwl_hal *mh = sc->sc_mh; int aifs, cwmin, cwmax, txoplim; aifs = wmep->wmep_aifsn; /* XXX in sta mode need to pass log values for cwmin/max */ cwmin = MWL_EXPONENT_TO_VALUE(wmep->wmep_logcwmin); cwmax = MWL_EXPONENT_TO_VALUE(wmep->wmep_logcwmax); txoplim = wmep->wmep_txopLimit; /* NB: units of 32us */ if (mwl_hal_setedcaparams(mh, txq->qnum, cwmin, cwmax, aifs, txoplim)) { device_printf(sc->sc_dev, "unable to update hardware queue " "parameters for %s traffic!\n", ieee80211_wme_acnames[ac]); return 0; } return 1; #undef MWL_EXPONENT_TO_VALUE } /* * Callback from the 802.11 layer to update WME parameters. */ static int mwl_wme_update(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; return !mwl_txq_update(sc, WME_AC_BE) || !mwl_txq_update(sc, WME_AC_BK) || !mwl_txq_update(sc, WME_AC_VI) || !mwl_txq_update(sc, WME_AC_VO) ? EIO : 0; } /* * Reclaim resources for a setup queue. */ static void mwl_tx_cleanupq(struct mwl_softc *sc, struct mwl_txq *txq) { /* XXX hal work? */ MWL_TXQ_LOCK_DESTROY(txq); } /* * Reclaim all tx queue resources. */ static void mwl_tx_cleanup(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_tx_cleanupq(sc, &sc->sc_txq[i]); } static int mwl_tx_dmasetup(struct mwl_softc *sc, struct mwl_txbuf *bf, struct mbuf *m0) { struct mbuf *m; int error; /* * Load the DMA map so any coalescing is done. This * also calculates the number of descriptors we need. */ error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); if (error == EFBIG) { /* XXX packet requires too many descriptors */ bf->bf_nseg = MWL_TXDESC+1; } else if (error != 0) { sc->sc_stats.mst_tx_busdma++; m_freem(m0); return error; } /* * Discard null packets and check for packets that * require too many TX descriptors. We try to convert * the latter to a cluster. */ if (error == EFBIG) { /* too many desc's, linearize */ sc->sc_stats.mst_tx_linear++; #if MWL_TXDESC > 1 m = m_collapse(m0, M_NOWAIT, MWL_TXDESC); #else m = m_defrag(m0, M_NOWAIT); #endif if (m == NULL) { m_freem(m0); sc->sc_stats.mst_tx_nombuf++; return ENOMEM; } m0 = m; error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); if (error != 0) { sc->sc_stats.mst_tx_busdma++; m_freem(m0); return error; } KASSERT(bf->bf_nseg <= MWL_TXDESC, ("too many segments after defrag; nseg %u", bf->bf_nseg)); } else if (bf->bf_nseg == 0) { /* null packet, discard */ sc->sc_stats.mst_tx_nodata++; m_freem(m0); return EIO; } DPRINTF(sc, MWL_DEBUG_XMIT, "%s: m %p len %u\n", __func__, m0, m0->m_pkthdr.len); bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE); bf->bf_m = m0; return 0; } static __inline int mwl_cvtlegacyrate(int rate) { switch (rate) { case 2: return 0; case 4: return 1; case 11: return 2; case 22: return 3; case 44: return 4; case 12: return 5; case 18: return 6; case 24: return 7; case 36: return 8; case 48: return 9; case 72: return 10; case 96: return 11; case 108:return 12; } return 0; } /* * Calculate fixed tx rate information per client state; * this value is suitable for writing to the Format field * of a tx descriptor. */ static uint16_t mwl_calcformat(uint8_t rate, const struct ieee80211_node *ni) { uint16_t fmt; fmt = SM(3, EAGLE_TXD_ANTENNA) | (IEEE80211_IS_CHAN_HT40D(ni->ni_chan) ? EAGLE_TXD_EXTCHAN_LO : EAGLE_TXD_EXTCHAN_HI); if (rate & IEEE80211_RATE_MCS) { /* HT MCS */ fmt |= EAGLE_TXD_FORMAT_HT /* NB: 0x80 implicitly stripped from ucastrate */ | SM(rate, EAGLE_TXD_RATE); /* XXX short/long GI may be wrong; re-check */ if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) { fmt |= EAGLE_TXD_CHW_40 | (ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40 ? EAGLE_TXD_GI_SHORT : EAGLE_TXD_GI_LONG); } else { fmt |= EAGLE_TXD_CHW_20 | (ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20 ? EAGLE_TXD_GI_SHORT : EAGLE_TXD_GI_LONG); } } else { /* legacy rate */ fmt |= EAGLE_TXD_FORMAT_LEGACY | SM(mwl_cvtlegacyrate(rate), EAGLE_TXD_RATE) | EAGLE_TXD_CHW_20 /* XXX iv_flags & IEEE80211_F_SHPREAMBLE? */ | (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE ? EAGLE_TXD_PREAMBLE_SHORT : EAGLE_TXD_PREAMBLE_LONG); } return fmt; } static int mwl_tx_start(struct mwl_softc *sc, struct ieee80211_node *ni, struct mwl_txbuf *bf, struct mbuf *m0) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = ni->ni_vap; int error, iswep, ismcast; int hdrlen, copyhdrlen, pktlen; struct mwl_txdesc *ds; struct mwl_txq *txq; struct ieee80211_frame *wh; struct mwltxrec *tr; struct mwl_node *mn; uint16_t qos; #if MWL_TXDESC > 1 int i; #endif wh = mtod(m0, struct ieee80211_frame *); iswep = wh->i_fc[1] & IEEE80211_FC1_PROTECTED; ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1); hdrlen = ieee80211_anyhdrsize(wh); copyhdrlen = hdrlen; pktlen = m0->m_pkthdr.len; if (IEEE80211_QOS_HAS_SEQ(wh)) { if (IEEE80211_IS_DSTODS(wh)) { qos = *(uint16_t *) (((struct ieee80211_qosframe_addr4 *) wh)->i_qos); copyhdrlen -= sizeof(qos); } else qos = *(uint16_t *) (((struct ieee80211_qosframe *) wh)->i_qos); } else qos = 0; if (iswep) { const struct ieee80211_cipher *cip; struct ieee80211_key *k; /* * Construct the 802.11 header+trailer for an encrypted * frame. The only reason this can fail is because of an * unknown or unsupported cipher/key type. * * NB: we do this even though the firmware will ignore * what we've done for WEP and TKIP as we need the * ExtIV filled in for CCMP and this also adjusts * the headers which simplifies our work below. */ k = ieee80211_crypto_encap(ni, m0); if (k == NULL) { /* * This can happen when the key is yanked after the * frame was queued. Just discard the frame; the * 802.11 layer counts failures and provides * debugging/diagnostics. */ m_freem(m0); return EIO; } /* * Adjust the packet length for the crypto additions * done during encap and any other bits that the f/w * will add later on. */ cip = k->wk_cipher; pktlen += cip->ic_header + cip->ic_miclen + cip->ic_trailer; /* packet header may have moved, reset our local pointer */ wh = mtod(m0, struct ieee80211_frame *); } if (ieee80211_radiotap_active_vap(vap)) { sc->sc_tx_th.wt_flags = 0; /* XXX */ if (iswep) sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_WEP; #if 0 sc->sc_tx_th.wt_rate = ds->DataRate; #endif sc->sc_tx_th.wt_txpower = ni->ni_txpower; sc->sc_tx_th.wt_antenna = sc->sc_txantenna; ieee80211_radiotap_tx(vap, m0); } /* * Copy up/down the 802.11 header; the firmware requires * we present a 2-byte payload length followed by a * 4-address header (w/o QoS), followed (optionally) by * any WEP/ExtIV header (but only filled in for CCMP). * We are assured the mbuf has sufficient headroom to * prepend in-place by the setup of ic_headroom in * mwl_attach. */ if (hdrlen < sizeof(struct mwltxrec)) { const int space = sizeof(struct mwltxrec) - hdrlen; if (M_LEADINGSPACE(m0) < space) { /* NB: should never happen */ device_printf(sc->sc_dev, "not enough headroom, need %d found %zd, " "m_flags 0x%x m_len %d\n", space, M_LEADINGSPACE(m0), m0->m_flags, m0->m_len); ieee80211_dump_pkt(ic, mtod(m0, const uint8_t *), m0->m_len, 0, -1); m_freem(m0); sc->sc_stats.mst_tx_noheadroom++; return EIO; } M_PREPEND(m0, space, M_NOWAIT); } tr = mtod(m0, struct mwltxrec *); if (wh != (struct ieee80211_frame *) &tr->wh) ovbcopy(wh, &tr->wh, hdrlen); /* * Note: the "firmware length" is actually the length * of the fully formed "802.11 payload". That is, it's * everything except for the 802.11 header. In particular * this includes all crypto material including the MIC! */ tr->fwlen = htole16(pktlen - hdrlen); /* * Load the DMA map so any coalescing is done. This * also calculates the number of descriptors we need. */ error = mwl_tx_dmasetup(sc, bf, m0); if (error != 0) { /* NB: stat collected in mwl_tx_dmasetup */ DPRINTF(sc, MWL_DEBUG_XMIT, "%s: unable to setup dma\n", __func__); return error; } bf->bf_node = ni; /* NB: held reference */ m0 = bf->bf_m; /* NB: may have changed */ tr = mtod(m0, struct mwltxrec *); wh = (struct ieee80211_frame *)&tr->wh; /* * Formulate tx descriptor. */ ds = bf->bf_desc; txq = bf->bf_txq; ds->QosCtrl = qos; /* NB: already little-endian */ #if MWL_TXDESC == 1 /* * NB: multiframes should be zero because the descriptors * are initialized to zero. This should handle the case * where the driver is built with MWL_TXDESC=1 but we are * using firmware with multi-segment support. */ ds->PktPtr = htole32(bf->bf_segs[0].ds_addr); ds->PktLen = htole16(bf->bf_segs[0].ds_len); #else ds->multiframes = htole32(bf->bf_nseg); ds->PktLen = htole16(m0->m_pkthdr.len); for (i = 0; i < bf->bf_nseg; i++) { ds->PktPtrArray[i] = htole32(bf->bf_segs[i].ds_addr); ds->PktLenArray[i] = htole16(bf->bf_segs[i].ds_len); } #endif /* NB: pPhysNext, DataRate, and SapPktInfo setup once, don't touch */ ds->Format = 0; ds->pad = 0; ds->ack_wcb_addr = 0; mn = MWL_NODE(ni); /* * Select transmit rate. */ switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) { case IEEE80211_FC0_TYPE_MGT: sc->sc_stats.mst_tx_mgmt++; /* fall thru... */ case IEEE80211_FC0_TYPE_CTL: /* NB: assign to BE q to avoid bursting */ ds->TxPriority = MWL_WME_AC_BE; break; case IEEE80211_FC0_TYPE_DATA: if (!ismcast) { const struct ieee80211_txparam *tp = ni->ni_txparms; /* * EAPOL frames get forced to a fixed rate and w/o * aggregation; otherwise check for any fixed rate * for the client (may depend on association state). */ if (m0->m_flags & M_EAPOL) { const struct mwl_vap *mvp = MWL_VAP_CONST(vap); ds->Format = mvp->mv_eapolformat; ds->pad = htole16( EAGLE_TXD_FIXED_RATE | EAGLE_TXD_DONT_AGGR); } else if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE) { /* XXX pre-calculate per node */ ds->Format = htole16( mwl_calcformat(tp->ucastrate, ni)); ds->pad = htole16(EAGLE_TXD_FIXED_RATE); } /* NB: EAPOL frames will never have qos set */ if (qos == 0) ds->TxPriority = txq->qnum; #if MWL_MAXBA > 3 else if (mwl_bastream_match(&mn->mn_ba[3], qos)) ds->TxPriority = mn->mn_ba[3].txq; #endif #if MWL_MAXBA > 2 else if (mwl_bastream_match(&mn->mn_ba[2], qos)) ds->TxPriority = mn->mn_ba[2].txq; #endif #if MWL_MAXBA > 1 else if (mwl_bastream_match(&mn->mn_ba[1], qos)) ds->TxPriority = mn->mn_ba[1].txq; #endif #if MWL_MAXBA > 0 else if (mwl_bastream_match(&mn->mn_ba[0], qos)) ds->TxPriority = mn->mn_ba[0].txq; #endif else ds->TxPriority = txq->qnum; } else ds->TxPriority = txq->qnum; break; default: device_printf(sc->sc_dev, "bogus frame type 0x%x (%s)\n", wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK, __func__); sc->sc_stats.mst_tx_badframetype++; m_freem(m0); return EIO; } if (IFF_DUMPPKTS_XMIT(sc)) ieee80211_dump_pkt(ic, mtod(m0, const uint8_t *)+sizeof(uint16_t), m0->m_len - sizeof(uint16_t), ds->DataRate, -1); MWL_TXQ_LOCK(txq); ds->Status = htole32(EAGLE_TXD_STATUS_FW_OWNED); STAILQ_INSERT_TAIL(&txq->active, bf, bf_list); MWL_TXDESC_SYNC(txq, ds, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); sc->sc_tx_timer = 5; MWL_TXQ_UNLOCK(txq); return 0; } static __inline int mwl_cvtlegacyrix(int rix) { static const int ieeerates[] = { 2, 4, 11, 22, 44, 12, 18, 24, 36, 48, 72, 96, 108 }; return (rix < nitems(ieeerates) ? ieeerates[rix] : 0); } /* * Process completed xmit descriptors from the specified queue. */ static int mwl_tx_processq(struct mwl_softc *sc, struct mwl_txq *txq) { #define EAGLE_TXD_STATUS_MCAST \ (EAGLE_TXD_STATUS_MULTICAST_TX | EAGLE_TXD_STATUS_BROADCAST_TX) struct ieee80211com *ic = &sc->sc_ic; struct mwl_txbuf *bf; struct mwl_txdesc *ds; struct ieee80211_node *ni; struct mwl_node *an; int nreaped; uint32_t status; DPRINTF(sc, MWL_DEBUG_TX_PROC, "%s: tx queue %u\n", __func__, txq->qnum); for (nreaped = 0;; nreaped++) { MWL_TXQ_LOCK(txq); bf = STAILQ_FIRST(&txq->active); if (bf == NULL) { MWL_TXQ_UNLOCK(txq); break; } ds = bf->bf_desc; MWL_TXDESC_SYNC(txq, ds, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (ds->Status & htole32(EAGLE_TXD_STATUS_FW_OWNED)) { MWL_TXQ_UNLOCK(txq); break; } STAILQ_REMOVE_HEAD(&txq->active, bf_list); MWL_TXQ_UNLOCK(txq); #ifdef MWL_DEBUG if (sc->sc_debug & MWL_DEBUG_XMIT_DESC) mwl_printtxbuf(bf, txq->qnum, nreaped); #endif ni = bf->bf_node; if (ni != NULL) { an = MWL_NODE(ni); status = le32toh(ds->Status); if (status & EAGLE_TXD_STATUS_OK) { uint16_t Format = le16toh(ds->Format); uint8_t txant = MS(Format, EAGLE_TXD_ANTENNA); sc->sc_stats.mst_ant_tx[txant]++; if (status & EAGLE_TXD_STATUS_OK_RETRY) sc->sc_stats.mst_tx_retries++; if (status & EAGLE_TXD_STATUS_OK_MORE_RETRY) sc->sc_stats.mst_tx_mretries++; if (txq->qnum >= MWL_WME_AC_VO) ic->ic_wme.wme_hipri_traffic++; ni->ni_txrate = MS(Format, EAGLE_TXD_RATE); if ((Format & EAGLE_TXD_FORMAT_HT) == 0) { ni->ni_txrate = mwl_cvtlegacyrix( ni->ni_txrate); } else ni->ni_txrate |= IEEE80211_RATE_MCS; sc->sc_stats.mst_tx_rate = ni->ni_txrate; } else { if (status & EAGLE_TXD_STATUS_FAILED_LINK_ERROR) sc->sc_stats.mst_tx_linkerror++; if (status & EAGLE_TXD_STATUS_FAILED_XRETRY) sc->sc_stats.mst_tx_xretries++; if (status & EAGLE_TXD_STATUS_FAILED_AGING) sc->sc_stats.mst_tx_aging++; if (bf->bf_m->m_flags & M_FF) sc->sc_stats.mst_ff_txerr++; } if (bf->bf_m->m_flags & M_TXCB) /* XXX strip fw len in case header inspected */ m_adj(bf->bf_m, sizeof(uint16_t)); ieee80211_tx_complete(ni, bf->bf_m, (status & EAGLE_TXD_STATUS_OK) == 0); } else m_freem(bf->bf_m); ds->Status = htole32(EAGLE_TXD_STATUS_IDLE); bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); mwl_puttxbuf_tail(txq, bf); } return nreaped; #undef EAGLE_TXD_STATUS_MCAST } /* * Deferred processing of transmit interrupt; special-cased * for four hardware queues, 0-3. */ static void mwl_tx_proc(void *arg, int npending) { struct mwl_softc *sc = arg; int nreaped; /* * Process each active queue. */ nreaped = 0; if (!STAILQ_EMPTY(&sc->sc_txq[0].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[0]); if (!STAILQ_EMPTY(&sc->sc_txq[1].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[1]); if (!STAILQ_EMPTY(&sc->sc_txq[2].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[2]); if (!STAILQ_EMPTY(&sc->sc_txq[3].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[3]); if (nreaped != 0) { sc->sc_tx_timer = 0; if (mbufq_first(&sc->sc_snd) != NULL) { /* NB: kick fw; the tx thread may have been preempted */ mwl_hal_txstart(sc->sc_mh, 0); mwl_start(sc); } } } static void mwl_tx_draintxq(struct mwl_softc *sc, struct mwl_txq *txq) { struct ieee80211_node *ni; struct mwl_txbuf *bf; u_int ix; /* * NB: this assumes output has been stopped and * we do not need to block mwl_tx_tasklet */ for (ix = 0;; ix++) { MWL_TXQ_LOCK(txq); bf = STAILQ_FIRST(&txq->active); if (bf == NULL) { MWL_TXQ_UNLOCK(txq); break; } STAILQ_REMOVE_HEAD(&txq->active, bf_list); MWL_TXQ_UNLOCK(txq); #ifdef MWL_DEBUG if (sc->sc_debug & MWL_DEBUG_RESET) { struct ieee80211com *ic = &sc->sc_ic; const struct mwltxrec *tr = mtod(bf->bf_m, const struct mwltxrec *); mwl_printtxbuf(bf, txq->qnum, ix); ieee80211_dump_pkt(ic, (const uint8_t *)&tr->wh, bf->bf_m->m_len - sizeof(tr->fwlen), 0, -1); } #endif /* MWL_DEBUG */ bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); ni = bf->bf_node; if (ni != NULL) { /* * Reclaim node reference. */ ieee80211_free_node(ni); } m_freem(bf->bf_m); mwl_puttxbuf_tail(txq, bf); } } /* * Drain the transmit queues and reclaim resources. */ static void mwl_draintxq(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_tx_draintxq(sc, &sc->sc_txq[i]); sc->sc_tx_timer = 0; } #ifdef MWL_DIAGAPI /* * Reset the transmit queues to a pristine state after a fw download. */ static void mwl_resettxq(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_txq_reset(sc, &sc->sc_txq[i]); } #endif /* MWL_DIAGAPI */ /* * Clear the transmit queues of any frames submitted for the * specified vap. This is done when the vap is deleted so we * don't potentially reference the vap after it is gone. * Note we cannot remove the frames; we only reclaim the node * reference. */ static void mwl_cleartxq(struct mwl_softc *sc, struct ieee80211vap *vap) { struct mwl_txq *txq; struct mwl_txbuf *bf; int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) { txq = &sc->sc_txq[i]; MWL_TXQ_LOCK(txq); STAILQ_FOREACH(bf, &txq->active, bf_list) { struct ieee80211_node *ni = bf->bf_node; if (ni != NULL && ni->ni_vap == vap) { bf->bf_node = NULL; ieee80211_free_node(ni); } } MWL_TXQ_UNLOCK(txq); } } static int mwl_recv_action(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct mwl_softc *sc = ni->ni_ic->ic_softc; const struct ieee80211_action *ia; ia = (const struct ieee80211_action *) frm; if (ia->ia_category == IEEE80211_ACTION_CAT_HT && ia->ia_action == IEEE80211_ACTION_HT_MIMOPWRSAVE) { const struct ieee80211_action_ht_mimopowersave *mps = (const struct ieee80211_action_ht_mimopowersave *) ia; mwl_hal_setmimops(sc->sc_mh, ni->ni_macaddr, mps->am_control & IEEE80211_A_HT_MIMOPWRSAVE_ENA, MS(mps->am_control, IEEE80211_A_HT_MIMOPWRSAVE_MODE)); return 0; } else return sc->sc_recv_action(ni, wh, frm, efrm); } static int mwl_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int dialogtoken, int baparamset, int batimeout) { struct mwl_softc *sc = ni->ni_ic->ic_softc; struct ieee80211vap *vap = ni->ni_vap; struct mwl_node *mn = MWL_NODE(ni); struct mwl_bastate *bas; bas = tap->txa_private; if (bas == NULL) { const MWL_HAL_BASTREAM *sp; /* * Check for a free BA stream slot. */ #if MWL_MAXBA > 3 if (mn->mn_ba[3].bastream == NULL) bas = &mn->mn_ba[3]; else #endif #if MWL_MAXBA > 2 if (mn->mn_ba[2].bastream == NULL) bas = &mn->mn_ba[2]; else #endif #if MWL_MAXBA > 1 if (mn->mn_ba[1].bastream == NULL) bas = &mn->mn_ba[1]; else #endif #if MWL_MAXBA > 0 if (mn->mn_ba[0].bastream == NULL) bas = &mn->mn_ba[0]; else #endif { /* sta already has max BA streams */ /* XXX assign BA stream to highest priority tid */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: already has max bastreams\n", __func__); sc->sc_stats.mst_ampdu_reject++; return 0; } /* NB: no held reference to ni */ sp = mwl_hal_bastream_alloc(MWL_VAP(vap)->mv_hvap, (baparamset & IEEE80211_BAPS_POLICY_IMMEDIATE) != 0, ni->ni_macaddr, tap->txa_tid, ni->ni_htparam, ni, tap); if (sp == NULL) { /* * No available stream, return 0 so no * a-mpdu aggregation will be done. */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no bastream available\n", __func__); sc->sc_stats.mst_ampdu_nostream++; return 0; } DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: alloc bastream %p\n", __func__, sp); /* NB: qos is left zero so we won't match in mwl_tx_start */ bas->bastream = sp; tap->txa_private = bas; } /* fetch current seq# from the firmware; if available */ if (mwl_hal_bastream_get_seqno(sc->sc_mh, bas->bastream, vap->iv_opmode == IEEE80211_M_STA ? vap->iv_myaddr : ni->ni_macaddr, &tap->txa_start) != 0) tap->txa_start = 0; return sc->sc_addba_request(ni, tap, dialogtoken, baparamset, batimeout); } static int mwl_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int code, int baparamset, int batimeout) { struct mwl_softc *sc = ni->ni_ic->ic_softc; struct mwl_bastate *bas; bas = tap->txa_private; if (bas == NULL) { /* XXX should not happen */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no BA stream allocated, TID %d\n", __func__, tap->txa_tid); sc->sc_stats.mst_addba_nostream++; return 0; } if (code == IEEE80211_STATUS_SUCCESS) { struct ieee80211vap *vap = ni->ni_vap; int bufsiz, error; /* * Tell the firmware to setup the BA stream; * we know resources are available because we * pre-allocated one before forming the request. */ bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ); if (bufsiz == 0) bufsiz = IEEE80211_AGGR_BAWMAX; error = mwl_hal_bastream_create(MWL_VAP(vap)->mv_hvap, bas->bastream, bufsiz, bufsiz, tap->txa_start); if (error != 0) { /* * Setup failed, return immediately so no a-mpdu * aggregation will be done. */ mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream); mwl_bastream_free(bas); tap->txa_private = NULL; DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: create failed, error %d, bufsiz %d TID %d " "htparam 0x%x\n", __func__, error, bufsiz, tap->txa_tid, ni->ni_htparam); sc->sc_stats.mst_bacreate_failed++; return 0; } /* NB: cache txq to avoid ptr indirect */ mwl_bastream_setup(bas, tap->txa_tid, bas->bastream->txq); DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: bastream %p assigned to txq %d TID %d bufsiz %d " "htparam 0x%x\n", __func__, bas->bastream, bas->txq, tap->txa_tid, bufsiz, ni->ni_htparam); } else { /* * Other side NAK'd us; return the resources. */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: request failed with code %d, destroy bastream %p\n", __func__, code, bas->bastream); mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream); mwl_bastream_free(bas); tap->txa_private = NULL; } /* NB: firmware sends BAR so we don't need to */ return sc->sc_addba_response(ni, tap, code, baparamset, batimeout); } static void mwl_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { struct mwl_softc *sc = ni->ni_ic->ic_softc; struct mwl_bastate *bas; bas = tap->txa_private; if (bas != NULL) { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: destroy bastream %p\n", __func__, bas->bastream); mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream); mwl_bastream_free(bas); tap->txa_private = NULL; } sc->sc_addba_stop(ni, tap); } /* * Setup the rx data structures. This should only be * done once or we may get out of sync with the firmware. */ static int mwl_startrecv(struct mwl_softc *sc) { if (!sc->sc_recvsetup) { struct mwl_rxbuf *bf, *prev; struct mwl_rxdesc *ds; prev = NULL; STAILQ_FOREACH(bf, &sc->sc_rxbuf, bf_list) { int error = mwl_rxbuf_init(sc, bf); if (error != 0) { DPRINTF(sc, MWL_DEBUG_RECV, "%s: mwl_rxbuf_init failed %d\n", __func__, error); return error; } if (prev != NULL) { ds = prev->bf_desc; ds->pPhysNext = htole32(bf->bf_daddr); } prev = bf; } if (prev != NULL) { ds = prev->bf_desc; ds->pPhysNext = htole32(STAILQ_FIRST(&sc->sc_rxbuf)->bf_daddr); } sc->sc_recvsetup = 1; } mwl_mode_init(sc); /* set filters, etc. */ return 0; } static MWL_HAL_APMODE mwl_getapmode(const struct ieee80211vap *vap, struct ieee80211_channel *chan) { MWL_HAL_APMODE mode; if (IEEE80211_IS_CHAN_HT(chan)) { if (vap->iv_flags_ht & IEEE80211_FHT_PUREN) mode = AP_MODE_N_ONLY; else if (IEEE80211_IS_CHAN_5GHZ(chan)) mode = AP_MODE_AandN; else if (vap->iv_flags & IEEE80211_F_PUREG) mode = AP_MODE_GandN; else mode = AP_MODE_BandGandN; } else if (IEEE80211_IS_CHAN_ANYG(chan)) { if (vap->iv_flags & IEEE80211_F_PUREG) mode = AP_MODE_G_ONLY; else mode = AP_MODE_MIXED; } else if (IEEE80211_IS_CHAN_B(chan)) mode = AP_MODE_B_ONLY; else if (IEEE80211_IS_CHAN_A(chan)) mode = AP_MODE_A_ONLY; else mode = AP_MODE_MIXED; /* XXX should not happen? */ return mode; } static int mwl_setapmode(struct ieee80211vap *vap, struct ieee80211_channel *chan) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; return mwl_hal_setapmode(hvap, mwl_getapmode(vap, chan)); } /* * Set/change channels. */ static int mwl_chan_set(struct mwl_softc *sc, struct ieee80211_channel *chan) { struct mwl_hal *mh = sc->sc_mh; struct ieee80211com *ic = &sc->sc_ic; MWL_HAL_CHANNEL hchan; int maxtxpow; DPRINTF(sc, MWL_DEBUG_RESET, "%s: chan %u MHz/flags 0x%x\n", __func__, chan->ic_freq, chan->ic_flags); /* * Convert to a HAL channel description with * the flags constrained to reflect the current * operating mode. */ mwl_mapchan(&hchan, chan); mwl_hal_intrset(mh, 0); /* disable interrupts */ #if 0 mwl_draintxq(sc); /* clear pending tx frames */ #endif mwl_hal_setchannel(mh, &hchan); /* * Tx power is cap'd by the regulatory setting and * possibly a user-set limit. We pass the min of * these to the hal to apply them to the cal data * for this channel. * XXX min bound? */ maxtxpow = 2*chan->ic_maxregpower; if (maxtxpow > ic->ic_txpowlimit) maxtxpow = ic->ic_txpowlimit; mwl_hal_settxpower(mh, &hchan, maxtxpow / 2); /* NB: potentially change mcast/mgt rates */ mwl_setcurchanrates(sc); /* * Update internal state. */ sc->sc_tx_th.wt_chan_freq = htole16(chan->ic_freq); sc->sc_rx_th.wr_chan_freq = htole16(chan->ic_freq); if (IEEE80211_IS_CHAN_A(chan)) { sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_A); sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_A); } else if (IEEE80211_IS_CHAN_ANYG(chan)) { sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_G); sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_G); } else { sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_B); sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_B); } sc->sc_curchan = hchan; mwl_hal_intrset(mh, sc->sc_imask); return 0; } static void mwl_scan_start(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; DPRINTF(sc, MWL_DEBUG_STATE, "%s\n", __func__); } static void mwl_scan_end(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; DPRINTF(sc, MWL_DEBUG_STATE, "%s\n", __func__); } static void mwl_set_channel(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; (void) mwl_chan_set(sc, ic->ic_curchan); } /* * Handle a channel switch request. We inform the firmware * and mark the global state to suppress various actions. * NB: we issue only one request to the fw; we may be called * multiple times if there are multiple vap's. */ static void mwl_startcsa(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct mwl_softc *sc = ic->ic_softc; MWL_HAL_CHANNEL hchan; if (sc->sc_csapending) return; mwl_mapchan(&hchan, ic->ic_csa_newchan); /* 1 =>'s quiet channel */ mwl_hal_setchannelswitchie(sc->sc_mh, &hchan, 1, ic->ic_csa_count); sc->sc_csapending = 1; } /* * Plumb any static WEP key for the station. This is * necessary as we must propagate the key from the * global key table of the vap to each sta db entry. */ static void mwl_setanywepkey(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { if ((vap->iv_flags & (IEEE80211_F_PRIVACY|IEEE80211_F_WPA)) == IEEE80211_F_PRIVACY && vap->iv_def_txkey != IEEE80211_KEYIX_NONE && vap->iv_nw_keys[vap->iv_def_txkey].wk_keyix != IEEE80211_KEYIX_NONE) (void) _mwl_key_set(vap, &vap->iv_nw_keys[vap->iv_def_txkey], mac); } static int mwl_peerstadb(struct ieee80211_node *ni, int aid, int staid, MWL_HAL_PEERINFO *pi) { #define WME(ie) ((const struct ieee80211_wme_info *) ie) struct ieee80211vap *vap = ni->ni_vap; struct mwl_hal_vap *hvap; int error; if (vap->iv_opmode == IEEE80211_M_WDS) { /* * WDS vap's do not have a f/w vap; instead they piggyback * on an AP vap and we must install the sta db entry and * crypto state using that AP's handle (the WDS vap has none). */ hvap = MWL_VAP(vap)->mv_ap_hvap; } else hvap = MWL_VAP(vap)->mv_hvap; error = mwl_hal_newstation(hvap, ni->ni_macaddr, aid, staid, pi, ni->ni_flags & (IEEE80211_NODE_QOS | IEEE80211_NODE_HT), ni->ni_ies.wme_ie != NULL ? WME(ni->ni_ies.wme_ie)->wme_info : 0); if (error == 0) { /* * Setup security for this station. For sta mode this is * needed even though do the same thing on transition to * AUTH state because the call to mwl_hal_newstation * clobbers the crypto state we setup. */ mwl_setanywepkey(vap, ni->ni_macaddr); } return error; #undef WME } static void mwl_setglobalkeys(struct ieee80211vap *vap) { struct ieee80211_key *wk; wk = &vap->iv_nw_keys[0]; for (; wk < &vap->iv_nw_keys[IEEE80211_WEP_NKID]; wk++) if (wk->wk_keyix != IEEE80211_KEYIX_NONE) (void) _mwl_key_set(vap, wk, vap->iv_myaddr); } /* * Convert a legacy rate set to a firmware bitmask. */ static uint32_t get_rate_bitmap(const struct ieee80211_rateset *rs) { uint32_t rates; int i; rates = 0; for (i = 0; i < rs->rs_nrates; i++) switch (rs->rs_rates[i] & IEEE80211_RATE_VAL) { case 2: rates |= 0x001; break; case 4: rates |= 0x002; break; case 11: rates |= 0x004; break; case 22: rates |= 0x008; break; case 44: rates |= 0x010; break; case 12: rates |= 0x020; break; case 18: rates |= 0x040; break; case 24: rates |= 0x080; break; case 36: rates |= 0x100; break; case 48: rates |= 0x200; break; case 72: rates |= 0x400; break; case 96: rates |= 0x800; break; case 108: rates |= 0x1000; break; } return rates; } /* * Construct an HT firmware bitmask from an HT rate set. */ static uint32_t get_htrate_bitmap(const struct ieee80211_htrateset *rs) { uint32_t rates; int i; rates = 0; for (i = 0; i < rs->rs_nrates; i++) { if (rs->rs_rates[i] < 16) rates |= 1<rs_rates[i]; } return rates; } /* * Craft station database entry for station. * NB: use host byte order here, the hal handles byte swapping. */ static MWL_HAL_PEERINFO * mkpeerinfo(MWL_HAL_PEERINFO *pi, const struct ieee80211_node *ni) { const struct ieee80211vap *vap = ni->ni_vap; memset(pi, 0, sizeof(*pi)); pi->LegacyRateBitMap = get_rate_bitmap(&ni->ni_rates); pi->CapInfo = ni->ni_capinfo; if (ni->ni_flags & IEEE80211_NODE_HT) { /* HT capabilities, etc */ pi->HTCapabilitiesInfo = ni->ni_htcap; /* XXX pi.HTCapabilitiesInfo */ pi->MacHTParamInfo = ni->ni_htparam; pi->HTRateBitMap = get_htrate_bitmap(&ni->ni_htrates); pi->AddHtInfo.ControlChan = ni->ni_htctlchan; pi->AddHtInfo.AddChan = ni->ni_ht2ndchan; pi->AddHtInfo.OpMode = ni->ni_htopmode; pi->AddHtInfo.stbc = ni->ni_htstbc; /* constrain according to local configuration */ if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) == 0) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI40; if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI20; if (ni->ni_chw != 40) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_CHWIDTH40; } return pi; } /* * Re-create the local sta db entry for a vap to ensure * up to date WME state is pushed to the firmware. Because * this resets crypto state this must be followed by a * reload of any keys in the global key table. */ static int mwl_localstadb(struct ieee80211vap *vap) { #define WME(ie) ((const struct ieee80211_wme_info *) ie) struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211_node *bss; MWL_HAL_PEERINFO pi; int error; switch (vap->iv_opmode) { case IEEE80211_M_STA: bss = vap->iv_bss; error = mwl_hal_newstation(hvap, vap->iv_myaddr, 0, 0, vap->iv_state == IEEE80211_S_RUN ? mkpeerinfo(&pi, bss) : NULL, (bss->ni_flags & (IEEE80211_NODE_QOS | IEEE80211_NODE_HT)), bss->ni_ies.wme_ie != NULL ? WME(bss->ni_ies.wme_ie)->wme_info : 0); if (error == 0) mwl_setglobalkeys(vap); break; case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: error = mwl_hal_newstation(hvap, vap->iv_myaddr, 0, 0, NULL, vap->iv_flags & IEEE80211_F_WME, 0); if (error == 0) mwl_setglobalkeys(vap); break; default: error = 0; break; } return error; #undef WME } static int mwl_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct mwl_vap *mvp = MWL_VAP(vap); struct mwl_hal_vap *hvap = mvp->mv_hvap; struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node *ni = NULL; struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; enum ieee80211_state ostate = vap->iv_state; int error; DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: %s -> %s\n", vap->iv_ifp->if_xname, __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate]); callout_stop(&sc->sc_timer); /* * Clear current radar detection state. */ if (ostate == IEEE80211_S_CAC) { /* stop quiet mode radar detection */ mwl_hal_setradardetection(mh, DR_CHK_CHANNEL_AVAILABLE_STOP); } else if (sc->sc_radarena) { /* stop in-service radar detection */ mwl_hal_setradardetection(mh, DR_DFS_DISABLE); sc->sc_radarena = 0; } /* * Carry out per-state actions before doing net80211 work. */ if (nstate == IEEE80211_S_INIT) { /* NB: only ap+sta vap's have a fw entity */ if (hvap != NULL) mwl_hal_stop(hvap); } else if (nstate == IEEE80211_S_SCAN) { mwl_hal_start(hvap); /* NB: this disables beacon frames */ mwl_hal_setinframode(hvap); } else if (nstate == IEEE80211_S_AUTH) { /* * Must create a sta db entry in case a WEP key needs to * be plumbed. This entry will be overwritten if we * associate; otherwise it will be reclaimed on node free. */ ni = vap->iv_bss; MWL_NODE(ni)->mn_hvap = hvap; (void) mwl_peerstadb(ni, 0, 0, NULL); } else if (nstate == IEEE80211_S_CSA) { /* XXX move to below? */ if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS) mwl_startcsa(vap); } else if (nstate == IEEE80211_S_CAC) { /* XXX move to below? */ /* stop ap xmit and enable quiet mode radar detection */ mwl_hal_setradardetection(mh, DR_CHK_CHANNEL_AVAILABLE_START); } /* * Invoke the parent method to do net80211 work. */ error = mvp->mv_newstate(vap, nstate, arg); /* * Carry out work that must be done after net80211 runs; * this work requires up to date state (e.g. iv_bss). */ if (error == 0 && nstate == IEEE80211_S_RUN) { /* NB: collect bss node again, it may have changed */ ni = vap->iv_bss; DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s(RUN): iv_flags 0x%08x bintvl %d bssid %s " "capinfo 0x%04x chan %d\n", vap->iv_ifp->if_xname, __func__, vap->iv_flags, ni->ni_intval, ether_sprintf(ni->ni_bssid), ni->ni_capinfo, ieee80211_chan2ieee(ic, ic->ic_curchan)); /* * Recreate local sta db entry to update WME/HT state. */ mwl_localstadb(vap); switch (vap->iv_opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: if (ostate == IEEE80211_S_CAC) { /* enable in-service radar detection */ mwl_hal_setradardetection(mh, DR_IN_SERVICE_MONITOR_START); sc->sc_radarena = 1; } /* * Allocate and setup the beacon frame * (and related state). */ error = mwl_reset_vap(vap, IEEE80211_S_RUN); if (error != 0) { DPRINTF(sc, MWL_DEBUG_STATE, "%s: beacon setup failed, error %d\n", __func__, error); goto bad; } /* NB: must be after setting up beacon */ mwl_hal_start(hvap); break; case IEEE80211_M_STA: DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: aid 0x%x\n", vap->iv_ifp->if_xname, __func__, ni->ni_associd); /* * Set state now that we're associated. */ mwl_hal_setassocid(hvap, ni->ni_bssid, ni->ni_associd); mwl_setrates(vap); mwl_hal_setrtsthreshold(hvap, vap->iv_rtsthreshold); if ((vap->iv_flags & IEEE80211_F_DWDS) && sc->sc_ndwdsvaps++ == 0) mwl_hal_setdwds(mh, 1); break; case IEEE80211_M_WDS: DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: bssid %s\n", vap->iv_ifp->if_xname, __func__, ether_sprintf(ni->ni_bssid)); mwl_seteapolformat(vap); break; default: break; } /* * Set CS mode according to operating channel; * this mostly an optimization for 5GHz. * * NB: must follow mwl_hal_start which resets csmode */ if (IEEE80211_IS_CHAN_5GHZ(ic->ic_bsschan)) mwl_hal_setcsmode(mh, CSMODE_AGGRESSIVE); else mwl_hal_setcsmode(mh, CSMODE_AUTO_ENA); /* * Start timer to prod firmware. */ if (sc->sc_ageinterval != 0) callout_reset(&sc->sc_timer, sc->sc_ageinterval*hz, mwl_agestations, sc); } else if (nstate == IEEE80211_S_SLEEP) { /* XXX set chip in power save */ } else if ((vap->iv_flags & IEEE80211_F_DWDS) && --sc->sc_ndwdsvaps == 0) mwl_hal_setdwds(mh, 0); bad: return error; } /* * Manage station id's; these are separate from AID's * as AID's may have values out of the range of possible * station id's acceptable to the firmware. */ static int allocstaid(struct mwl_softc *sc, int aid) { int staid; if (!(0 < aid && aid < MWL_MAXSTAID) || isset(sc->sc_staid, aid)) { /* NB: don't use 0 */ for (staid = 1; staid < MWL_MAXSTAID; staid++) if (isclr(sc->sc_staid, staid)) break; } else staid = aid; setbit(sc->sc_staid, staid); return staid; } static void delstaid(struct mwl_softc *sc, int staid) { clrbit(sc->sc_staid, staid); } /* * Setup driver-specific state for a newly associated node. * Note that we're called also on a re-associate, the isnew * param tells us if this is the first time or not. */ static void mwl_newassoc(struct ieee80211_node *ni, int isnew) { struct ieee80211vap *vap = ni->ni_vap; struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_node *mn = MWL_NODE(ni); MWL_HAL_PEERINFO pi; uint16_t aid; int error; aid = IEEE80211_AID(ni->ni_associd); if (isnew) { mn->mn_staid = allocstaid(sc, aid); mn->mn_hvap = MWL_VAP(vap)->mv_hvap; } else { mn = MWL_NODE(ni); /* XXX reset BA stream? */ } DPRINTF(sc, MWL_DEBUG_NODE, "%s: mac %s isnew %d aid %d staid %d\n", __func__, ether_sprintf(ni->ni_macaddr), isnew, aid, mn->mn_staid); error = mwl_peerstadb(ni, aid, mn->mn_staid, mkpeerinfo(&pi, ni)); if (error != 0) { DPRINTF(sc, MWL_DEBUG_NODE, "%s: error %d creating sta db entry\n", __func__, error); /* XXX how to deal with error? */ } } /* * Periodically poke the firmware to age out station state * (power save queues, pending tx aggregates). */ static void mwl_agestations(void *arg) { struct mwl_softc *sc = arg; mwl_hal_setkeepalive(sc->sc_mh); if (sc->sc_ageinterval != 0) /* NB: catch dynamic changes */ callout_schedule(&sc->sc_timer, sc->sc_ageinterval*hz); } static const struct mwl_hal_channel * findhalchannel(const MWL_HAL_CHANNELINFO *ci, int ieee) { int i; for (i = 0; i < ci->nchannels; i++) { const struct mwl_hal_channel *hc = &ci->channels[i]; if (hc->ieee == ieee) return hc; } return NULL; } static int mwl_setregdomain(struct ieee80211com *ic, struct ieee80211_regdomain *rd, int nchan, struct ieee80211_channel chans[]) { struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; const MWL_HAL_CHANNELINFO *ci; int i; for (i = 0; i < nchan; i++) { struct ieee80211_channel *c = &chans[i]; const struct mwl_hal_channel *hc; if (IEEE80211_IS_CHAN_2GHZ(c)) { mwl_hal_getchannelinfo(mh, MWL_FREQ_BAND_2DOT4GHZ, IEEE80211_IS_CHAN_HT40(c) ? MWL_CH_40_MHz_WIDTH : MWL_CH_20_MHz_WIDTH, &ci); } else if (IEEE80211_IS_CHAN_5GHZ(c)) { mwl_hal_getchannelinfo(mh, MWL_FREQ_BAND_5GHZ, IEEE80211_IS_CHAN_HT40(c) ? MWL_CH_40_MHz_WIDTH : MWL_CH_20_MHz_WIDTH, &ci); } else { device_printf(sc->sc_dev, "%s: channel %u freq %u/0x%x not 2.4/5GHz\n", __func__, c->ic_ieee, c->ic_freq, c->ic_flags); return EINVAL; } /* * Verify channel has cal data and cap tx power. */ hc = findhalchannel(ci, c->ic_ieee); if (hc != NULL) { if (c->ic_maxpower > 2*hc->maxTxPow) c->ic_maxpower = 2*hc->maxTxPow; goto next; } if (IEEE80211_IS_CHAN_HT40(c)) { /* * Look for the extension channel since the * hal table only has the primary channel. */ hc = findhalchannel(ci, c->ic_extieee); if (hc != NULL) { if (c->ic_maxpower > 2*hc->maxTxPow) c->ic_maxpower = 2*hc->maxTxPow; goto next; } } device_printf(sc->sc_dev, "%s: no cal data for channel %u ext %u freq %u/0x%x\n", __func__, c->ic_ieee, c->ic_extieee, c->ic_freq, c->ic_flags); return EINVAL; next: ; } return 0; } #define IEEE80211_CHAN_HTG (IEEE80211_CHAN_HT|IEEE80211_CHAN_G) #define IEEE80211_CHAN_HTA (IEEE80211_CHAN_HT|IEEE80211_CHAN_A) static void addht40channels(struct ieee80211_channel chans[], int maxchans, int *nchans, const MWL_HAL_CHANNELINFO *ci, int flags) { int i, error; for (i = 0; i < ci->nchannels; i++) { const struct mwl_hal_channel *hc = &ci->channels[i]; error = ieee80211_add_channel_ht40(chans, maxchans, nchans, hc->ieee, hc->maxTxPow, flags); if (error != 0 && error != ENOENT) break; } } static void addchannels(struct ieee80211_channel chans[], int maxchans, int *nchans, const MWL_HAL_CHANNELINFO *ci, const uint8_t bands[]) { int i, error; error = 0; for (i = 0; i < ci->nchannels && error == 0; i++) { const struct mwl_hal_channel *hc = &ci->channels[i]; error = ieee80211_add_channel(chans, maxchans, nchans, hc->ieee, hc->freq, hc->maxTxPow, 0, bands); } } static void getchannels(struct mwl_softc *sc, int maxchans, int *nchans, struct ieee80211_channel chans[]) { const MWL_HAL_CHANNELINFO *ci; uint8_t bands[IEEE80211_MODE_BYTES]; /* * Use the channel info from the hal to craft the * channel list. Note that we pass back an unsorted * list; the caller is required to sort it for us * (if desired). */ *nchans = 0; if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_2DOT4GHZ, MWL_CH_20_MHz_WIDTH, &ci) == 0) { memset(bands, 0, sizeof(bands)); setbit(bands, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11G); setbit(bands, IEEE80211_MODE_11NG); addchannels(chans, maxchans, nchans, ci, bands); } if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_5GHZ, MWL_CH_20_MHz_WIDTH, &ci) == 0) { memset(bands, 0, sizeof(bands)); setbit(bands, IEEE80211_MODE_11A); setbit(bands, IEEE80211_MODE_11NA); addchannels(chans, maxchans, nchans, ci, bands); } if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_2DOT4GHZ, MWL_CH_40_MHz_WIDTH, &ci) == 0) addht40channels(chans, maxchans, nchans, ci, IEEE80211_CHAN_HTG); if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_5GHZ, MWL_CH_40_MHz_WIDTH, &ci) == 0) addht40channels(chans, maxchans, nchans, ci, IEEE80211_CHAN_HTA); } static void mwl_getradiocaps(struct ieee80211com *ic, int maxchans, int *nchans, struct ieee80211_channel chans[]) { struct mwl_softc *sc = ic->ic_softc; getchannels(sc, maxchans, nchans, chans); } static int mwl_getchannels(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; /* * Use the channel info from the hal to craft the * channel list for net80211. Note that we pass up * an unsorted list; net80211 will sort it for us. */ memset(ic->ic_channels, 0, sizeof(ic->ic_channels)); ic->ic_nchans = 0; getchannels(sc, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels); ic->ic_regdomain.regdomain = SKU_DEBUG; ic->ic_regdomain.country = CTRY_DEFAULT; ic->ic_regdomain.location = 'I'; ic->ic_regdomain.isocc[0] = ' '; /* XXX? */ ic->ic_regdomain.isocc[1] = ' '; return (ic->ic_nchans == 0 ? EIO : 0); } #undef IEEE80211_CHAN_HTA #undef IEEE80211_CHAN_HTG #ifdef MWL_DEBUG static void mwl_printrxbuf(const struct mwl_rxbuf *bf, u_int ix) { const struct mwl_rxdesc *ds = bf->bf_desc; uint32_t status = le32toh(ds->Status); printf("R[%2u] (DS.V:%p DS.P:0x%jx) NEXT:%08x DATA:%08x RC:%02x%s\n" " STAT:%02x LEN:%04x RSSI:%02x CHAN:%02x RATE:%02x QOS:%04x HT:%04x\n", ix, ds, (uintmax_t)bf->bf_daddr, le32toh(ds->pPhysNext), le32toh(ds->pPhysBuffData), ds->RxControl, ds->RxControl != EAGLE_RXD_CTRL_DRIVER_OWN ? "" : (status & EAGLE_RXD_STATUS_OK) ? " *" : " !", ds->Status, le16toh(ds->PktLen), ds->RSSI, ds->Channel, ds->Rate, le16toh(ds->QosCtrl), le16toh(ds->HtSig2)); } static void mwl_printtxbuf(const struct mwl_txbuf *bf, u_int qnum, u_int ix) { const struct mwl_txdesc *ds = bf->bf_desc; uint32_t status = le32toh(ds->Status); printf("Q%u[%3u]", qnum, ix); printf(" (DS.V:%p DS.P:0x%jx)\n", ds, (uintmax_t)bf->bf_daddr); printf(" NEXT:%08x DATA:%08x LEN:%04x STAT:%08x%s\n", le32toh(ds->pPhysNext), le32toh(ds->PktPtr), le16toh(ds->PktLen), status, status & EAGLE_TXD_STATUS_USED ? "" : (status & 3) != 0 ? " *" : " !"); printf(" RATE:%02x PRI:%x QOS:%04x SAP:%08x FORMAT:%04x\n", ds->DataRate, ds->TxPriority, le16toh(ds->QosCtrl), le32toh(ds->SapPktInfo), le16toh(ds->Format)); #if MWL_TXDESC > 1 printf(" MULTIFRAMES:%u LEN:%04x %04x %04x %04x %04x %04x\n" , le32toh(ds->multiframes) , le16toh(ds->PktLenArray[0]), le16toh(ds->PktLenArray[1]) , le16toh(ds->PktLenArray[2]), le16toh(ds->PktLenArray[3]) , le16toh(ds->PktLenArray[4]), le16toh(ds->PktLenArray[5]) ); printf(" DATA:%08x %08x %08x %08x %08x %08x\n" , le32toh(ds->PktPtrArray[0]), le32toh(ds->PktPtrArray[1]) , le32toh(ds->PktPtrArray[2]), le32toh(ds->PktPtrArray[3]) , le32toh(ds->PktPtrArray[4]), le32toh(ds->PktPtrArray[5]) ); #endif #if 0 { const uint8_t *cp = (const uint8_t *) ds; int i; for (i = 0; i < sizeof(struct mwl_txdesc); i++) { printf("%02x ", cp[i]); if (((i+1) % 16) == 0) printf("\n"); } printf("\n"); } #endif } #endif /* MWL_DEBUG */ #if 0 static void mwl_txq_dump(struct mwl_txq *txq) { struct mwl_txbuf *bf; int i = 0; MWL_TXQ_LOCK(txq); STAILQ_FOREACH(bf, &txq->active, bf_list) { struct mwl_txdesc *ds = bf->bf_desc; MWL_TXDESC_SYNC(txq, ds, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef MWL_DEBUG mwl_printtxbuf(bf, txq->qnum, i); #endif i++; } MWL_TXQ_UNLOCK(txq); } #endif static void mwl_watchdog(void *arg) { struct mwl_softc *sc = arg; callout_reset(&sc->sc_watchdog, hz, mwl_watchdog, sc); if (sc->sc_tx_timer == 0 || --sc->sc_tx_timer > 0) return; if (sc->sc_running && !sc->sc_invalid) { if (mwl_hal_setkeepalive(sc->sc_mh)) device_printf(sc->sc_dev, "transmit timeout (firmware hung?)\n"); else device_printf(sc->sc_dev, "transmit timeout\n"); #if 0 mwl_reset(sc); mwl_txq_dump(&sc->sc_txq[0]);/*XXX*/ #endif counter_u64_add(sc->sc_ic.ic_oerrors, 1); sc->sc_stats.mst_watchdog++; } } #ifdef MWL_DIAGAPI /* * Diagnostic interface to the HAL. This is used by various * tools to do things like retrieve register contents for * debugging. The mechanism is intentionally opaque so that * it can change frequently w/o concern for compatibility. */ static int mwl_ioctl_diag(struct mwl_softc *sc, struct mwl_diag *md) { struct mwl_hal *mh = sc->sc_mh; u_int id = md->md_id & MWL_DIAG_ID; void *indata = NULL; void *outdata = NULL; u_int32_t insize = md->md_in_size; u_int32_t outsize = md->md_out_size; int error = 0; if (md->md_id & MWL_DIAG_IN) { /* * Copy in data. */ indata = malloc(insize, M_TEMP, M_NOWAIT); if (indata == NULL) { error = ENOMEM; goto bad; } error = copyin(md->md_in_data, indata, insize); if (error) goto bad; } if (md->md_id & MWL_DIAG_DYN) { /* * Allocate a buffer for the results (otherwise the HAL * returns a pointer to a buffer where we can read the * results). Note that we depend on the HAL leaving this * pointer for us to use below in reclaiming the buffer; * may want to be more defensive. */ outdata = malloc(outsize, M_TEMP, M_NOWAIT); if (outdata == NULL) { error = ENOMEM; goto bad; } } if (mwl_hal_getdiagstate(mh, id, indata, insize, &outdata, &outsize)) { if (outsize < md->md_out_size) md->md_out_size = outsize; if (outdata != NULL) error = copyout(outdata, md->md_out_data, md->md_out_size); } else { error = EINVAL; } bad: if ((md->md_id & MWL_DIAG_IN) && indata != NULL) free(indata, M_TEMP); if ((md->md_id & MWL_DIAG_DYN) && outdata != NULL) free(outdata, M_TEMP); return error; } static int mwl_ioctl_reset(struct mwl_softc *sc, struct mwl_diag *md) { struct mwl_hal *mh = sc->sc_mh; int error; MWL_LOCK_ASSERT(sc); if (md->md_id == 0 && mwl_hal_fwload(mh, NULL) != 0) { device_printf(sc->sc_dev, "unable to load firmware\n"); return EIO; } if (mwl_hal_gethwspecs(mh, &sc->sc_hwspecs) != 0) { device_printf(sc->sc_dev, "unable to fetch h/w specs\n"); return EIO; } error = mwl_setupdma(sc); if (error != 0) { /* NB: mwl_setupdma prints a msg */ return error; } /* * Reset tx/rx data structures; after reload we must * re-start the driver's notion of the next xmit/recv. */ mwl_draintxq(sc); /* clear pending frames */ mwl_resettxq(sc); /* rebuild tx q lists */ sc->sc_rxnext = NULL; /* force rx to start at the list head */ return 0; } #endif /* MWL_DIAGAPI */ static void mwl_parent(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; int startall = 0; MWL_LOCK(sc); if (ic->ic_nrunning > 0) { if (sc->sc_running) { /* * To avoid rescanning another access point, * do not call mwl_init() here. Instead, * only reflect promisc mode settings. */ mwl_mode_init(sc); } else { /* * Beware of being called during attach/detach * to reset promiscuous mode. In that case we * will still be marked UP but not RUNNING. * However trying to re-init the interface * is the wrong thing to do as we've already * torn down much of our state. There's * probably a better way to deal with this. */ if (!sc->sc_invalid) { mwl_init(sc); /* XXX lose error */ startall = 1; } } } else mwl_stop(sc); MWL_UNLOCK(sc); if (startall) ieee80211_start_all(ic); } static int mwl_ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct mwl_softc *sc = ic->ic_softc; struct ifreq *ifr = data; int error = 0; switch (cmd) { case SIOCGMVSTATS: mwl_hal_gethwstats(sc->sc_mh, &sc->sc_stats.hw_stats); #if 0 /* NB: embed these numbers to get a consistent view */ sc->sc_stats.mst_tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); sc->sc_stats.mst_rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); #endif /* * NB: Drop the softc lock in case of a page fault; * we'll accept any potential inconsisentcy in the * statistics. The alternative is to copy the data * to a local structure. */ return (copyout(&sc->sc_stats, ifr->ifr_data, sizeof (sc->sc_stats))); #ifdef MWL_DIAGAPI case SIOCGMVDIAG: /* XXX check privs */ return mwl_ioctl_diag(sc, (struct mwl_diag *) ifr); case SIOCGMVRESET: /* XXX check privs */ MWL_LOCK(sc); error = mwl_ioctl_reset(sc,(struct mwl_diag *) ifr); MWL_UNLOCK(sc); break; #endif /* MWL_DIAGAPI */ default: error = ENOTTY; break; } return (error); } #ifdef MWL_DEBUG static int mwl_sysctl_debug(SYSCTL_HANDLER_ARGS) { struct mwl_softc *sc = arg1; int debug, error; debug = sc->sc_debug | (mwl_hal_getdebug(sc->sc_mh) << 24); error = sysctl_handle_int(oidp, &debug, 0, req); if (error || !req->newptr) return error; mwl_hal_setdebug(sc->sc_mh, debug >> 24); sc->sc_debug = debug & 0x00ffffff; return 0; } #endif /* MWL_DEBUG */ static void mwl_sysctlattach(struct mwl_softc *sc) { #ifdef MWL_DEBUG struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev); sc->sc_debug = mwl_debug; SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW, sc, 0, mwl_sysctl_debug, "I", "control debugging printfs"); #endif } /* * Announce various information on device/driver attach. */ static void mwl_announce(struct mwl_softc *sc) { device_printf(sc->sc_dev, "Rev A%d hardware, v%d.%d.%d.%d firmware (regioncode %d)\n", sc->sc_hwspecs.hwVersion, (sc->sc_hwspecs.fwReleaseNumber>>24) & 0xff, (sc->sc_hwspecs.fwReleaseNumber>>16) & 0xff, (sc->sc_hwspecs.fwReleaseNumber>>8) & 0xff, (sc->sc_hwspecs.fwReleaseNumber>>0) & 0xff, sc->sc_hwspecs.regionCode); sc->sc_fwrelease = sc->sc_hwspecs.fwReleaseNumber; if (bootverbose) { int i; for (i = 0; i <= WME_AC_VO; i++) { struct mwl_txq *txq = sc->sc_ac2q[i]; device_printf(sc->sc_dev, "Use hw queue %u for %s traffic\n", txq->qnum, ieee80211_wme_acnames[i]); } } if (bootverbose || mwl_rxdesc != MWL_RXDESC) device_printf(sc->sc_dev, "using %u rx descriptors\n", mwl_rxdesc); if (bootverbose || mwl_rxbuf != MWL_RXBUF) device_printf(sc->sc_dev, "using %u rx buffers\n", mwl_rxbuf); if (bootverbose || mwl_txbuf != MWL_TXBUF) device_printf(sc->sc_dev, "using %u tx buffers\n", mwl_txbuf); if (bootverbose && mwl_hal_ismbsscapable(sc->sc_mh)) device_printf(sc->sc_dev, "multi-bss support\n"); #ifdef MWL_TX_NODROP if (bootverbose) device_printf(sc->sc_dev, "no tx drop\n"); #endif } Index: head/sys/dev/netmap/netmap_generic.c =================================================================== --- head/sys/dev/netmap/netmap_generic.c (revision 324445) +++ head/sys/dev/netmap/netmap_generic.c (revision 324446) @@ -1,1262 +1,1262 @@ /* * Copyright (C) 2013-2016 Vincenzo Maffione * Copyright (C) 2013-2016 Luigi Rizzo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This module implements netmap support on top of standard, * unmodified device drivers. * * A NIOCREGIF request is handled here if the device does not * have native support. TX and RX rings are emulated as follows: * * NIOCREGIF * We preallocate a block of TX mbufs (roughly as many as * tx descriptors; the number is not critical) to speed up * operation during transmissions. The refcount on most of * these buffers is artificially bumped up so we can recycle * them more easily. Also, the destructor is intercepted * so we use it as an interrupt notification to wake up * processes blocked on a poll(). * * For each receive ring we allocate one "struct mbq" * (an mbuf tailq plus a spinlock). We intercept packets * (through if_input) * on the receive path and put them in the mbq from which * netmap receive routines can grab them. * * TX: * in the generic_txsync() routine, netmap buffers are copied * (or linked, in a future) to the preallocated mbufs * and pushed to the transmit queue. Some of these mbufs * (those with NS_REPORT, or otherwise every half ring) * have the refcount=1, others have refcount=2. * When the destructor is invoked, we take that as * a notification that all mbufs up to that one in * the specific ring have been completed, and generate * the equivalent of a transmit interrupt. * * RX: * */ #ifdef __FreeBSD__ #include /* prerequisite */ __FBSDID("$FreeBSD$"); #include #include #include #include /* PROT_EXEC */ #include #include /* sockaddrs */ #include #include #include #include #include /* bus_dmamap_* in netmap_kern.h */ // XXX temporary - D() defined here #include #include #include #define rtnl_lock() ND("rtnl_lock called") #define rtnl_unlock() ND("rtnl_unlock called") #define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) #define smp_mb() /* * FreeBSD mbuf allocator/deallocator in emulation mode: */ #if __FreeBSD_version < 1100000 /* * For older versions of FreeBSD: * * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE * so that the destructor, if invoked, will not free the packet. * In principle we should set the destructor only on demand, * but since there might be a race we better do it on allocation. * As a consequence, we also need to set the destructor or we * would leak buffers. */ /* mbuf destructor, also need to change the type to EXT_EXTREF, * add an M_NOFREE flag, and then clear the flag and * chain into uma_zfree(zone_pack, mf) * (or reinstall the buffer ?) */ #define SET_MBUF_DESTRUCTOR(m, fn) do { \ (m)->m_ext.ext_free = (void *)fn; \ (m)->m_ext.ext_type = EXT_EXTREF; \ } while (0) static int void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { /* restore original mbuf */ m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1; m->m_ext.ext_arg1 = NULL; m->m_ext.ext_type = EXT_PACKET; m->m_ext.ext_free = NULL; if (MBUF_REFCNT(m) == 0) SET_MBUF_REFCNT(m, 1); uma_zfree(zone_pack, m); return 0; } static inline struct mbuf * nm_os_get_mbuf(struct ifnet *ifp, int len) { struct mbuf *m; (void)ifp; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m) { /* m_getcl() (mb_ctor_mbuf) has an assert that checks that * M_NOFREE flag is not specified as third argument, * so we have to set M_NOFREE after m_getcl(). */ m->m_flags |= M_NOFREE; m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save m->m_ext.ext_free = (void *)void_mbuf_dtor; m->m_ext.ext_type = EXT_EXTREF; ND(5, "create m %p refcnt %d", m, MBUF_REFCNT(m)); } return m; } #else /* __FreeBSD_version >= 1100000 */ /* * Newer versions of FreeBSD, using a straightforward scheme. * * We allocate mbufs with m_gethdr(), since the mbuf header is needed * by the driver. We also attach a customly-provided external storage, * which in this case is a netmap buffer. When calling m_extadd(), however * we pass a NULL address, since the real address (and length) will be * filled in by nm_os_generic_xmit_frame() right before calling * if_transmit(). * * The dtor function does nothing, however we need it since mb_free_ext() * has a KASSERT(), checking that the mbuf dtor function is not NULL. */ -static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { } +static void void_mbuf_dtor(struct mbuf *m) { } #define SET_MBUF_DESTRUCTOR(m, fn) do { \ (m)->m_ext.ext_free = (fn != NULL) ? \ (void *)fn : (void *)void_mbuf_dtor; \ } while (0) static inline struct mbuf * nm_os_get_mbuf(struct ifnet *ifp, int len) { struct mbuf *m; (void)ifp; (void)len; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { return m; } m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor, NULL, NULL, 0, EXT_NET_DRV); return m; } #endif /* __FreeBSD_version >= 1100000 */ #elif defined _WIN32 #include "win_glue.h" #define rtnl_lock() ND("rtnl_lock called") #define rtnl_unlock() ND("rtnl_unlock called") #define MBUF_TXQ(m) 0//((m)->m_pkthdr.flowid) #define MBUF_RXQ(m) 0//((m)->m_pkthdr.flowid) #define smp_mb() //XXX: to be correctly defined #else /* linux */ #include "bsd_glue.h" #include /* rtnl_[un]lock() */ #include /* struct ethtool_ops, get_ringparam */ #include static inline struct mbuf * nm_os_get_mbuf(struct ifnet *ifp, int len) { return alloc_skb(ifp->needed_headroom + len + ifp->needed_tailroom, GFP_ATOMIC); } #endif /* linux */ /* Common headers. */ #include #include #include #define for_each_kring_n(_i, _k, _karr, _n) \ for (_k=_karr, _i = 0; _i < _n; (_k)++, (_i)++) #define for_each_tx_kring(_i, _k, _na) \ for_each_kring_n(_i, _k, (_na)->tx_rings, (_na)->num_tx_rings) #define for_each_tx_kring_h(_i, _k, _na) \ for_each_kring_n(_i, _k, (_na)->tx_rings, (_na)->num_tx_rings + 1) #define for_each_rx_kring(_i, _k, _na) \ for_each_kring_n(_i, _k, (_na)->rx_rings, (_na)->num_rx_rings) #define for_each_rx_kring_h(_i, _k, _na) \ for_each_kring_n(_i, _k, (_na)->rx_rings, (_na)->num_rx_rings + 1) /* ======================== PERFORMANCE STATISTICS =========================== */ #ifdef RATE_GENERIC #define IFRATE(x) x struct rate_stats { unsigned long txpkt; unsigned long txsync; unsigned long txirq; unsigned long txrepl; unsigned long txdrop; unsigned long rxpkt; unsigned long rxirq; unsigned long rxsync; }; struct rate_context { unsigned refcount; struct timer_list timer; struct rate_stats new; struct rate_stats old; }; #define RATE_PRINTK(_NAME_) \ printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD); #define RATE_PERIOD 2 static void rate_callback(unsigned long arg) { struct rate_context * ctx = (struct rate_context *)arg; struct rate_stats cur = ctx->new; int r; RATE_PRINTK(txpkt); RATE_PRINTK(txsync); RATE_PRINTK(txirq); RATE_PRINTK(txrepl); RATE_PRINTK(txdrop); RATE_PRINTK(rxpkt); RATE_PRINTK(rxsync); RATE_PRINTK(rxirq); printk("\n"); ctx->old = cur; r = mod_timer(&ctx->timer, jiffies + msecs_to_jiffies(RATE_PERIOD * 1000)); if (unlikely(r)) D("[v1000] Error: mod_timer()"); } static struct rate_context rate_ctx; void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi) { if (txp) rate_ctx.new.txpkt++; if (txs) rate_ctx.new.txsync++; if (txi) rate_ctx.new.txirq++; if (rxp) rate_ctx.new.rxpkt++; if (rxs) rate_ctx.new.rxsync++; if (rxi) rate_ctx.new.rxirq++; } #else /* !RATE */ #define IFRATE(x) #endif /* !RATE */ /* ========== GENERIC (EMULATED) NETMAP ADAPTER SUPPORT ============= */ /* * Wrapper used by the generic adapter layer to notify * the poller threads. Differently from netmap_rx_irq(), we check * only NAF_NETMAP_ON instead of NAF_NATIVE_ON to enable the irq. */ void netmap_generic_irq(struct netmap_adapter *na, u_int q, u_int *work_done) { if (unlikely(!nm_netmap_on(na))) return; netmap_common_irq(na, q, work_done); #ifdef RATE_GENERIC if (work_done) rate_ctx.new.rxirq++; else rate_ctx.new.txirq++; #endif /* RATE_GENERIC */ } static int generic_netmap_unregister(struct netmap_adapter *na) { struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; struct netmap_kring *kring = NULL; int i, r; if (na->active_fds == 0) { rtnl_lock(); na->na_flags &= ~NAF_NETMAP_ON; /* Release packet steering control. */ nm_os_catch_tx(gna, 0); /* Stop intercepting packets on the RX path. */ nm_os_catch_rx(gna, 0); rtnl_unlock(); } for_each_rx_kring_h(r, kring, na) { if (nm_kring_pending_off(kring)) { D("Emulated adapter: ring '%s' deactivated", kring->name); kring->nr_mode = NKR_NETMAP_OFF; } } for_each_tx_kring_h(r, kring, na) { if (nm_kring_pending_off(kring)) { kring->nr_mode = NKR_NETMAP_OFF; D("Emulated adapter: ring '%s' deactivated", kring->name); } } for_each_rx_kring(r, kring, na) { /* Free the mbufs still pending in the RX queues, * that did not end up into the corresponding netmap * RX rings. */ mbq_safe_purge(&kring->rx_queue); nm_os_mitigation_cleanup(&gna->mit[r]); } /* Decrement reference counter for the mbufs in the * TX pools. These mbufs can be still pending in drivers, * (e.g. this happens with virtio-net driver, which * does lazy reclaiming of transmitted mbufs). */ for_each_tx_kring(r, kring, na) { /* We must remove the destructor on the TX event, * because the destructor invokes netmap code, and * the netmap module may disappear before the * TX event is consumed. */ mtx_lock_spin(&kring->tx_event_lock); if (kring->tx_event) { SET_MBUF_DESTRUCTOR(kring->tx_event, NULL); } kring->tx_event = NULL; mtx_unlock_spin(&kring->tx_event_lock); } if (na->active_fds == 0) { nm_os_free(gna->mit); for_each_rx_kring(r, kring, na) { mbq_safe_fini(&kring->rx_queue); } for_each_tx_kring(r, kring, na) { mtx_destroy(&kring->tx_event_lock); if (kring->tx_pool == NULL) { continue; } for (i=0; inum_tx_desc; i++) { if (kring->tx_pool[i]) { m_freem(kring->tx_pool[i]); } } nm_os_free(kring->tx_pool); kring->tx_pool = NULL; } #ifdef RATE_GENERIC if (--rate_ctx.refcount == 0) { D("del_timer()"); del_timer(&rate_ctx.timer); } #endif D("Emulated adapter for %s deactivated", na->name); } return 0; } /* Enable/disable netmap mode for a generic network interface. */ static int generic_netmap_register(struct netmap_adapter *na, int enable) { struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; struct netmap_kring *kring = NULL; int error; int i, r; if (!na) { return EINVAL; } if (!enable) { /* This is actually an unregif. */ return generic_netmap_unregister(na); } if (na->active_fds == 0) { D("Emulated adapter for %s activated", na->name); /* Do all memory allocations when (na->active_fds == 0), to * simplify error management. */ /* Allocate memory for mitigation support on all the rx queues. */ gna->mit = nm_os_malloc(na->num_rx_rings * sizeof(struct nm_generic_mit)); if (!gna->mit) { D("mitigation allocation failed"); error = ENOMEM; goto out; } for_each_rx_kring(r, kring, na) { /* Init mitigation support. */ nm_os_mitigation_init(&gna->mit[r], r, na); /* Initialize the rx queue, as generic_rx_handler() can * be called as soon as nm_os_catch_rx() returns. */ mbq_safe_init(&kring->rx_queue); } /* * Prepare mbuf pools (parallel to the tx rings), for packet * transmission. Don't preallocate the mbufs here, it's simpler * to leave this task to txsync. */ for_each_tx_kring(r, kring, na) { kring->tx_pool = NULL; } for_each_tx_kring(r, kring, na) { kring->tx_pool = nm_os_malloc(na->num_tx_desc * sizeof(struct mbuf *)); if (!kring->tx_pool) { D("tx_pool allocation failed"); error = ENOMEM; goto free_tx_pools; } mtx_init(&kring->tx_event_lock, "tx_event_lock", NULL, MTX_SPIN); } } for_each_rx_kring_h(r, kring, na) { if (nm_kring_pending_on(kring)) { D("Emulated adapter: ring '%s' activated", kring->name); kring->nr_mode = NKR_NETMAP_ON; } } for_each_tx_kring_h(r, kring, na) { if (nm_kring_pending_on(kring)) { D("Emulated adapter: ring '%s' activated", kring->name); kring->nr_mode = NKR_NETMAP_ON; } } for_each_tx_kring(r, kring, na) { /* Initialize tx_pool and tx_event. */ for (i=0; inum_tx_desc; i++) { kring->tx_pool[i] = NULL; } kring->tx_event = NULL; } if (na->active_fds == 0) { rtnl_lock(); /* Prepare to intercept incoming traffic. */ error = nm_os_catch_rx(gna, 1); if (error) { D("nm_os_catch_rx(1) failed (%d)", error); goto register_handler; } /* Make netmap control the packet steering. */ error = nm_os_catch_tx(gna, 1); if (error) { D("nm_os_catch_tx(1) failed (%d)", error); goto catch_rx; } rtnl_unlock(); na->na_flags |= NAF_NETMAP_ON; #ifdef RATE_GENERIC if (rate_ctx.refcount == 0) { D("setup_timer()"); memset(&rate_ctx, 0, sizeof(rate_ctx)); setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { D("Error: mod_timer()"); } } rate_ctx.refcount++; #endif /* RATE */ } return 0; /* Here (na->active_fds == 0) holds. */ catch_rx: nm_os_catch_rx(gna, 0); register_handler: rtnl_unlock(); free_tx_pools: for_each_tx_kring(r, kring, na) { mtx_destroy(&kring->tx_event_lock); if (kring->tx_pool == NULL) { continue; } nm_os_free(kring->tx_pool); kring->tx_pool = NULL; } for_each_rx_kring(r, kring, na) { mbq_safe_fini(&kring->rx_queue); } nm_os_free(gna->mit); out: return error; } /* * Callback invoked when the device driver frees an mbuf used * by netmap to transmit a packet. This usually happens when * the NIC notifies the driver that transmission is completed. */ static void generic_mbuf_destructor(struct mbuf *m) { struct netmap_adapter *na = NA(GEN_TX_MBUF_IFP(m)); struct netmap_kring *kring; unsigned int r = MBUF_TXQ(m); unsigned int r_orig = r; if (unlikely(!nm_netmap_on(na) || r >= na->num_tx_rings)) { D("Error: no netmap adapter on device %p", GEN_TX_MBUF_IFP(m)); return; } /* * First, clear the event mbuf. * In principle, the event 'm' should match the one stored * on ring 'r'. However we check it explicitely to stay * safe against lower layers (qdisc, driver, etc.) changing * MBUF_TXQ(m) under our feet. If the match is not found * on 'r', we try to see if it belongs to some other ring. */ for (;;) { bool match = false; kring = &na->tx_rings[r]; mtx_lock_spin(&kring->tx_event_lock); if (kring->tx_event == m) { kring->tx_event = NULL; match = true; } mtx_unlock_spin(&kring->tx_event_lock); if (match) { if (r != r_orig) { RD(1, "event %p migrated: ring %u --> %u", m, r_orig, r); } break; } if (++r == na->num_tx_rings) r = 0; if (r == r_orig) { RD(1, "Cannot match event %p", m); return; } } /* Second, wake up clients. They will reclaim the event through * txsync. */ netmap_generic_irq(na, r, NULL); #ifdef __FreeBSD__ - void_mbuf_dtor(m, NULL, NULL); + void_mbuf_dtor(m); #endif } /* Record completed transmissions and update hwtail. * * The oldest tx buffer not yet completed is at nr_hwtail + 1, * nr_hwcur is the first unsent buffer. */ static u_int generic_netmap_tx_clean(struct netmap_kring *kring, int txqdisc) { u_int const lim = kring->nkr_num_slots - 1; u_int nm_i = nm_next(kring->nr_hwtail, lim); u_int hwcur = kring->nr_hwcur; u_int n = 0; struct mbuf **tx_pool = kring->tx_pool; ND("hwcur = %d, hwtail = %d", kring->nr_hwcur, kring->nr_hwtail); while (nm_i != hwcur) { /* buffers not completed */ struct mbuf *m = tx_pool[nm_i]; if (txqdisc) { if (m == NULL) { /* Nothing to do, this is going * to be replenished. */ RD(3, "Is this happening?"); } else if (MBUF_QUEUED(m)) { break; /* Not dequeued yet. */ } else if (MBUF_REFCNT(m) != 1) { /* This mbuf has been dequeued but is still busy * (refcount is 2). * Leave it to the driver and replenish. */ m_freem(m); tx_pool[nm_i] = NULL; } } else { if (unlikely(m == NULL)) { int event_consumed; /* This slot was used to place an event. */ mtx_lock_spin(&kring->tx_event_lock); event_consumed = (kring->tx_event == NULL); mtx_unlock_spin(&kring->tx_event_lock); if (!event_consumed) { /* The event has not been consumed yet, * still busy in the driver. */ break; } /* The event has been consumed, we can go * ahead. */ } else if (MBUF_REFCNT(m) != 1) { /* This mbuf is still busy: its refcnt is 2. */ break; } } n++; nm_i = nm_next(nm_i, lim); } kring->nr_hwtail = nm_prev(nm_i, lim); ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail); return n; } /* Compute a slot index in the middle between inf and sup. */ static inline u_int ring_middle(u_int inf, u_int sup, u_int lim) { u_int n = lim + 1; u_int e; if (sup >= inf) { e = (sup + inf) / 2; } else { /* wrap around */ e = (sup + n + inf) / 2; if (e >= n) { e -= n; } } if (unlikely(e >= n)) { D("This cannot happen"); e = 0; } return e; } static void generic_set_tx_event(struct netmap_kring *kring, u_int hwcur) { u_int lim = kring->nkr_num_slots - 1; struct mbuf *m; u_int e; u_int ntc = nm_next(kring->nr_hwtail, lim); /* next to clean */ if (ntc == hwcur) { return; /* all buffers are free */ } /* * We have pending packets in the driver between hwtail+1 * and hwcur, and we have to chose one of these slot to * generate a notification. * There is a race but this is only called within txsync which * does a double check. */ #if 0 /* Choose a slot in the middle, so that we don't risk ending * up in a situation where the client continuously wake up, * fills one or a few TX slots and go to sleep again. */ e = ring_middle(ntc, hwcur, lim); #else /* Choose the first pending slot, to be safe against driver * reordering mbuf transmissions. */ e = ntc; #endif m = kring->tx_pool[e]; if (m == NULL) { /* An event is already in place. */ return; } mtx_lock_spin(&kring->tx_event_lock); if (kring->tx_event) { /* An event is already in place. */ mtx_unlock_spin(&kring->tx_event_lock); return; } SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); kring->tx_event = m; mtx_unlock_spin(&kring->tx_event_lock); kring->tx_pool[e] = NULL; ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? MBUF_REFCNT(m) : -2 ); /* Decrement the refcount. This will free it if we lose the race * with the driver. */ m_freem(m); smp_mb(); } /* * generic_netmap_txsync() transforms netmap buffers into mbufs * and passes them to the standard device driver * (ndo_start_xmit() or ifp->if_transmit() ). * On linux this is not done directly, but using dev_queue_xmit(), * since it implements the TX flow control (and takes some locks). */ static int generic_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ // j u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; u_int ring_nr = kring->ring_id; IFRATE(rate_ctx.new.txsync++); rmb(); /* * First part: process new packets to send. */ nm_i = kring->nr_hwcur; if (nm_i != head) { /* we have new packets to send */ struct nm_os_gen_arg a; u_int event = -1; if (gna->txqdisc && nm_kr_txempty(kring)) { /* In txqdisc mode, we ask for a delayed notification, * but only when cur == hwtail, which means that the * client is going to block. */ event = ring_middle(nm_i, head, lim); ND(3, "Place txqdisc event (hwcur=%u,event=%u," "head=%u,hwtail=%u)", nm_i, event, head, kring->nr_hwtail); } a.ifp = ifp; a.ring_nr = ring_nr; a.head = a.tail = NULL; while (nm_i != head) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; void *addr = NMB(na, slot); /* device-specific */ struct mbuf *m; int tx_ret; NM_CHECK_ADDR_LEN(na, addr, len); /* Tale a mbuf from the tx pool (replenishing the pool * entry if necessary) and copy in the user packet. */ m = kring->tx_pool[nm_i]; if (unlikely(m == NULL)) { kring->tx_pool[nm_i] = m = nm_os_get_mbuf(ifp, NETMAP_BUF_SIZE(na)); if (m == NULL) { RD(2, "Failed to replenish mbuf"); /* Here we could schedule a timer which * retries to replenish after a while, * and notifies the client when it * manages to replenish some slots. In * any case we break early to avoid * crashes. */ break; } IFRATE(rate_ctx.new.txrepl++); } a.m = m; a.addr = addr; a.len = len; a.qevent = (nm_i == event); /* When not in txqdisc mode, we should ask * notifications when NS_REPORT is set, or roughly * every half ring. To optimize this, we set a * notification event when the client runs out of * TX ring space, or when transmission fails. In * the latter case we also break early. */ tx_ret = nm_os_generic_xmit_frame(&a); if (unlikely(tx_ret)) { if (!gna->txqdisc) { /* * No room for this mbuf in the device driver. * Request a notification FOR A PREVIOUS MBUF, * then call generic_netmap_tx_clean(kring) to do the * double check and see if we can free more buffers. * If there is space continue, else break; * NOTE: the double check is necessary if the problem * occurs in the txsync call after selrecord(). * Also, we need some way to tell the caller that not * all buffers were queued onto the device (this was * not a problem with native netmap driver where space * is preallocated). The bridge has a similar problem * and we solve it there by dropping the excess packets. */ generic_set_tx_event(kring, nm_i); if (generic_netmap_tx_clean(kring, gna->txqdisc)) { /* space now available */ continue; } else { break; } } /* In txqdisc mode, the netmap-aware qdisc * queue has the same length as the number of * netmap slots (N). Since tail is advanced * only when packets are dequeued, qdisc * queue overrun cannot happen, so * nm_os_generic_xmit_frame() did not fail * because of that. * However, packets can be dropped because * carrier is off, or because our qdisc is * being deactivated, or possibly for other * reasons. In these cases, we just let the * packet to be dropped. */ IFRATE(rate_ctx.new.txdrop++); } slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); nm_i = nm_next(nm_i, lim); IFRATE(rate_ctx.new.txpkt++); } if (a.head != NULL) { a.addr = NULL; nm_os_generic_xmit_frame(&a); } /* Update hwcur to the next slot to transmit. Here nm_i * is not necessarily head, we could break early. */ kring->nr_hwcur = nm_i; } /* * Second, reclaim completed buffers */ if (!gna->txqdisc && (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring))) { /* No more available slots? Set a notification event * on a netmap slot that will be cleaned in the future. * No doublecheck is performed, since txsync() will be * called twice by netmap_poll(). */ generic_set_tx_event(kring, nm_i); } generic_netmap_tx_clean(kring, gna->txqdisc); return 0; } /* * This handler is registered (through nm_os_catch_rx()) * within the attached network interface * in the RX subsystem, so that every mbuf passed up by * the driver can be stolen to the network stack. * Stolen packets are put in a queue where the * generic_netmap_rxsync() callback can extract them. * Returns 1 if the packet was stolen, 0 otherwise. */ int generic_rx_handler(struct ifnet *ifp, struct mbuf *m) { struct netmap_adapter *na = NA(ifp); struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; struct netmap_kring *kring; u_int work_done; u_int r = MBUF_RXQ(m); /* receive ring number */ if (r >= na->num_rx_rings) { r = r % na->num_rx_rings; } kring = &na->rx_rings[r]; if (kring->nr_mode == NKR_NETMAP_OFF) { /* We must not intercept this mbuf. */ return 0; } /* limit the size of the queue */ if (unlikely(!gna->rxsg && MBUF_LEN(m) > NETMAP_BUF_SIZE(na))) { /* This may happen when GRO/LRO features are enabled for * the NIC driver when the generic adapter does not * support RX scatter-gather. */ RD(2, "Warning: driver pushed up big packet " "(size=%d)", (int)MBUF_LEN(m)); m_freem(m); } else if (unlikely(mbq_len(&kring->rx_queue) > 1024)) { m_freem(m); } else { mbq_safe_enqueue(&kring->rx_queue, m); } if (netmap_generic_mit < 32768) { /* no rx mitigation, pass notification up */ netmap_generic_irq(na, r, &work_done); } else { /* same as send combining, filter notification if there is a * pending timer, otherwise pass it up and start a timer. */ if (likely(nm_os_mitigation_active(&gna->mit[r]))) { /* Record that there is some pending work. */ gna->mit[r].mit_pending = 1; } else { netmap_generic_irq(na, r, &work_done); nm_os_mitigation_start(&gna->mit[r]); } } /* We have intercepted the mbuf. */ return 1; } /* * generic_netmap_rxsync() extracts mbufs from the queue filled by * generic_netmap_rx_handler() and puts their content in the netmap * receive ring. * Access must be protected because the rx handler is asynchronous, */ static int generic_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_ring *ring = kring->ring; struct netmap_adapter *na = kring->na; u_int nm_i; /* index into the netmap ring */ //j, u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* Adapter-specific variables. */ uint16_t slot_flags = kring->nkr_slot_flags; u_int nm_buf_len = NETMAP_BUF_SIZE(na); struct mbq tmpq; struct mbuf *m; int avail; /* in bytes */ int mlen; int copy; if (head > lim) return netmap_ring_reinit(kring); IFRATE(rate_ctx.new.rxsync++); /* * First part: skip past packets that userspace has released. * This can possibly make room for the second part. */ nm_i = kring->nr_hwcur; if (nm_i != head) { /* Userspace has released some packets. */ for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; slot->flags &= ~NS_BUF_CHANGED; nm_i = nm_next(nm_i, lim); } kring->nr_hwcur = head; } /* * Second part: import newly received packets. */ if (!netmap_no_pendintr && !force_update) { return 0; } nm_i = kring->nr_hwtail; /* First empty slot in the receive ring. */ /* Compute the available space (in bytes) in this netmap ring. * The first slot that is not considered in is the one before * nr_hwcur. */ avail = nm_prev(kring->nr_hwcur, lim) - nm_i; if (avail < 0) avail += lim + 1; avail *= nm_buf_len; /* First pass: While holding the lock on the RX mbuf queue, * extract as many mbufs as they fit the available space, * and put them in a temporary queue. * To avoid performing a per-mbuf division (mlen / nm_buf_len) to * to update avail, we do the update in a while loop that we * also use to set the RX slots, but without performing the copy. */ mbq_init(&tmpq); mbq_lock(&kring->rx_queue); for (n = 0;; n++) { m = mbq_peek(&kring->rx_queue); if (!m) { /* No more packets from the driver. */ break; } mlen = MBUF_LEN(m); if (mlen > avail) { /* No more space in the ring. */ break; } mbq_dequeue(&kring->rx_queue); while (mlen) { copy = nm_buf_len; if (mlen < copy) { copy = mlen; } mlen -= copy; avail -= nm_buf_len; ring->slot[nm_i].len = copy; ring->slot[nm_i].flags = slot_flags | (mlen ? NS_MOREFRAG : 0); nm_i = nm_next(nm_i, lim); } mbq_enqueue(&tmpq, m); } mbq_unlock(&kring->rx_queue); /* Second pass: Drain the temporary queue, going over the used RX slots, * and perform the copy out of the RX queue lock. */ nm_i = kring->nr_hwtail; for (;;) { void *nmaddr; int ofs = 0; int morefrag; m = mbq_dequeue(&tmpq); if (!m) { break; } do { nmaddr = NMB(na, &ring->slot[nm_i]); /* We only check the address here on generic rx rings. */ if (nmaddr == NETMAP_BUF_BASE(na)) { /* Bad buffer */ m_freem(m); mbq_purge(&tmpq); mbq_fini(&tmpq); return netmap_ring_reinit(kring); } copy = ring->slot[nm_i].len; m_copydata(m, ofs, copy, nmaddr); ofs += copy; morefrag = ring->slot[nm_i].flags & NS_MOREFRAG; nm_i = nm_next(nm_i, lim); } while (morefrag); m_freem(m); } mbq_fini(&tmpq); if (n) { kring->nr_hwtail = nm_i; IFRATE(rate_ctx.new.rxpkt += n); } kring->nr_kflags &= ~NKR_PENDINTR; return 0; } static void generic_netmap_dtor(struct netmap_adapter *na) { struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; struct ifnet *ifp = netmap_generic_getifp(gna); struct netmap_adapter *prev_na = gna->prev; if (prev_na != NULL) { netmap_adapter_put(prev_na); if (nm_iszombie(na)) { /* * The driver has been removed without releasing * the reference so we need to do it here. */ netmap_adapter_put(prev_na); } D("Native netmap adapter %p restored", prev_na); } NM_ATTACH_NA(ifp, prev_na); /* * netmap_detach_common(), that it's called after this function, * overrides WNA(ifp) if na->ifp is not NULL. */ na->ifp = NULL; D("Emulated netmap adapter for %s destroyed", na->name); } int na_is_generic(struct netmap_adapter *na) { return na->nm_register == generic_netmap_register; } /* * generic_netmap_attach() makes it possible to use netmap on * a device without native netmap support. * This is less performant than native support but potentially * faster than raw sockets or similar schemes. * * In this "emulated" mode, netmap rings do not necessarily * have the same size as those in the NIC. We use a default * value and possibly override it if the OS has ways to fetch the * actual configuration. */ int generic_netmap_attach(struct ifnet *ifp) { struct netmap_adapter *na; struct netmap_generic_adapter *gna; int retval; u_int num_tx_desc, num_rx_desc; #ifdef __FreeBSD__ if (ifp->if_type == IFT_LOOP) { D("if_loop is not supported by %s", __func__); return EINVAL; } #endif num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ nm_os_generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */ ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); if (num_tx_desc == 0 || num_rx_desc == 0) { D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc); return EINVAL; } gna = nm_os_malloc(sizeof(*gna)); if (gna == NULL) { D("no memory on attach, give up"); return ENOMEM; } na = (struct netmap_adapter *)gna; strncpy(na->name, ifp->if_xname, sizeof(na->name)); na->ifp = ifp; na->num_tx_desc = num_tx_desc; na->num_rx_desc = num_rx_desc; na->nm_register = &generic_netmap_register; na->nm_txsync = &generic_netmap_txsync; na->nm_rxsync = &generic_netmap_rxsync; na->nm_dtor = &generic_netmap_dtor; /* when using generic, NAF_NETMAP_ON is set so we force * NAF_SKIP_INTR to use the regular interrupt handler */ na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS; ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", ifp->num_tx_queues, ifp->real_num_tx_queues, ifp->tx_queue_len); ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", ifp->num_rx_queues, ifp->real_num_rx_queues); nm_os_generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); retval = netmap_attach_common(na); if (retval) { nm_os_free(gna); return retval; } gna->prev = NA(ifp); /* save old na */ if (gna->prev != NULL) { netmap_adapter_get(gna->prev); } NM_ATTACH_NA(ifp, na); nm_os_generic_set_features(gna); D("Emulated adapter for %s created (prev was %p)", na->name, gna->prev); return retval; } Index: head/sys/dev/wb/if_wb.c =================================================================== --- head/sys/dev/wb/if_wb.c (revision 324445) +++ head/sys/dev/wb/if_wb.c (revision 324446) @@ -1,1636 +1,1635 @@ /*- * Copyright (c) 1997, 1998 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Winbond fast ethernet PCI NIC driver * * Supports various cheap network adapters based on the Winbond W89C840F * fast ethernet controller chip. This includes adapters manufactured by * Winbond itself and some made by Linksys. * * Written by Bill Paul * Electrical Engineering Department * Columbia University, New York City */ /* * The Winbond W89C840F chip is a bus master; in some ways it resembles * a DEC 'tulip' chip, only not as complicated. Unfortunately, it has * one major difference which is that while the registers do many of * the same things as a tulip adapter, the offsets are different: where * tulip registers are typically spaced 8 bytes apart, the Winbond * registers are spaced 4 bytes apart. The receiver filter is also * programmed differently. * * Like the tulip, the Winbond chip uses small descriptors containing * a status word, a control word and 32-bit areas that can either be used * to point to two external data blocks, or to point to a single block * and another descriptor in a linked list. Descriptors can be grouped * together in blocks to form fixed length rings or can be chained * together in linked lists. A single packet may be spread out over * several descriptors if necessary. * * For the receive ring, this driver uses a linked list of descriptors, * each pointing to a single mbuf cluster buffer, which us large enough * to hold an entire packet. The link list is looped back to created a * closed ring. * * For transmission, the driver creates a linked list of 'super descriptors' * which each contain several individual descriptors linked toghether. * Each 'super descriptor' contains WB_MAXFRAGS descriptors, which we * abuse as fragment pointers. This allows us to use a buffer managment * scheme very similar to that used in the ThunderLAN and Etherlink XL * drivers. * * Autonegotiation is performed using the external PHY via the MII bus. * The sample boards I have all use a Davicom PHY. * * Note: the author of the Linux driver for the Winbond chip alludes * to some sort of flaw in the chip's design that seems to mandate some * drastic workaround which signigicantly impairs transmit performance. * I have no idea what he's on about: transmit performance with all * three of my test boards seems fine. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for vtophys */ #include /* for vtophys */ #include #include #include #include #include #include #include #include #include /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" #define WB_USEIOSPACE #include MODULE_DEPEND(wb, pci, 1, 1, 1); MODULE_DEPEND(wb, ether, 1, 1, 1); MODULE_DEPEND(wb, miibus, 1, 1, 1); /* * Various supported device vendors/types and their names. */ static const struct wb_type wb_devs[] = { { WB_VENDORID, WB_DEVICEID_840F, "Winbond W89C840F 10/100BaseTX" }, { CP_VENDORID, CP_DEVICEID_RL100, "Compex RL100-ATX 10/100baseTX" }, { 0, 0, NULL } }; static int wb_probe(device_t); static int wb_attach(device_t); static int wb_detach(device_t); -static void wb_bfree(struct mbuf *, void *addr, void *args); +static void wb_bfree(struct mbuf *); static int wb_newbuf(struct wb_softc *, struct wb_chain_onefrag *, struct mbuf *); static int wb_encap(struct wb_softc *, struct wb_chain *, struct mbuf *); static void wb_rxeof(struct wb_softc *); static void wb_rxeoc(struct wb_softc *); static void wb_txeof(struct wb_softc *); static void wb_txeoc(struct wb_softc *); static void wb_intr(void *); static void wb_tick(void *); static void wb_start(struct ifnet *); static void wb_start_locked(struct ifnet *); static int wb_ioctl(struct ifnet *, u_long, caddr_t); static void wb_init(void *); static void wb_init_locked(struct wb_softc *); static void wb_stop(struct wb_softc *); static void wb_watchdog(struct wb_softc *); static int wb_shutdown(device_t); static int wb_ifmedia_upd(struct ifnet *); static void wb_ifmedia_sts(struct ifnet *, struct ifmediareq *); static void wb_eeprom_putbyte(struct wb_softc *, int); static void wb_eeprom_getword(struct wb_softc *, int, u_int16_t *); static void wb_read_eeprom(struct wb_softc *, caddr_t, int, int, int); static void wb_setcfg(struct wb_softc *, u_int32_t); static void wb_setmulti(struct wb_softc *); static void wb_reset(struct wb_softc *); static void wb_fixmedia(struct wb_softc *); static int wb_list_rx_init(struct wb_softc *); static int wb_list_tx_init(struct wb_softc *); static int wb_miibus_readreg(device_t, int, int); static int wb_miibus_writereg(device_t, int, int, int); static void wb_miibus_statchg(device_t); /* * MII bit-bang glue */ static uint32_t wb_mii_bitbang_read(device_t); static void wb_mii_bitbang_write(device_t, uint32_t); static const struct mii_bitbang_ops wb_mii_bitbang_ops = { wb_mii_bitbang_read, wb_mii_bitbang_write, { WB_SIO_MII_DATAOUT, /* MII_BIT_MDO */ WB_SIO_MII_DATAIN, /* MII_BIT_MDI */ WB_SIO_MII_CLK, /* MII_BIT_MDC */ WB_SIO_MII_DIR, /* MII_BIT_DIR_HOST_PHY */ 0, /* MII_BIT_DIR_PHY_HOST */ } }; #ifdef WB_USEIOSPACE #define WB_RES SYS_RES_IOPORT #define WB_RID WB_PCI_LOIO #else #define WB_RES SYS_RES_MEMORY #define WB_RID WB_PCI_LOMEM #endif static device_method_t wb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, wb_probe), DEVMETHOD(device_attach, wb_attach), DEVMETHOD(device_detach, wb_detach), DEVMETHOD(device_shutdown, wb_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, wb_miibus_readreg), DEVMETHOD(miibus_writereg, wb_miibus_writereg), DEVMETHOD(miibus_statchg, wb_miibus_statchg), DEVMETHOD_END }; static driver_t wb_driver = { "wb", wb_methods, sizeof(struct wb_softc) }; static devclass_t wb_devclass; DRIVER_MODULE(wb, pci, wb_driver, wb_devclass, 0, 0); DRIVER_MODULE(miibus, wb, miibus_driver, miibus_devclass, 0, 0); #define WB_SETBIT(sc, reg, x) \ CSR_WRITE_4(sc, reg, \ CSR_READ_4(sc, reg) | (x)) #define WB_CLRBIT(sc, reg, x) \ CSR_WRITE_4(sc, reg, \ CSR_READ_4(sc, reg) & ~(x)) #define SIO_SET(x) \ CSR_WRITE_4(sc, WB_SIO, \ CSR_READ_4(sc, WB_SIO) | (x)) #define SIO_CLR(x) \ CSR_WRITE_4(sc, WB_SIO, \ CSR_READ_4(sc, WB_SIO) & ~(x)) /* * Send a read command and address to the EEPROM, check for ACK. */ static void wb_eeprom_putbyte(sc, addr) struct wb_softc *sc; int addr; { int d, i; d = addr | WB_EECMD_READ; /* * Feed in each bit and stobe the clock. */ for (i = 0x400; i; i >>= 1) { if (d & i) { SIO_SET(WB_SIO_EE_DATAIN); } else { SIO_CLR(WB_SIO_EE_DATAIN); } DELAY(100); SIO_SET(WB_SIO_EE_CLK); DELAY(150); SIO_CLR(WB_SIO_EE_CLK); DELAY(100); } } /* * Read a word of data stored in the EEPROM at address 'addr.' */ static void wb_eeprom_getword(sc, addr, dest) struct wb_softc *sc; int addr; u_int16_t *dest; { int i; u_int16_t word = 0; /* Enter EEPROM access mode. */ CSR_WRITE_4(sc, WB_SIO, WB_SIO_EESEL|WB_SIO_EE_CS); /* * Send address of word we want to read. */ wb_eeprom_putbyte(sc, addr); CSR_WRITE_4(sc, WB_SIO, WB_SIO_EESEL|WB_SIO_EE_CS); /* * Start reading bits from EEPROM. */ for (i = 0x8000; i; i >>= 1) { SIO_SET(WB_SIO_EE_CLK); DELAY(100); if (CSR_READ_4(sc, WB_SIO) & WB_SIO_EE_DATAOUT) word |= i; SIO_CLR(WB_SIO_EE_CLK); DELAY(100); } /* Turn off EEPROM access mode. */ CSR_WRITE_4(sc, WB_SIO, 0); *dest = word; } /* * Read a sequence of words from the EEPROM. */ static void wb_read_eeprom(sc, dest, off, cnt, swap) struct wb_softc *sc; caddr_t dest; int off; int cnt; int swap; { int i; u_int16_t word = 0, *ptr; for (i = 0; i < cnt; i++) { wb_eeprom_getword(sc, off + i, &word); ptr = (u_int16_t *)(dest + (i * 2)); if (swap) *ptr = ntohs(word); else *ptr = word; } } /* * Read the MII serial port for the MII bit-bang module. */ static uint32_t wb_mii_bitbang_read(device_t dev) { struct wb_softc *sc; uint32_t val; sc = device_get_softc(dev); val = CSR_READ_4(sc, WB_SIO); CSR_BARRIER(sc, WB_SIO, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (val); } /* * Write the MII serial port for the MII bit-bang module. */ static void wb_mii_bitbang_write(device_t dev, uint32_t val) { struct wb_softc *sc; sc = device_get_softc(dev); CSR_WRITE_4(sc, WB_SIO, val); CSR_BARRIER(sc, WB_SIO, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } static int wb_miibus_readreg(dev, phy, reg) device_t dev; int phy, reg; { return (mii_bitbang_readreg(dev, &wb_mii_bitbang_ops, phy, reg)); } static int wb_miibus_writereg(dev, phy, reg, data) device_t dev; int phy, reg, data; { mii_bitbang_writereg(dev, &wb_mii_bitbang_ops, phy, reg, data); return(0); } static void wb_miibus_statchg(dev) device_t dev; { struct wb_softc *sc; struct mii_data *mii; sc = device_get_softc(dev); mii = device_get_softc(sc->wb_miibus); wb_setcfg(sc, mii->mii_media_active); } /* * Program the 64-bit multicast hash filter. */ static void wb_setmulti(sc) struct wb_softc *sc; { struct ifnet *ifp; int h = 0; u_int32_t hashes[2] = { 0, 0 }; struct ifmultiaddr *ifma; u_int32_t rxfilt; int mcnt = 0; ifp = sc->wb_ifp; rxfilt = CSR_READ_4(sc, WB_NETCFG); if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { rxfilt |= WB_NETCFG_RX_MULTI; CSR_WRITE_4(sc, WB_NETCFG, rxfilt); CSR_WRITE_4(sc, WB_MAR0, 0xFFFFFFFF); CSR_WRITE_4(sc, WB_MAR1, 0xFFFFFFFF); return; } /* first, zot all the existing hash bits */ CSR_WRITE_4(sc, WB_MAR0, 0); CSR_WRITE_4(sc, WB_MAR1, 0); /* now program new ones */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = ~ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 26; if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); mcnt++; } if_maddr_runlock(ifp); if (mcnt) rxfilt |= WB_NETCFG_RX_MULTI; else rxfilt &= ~WB_NETCFG_RX_MULTI; CSR_WRITE_4(sc, WB_MAR0, hashes[0]); CSR_WRITE_4(sc, WB_MAR1, hashes[1]); CSR_WRITE_4(sc, WB_NETCFG, rxfilt); } /* * The Winbond manual states that in order to fiddle with the * 'full-duplex' and '100Mbps' bits in the netconfig register, we * first have to put the transmit and/or receive logic in the idle state. */ static void wb_setcfg(sc, media) struct wb_softc *sc; u_int32_t media; { int i, restart = 0; if (CSR_READ_4(sc, WB_NETCFG) & (WB_NETCFG_TX_ON|WB_NETCFG_RX_ON)) { restart = 1; WB_CLRBIT(sc, WB_NETCFG, (WB_NETCFG_TX_ON|WB_NETCFG_RX_ON)); for (i = 0; i < WB_TIMEOUT; i++) { DELAY(10); if ((CSR_READ_4(sc, WB_ISR) & WB_ISR_TX_IDLE) && (CSR_READ_4(sc, WB_ISR) & WB_ISR_RX_IDLE)) break; } if (i == WB_TIMEOUT) device_printf(sc->wb_dev, "failed to force tx and rx to idle state\n"); } if (IFM_SUBTYPE(media) == IFM_10_T) WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_100MBPS); else WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_100MBPS); if ((media & IFM_GMASK) == IFM_FDX) WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_FULLDUPLEX); else WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_FULLDUPLEX); if (restart) WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON|WB_NETCFG_RX_ON); } static void wb_reset(sc) struct wb_softc *sc; { int i; struct mii_data *mii; struct mii_softc *miisc; CSR_WRITE_4(sc, WB_NETCFG, 0); CSR_WRITE_4(sc, WB_BUSCTL, 0); CSR_WRITE_4(sc, WB_TXADDR, 0); CSR_WRITE_4(sc, WB_RXADDR, 0); WB_SETBIT(sc, WB_BUSCTL, WB_BUSCTL_RESET); WB_SETBIT(sc, WB_BUSCTL, WB_BUSCTL_RESET); for (i = 0; i < WB_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_4(sc, WB_BUSCTL) & WB_BUSCTL_RESET)) break; } if (i == WB_TIMEOUT) device_printf(sc->wb_dev, "reset never completed!\n"); /* Wait a little while for the chip to get its brains in order. */ DELAY(1000); if (sc->wb_miibus == NULL) return; mii = device_get_softc(sc->wb_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); } static void wb_fixmedia(sc) struct wb_softc *sc; { struct mii_data *mii = NULL; struct ifnet *ifp; u_int32_t media; mii = device_get_softc(sc->wb_miibus); ifp = sc->wb_ifp; mii_pollstat(mii); if (IFM_SUBTYPE(mii->mii_media_active) == IFM_10_T) { media = mii->mii_media_active & ~IFM_10_T; media |= IFM_100_TX; } else if (IFM_SUBTYPE(mii->mii_media_active) == IFM_100_TX) { media = mii->mii_media_active & ~IFM_100_TX; media |= IFM_10_T; } else return; ifmedia_set(&mii->mii_media, media); } /* * Probe for a Winbond chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int wb_probe(dev) device_t dev; { const struct wb_type *t; t = wb_devs; while(t->wb_name != NULL) { if ((pci_get_vendor(dev) == t->wb_vid) && (pci_get_device(dev) == t->wb_did)) { device_set_desc(dev, t->wb_name); return (BUS_PROBE_DEFAULT); } t++; } return(ENXIO); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int wb_attach(dev) device_t dev; { u_char eaddr[ETHER_ADDR_LEN]; struct wb_softc *sc; struct ifnet *ifp; int error = 0, rid; sc = device_get_softc(dev); sc->wb_dev = dev; mtx_init(&sc->wb_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->wb_stat_callout, &sc->wb_mtx, 0); /* * Map control/status registers. */ pci_enable_busmaster(dev); rid = WB_RID; sc->wb_res = bus_alloc_resource_any(dev, WB_RES, &rid, RF_ACTIVE); if (sc->wb_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } /* Allocate interrupt */ rid = 0; sc->wb_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->wb_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } /* Save the cache line size. */ sc->wb_cachesize = pci_read_config(dev, WB_PCI_CACHELEN, 4) & 0xFF; /* Reset the adapter. */ wb_reset(sc); /* * Get station address from the EEPROM. */ wb_read_eeprom(sc, (caddr_t)&eaddr, 0, 3, 0); sc->wb_ldata = contigmalloc(sizeof(struct wb_list_data) + 8, M_DEVBUF, M_NOWAIT, 0, 0xffffffff, PAGE_SIZE, 0); if (sc->wb_ldata == NULL) { device_printf(dev, "no memory for list buffers!\n"); error = ENXIO; goto fail; } bzero(sc->wb_ldata, sizeof(struct wb_list_data)); ifp = sc->wb_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = wb_ioctl; ifp->if_start = wb_start; ifp->if_init = wb_init; ifp->if_snd.ifq_maxlen = WB_TX_LIST_CNT - 1; /* * Do MII setup. */ error = mii_attach(dev, &sc->wb_miibus, ifp, wb_ifmedia_upd, wb_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* Hook interrupt last to avoid having to lock softc */ error = bus_setup_intr(dev, sc->wb_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, wb_intr, sc, &sc->wb_intrhand); if (error) { device_printf(dev, "couldn't set up irq\n"); ether_ifdetach(ifp); goto fail; } fail: if (error) wb_detach(dev); return(error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int wb_detach(dev) device_t dev; { struct wb_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); KASSERT(mtx_initialized(&sc->wb_mtx), ("wb mutex not initialized")); ifp = sc->wb_ifp; /* * Delete any miibus and phy devices attached to this interface. * This should only be done if attach succeeded. */ if (device_is_attached(dev)) { ether_ifdetach(ifp); WB_LOCK(sc); wb_stop(sc); WB_UNLOCK(sc); callout_drain(&sc->wb_stat_callout); } if (sc->wb_miibus) device_delete_child(dev, sc->wb_miibus); bus_generic_detach(dev); if (sc->wb_intrhand) bus_teardown_intr(dev, sc->wb_irq, sc->wb_intrhand); if (sc->wb_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->wb_irq); if (sc->wb_res) bus_release_resource(dev, WB_RES, WB_RID, sc->wb_res); if (ifp) if_free(ifp); if (sc->wb_ldata) { contigfree(sc->wb_ldata, sizeof(struct wb_list_data) + 8, M_DEVBUF); } mtx_destroy(&sc->wb_mtx); return(0); } /* * Initialize the transmit descriptors. */ static int wb_list_tx_init(sc) struct wb_softc *sc; { struct wb_chain_data *cd; struct wb_list_data *ld; int i; cd = &sc->wb_cdata; ld = sc->wb_ldata; for (i = 0; i < WB_TX_LIST_CNT; i++) { cd->wb_tx_chain[i].wb_ptr = &ld->wb_tx_list[i]; if (i == (WB_TX_LIST_CNT - 1)) { cd->wb_tx_chain[i].wb_nextdesc = &cd->wb_tx_chain[0]; } else { cd->wb_tx_chain[i].wb_nextdesc = &cd->wb_tx_chain[i + 1]; } } cd->wb_tx_free = &cd->wb_tx_chain[0]; cd->wb_tx_tail = cd->wb_tx_head = NULL; return(0); } /* * Initialize the RX descriptors and allocate mbufs for them. Note that * we arrange the descriptors in a closed ring, so that the last descriptor * points back to the first. */ static int wb_list_rx_init(sc) struct wb_softc *sc; { struct wb_chain_data *cd; struct wb_list_data *ld; int i; cd = &sc->wb_cdata; ld = sc->wb_ldata; for (i = 0; i < WB_RX_LIST_CNT; i++) { cd->wb_rx_chain[i].wb_ptr = (struct wb_desc *)&ld->wb_rx_list[i]; cd->wb_rx_chain[i].wb_buf = (void *)&ld->wb_rxbufs[i]; if (wb_newbuf(sc, &cd->wb_rx_chain[i], NULL) == ENOBUFS) return(ENOBUFS); if (i == (WB_RX_LIST_CNT - 1)) { cd->wb_rx_chain[i].wb_nextdesc = &cd->wb_rx_chain[0]; ld->wb_rx_list[i].wb_next = vtophys(&ld->wb_rx_list[0]); } else { cd->wb_rx_chain[i].wb_nextdesc = &cd->wb_rx_chain[i + 1]; ld->wb_rx_list[i].wb_next = vtophys(&ld->wb_rx_list[i + 1]); } } cd->wb_rx_head = &cd->wb_rx_chain[0]; return(0); } static void -wb_bfree(struct mbuf *m, void *buf, void *args) +wb_bfree(struct mbuf *m) { } /* * Initialize an RX descriptor and attach an MBUF cluster. */ static int wb_newbuf(sc, c, m) struct wb_softc *sc; struct wb_chain_onefrag *c; struct mbuf *m; { struct mbuf *m_new = NULL; if (m == NULL) { MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) return(ENOBUFS); - m_new->m_data = c->wb_buf; m_new->m_pkthdr.len = m_new->m_len = WB_BUFBYTES; - MEXTADD(m_new, c->wb_buf, WB_BUFBYTES, wb_bfree, c->wb_buf, - NULL, 0, EXT_NET_DRV); + m_extadd(m_new, c->wb_buf, WB_BUFBYTES, wb_bfree, NULL, NULL, + 0, EXT_NET_DRV); } else { m_new = m; m_new->m_len = m_new->m_pkthdr.len = WB_BUFBYTES; m_new->m_data = m_new->m_ext.ext_buf; } m_adj(m_new, sizeof(u_int64_t)); c->wb_mbuf = m_new; c->wb_ptr->wb_data = vtophys(mtod(m_new, caddr_t)); c->wb_ptr->wb_ctl = WB_RXCTL_RLINK | 1536; c->wb_ptr->wb_status = WB_RXSTAT; return(0); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. */ static void wb_rxeof(sc) struct wb_softc *sc; { struct mbuf *m = NULL; struct ifnet *ifp; struct wb_chain_onefrag *cur_rx; int total_len = 0; u_int32_t rxstat; WB_LOCK_ASSERT(sc); ifp = sc->wb_ifp; while(!((rxstat = sc->wb_cdata.wb_rx_head->wb_ptr->wb_status) & WB_RXSTAT_OWN)) { struct mbuf *m0 = NULL; cur_rx = sc->wb_cdata.wb_rx_head; sc->wb_cdata.wb_rx_head = cur_rx->wb_nextdesc; m = cur_rx->wb_mbuf; if ((rxstat & WB_RXSTAT_MIIERR) || (WB_RXBYTES(cur_rx->wb_ptr->wb_status) < WB_MIN_FRAMELEN) || (WB_RXBYTES(cur_rx->wb_ptr->wb_status) > 1536) || !(rxstat & WB_RXSTAT_LASTFRAG) || !(rxstat & WB_RXSTAT_RXCMP)) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); wb_newbuf(sc, cur_rx, m); device_printf(sc->wb_dev, "receiver babbling: possible chip bug," " forcing reset\n"); wb_fixmedia(sc); wb_reset(sc); wb_init_locked(sc); return; } if (rxstat & WB_RXSTAT_RXERR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); wb_newbuf(sc, cur_rx, m); break; } /* No errors; receive the packet. */ total_len = WB_RXBYTES(cur_rx->wb_ptr->wb_status); /* * XXX The Winbond chip includes the CRC with every * received frame, and there's no way to turn this * behavior off (at least, I can't find anything in * the manual that explains how to do it) so we have * to trim off the CRC manually. */ total_len -= ETHER_CRC_LEN; m0 = m_devget(mtod(m, char *), total_len, ETHER_ALIGN, ifp, NULL); wb_newbuf(sc, cur_rx, m); if (m0 == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } m = m0; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); WB_UNLOCK(sc); (*ifp->if_input)(ifp, m); WB_LOCK(sc); } } static void wb_rxeoc(sc) struct wb_softc *sc; { wb_rxeof(sc); WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_RX_ON); CSR_WRITE_4(sc, WB_RXADDR, vtophys(&sc->wb_ldata->wb_rx_list[0])); WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_RX_ON); if (CSR_READ_4(sc, WB_ISR) & WB_RXSTATE_SUSPEND) CSR_WRITE_4(sc, WB_RXSTART, 0xFFFFFFFF); } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void wb_txeof(sc) struct wb_softc *sc; { struct wb_chain *cur_tx; struct ifnet *ifp; ifp = sc->wb_ifp; /* Clear the timeout timer. */ sc->wb_timer = 0; if (sc->wb_cdata.wb_tx_head == NULL) return; /* * Go through our tx list and free mbufs for those * frames that have been transmitted. */ while(sc->wb_cdata.wb_tx_head->wb_mbuf != NULL) { u_int32_t txstat; cur_tx = sc->wb_cdata.wb_tx_head; txstat = WB_TXSTATUS(cur_tx); if ((txstat & WB_TXSTAT_OWN) || txstat == WB_UNSENT) break; if (txstat & WB_TXSTAT_TXERR) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (txstat & WB_TXSTAT_ABORT) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); if (txstat & WB_TXSTAT_LATECOLL) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); } if_inc_counter(ifp, IFCOUNTER_COLLISIONS, (txstat & WB_TXSTAT_COLLCNT) >> 3); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); m_freem(cur_tx->wb_mbuf); cur_tx->wb_mbuf = NULL; if (sc->wb_cdata.wb_tx_head == sc->wb_cdata.wb_tx_tail) { sc->wb_cdata.wb_tx_head = NULL; sc->wb_cdata.wb_tx_tail = NULL; break; } sc->wb_cdata.wb_tx_head = cur_tx->wb_nextdesc; } } /* * TX 'end of channel' interrupt handler. */ static void wb_txeoc(sc) struct wb_softc *sc; { struct ifnet *ifp; ifp = sc->wb_ifp; sc->wb_timer = 0; if (sc->wb_cdata.wb_tx_head == NULL) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->wb_cdata.wb_tx_tail = NULL; } else { if (WB_TXOWN(sc->wb_cdata.wb_tx_head) == WB_UNSENT) { WB_TXOWN(sc->wb_cdata.wb_tx_head) = WB_TXSTAT_OWN; sc->wb_timer = 5; CSR_WRITE_4(sc, WB_TXSTART, 0xFFFFFFFF); } } } static void wb_intr(arg) void *arg; { struct wb_softc *sc; struct ifnet *ifp; u_int32_t status; sc = arg; WB_LOCK(sc); ifp = sc->wb_ifp; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { WB_UNLOCK(sc); return; } /* Disable interrupts. */ CSR_WRITE_4(sc, WB_IMR, 0x00000000); for (;;) { status = CSR_READ_4(sc, WB_ISR); if (status) CSR_WRITE_4(sc, WB_ISR, status); if ((status & WB_INTRS) == 0) break; if ((status & WB_ISR_RX_NOBUF) || (status & WB_ISR_RX_ERR)) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); wb_reset(sc); if (status & WB_ISR_RX_ERR) wb_fixmedia(sc); wb_init_locked(sc); continue; } if (status & WB_ISR_RX_OK) wb_rxeof(sc); if (status & WB_ISR_RX_IDLE) wb_rxeoc(sc); if (status & WB_ISR_TX_OK) wb_txeof(sc); if (status & WB_ISR_TX_NOBUF) wb_txeoc(sc); if (status & WB_ISR_TX_IDLE) { wb_txeof(sc); if (sc->wb_cdata.wb_tx_head != NULL) { WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON); CSR_WRITE_4(sc, WB_TXSTART, 0xFFFFFFFF); } } if (status & WB_ISR_TX_UNDERRUN) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); wb_txeof(sc); WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON); /* Jack up TX threshold */ sc->wb_txthresh += WB_TXTHRESH_CHUNK; WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_TX_THRESH); WB_SETBIT(sc, WB_NETCFG, WB_TXTHRESH(sc->wb_txthresh)); WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON); } if (status & WB_ISR_BUS_ERR) { wb_reset(sc); wb_init_locked(sc); } } /* Re-enable interrupts. */ CSR_WRITE_4(sc, WB_IMR, WB_INTRS); if (ifp->if_snd.ifq_head != NULL) { wb_start_locked(ifp); } WB_UNLOCK(sc); } static void wb_tick(xsc) void *xsc; { struct wb_softc *sc; struct mii_data *mii; sc = xsc; WB_LOCK_ASSERT(sc); mii = device_get_softc(sc->wb_miibus); mii_tick(mii); if (sc->wb_timer > 0 && --sc->wb_timer == 0) wb_watchdog(sc); callout_reset(&sc->wb_stat_callout, hz, wb_tick, sc); } /* * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data * pointers to the fragment pointers. */ static int wb_encap(sc, c, m_head) struct wb_softc *sc; struct wb_chain *c; struct mbuf *m_head; { int frag = 0; struct wb_desc *f = NULL; int total_len; struct mbuf *m; /* * Start packing the mbufs in this chain into * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ m = m_head; total_len = 0; for (m = m_head, frag = 0; m != NULL; m = m->m_next) { if (m->m_len != 0) { if (frag == WB_MAXFRAGS) break; total_len += m->m_len; f = &c->wb_ptr->wb_frag[frag]; f->wb_ctl = WB_TXCTL_TLINK | m->m_len; if (frag == 0) { f->wb_ctl |= WB_TXCTL_FIRSTFRAG; f->wb_status = 0; } else f->wb_status = WB_TXSTAT_OWN; f->wb_next = vtophys(&c->wb_ptr->wb_frag[frag + 1]); f->wb_data = vtophys(mtod(m, vm_offset_t)); frag++; } } /* * Handle special case: we used up all 16 fragments, * but we have more mbufs left in the chain. Copy the * data into an mbuf cluster. Note that we don't * bother clearing the values in the other fragment * pointers/counters; it wouldn't gain us anything, * and would waste cycles. */ if (m != NULL) { struct mbuf *m_new = NULL; MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) return(1); if (m_head->m_pkthdr.len > MHLEN) { if (!(MCLGET(m_new, M_NOWAIT))) { m_freem(m_new); return(1); } } m_copydata(m_head, 0, m_head->m_pkthdr.len, mtod(m_new, caddr_t)); m_new->m_pkthdr.len = m_new->m_len = m_head->m_pkthdr.len; m_freem(m_head); m_head = m_new; f = &c->wb_ptr->wb_frag[0]; f->wb_status = 0; f->wb_data = vtophys(mtod(m_new, caddr_t)); f->wb_ctl = total_len = m_new->m_len; f->wb_ctl |= WB_TXCTL_TLINK|WB_TXCTL_FIRSTFRAG; frag = 1; } if (total_len < WB_MIN_FRAMELEN) { f = &c->wb_ptr->wb_frag[frag]; f->wb_ctl = WB_MIN_FRAMELEN - total_len; f->wb_data = vtophys(&sc->wb_cdata.wb_pad); f->wb_ctl |= WB_TXCTL_TLINK; f->wb_status = WB_TXSTAT_OWN; frag++; } c->wb_mbuf = m_head; c->wb_lastdesc = frag - 1; WB_TXCTL(c) |= WB_TXCTL_LASTFRAG; WB_TXNEXT(c) = vtophys(&c->wb_nextdesc->wb_ptr->wb_frag[0]); return(0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit lists. We also save a * copy of the pointers since the transmit list fragment pointers are * physical addresses. */ static void wb_start(ifp) struct ifnet *ifp; { struct wb_softc *sc; sc = ifp->if_softc; WB_LOCK(sc); wb_start_locked(ifp); WB_UNLOCK(sc); } static void wb_start_locked(ifp) struct ifnet *ifp; { struct wb_softc *sc; struct mbuf *m_head = NULL; struct wb_chain *cur_tx = NULL, *start_tx; sc = ifp->if_softc; WB_LOCK_ASSERT(sc); /* * Check for an available queue slot. If there are none, * punt. */ if (sc->wb_cdata.wb_tx_free->wb_mbuf != NULL) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; } start_tx = sc->wb_cdata.wb_tx_free; while(sc->wb_cdata.wb_tx_free->wb_mbuf == NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* Pick a descriptor off the free list. */ cur_tx = sc->wb_cdata.wb_tx_free; sc->wb_cdata.wb_tx_free = cur_tx->wb_nextdesc; /* Pack the data into the descriptor. */ wb_encap(sc, cur_tx, m_head); if (cur_tx != start_tx) WB_TXOWN(cur_tx) = WB_TXSTAT_OWN; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ BPF_MTAP(ifp, cur_tx->wb_mbuf); } /* * If there are no packets queued, bail. */ if (cur_tx == NULL) return; /* * Place the request for the upload interrupt * in the last descriptor in the chain. This way, if * we're chaining several packets at once, we'll only * get an interrupt once for the whole chain rather than * once for each packet. */ WB_TXCTL(cur_tx) |= WB_TXCTL_FINT; cur_tx->wb_ptr->wb_frag[0].wb_ctl |= WB_TXCTL_FINT; sc->wb_cdata.wb_tx_tail = cur_tx; if (sc->wb_cdata.wb_tx_head == NULL) { sc->wb_cdata.wb_tx_head = start_tx; WB_TXOWN(start_tx) = WB_TXSTAT_OWN; CSR_WRITE_4(sc, WB_TXSTART, 0xFFFFFFFF); } else { /* * We need to distinguish between the case where * the own bit is clear because the chip cleared it * and where the own bit is clear because we haven't * set it yet. The magic value WB_UNSET is just some * ramdomly chosen number which doesn't have the own * bit set. When we actually transmit the frame, the * status word will have _only_ the own bit set, so * the txeoc handler will be able to tell if it needs * to initiate another transmission to flush out pending * frames. */ WB_TXOWN(start_tx) = WB_UNSENT; } /* * Set a timeout in case the chip goes out to lunch. */ sc->wb_timer = 5; } static void wb_init(xsc) void *xsc; { struct wb_softc *sc = xsc; WB_LOCK(sc); wb_init_locked(sc); WB_UNLOCK(sc); } static void wb_init_locked(sc) struct wb_softc *sc; { struct ifnet *ifp = sc->wb_ifp; int i; struct mii_data *mii; WB_LOCK_ASSERT(sc); mii = device_get_softc(sc->wb_miibus); /* * Cancel pending I/O and free all RX/TX buffers. */ wb_stop(sc); wb_reset(sc); sc->wb_txthresh = WB_TXTHRESH_INIT; /* * Set cache alignment and burst length. */ #ifdef foo CSR_WRITE_4(sc, WB_BUSCTL, WB_BUSCTL_CONFIG); WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_TX_THRESH); WB_SETBIT(sc, WB_NETCFG, WB_TXTHRESH(sc->wb_txthresh)); #endif CSR_WRITE_4(sc, WB_BUSCTL, WB_BUSCTL_MUSTBEONE|WB_BUSCTL_ARBITRATION); WB_SETBIT(sc, WB_BUSCTL, WB_BURSTLEN_16LONG); switch(sc->wb_cachesize) { case 32: WB_SETBIT(sc, WB_BUSCTL, WB_CACHEALIGN_32LONG); break; case 16: WB_SETBIT(sc, WB_BUSCTL, WB_CACHEALIGN_16LONG); break; case 8: WB_SETBIT(sc, WB_BUSCTL, WB_CACHEALIGN_8LONG); break; case 0: default: WB_SETBIT(sc, WB_BUSCTL, WB_CACHEALIGN_NONE); break; } /* This doesn't tend to work too well at 100Mbps. */ WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_TX_EARLY_ON); /* Init our MAC address */ for (i = 0; i < ETHER_ADDR_LEN; i++) { CSR_WRITE_1(sc, WB_NODE0 + i, IF_LLADDR(sc->wb_ifp)[i]); } /* Init circular RX list. */ if (wb_list_rx_init(sc) == ENOBUFS) { device_printf(sc->wb_dev, "initialization failed: no memory for rx buffers\n"); wb_stop(sc); return; } /* Init TX descriptors. */ wb_list_tx_init(sc); /* If we want promiscuous mode, set the allframes bit. */ if (ifp->if_flags & IFF_PROMISC) { WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_RX_ALLPHYS); } else { WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_RX_ALLPHYS); } /* * Set capture broadcast bit to capture broadcast frames. */ if (ifp->if_flags & IFF_BROADCAST) { WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_RX_BROAD); } else { WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_RX_BROAD); } /* * Program the multicast filter, if necessary. */ wb_setmulti(sc); /* * Load the address of the RX list. */ WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_RX_ON); CSR_WRITE_4(sc, WB_RXADDR, vtophys(&sc->wb_ldata->wb_rx_list[0])); /* * Enable interrupts. */ CSR_WRITE_4(sc, WB_IMR, WB_INTRS); CSR_WRITE_4(sc, WB_ISR, 0xFFFFFFFF); /* Enable receiver and transmitter. */ WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_RX_ON); CSR_WRITE_4(sc, WB_RXSTART, 0xFFFFFFFF); WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON); CSR_WRITE_4(sc, WB_TXADDR, vtophys(&sc->wb_ldata->wb_tx_list[0])); WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON); mii_mediachg(mii); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->wb_stat_callout, hz, wb_tick, sc); } /* * Set media options. */ static int wb_ifmedia_upd(ifp) struct ifnet *ifp; { struct wb_softc *sc; sc = ifp->if_softc; WB_LOCK(sc); if (ifp->if_flags & IFF_UP) wb_init_locked(sc); WB_UNLOCK(sc); return(0); } /* * Report current media status. */ static void wb_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct wb_softc *sc; struct mii_data *mii; sc = ifp->if_softc; WB_LOCK(sc); mii = device_get_softc(sc->wb_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; WB_UNLOCK(sc); } static int wb_ioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct wb_softc *sc = ifp->if_softc; struct mii_data *mii; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch(command) { case SIOCSIFFLAGS: WB_LOCK(sc); if (ifp->if_flags & IFF_UP) { wb_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) wb_stop(sc); } WB_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: WB_LOCK(sc); wb_setmulti(sc); WB_UNLOCK(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc->wb_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; default: error = ether_ioctl(ifp, command, data); break; } return(error); } static void wb_watchdog(sc) struct wb_softc *sc; { struct ifnet *ifp; WB_LOCK_ASSERT(sc); ifp = sc->wb_ifp; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if_printf(ifp, "watchdog timeout\n"); #ifdef foo if (!(wb_phy_readreg(sc, PHY_BMSR) & PHY_BMSR_LINKSTAT)) if_printf(ifp, "no carrier - transceiver cable problem?\n"); #endif wb_stop(sc); wb_reset(sc); wb_init_locked(sc); if (ifp->if_snd.ifq_head != NULL) wb_start_locked(ifp); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void wb_stop(sc) struct wb_softc *sc; { int i; struct ifnet *ifp; WB_LOCK_ASSERT(sc); ifp = sc->wb_ifp; sc->wb_timer = 0; callout_stop(&sc->wb_stat_callout); WB_CLRBIT(sc, WB_NETCFG, (WB_NETCFG_RX_ON|WB_NETCFG_TX_ON)); CSR_WRITE_4(sc, WB_IMR, 0x00000000); CSR_WRITE_4(sc, WB_TXADDR, 0x00000000); CSR_WRITE_4(sc, WB_RXADDR, 0x00000000); /* * Free data in the RX lists. */ for (i = 0; i < WB_RX_LIST_CNT; i++) { if (sc->wb_cdata.wb_rx_chain[i].wb_mbuf != NULL) { m_freem(sc->wb_cdata.wb_rx_chain[i].wb_mbuf); sc->wb_cdata.wb_rx_chain[i].wb_mbuf = NULL; } } bzero((char *)&sc->wb_ldata->wb_rx_list, sizeof(sc->wb_ldata->wb_rx_list)); /* * Free the TX list buffers. */ for (i = 0; i < WB_TX_LIST_CNT; i++) { if (sc->wb_cdata.wb_tx_chain[i].wb_mbuf != NULL) { m_freem(sc->wb_cdata.wb_tx_chain[i].wb_mbuf); sc->wb_cdata.wb_tx_chain[i].wb_mbuf = NULL; } } bzero((char *)&sc->wb_ldata->wb_tx_list, sizeof(sc->wb_ldata->wb_tx_list)); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int wb_shutdown(dev) device_t dev; { struct wb_softc *sc; sc = device_get_softc(dev); WB_LOCK(sc); wb_stop(sc); WB_UNLOCK(sc); return (0); } Index: head/sys/kern/kern_mbuf.c =================================================================== --- head/sys/kern/kern_mbuf.c (revision 324445) +++ head/sys/kern/kern_mbuf.c (revision 324446) @@ -1,955 +1,952 @@ /*- * Copyright (c) 2004, 2005, * Bosko Milekic . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_param.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA * Zones. * * Mbuf Clusters (2K, contiguous) are allocated from the Cluster * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the * administrator so desires. * * Mbufs are allocated from a UMA Master Zone called the Mbuf * Zone. * * Additionally, FreeBSD provides a Packet Zone, which it * configures as a Secondary Zone to the Mbuf Master Zone, * thus sharing backend Slab kegs with the Mbuf Master Zone. * * Thus common-case allocations and locking are simplified: * * m_clget() m_getcl() * | | * | .------------>[(Packet Cache)] m_get(), m_gethdr() * | | [ Packet ] | * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] * | \________ | * [ Cluster Keg ] \ / * | [ Mbuf Keg ] * [ Cluster Slabs ] | * | [ Mbuf Slabs ] * \____________(VM)_________________/ * * * Whenever an object is allocated with uma_zalloc() out of * one of the Zones its _ctor_ function is executed. The same * for any deallocation through uma_zfree() the _dtor_ function * is executed. * * Caches are per-CPU and are filled from the Master Zone. * * Whenever an object is allocated from the underlying global * memory pool it gets pre-initialized with the _zinit_ functions. * When the Keg's are overfull objects get decommissioned with * _zfini_ functions and free'd back to the global memory pool. * */ int nmbufs; /* limits number of mbufs */ int nmbclusters; /* limits number of mbuf clusters */ int nmbjumbop; /* limits number of page size jumbo clusters */ int nmbjumbo9; /* limits number of 9k jumbo clusters */ int nmbjumbo16; /* limits number of 16k jumbo clusters */ static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, "Maximum real memory allocatable to various mbuf types"); /* * tunable_mbinit() has to be run before any mbuf allocations are done. */ static void tunable_mbinit(void *dummy) { quad_t realmem; /* * The default limit for all mbuf related memory is 1/2 of all * available kernel memory (physical or kmem). * At most it can be 3/4 of available kernel memory. */ realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); maxmbufmem = realmem / 2; TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); if (maxmbufmem > realmem / 4 * 3) maxmbufmem = realmem / 4 * 3; TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); if (nmbclusters == 0) nmbclusters = maxmbufmem / MCLBYTES / 4; TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); if (nmbjumbop == 0) nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); if (nmbjumbo9 == 0) nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); if (nmbjumbo16 == 0) nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; /* * We need at least as many mbufs as we have clusters of * the various types added together. */ TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) nmbufs = lmax(maxmbufmem / MSIZE / 5, nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); } SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); static int sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) { int error, newnmbclusters; newnmbclusters = nmbclusters; error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); if (error == 0 && req->newptr && newnmbclusters != nmbclusters) { if (newnmbclusters > nmbclusters && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbclusters = newnmbclusters; nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); EVENTHANDLER_INVOKE(nmbclusters_change); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, &nmbclusters, 0, sysctl_nmbclusters, "IU", "Maximum number of mbuf clusters allowed"); static int sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) { int error, newnmbjumbop; newnmbjumbop = nmbjumbop; error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) { if (newnmbjumbop > nmbjumbop && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbop = newnmbjumbop; nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, &nmbjumbop, 0, sysctl_nmbjumbop, "IU", "Maximum number of mbuf page size jumbo clusters allowed"); static int sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) { int error, newnmbjumbo9; newnmbjumbo9 = nmbjumbo9; error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) { if (newnmbjumbo9 > nmbjumbo9 && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo9 = newnmbjumbo9; nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", "Maximum number of mbuf 9k jumbo clusters allowed"); static int sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) { int error, newnmbjumbo16; newnmbjumbo16 = nmbjumbo16; error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) { if (newnmbjumbo16 > nmbjumbo16 && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo16 = newnmbjumbo16; nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", "Maximum number of mbuf 16k jumbo clusters allowed"); static int sysctl_nmbufs(SYSCTL_HANDLER_ARGS) { int error, newnmbufs; newnmbufs = nmbufs; error = sysctl_handle_int(oidp, &newnmbufs, 0, req); if (error == 0 && req->newptr && newnmbufs != nmbufs) { if (newnmbufs > nmbufs) { nmbufs = newnmbufs; nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); EVENTHANDLER_INVOKE(nmbufs_change); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, &nmbufs, 0, sysctl_nmbufs, "IU", "Maximum number of mbufs allowed"); /* * Zones from which we allocate. */ uma_zone_t zone_mbuf; uma_zone_t zone_clust; uma_zone_t zone_pack; uma_zone_t zone_jumbop; uma_zone_t zone_jumbo9; uma_zone_t zone_jumbo16; /* * Local prototypes. */ static int mb_ctor_mbuf(void *, int, void *, int); static int mb_ctor_clust(void *, int, void *, int); static int mb_ctor_pack(void *, int, void *, int); static void mb_dtor_mbuf(void *, int, void *); static void mb_dtor_pack(void *, int, void *); static int mb_zinit_pack(void *, int, int); static void mb_zfini_pack(void *, int); static void mb_reclaim(uma_zone_t, int); static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int); /* Ensure that MSIZE is a power of 2. */ CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); /* * Initialize FreeBSD Network buffer allocation. */ static void mbuf_init(void *dummy) { /* * Configure UMA zones for Mbufs, Clusters, and Packets. */ zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, #ifdef INVARIANTS trash_init, trash_fini, #else NULL, NULL, #endif MSIZE - 1, UMA_ZONE_MAXBUCKET); if (nmbufs > 0) nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); uma_zone_set_maxaction(zone_mbuf, mb_reclaim); zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, mb_ctor_clust, #ifdef INVARIANTS trash_dtor, trash_init, trash_fini, #else NULL, NULL, NULL, #endif UMA_ALIGN_PTR, 0); if (nmbclusters > 0) nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); uma_zone_set_maxaction(zone_clust, mb_reclaim); zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); /* Make jumbo frame zone too. Page size, 9k and 16k. */ zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, mb_ctor_clust, #ifdef INVARIANTS trash_dtor, trash_init, trash_fini, #else NULL, NULL, NULL, #endif UMA_ALIGN_PTR, 0); if (nmbjumbop > 0) nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); uma_zone_set_maxaction(zone_jumbop, mb_reclaim); zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, mb_ctor_clust, #ifdef INVARIANTS trash_dtor, trash_init, trash_fini, #else NULL, NULL, NULL, #endif UMA_ALIGN_PTR, 0); uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); if (nmbjumbo9 > 0) nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); uma_zone_set_maxaction(zone_jumbo9, mb_reclaim); zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, mb_ctor_clust, #ifdef INVARIANTS trash_dtor, trash_init, trash_fini, #else NULL, NULL, NULL, #endif UMA_ALIGN_PTR, 0); uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); if (nmbjumbo16 > 0) nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); /* * Hook event handler for low-memory situation, used to * drain protocols and push data back to the caches (UMA * later pushes it back to VM). */ EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, EVENTHANDLER_PRI_FIRST); } SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); /* * UMA backend page allocator for the jumbo frame zones. * * Allocates kernel virtual memory that is backed by contiguous physical * pages. */ static void * mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ *flags = UMA_SLAB_KERNEL; return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); } /* * Constructor for Mbuf master zone. * * The 'arg' pointer points to a mb_args structure which * contains call-specific information required to support the * mbuf allocation API. See mbuf.h. */ static int mb_ctor_mbuf(void *mem, int size, void *arg, int how) { struct mbuf *m; struct mb_args *args; int error; int flags; short type; #ifdef INVARIANTS trash_ctor(mem, size, arg, how); #endif args = (struct mb_args *)arg; type = args->type; /* * The mbuf is initialized later. The caller has the * responsibility to set up any MAC labels too. */ if (type == MT_NOINIT) return (0); m = (struct mbuf *)mem; flags = args->flags; MPASS((flags & M_NOFREE) == 0); error = m_init(m, how, type, flags); return (error); } /* * The Mbuf master zone destructor. */ static void mb_dtor_mbuf(void *mem, int size, void *arg) { struct mbuf *m; unsigned long flags; m = (struct mbuf *)mem; flags = (unsigned long)arg; KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); if (!(flags & MB_DTOR_SKIP) && (m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) m_tag_delete_chain(m, NULL); #ifdef INVARIANTS trash_dtor(mem, size, arg); #endif } /* * The Mbuf Packet zone destructor. */ static void mb_dtor_pack(void *mem, int size, void *arg) { struct mbuf *m; m = (struct mbuf *)mem; if ((m->m_flags & M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); /* Make sure we've got a clean cluster back. */ KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); #ifdef INVARIANTS trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); #endif /* * If there are processes blocked on zone_clust, waiting for pages * to be freed up, * cause them to be woken up by draining the * packet zone. We are exposed to a race here * (in the check for * the UMA_ZFLAG_FULL) where we might miss the flag set, but that * is deliberate. We don't want to acquire the zone lock for every * mbuf free. */ if (uma_zone_exhausted_nolock(zone_clust)) zone_drain(zone_pack); } /* * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. * * Here the 'arg' pointer points to the Mbuf which we * are configuring cluster storage for. If 'arg' is * empty we allocate just the cluster without setting * the mbuf to it. See mbuf.h. */ static int mb_ctor_clust(void *mem, int size, void *arg, int how) { struct mbuf *m; #ifdef INVARIANTS trash_ctor(mem, size, arg, how); #endif m = (struct mbuf *)arg; if (m != NULL) { - m->m_ext.ext_buf = (caddr_t)mem; + m->m_ext.ext_buf = (char *)mem; m->m_data = m->m_ext.ext_buf; m->m_flags |= M_EXT; m->m_ext.ext_free = NULL; m->m_ext.ext_arg1 = NULL; m->m_ext.ext_arg2 = NULL; m->m_ext.ext_size = size; m->m_ext.ext_type = m_gettype(size); m->m_ext.ext_flags = EXT_FLAG_EMBREF; m->m_ext.ext_count = 1; } return (0); } /* * The Packet secondary zone's init routine, executed on the * object's transition from mbuf keg slab to zone cache. */ static int mb_zinit_pack(void *mem, int size, int how) { struct mbuf *m; m = (struct mbuf *)mem; /* m is virgin. */ if (uma_zalloc_arg(zone_clust, m, how) == NULL || m->m_ext.ext_buf == NULL) return (ENOMEM); m->m_ext.ext_type = EXT_PACKET; /* Override. */ #ifdef INVARIANTS trash_init(m->m_ext.ext_buf, MCLBYTES, how); #endif return (0); } /* * The Packet secondary zone's fini routine, executed on the * object's transition from zone cache to keg slab. */ static void mb_zfini_pack(void *mem, int size) { struct mbuf *m; m = (struct mbuf *)mem; #ifdef INVARIANTS trash_fini(m->m_ext.ext_buf, MCLBYTES); #endif uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); #ifdef INVARIANTS trash_dtor(mem, size, NULL); #endif } /* * The "packet" keg constructor. */ static int mb_ctor_pack(void *mem, int size, void *arg, int how) { struct mbuf *m; struct mb_args *args; int error, flags; short type; m = (struct mbuf *)mem; args = (struct mb_args *)arg; flags = args->flags; type = args->type; MPASS((flags & M_NOFREE) == 0); #ifdef INVARIANTS trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); #endif error = m_init(m, how, type, flags); /* m_ext is already initialized. */ m->m_data = m->m_ext.ext_buf; m->m_flags = (flags | M_EXT); return (error); } /* * This is the protocol drain routine. Called by UMA whenever any of the * mbuf zones is closed to its limit. * * No locks should be held when this is called. The drain routines have to * presently acquire some locks which raises the possibility of lock order * reversal. */ static void mb_reclaim(uma_zone_t zone __unused, int pending __unused) { struct domain *dp; struct protosw *pr; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); for (dp = domains; dp != NULL; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain != NULL) (*pr->pr_drain)(); } /* * Clean up after mbufs with M_EXT storage attached to them if the * reference count hits 1. */ void mb_free_ext(struct mbuf *m) { volatile u_int *refcnt; struct mbuf *mref; int freembuf; KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); /* See if this is the mbuf that holds the embedded refcount. */ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { refcnt = &m->m_ext.ext_count; mref = m; } else { KASSERT(m->m_ext.ext_cnt != NULL, ("%s: no refcounting pointer on %p", __func__, m)); refcnt = m->m_ext.ext_cnt; mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); } /* * Check if the header is embedded in the cluster. It is * important that we can't touch any of the mbuf fields * after we have freed the external storage, since mbuf * could have been embedded in it. For now, the mbufs * embedded into the cluster are always of type EXT_EXTREF, * and for this type we won't free the mref. */ if (m->m_flags & M_NOFREE) { freembuf = 0; KASSERT(m->m_ext.ext_type == EXT_EXTREF, ("%s: no-free mbuf %p has wrong type", __func__, m)); } else freembuf = 1; /* Free attached storage if this mbuf is the only reference to it. */ if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { switch (m->m_ext.ext_type) { case EXT_PACKET: /* The packet zone is special. */ if (*refcnt == 0) *refcnt = 1; uma_zfree(zone_pack, mref); break; case EXT_CLUSTER: uma_zfree(zone_clust, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; case EXT_JUMBOP: uma_zfree(zone_jumbop, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; case EXT_JUMBO9: uma_zfree(zone_jumbo9, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; case EXT_JUMBO16: uma_zfree(zone_jumbo16, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; case EXT_SFBUF: sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); uma_zfree(zone_mbuf, mref); break; case EXT_SFBUF_NOCACHE: sf_ext_free_nocache(m->m_ext.ext_arg1, m->m_ext.ext_arg2); uma_zfree(zone_mbuf, mref); break; case EXT_NET_DRV: case EXT_MOD_TYPE: case EXT_DISPOSABLE: KASSERT(m->m_ext.ext_free != NULL, ("%s: ext_free not set", __func__)); - (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1, - m->m_ext.ext_arg2); + m->m_ext.ext_free(m); uma_zfree(zone_mbuf, mref); break; case EXT_EXTREF: KASSERT(m->m_ext.ext_free != NULL, ("%s: ext_free not set", __func__)); - (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1, - m->m_ext.ext_arg2); + m->m_ext.ext_free(m); break; default: KASSERT(m->m_ext.ext_type == 0, ("%s: unknown ext_type", __func__)); } } if (freembuf && m != mref) uma_zfree(zone_mbuf, m); } /* * Official mbuf(9) allocation KPI for stack and drivers: * * m_get() - a single mbuf without any attachments, sys/mbuf.h. * m_gethdr() - a single mbuf initialized as M_PKTHDR, sys/mbuf.h. * m_getcl() - an mbuf + 2k cluster, sys/mbuf.h. * m_clget() - attach cluster to already allocated mbuf. * m_cljget() - attach jumbo cluster to already allocated mbuf. * m_get2() - allocate minimum mbuf that would fit size argument. * m_getm2() - allocate a chain of mbufs/clusters. * m_extadd() - attach external cluster to mbuf. * * m_free() - free single mbuf with its tags and ext, sys/mbuf.h. * m_freem() - free chain of mbufs. */ int m_clget(struct mbuf *m, int how) { KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", __func__, m)); m->m_ext.ext_buf = (char *)NULL; uma_zalloc_arg(zone_clust, m, how); /* * On a cluster allocation failure, drain the packet zone and retry, * we might be able to loosen a few clusters up on the drain. */ if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { zone_drain(zone_pack); uma_zalloc_arg(zone_clust, m, how); } MBUF_PROBE2(m__clget, m, how); return (m->m_flags & M_EXT); } /* * m_cljget() is different from m_clget() as it can allocate clusters without * attaching them to an mbuf. In that case the return value is the pointer * to the cluster of the requested size. If an mbuf was specified, it gets * the cluster attached to it and the return value can be safely ignored. * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. */ void * m_cljget(struct mbuf *m, int how, int size) { uma_zone_t zone; void *retval; if (m != NULL) { KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", __func__, m)); m->m_ext.ext_buf = NULL; } zone = m_getzone(size); retval = uma_zalloc_arg(zone, m, how); MBUF_PROBE4(m__cljget, m, how, size, retval); return (retval); } /* * m_get2() allocates minimum mbuf that would fit "size" argument. */ struct mbuf * m_get2(int size, int how, short type, int flags) { struct mb_args args; struct mbuf *m, *n; args.flags = flags; args.type = type; if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) return (uma_zalloc_arg(zone_mbuf, &args, how)); if (size <= MCLBYTES) return (uma_zalloc_arg(zone_pack, &args, how)); if (size > MJUMPAGESIZE) return (NULL); m = uma_zalloc_arg(zone_mbuf, &args, how); if (m == NULL) return (NULL); n = uma_zalloc_arg(zone_jumbop, m, how); if (n == NULL) { uma_zfree(zone_mbuf, m); return (NULL); } return (m); } /* * m_getjcl() returns an mbuf with a cluster of the specified size attached. * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. */ struct mbuf * m_getjcl(int how, short type, int flags, int size) { struct mb_args args; struct mbuf *m, *n; uma_zone_t zone; if (size == MCLBYTES) return m_getcl(how, type, flags); args.flags = flags; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); if (m == NULL) return (NULL); zone = m_getzone(size); n = uma_zalloc_arg(zone, m, how); if (n == NULL) { uma_zfree(zone_mbuf, m); return (NULL); } return (m); } /* * Allocate a given length worth of mbufs and/or clusters (whatever fits * best) and return a pointer to the top of the allocated chain. If an * existing mbuf chain is provided, then we will append the new chain * to the existing one but still return the top of the newly allocated * chain. */ struct mbuf * m_getm2(struct mbuf *m, int len, int how, short type, int flags) { struct mbuf *mb, *nm = NULL, *mtail = NULL; KASSERT(len >= 0, ("%s: len is < 0", __func__)); /* Validate flags. */ flags &= (M_PKTHDR | M_EOR); /* Packet header mbuf must be first in chain. */ if ((flags & M_PKTHDR) && m != NULL) flags &= ~M_PKTHDR; /* Loop and append maximum sized mbufs to the chain tail. */ while (len > 0) { if (len > MCLBYTES) mb = m_getjcl(how, type, (flags & M_PKTHDR), MJUMPAGESIZE); else if (len >= MINCLSIZE) mb = m_getcl(how, type, (flags & M_PKTHDR)); else if (flags & M_PKTHDR) mb = m_gethdr(how, type); else mb = m_get(how, type); /* Fail the whole operation if one mbuf can't be allocated. */ if (mb == NULL) { if (nm != NULL) m_freem(nm); return (NULL); } /* Book keeping. */ len -= M_SIZE(mb); if (mtail != NULL) mtail->m_next = mb; else nm = mb; mtail = mb; flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ } if (flags & M_EOR) mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ /* If mbuf was supplied, append new chain to the end of it. */ if (m != NULL) { for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) ; mtail->m_next = nm; mtail->m_flags &= ~M_EOR; } else m = nm; return (m); } /*- * Configure a provided mbuf to refer to the provided external storage * buffer and setup a reference count for said buffer. * * Arguments: * mb The existing mbuf to which to attach the provided buffer. * buf The address of the provided external storage buffer. * size The size of the provided buffer. * freef A pointer to a routine that is responsible for freeing the * provided external storage buffer. * args A pointer to an argument structure (of any type) to be passed * to the provided freef routine (may be NULL). * flags Any other flags to be passed to the provided mbuf. * type The type that the external storage buffer should be * labeled with. * * Returns: * Nothing. */ void -m_extadd(struct mbuf *mb, caddr_t buf, u_int size, - void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2, - int flags, int type) +m_extadd(struct mbuf *mb, char *buf, u_int size, m_ext_free_t freef, + void *arg1, void *arg2, int flags, int type) { KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); mb->m_flags |= (M_EXT | flags); mb->m_ext.ext_buf = buf; mb->m_data = mb->m_ext.ext_buf; mb->m_ext.ext_size = size; mb->m_ext.ext_free = freef; mb->m_ext.ext_arg1 = arg1; mb->m_ext.ext_arg2 = arg2; mb->m_ext.ext_type = type; if (type != EXT_EXTREF) { mb->m_ext.ext_count = 1; mb->m_ext.ext_flags = EXT_FLAG_EMBREF; } else mb->m_ext.ext_flags = 0; } /* * Free an entire chain of mbufs and associated external buffers, if * applicable. */ void m_freem(struct mbuf *mb) { MBUF_PROBE1(m__freem, mb); while (mb != NULL) mb = m_free(mb); } Index: head/sys/kern/subr_mbpool.c =================================================================== --- head/sys/kern/subr_mbpool.c (revision 324445) +++ head/sys/kern/subr_mbpool.c (revision 324446) @@ -1,398 +1,398 @@ /*- * Copyright (c) 2003 * Fraunhofer Institute for Open Communication Systems (FhG Fokus). * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Author: Hartmut Brandt */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(libmbpool, 1); /* * Memory is allocated as DMA-able pages. Each page is divided into a number * of equal chunks where the last 4 bytes of each chunk are occupied by * the page number and the chunk number. The caller must take these four * bytes into account when specifying the chunk size. Each page is mapped by * its own DMA map using the user specified DMA tag. * * Each chunk has a used and a card bit in the high bits of its page number. * 0 0 chunk is free and may be allocated * 1 1 chunk has been given to the interface * 0 1 chunk is traveling through the system * 1 0 illegal */ struct mbtrail { uint16_t chunk; uint16_t page; }; #define MBP_CARD 0x8000 #define MBP_USED 0x4000 #define MBP_PMSK 0x3fff /* page number mask */ #define MBP_CMSK 0x01ff /* chunk number mask */ struct mbfree { SLIST_ENTRY(mbfree) link; /* link on free list */ }; struct mbpage { bus_dmamap_t map; /* map for this page */ bus_addr_t phy; /* physical address */ void *va; /* the memory */ }; struct mbpool { const char *name; /* a name for this pool */ bus_dma_tag_t dmat; /* tag for mapping */ u_int max_pages; /* maximum number of pages */ size_t page_size; /* size of each allocation */ size_t chunk_size; /* size of each external mbuf */ struct mtx free_lock; /* lock of free list */ SLIST_HEAD(, mbfree) free_list; /* free list */ u_int npages; /* current number of pages */ u_int nchunks; /* chunks per page */ struct mbpage pages[]; /* pages */ }; static MALLOC_DEFINE(M_MBPOOL, "mbpools", "mbuf pools"); /* * Make a trail pointer from a chunk pointer */ #define C2T(P, C) ((struct mbtrail *)((char *)(C) + (P)->chunk_size - \ sizeof(struct mbtrail))) /* * Make a free chunk pointer from a chunk number */ #define N2C(P, PG, C) ((struct mbfree *)((char *)(PG)->va + \ (C) * (P)->chunk_size)) /* * Make/parse handles */ #define HMAKE(P, C) ((((P) & MBP_PMSK) << 16) | ((C) << 7)) #define HPAGE(H) (((H) >> 16) & MBP_PMSK) #define HCHUNK(H) (((H) >> 7) & MBP_CMSK) /* * initialize a pool */ int mbp_create(struct mbpool **pp, const char *name, bus_dma_tag_t dmat, u_int max_pages, size_t page_size, size_t chunk_size) { u_int nchunks; if (max_pages > MBPOOL_MAX_MAXPAGES || chunk_size == 0) return (EINVAL); nchunks = page_size / chunk_size; if (nchunks == 0 || nchunks > MBPOOL_MAX_CHUNKS) return (EINVAL); (*pp) = malloc(sizeof(struct mbpool) + max_pages * sizeof(struct mbpage), M_MBPOOL, M_WAITOK | M_ZERO); (*pp)->name = name; (*pp)->dmat = dmat; (*pp)->max_pages = max_pages; (*pp)->page_size = page_size; (*pp)->chunk_size = chunk_size; (*pp)->nchunks = nchunks; SLIST_INIT(&(*pp)->free_list); mtx_init(&(*pp)->free_lock, name, NULL, MTX_DEF); return (0); } /* * destroy a pool */ void mbp_destroy(struct mbpool *p) { u_int i; struct mbpage *pg; #ifdef DIAGNOSTIC struct mbtrail *tr; u_int b; #endif for (i = 0; i < p->npages; i++) { pg = &p->pages[i]; #ifdef DIAGNOSTIC for (b = 0; b < p->nchunks; b++) { tr = C2T(p, N2C(p, pg, b)); if (tr->page & MBP_CARD) printf("%s: (%s) buf still on card" " %u/%u\n", __func__, p->name, i, b); if (tr->page & MBP_USED) printf("%s: (%s) sbuf still in use" " %u/%u\n", __func__, p->name, i, b); } #endif bus_dmamap_unload(p->dmat, pg->map); bus_dmamem_free(p->dmat, pg->va, pg->map); } mtx_destroy(&p->free_lock); free(p, M_MBPOOL); } /* * Helper function when loading a one segment DMA buffer. */ static void mbp_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error == 0) *(bus_addr_t *)arg = segs[0].ds_addr; } /* * Allocate a new page */ static void mbp_alloc_page(struct mbpool *p) { int error; struct mbpage *pg; u_int i; struct mbfree *f; struct mbtrail *t; if (p->npages == p->max_pages) { #ifdef DIAGNOSTIC printf("%s: (%s) page limit reached %u\n", __func__, p->name, p->max_pages); #endif return; } pg = &p->pages[p->npages]; error = bus_dmamem_alloc(p->dmat, &pg->va, BUS_DMA_NOWAIT, &pg->map); if (error != 0) return; error = bus_dmamap_load(p->dmat, pg->map, pg->va, p->page_size, mbp_callback, &pg->phy, 0); if (error != 0) { bus_dmamem_free(p->dmat, pg->va, pg->map); return; } for (i = 0; i < p->nchunks; i++) { f = N2C(p, pg, i); t = C2T(p, f); t->page = p->npages; t->chunk = i; SLIST_INSERT_HEAD(&p->free_list, f, link); } p->npages++; } /* * allocate a chunk */ void * mbp_alloc(struct mbpool *p, bus_addr_t *pap, uint32_t *hp) { struct mbfree *cf; struct mbtrail *t; mtx_lock(&p->free_lock); if ((cf = SLIST_FIRST(&p->free_list)) == NULL) { mbp_alloc_page(p); cf = SLIST_FIRST(&p->free_list); } if (cf == NULL) { mtx_unlock(&p->free_lock); return (NULL); } SLIST_REMOVE_HEAD(&p->free_list, link); mtx_unlock(&p->free_lock); t = C2T(p, cf); *pap = p->pages[t->page].phy + t->chunk * p->chunk_size; *hp = HMAKE(t->page, t->chunk); t->page |= MBP_CARD | MBP_USED; return (cf); } /* * Free a chunk */ void mbp_free(struct mbpool *p, void *ptr) { struct mbtrail *t; mtx_lock(&p->free_lock); t = C2T(p, ptr); t->page &= ~(MBP_USED | MBP_CARD); SLIST_INSERT_HEAD(&p->free_list, (struct mbfree *)ptr, link); mtx_unlock(&p->free_lock); } /* * Mbuf system external mbuf free routine */ void -mbp_ext_free(struct mbuf *m, void *buf, void *arg) +mbp_ext_free(struct mbuf *m) { - mbp_free(arg, buf); + mbp_free(m->m_ext.ext_arg2, m->m_ext.ext_arg1); } /* * Free all buffers that are marked as being on the card */ void mbp_card_free(struct mbpool *p) { u_int i, b; struct mbpage *pg; struct mbtrail *tr; struct mbfree *cf; mtx_lock(&p->free_lock); for (i = 0; i < p->npages; i++) { pg = &p->pages[i]; for (b = 0; b < p->nchunks; b++) { cf = N2C(p, pg, b); tr = C2T(p, cf); if (tr->page & MBP_CARD) { tr->page &= MBP_PMSK; SLIST_INSERT_HEAD(&p->free_list, cf, link); } } } mtx_unlock(&p->free_lock); } /* * Count buffers */ void mbp_count(struct mbpool *p, u_int *used, u_int *card, u_int *free) { u_int i, b; struct mbpage *pg; struct mbtrail *tr; struct mbfree *cf; *used = *card = *free = 0; for (i = 0; i < p->npages; i++) { pg = &p->pages[i]; for (b = 0; b < p->nchunks; b++) { tr = C2T(p, N2C(p, pg, b)); if (tr->page & MBP_CARD) (*card)++; if (tr->page & MBP_USED) (*used)++; } } mtx_lock(&p->free_lock); SLIST_FOREACH(cf, &p->free_list, link) (*free)++; mtx_unlock(&p->free_lock); } /* * Get the buffer from a handle and clear the card flag. */ void * mbp_get(struct mbpool *p, uint32_t h) { struct mbfree *cf; struct mbtrail *tr; cf = N2C(p, &p->pages[HPAGE(h)], HCHUNK(h)); tr = C2T(p, cf); #ifdef DIAGNOSTIC if (!(tr->page & MBP_CARD)) printf("%s: (%s) chunk %u page %u not on card\n", __func__, p->name, HCHUNK(h), HPAGE(h)); #endif tr->page &= ~MBP_CARD; return (cf); } /* * Get the buffer from a handle and keep the card flag. */ void * mbp_get_keep(struct mbpool *p, uint32_t h) { struct mbfree *cf; struct mbtrail *tr; cf = N2C(p, &p->pages[HPAGE(h)], HCHUNK(h)); tr = C2T(p, cf); #ifdef DIAGNOSTIC if (!(tr->page & MBP_CARD)) printf("%s: (%s) chunk %u page %u not on card\n", __func__, p->name, HCHUNK(h), HPAGE(h)); #endif return (cf); } /* * sync the chunk */ void mbp_sync(struct mbpool *p, uint32_t h, bus_addr_t off, bus_size_t len, u_int op) { #if 0 bus_dmamap_sync_size(p->dmat, p->pages[HPAGE(h)].map, HCHUNK(h) * p->chunk_size + off, len, op); #endif } Index: head/sys/sys/mbpool.h =================================================================== --- head/sys/sys/mbpool.h (revision 324445) +++ head/sys/sys/mbpool.h (revision 324446) @@ -1,90 +1,90 @@ /*- * Copyright (c) 2003 * Fraunhofer Institute for Open Communication Systems (FhG Fokus). * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Author: Hartmut Brandt */ /* * This implements pools of DMA-able buffers that conserve DMA address space * by putting several buffers into one page and that allow to map between * 32-bit handles for the buffer and buffer addresses (to use 32-bit network * interfaces on 64bit machines). This assists network interfaces that may need * huge numbers of mbufs. * * $FreeBSD$ */ #ifndef _SYS_MBPOOL_H_ #define _SYS_MBPOOL_H_ #ifdef _KERNEL #include /* opaque */ struct mbpool; /* size of reserved area at end of each chunk */ #define MBPOOL_TRAILER_SIZE 4 /* maximum value of max_pages */ #define MBPOOL_MAX_MAXPAGES ((1 << 14) - 1) /* maximum number of chunks per page */ #define MBPOOL_MAX_CHUNKS (1 << 9) /* initialize a pool */ int mbp_create(struct mbpool **, const char *, bus_dma_tag_t, u_int, size_t, size_t); /* destroy a pool */ void mbp_destroy(struct mbpool *); /* allocate a chunk and set used and on card */ void *mbp_alloc(struct mbpool *, bus_addr_t *, uint32_t *); /* free a chunk */ void mbp_free(struct mbpool *, void *); /* free a chunk that is an external mbuf */ -void mbp_ext_free(struct mbuf *, void *, void *); +void mbp_ext_free(struct mbuf *); /* free all buffers that are marked to be on the card */ void mbp_card_free(struct mbpool *); /* count used buffers and buffers on card */ void mbp_count(struct mbpool *, u_int *, u_int *, u_int *); /* get the buffer from a handle and clear card bit */ void *mbp_get(struct mbpool *, uint32_t); /* get the buffer from a handle and don't clear card bit */ void *mbp_get_keep(struct mbpool *, uint32_t); /* sync the chunk */ void mbp_sync(struct mbpool *, uint32_t, bus_addr_t, bus_size_t, u_int); #endif /* _KERNEL */ #endif /* _SYS_MBPOOL_H_ */ Index: head/sys/sys/mbuf.h =================================================================== --- head/sys/sys/mbuf.h (revision 324445) +++ head/sys/sys/mbuf.h (revision 324446) @@ -1,1344 +1,1343 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 * $FreeBSD$ */ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ /* XXX: These includes suck. Sorry! */ #include #ifdef _KERNEL #include #include #ifdef WITNESS #include #endif #endif #ifdef _KERNEL #include #define MBUF_PROBE1(probe, arg0) \ SDT_PROBE1(sdt, , , probe, arg0) #define MBUF_PROBE2(probe, arg0, arg1) \ SDT_PROBE2(sdt, , , probe, arg0, arg1) #define MBUF_PROBE3(probe, arg0, arg1, arg2) \ SDT_PROBE3(sdt, , , probe, arg0, arg1, arg2) #define MBUF_PROBE4(probe, arg0, arg1, arg2, arg3) \ SDT_PROBE4(sdt, , , probe, arg0, arg1, arg2, arg3) #define MBUF_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \ SDT_PROBE5(sdt, , , probe, arg0, arg1, arg2, arg3, arg4) SDT_PROBE_DECLARE(sdt, , , m__init); SDT_PROBE_DECLARE(sdt, , , m__gethdr); SDT_PROBE_DECLARE(sdt, , , m__get); SDT_PROBE_DECLARE(sdt, , , m__getcl); SDT_PROBE_DECLARE(sdt, , , m__clget); SDT_PROBE_DECLARE(sdt, , , m__cljget); SDT_PROBE_DECLARE(sdt, , , m__cljset); SDT_PROBE_DECLARE(sdt, , , m__free); SDT_PROBE_DECLARE(sdt, , , m__freem); #endif /* _KERNEL */ /* * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead. * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in * sys/param.h), which has no additional overhead and is used instead of the * internal data area; this is done when at least MINCLSIZE of data must be * stored. Additionally, it is possible to allocate a separate buffer * externally and attach it to the mbuf in a way similar to that of mbuf * clusters. * * NB: These calculation do not take actual compiler-induced alignment and * padding inside the complete struct mbuf into account. Appropriate * attention is required when changing members of struct mbuf. * * MLEN is data length in a normal mbuf. * MHLEN is data length in an mbuf with pktheader. * MINCLSIZE is a smallest amount of data that should be put into cluster. * * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are sensible. */ struct mbuf; #define MHSIZE offsetof(struct mbuf, m_dat) #define MPKTHSIZE offsetof(struct mbuf, m_pktdat) #define MLEN ((int)(MSIZE - MHSIZE)) #define MHLEN ((int)(MSIZE - MPKTHSIZE)) #define MINCLSIZE (MHLEN + 1) #ifdef _KERNEL /*- * Macro for type conversion: convert mbuf pointer to data pointer of correct * type: * * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. * mtodo(m, o) -- Same as above but with offset 'o' into data. */ #define mtod(m, t) ((t)((m)->m_data)) #define mtodo(m, o) ((void *)(((m)->m_data) + (o))) /* * Argument structure passed to UMA routines during mbuf and packet * allocations. */ struct mb_args { int flags; /* Flags for mbuf being allocated */ short type; /* Type of mbuf being allocated */ }; #endif /* _KERNEL */ /* * Packet tag structure (see below for details). */ struct m_tag { SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ u_int16_t m_tag_id; /* Tag ID */ u_int16_t m_tag_len; /* Length of data */ u_int32_t m_tag_cookie; /* ABI/Module ID */ void (*m_tag_free)(struct m_tag *); }; /* * Static network interface owned tag. * Allocated through ifp->if_snd_tag_alloc(). */ struct m_snd_tag { struct ifnet *ifp; /* network interface tag belongs to */ }; /* * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. * Size ILP32: 48 * LP64: 56 * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are correct. */ struct pkthdr { union { struct m_snd_tag *snd_tag; /* send tag, if any */ struct ifnet *rcvif; /* rcv interface */ }; SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ int32_t len; /* total packet length */ /* Layer crossing persistent information. */ uint32_t flowid; /* packet's 4-tuple system */ uint64_t csum_flags; /* checksum and offload features */ uint16_t fibnum; /* this packet should use this fib */ uint8_t cosqos; /* class/quality of service */ uint8_t rsstype; /* hash type */ uint8_t l2hlen; /* layer 2 header length */ uint8_t l3hlen; /* layer 3 header length */ uint8_t l4hlen; /* layer 4 header length */ uint8_t l5hlen; /* layer 5 header length */ union { uint8_t eight[8]; uint16_t sixteen[4]; uint32_t thirtytwo[2]; uint64_t sixtyfour[1]; uintptr_t unintptr[1]; void *ptr; } PH_per; /* Layer specific non-persistent local storage for reassembly, etc. */ union { uint8_t eight[8]; uint16_t sixteen[4]; uint32_t thirtytwo[2]; uint64_t sixtyfour[1]; uintptr_t unintptr[1]; void *ptr; } PH_loc; }; #define ether_vtag PH_per.sixteen[0] #define PH_vt PH_per #define vt_nrecs sixteen[0] #define tso_segsz PH_per.sixteen[1] #define lro_nsegs tso_segsz #define csum_phsum PH_per.sixteen[2] #define csum_data PH_per.thirtytwo[1] /* * Description of external storage mapped into mbuf; valid only if M_EXT is * set. * Size ILP32: 28 * LP64: 48 * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are correct. */ +typedef void m_ext_free_t(struct mbuf *); struct m_ext { union { volatile u_int ext_count; /* value of ref count info */ volatile u_int *ext_cnt; /* pointer to ref count info */ }; - caddr_t ext_buf; /* start of buffer */ + char *ext_buf; /* start of buffer */ uint32_t ext_size; /* size of buffer, for ext_free */ uint32_t ext_type:8, /* type of external storage */ ext_flags:24; /* external storage mbuf flags */ - void (*ext_free) /* free routine if not the usual */ - (struct mbuf *, void *, void *); + m_ext_free_t *ext_free; /* free routine if not the usual */ void *ext_arg1; /* optional argument pointer */ void *ext_arg2; /* optional argument pointer */ }; /* * The core of the mbuf object along with some shortcut defines for practical * purposes. */ struct mbuf { /* * Header present at the beginning of every mbuf. * Size ILP32: 24 * LP64: 32 * Compile-time assertions in uipc_mbuf.c test these values to ensure * that they are correct. */ union { /* next buffer in chain */ struct mbuf *m_next; SLIST_ENTRY(mbuf) m_slist; STAILQ_ENTRY(mbuf) m_stailq; }; union { /* next chain in queue/record */ struct mbuf *m_nextpkt; SLIST_ENTRY(mbuf) m_slistpkt; STAILQ_ENTRY(mbuf) m_stailqpkt; }; caddr_t m_data; /* location of data */ int32_t m_len; /* amount of data in this mbuf */ uint32_t m_type:8, /* type of data in this mbuf */ m_flags:24; /* flags; see below */ #if !defined(__LP64__) uint32_t m_pad; /* pad for 64bit alignment */ #endif /* * A set of optional headers (packet header, external storage header) * and internal data storage. Historically, these arrays were sized * to MHLEN (space left after a packet header) and MLEN (space left * after only a regular mbuf header); they are now variable size in * order to support future work on variable-size mbufs. */ union { struct { struct pkthdr m_pkthdr; /* M_PKTHDR set */ union { struct m_ext m_ext; /* M_EXT set */ char m_pktdat[0]; }; }; char m_dat[0]; /* !M_PKTHDR, !M_EXT */ }; }; /* * mbuf flags of global significance and layer crossing. * Those of only protocol/layer specific significance are to be mapped * to M_PROTO[1-12] and cleared at layer handoff boundaries. * NB: Limited to the lower 24 bits. */ #define M_EXT 0x00000001 /* has associated external storage */ #define M_PKTHDR 0x00000002 /* start of record */ #define M_EOR 0x00000004 /* end of record */ #define M_RDONLY 0x00000008 /* associated data is marked read-only */ #define M_BCAST 0x00000010 /* send/received as link-level broadcast */ #define M_MCAST 0x00000020 /* send/received as link-level multicast */ #define M_PROMISC 0x00000040 /* packet was not for us */ #define M_VLANTAG 0x00000080 /* ether_vtag is valid */ #define M_UNUSED_8 0x00000100 /* --available-- */ #define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */ #define M_PROTO1 0x00001000 /* protocol-specific */ #define M_PROTO2 0x00002000 /* protocol-specific */ #define M_PROTO3 0x00004000 /* protocol-specific */ #define M_PROTO4 0x00008000 /* protocol-specific */ #define M_PROTO5 0x00010000 /* protocol-specific */ #define M_PROTO6 0x00020000 /* protocol-specific */ #define M_PROTO7 0x00040000 /* protocol-specific */ #define M_PROTO8 0x00080000 /* protocol-specific */ #define M_PROTO9 0x00100000 /* protocol-specific */ #define M_PROTO10 0x00200000 /* protocol-specific */ #define M_PROTO11 0x00400000 /* protocol-specific */ #define M_PROTO12 0x00800000 /* protocol-specific */ #define MB_DTOR_SKIP 0x1 /* don't pollute the cache by touching a freed mbuf */ /* * Flags to purge when crossing layers. */ #define M_PROTOFLAGS \ (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\ M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12) /* * Flags preserved when copying m_pkthdr. */ #define M_COPYFLAGS \ (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG| \ M_PROTOFLAGS) /* * Mbuf flag description for use with printf(9) %b identifier. */ #define M_FLAG_BITS \ "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \ "\7M_PROMISC\10M_VLANTAG" #define M_FLAG_PROTOBITS \ "\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \ "\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \ "\27M_PROTO11\30M_PROTO12" #define M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS) /* * Network interface cards are able to hash protocol fields (such as IPv4 * addresses and TCP port numbers) classify packets into flows. These flows * can then be used to maintain ordering while delivering packets to the OS * via parallel input queues, as well as to provide a stateless affinity * model. NIC drivers can pass up the hash via m->m_pkthdr.flowid, and set * m_flag fields to indicate how the hash should be interpreted by the * network stack. * * Most NICs support RSS, which provides ordering and explicit affinity, and * use the hash m_flag bits to indicate what header fields were covered by * the hash. M_HASHTYPE_OPAQUE and M_HASHTYPE_OPAQUE_HASH can be set by non- * RSS cards or configurations that provide an opaque flow identifier, allowing * for ordering and distribution without explicit affinity. Additionally, * M_HASHTYPE_OPAQUE_HASH indicates that the flow identifier has hash * properties. * * The meaning of the IPV6_EX suffix: * "o Home address from the home address option in the IPv6 destination * options header. If the extension header is not present, use the Source * IPv6 Address. * o IPv6 address that is contained in the Routing-Header-Type-2 from the * associated extension header. If the extension header is not present, * use the Destination IPv6 Address." * Quoted from: * https://docs.microsoft.com/en-us/windows-hardware/drivers/network/rss-hashing-types#ndishashipv6ex */ #define M_HASHTYPE_HASHPROP 0x80 /* has hash properties */ #define M_HASHTYPE_HASH(t) (M_HASHTYPE_HASHPROP | (t)) /* Microsoft RSS standard hash types */ #define M_HASHTYPE_NONE 0 #define M_HASHTYPE_RSS_IPV4 M_HASHTYPE_HASH(1) /* IPv4 2-tuple */ #define M_HASHTYPE_RSS_TCP_IPV4 M_HASHTYPE_HASH(2) /* TCPv4 4-tuple */ #define M_HASHTYPE_RSS_IPV6 M_HASHTYPE_HASH(3) /* IPv6 2-tuple */ #define M_HASHTYPE_RSS_TCP_IPV6 M_HASHTYPE_HASH(4) /* TCPv6 4-tuple */ #define M_HASHTYPE_RSS_IPV6_EX M_HASHTYPE_HASH(5) /* IPv6 2-tuple + * ext hdrs */ #define M_HASHTYPE_RSS_TCP_IPV6_EX M_HASHTYPE_HASH(6) /* TCPv6 4-tuple + * ext hdrs */ #define M_HASHTYPE_RSS_UDP_IPV4 M_HASHTYPE_HASH(7) /* IPv4 UDP 4-tuple*/ #define M_HASHTYPE_RSS_UDP_IPV6 M_HASHTYPE_HASH(9) /* IPv6 UDP 4-tuple*/ #define M_HASHTYPE_RSS_UDP_IPV6_EX M_HASHTYPE_HASH(10)/* IPv6 UDP 4-tuple + * ext hdrs */ #define M_HASHTYPE_OPAQUE 63 /* ordering, not affinity */ #define M_HASHTYPE_OPAQUE_HASH M_HASHTYPE_HASH(M_HASHTYPE_OPAQUE) /* ordering+hash, not affinity*/ #define M_HASHTYPE_CLEAR(m) ((m)->m_pkthdr.rsstype = 0) #define M_HASHTYPE_GET(m) ((m)->m_pkthdr.rsstype) #define M_HASHTYPE_SET(m, v) ((m)->m_pkthdr.rsstype = (v)) #define M_HASHTYPE_TEST(m, v) (M_HASHTYPE_GET(m) == (v)) #define M_HASHTYPE_ISHASH(m) (M_HASHTYPE_GET(m) & M_HASHTYPE_HASHPROP) /* * COS/QOS class and quality of service tags. * It uses DSCP code points as base. */ #define QOS_DSCP_CS0 0x00 #define QOS_DSCP_DEF QOS_DSCP_CS0 #define QOS_DSCP_CS1 0x20 #define QOS_DSCP_AF11 0x28 #define QOS_DSCP_AF12 0x30 #define QOS_DSCP_AF13 0x38 #define QOS_DSCP_CS2 0x40 #define QOS_DSCP_AF21 0x48 #define QOS_DSCP_AF22 0x50 #define QOS_DSCP_AF23 0x58 #define QOS_DSCP_CS3 0x60 #define QOS_DSCP_AF31 0x68 #define QOS_DSCP_AF32 0x70 #define QOS_DSCP_AF33 0x78 #define QOS_DSCP_CS4 0x80 #define QOS_DSCP_AF41 0x88 #define QOS_DSCP_AF42 0x90 #define QOS_DSCP_AF43 0x98 #define QOS_DSCP_CS5 0xa0 #define QOS_DSCP_EF 0xb8 #define QOS_DSCP_CS6 0xc0 #define QOS_DSCP_CS7 0xe0 /* * External mbuf storage buffer types. */ #define EXT_CLUSTER 1 /* mbuf cluster */ #define EXT_SFBUF 2 /* sendfile(2)'s sf_buf */ #define EXT_JUMBOP 3 /* jumbo cluster page sized */ #define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference */ #define EXT_SFBUF_NOCACHE 8 /* sendfile(2)'s sf_buf not to be cached */ #define EXT_VENDOR1 224 /* for vendor-internal use */ #define EXT_VENDOR2 225 /* for vendor-internal use */ #define EXT_VENDOR3 226 /* for vendor-internal use */ #define EXT_VENDOR4 227 /* for vendor-internal use */ #define EXT_EXP1 244 /* for experimental use */ #define EXT_EXP2 245 /* for experimental use */ #define EXT_EXP3 246 /* for experimental use */ #define EXT_EXP4 247 /* for experimental use */ #define EXT_NET_DRV 252 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 253 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 254 /* can throw this buffer away w/page flipping */ #define EXT_EXTREF 255 /* has externally maintained ext_cnt ptr */ /* * Flags for external mbuf buffer types. * NB: limited to the lower 24 bits. */ #define EXT_FLAG_EMBREF 0x000001 /* embedded ext_count */ #define EXT_FLAG_EXTREF 0x000002 /* external ext_cnt, notyet */ #define EXT_FLAG_NOFREE 0x000010 /* don't free mbuf to pool, notyet */ -#define EXT_FLAG_VENDOR1 0x010000 /* for vendor-internal use */ -#define EXT_FLAG_VENDOR2 0x020000 /* for vendor-internal use */ -#define EXT_FLAG_VENDOR3 0x040000 /* for vendor-internal use */ -#define EXT_FLAG_VENDOR4 0x080000 /* for vendor-internal use */ +#define EXT_FLAG_VENDOR1 0x010000 /* These flags are vendor */ +#define EXT_FLAG_VENDOR2 0x020000 /* or submodule specific, */ +#define EXT_FLAG_VENDOR3 0x040000 /* not used by mbuf code. */ +#define EXT_FLAG_VENDOR4 0x080000 /* Set/read by submodule. */ #define EXT_FLAG_EXP1 0x100000 /* for experimental use */ #define EXT_FLAG_EXP2 0x200000 /* for experimental use */ #define EXT_FLAG_EXP3 0x400000 /* for experimental use */ #define EXT_FLAG_EXP4 0x800000 /* for experimental use */ /* * EXT flag description for use with printf(9) %b identifier. */ #define EXT_FLAG_BITS \ "\20\1EXT_FLAG_EMBREF\2EXT_FLAG_EXTREF\5EXT_FLAG_NOFREE" \ "\21EXT_FLAG_VENDOR1\22EXT_FLAG_VENDOR2\23EXT_FLAG_VENDOR3" \ "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \ "\30EXT_FLAG_EXP4" /* * External reference/free functions. */ void sf_ext_free(void *, void *); void sf_ext_free_nocache(void *, void *); /* * Flags indicating checksum, segmentation and other offload work to be * done, or already done, by hardware or lower layers. It is split into * separate inbound and outbound flags. * * Outbound flags that are set by upper protocol layers requesting lower * layers, or ideally the hardware, to perform these offloading tasks. * For outbound packets this field and its flags can be directly tested * against ifnet if_hwassist. */ #define CSUM_IP 0x00000001 /* IP header checksum offload */ #define CSUM_IP_UDP 0x00000002 /* UDP checksum offload */ #define CSUM_IP_TCP 0x00000004 /* TCP checksum offload */ #define CSUM_IP_SCTP 0x00000008 /* SCTP checksum offload */ #define CSUM_IP_TSO 0x00000010 /* TCP segmentation offload */ #define CSUM_IP_ISCSI 0x00000020 /* iSCSI checksum offload */ #define CSUM_IP6_UDP 0x00000200 /* UDP checksum offload */ #define CSUM_IP6_TCP 0x00000400 /* TCP checksum offload */ #define CSUM_IP6_SCTP 0x00000800 /* SCTP checksum offload */ #define CSUM_IP6_TSO 0x00001000 /* TCP segmentation offload */ #define CSUM_IP6_ISCSI 0x00002000 /* iSCSI checksum offload */ /* Inbound checksum support where the checksum was verified by hardware. */ #define CSUM_L3_CALC 0x01000000 /* calculated layer 3 csum */ #define CSUM_L3_VALID 0x02000000 /* checksum is correct */ #define CSUM_L4_CALC 0x04000000 /* calculated layer 4 csum */ #define CSUM_L4_VALID 0x08000000 /* checksum is correct */ #define CSUM_L5_CALC 0x10000000 /* calculated layer 5 csum */ #define CSUM_L5_VALID 0x20000000 /* checksum is correct */ #define CSUM_COALESCED 0x40000000 /* contains merged segments */ /* * CSUM flag description for use with printf(9) %b identifier. */ #define CSUM_BITS \ "\20\1CSUM_IP\2CSUM_IP_UDP\3CSUM_IP_TCP\4CSUM_IP_SCTP\5CSUM_IP_TSO" \ "\6CSUM_IP_ISCSI" \ "\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \ "\16CSUM_IP6_ISCSI" \ "\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED" /* CSUM flags compatibility mappings. */ #define CSUM_IP_CHECKED CSUM_L3_CALC #define CSUM_IP_VALID CSUM_L3_VALID #define CSUM_DATA_VALID CSUM_L4_VALID #define CSUM_PSEUDO_HDR CSUM_L4_CALC #define CSUM_SCTP_VALID CSUM_L4_VALID #define CSUM_DELAY_DATA (CSUM_TCP|CSUM_UDP) #define CSUM_DELAY_IP CSUM_IP /* Only v4, no v6 IP hdr csum */ #define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6) #define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID #define CSUM_TCP CSUM_IP_TCP #define CSUM_UDP CSUM_IP_UDP #define CSUM_SCTP CSUM_IP_SCTP #define CSUM_TSO (CSUM_IP_TSO|CSUM_IP6_TSO) #define CSUM_UDP_IPV6 CSUM_IP6_UDP #define CSUM_TCP_IPV6 CSUM_IP6_TCP #define CSUM_SCTP_IPV6 CSUM_IP6_SCTP /* * mbuf types describing the content of the mbuf (including external storage). */ #define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */ #define MT_DATA 1 /* dynamic (data) allocation */ #define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */ #define MT_VENDOR1 4 /* for vendor-internal use */ #define MT_VENDOR2 5 /* for vendor-internal use */ #define MT_VENDOR3 6 /* for vendor-internal use */ #define MT_VENDOR4 7 /* for vendor-internal use */ #define MT_SONAME 8 /* socket name */ #define MT_EXP1 9 /* for experimental use */ #define MT_EXP2 10 /* for experimental use */ #define MT_EXP3 11 /* for experimental use */ #define MT_EXP4 12 /* for experimental use */ #define MT_CONTROL 14 /* extra-data protocol message */ #define MT_OOBDATA 15 /* expedited data */ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ #define MT_NOINIT 255 /* Not a type but a flag to allocate a non-initialized mbuf */ /* * String names of mbuf-related UMA(9) and malloc(9) types. Exposed to * !_KERNEL so that monitoring tools can look up the zones with * libmemstat(3). */ #define MBUF_MEM_NAME "mbuf" #define MBUF_CLUSTER_MEM_NAME "mbuf_cluster" #define MBUF_PACKET_MEM_NAME "mbuf_packet" #define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_page" #define MBUF_JUMBO9_MEM_NAME "mbuf_jumbo_9k" #define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k" #define MBUF_TAG_MEM_NAME "mbuf_tag" #define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt" #ifdef _KERNEL #ifdef WITNESS #define MBUF_CHECKSLEEP(how) do { \ if (how == M_WAITOK) \ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, \ "Sleeping in \"%s\"", __func__); \ } while (0) #else #define MBUF_CHECKSLEEP(how) #endif /* * Network buffer allocation API * * The rest of it is defined in kern/kern_mbuf.c */ extern uma_zone_t zone_mbuf; extern uma_zone_t zone_clust; extern uma_zone_t zone_pack; extern uma_zone_t zone_jumbop; extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; void mb_dupcl(struct mbuf *, struct mbuf *); void mb_free_ext(struct mbuf *); void m_adj(struct mbuf *, int); int m_apply(struct mbuf *, int, int, int (*)(void *, void *, u_int), void *); int m_append(struct mbuf *, int, c_caddr_t); void m_cat(struct mbuf *, struct mbuf *); void m_catpkt(struct mbuf *, struct mbuf *); int m_clget(struct mbuf *m, int how); void *m_cljget(struct mbuf *m, int how, int size); struct mbuf *m_collapse(struct mbuf *, int, int); void m_copyback(struct mbuf *, int, int, c_caddr_t); void m_copydata(const struct mbuf *, int, int, caddr_t); struct mbuf *m_copym(struct mbuf *, int, int, int); struct mbuf *m_copypacket(struct mbuf *, int); void m_copy_pkthdr(struct mbuf *, struct mbuf *); struct mbuf *m_copyup(struct mbuf *, int, int); struct mbuf *m_defrag(struct mbuf *, int); void m_demote_pkthdr(struct mbuf *); void m_demote(struct mbuf *, int, int); struct mbuf *m_devget(char *, int, int, struct ifnet *, void (*)(char *, caddr_t, u_int)); struct mbuf *m_dup(const struct mbuf *, int); int m_dup_pkthdr(struct mbuf *, const struct mbuf *, int); -void m_extadd(struct mbuf *, caddr_t, u_int, - void (*)(struct mbuf *, void *, void *), void *, void *, - int, int); +void m_extadd(struct mbuf *, char *, u_int, m_ext_free_t, + void *, void *, int, int); u_int m_fixhdr(struct mbuf *); struct mbuf *m_fragment(struct mbuf *, int, int); void m_freem(struct mbuf *); struct mbuf *m_get2(int, int, short, int); struct mbuf *m_getjcl(int, short, int, int); struct mbuf *m_getm2(struct mbuf *, int, int, short, int); struct mbuf *m_getptr(struct mbuf *, int, int *); u_int m_length(struct mbuf *, struct mbuf **); int m_mbuftouio(struct uio *, const struct mbuf *, int); void m_move_pkthdr(struct mbuf *, struct mbuf *); int m_pkthdr_init(struct mbuf *, int); struct mbuf *m_prepend(struct mbuf *, int, int); void m_print(const struct mbuf *, int); struct mbuf *m_pulldown(struct mbuf *, int, int, int *); struct mbuf *m_pullup(struct mbuf *, int); int m_sanity(struct mbuf *, int); struct mbuf *m_split(struct mbuf *, int, int); struct mbuf *m_uiotombuf(struct uio *, int, int, int, int); struct mbuf *m_unshare(struct mbuf *, int); static __inline int m_gettype(int size) { int type; switch (size) { case MSIZE: type = EXT_MBUF; break; case MCLBYTES: type = EXT_CLUSTER; break; #if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: type = EXT_JUMBOP; break; #endif case MJUM9BYTES: type = EXT_JUMBO9; break; case MJUM16BYTES: type = EXT_JUMBO16; break; default: panic("%s: invalid cluster size %d", __func__, size); } return (type); } /* * Associated an external reference counted buffer with an mbuf. */ static __inline void -m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt, - void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2) +m_extaddref(struct mbuf *m, char *buf, u_int size, u_int *ref_cnt, + m_ext_free_t freef, void *arg1, void *arg2) { KASSERT(ref_cnt != NULL, ("%s: ref_cnt not provided", __func__)); atomic_add_int(ref_cnt, 1); m->m_flags |= M_EXT; m->m_ext.ext_buf = buf; m->m_ext.ext_cnt = ref_cnt; m->m_data = m->m_ext.ext_buf; m->m_ext.ext_size = size; m->m_ext.ext_free = freef; m->m_ext.ext_arg1 = arg1; m->m_ext.ext_arg2 = arg2; m->m_ext.ext_type = EXT_EXTREF; m->m_ext.ext_flags = 0; } static __inline uma_zone_t m_getzone(int size) { uma_zone_t zone; switch (size) { case MCLBYTES: zone = zone_clust; break; #if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: zone = zone_jumbop; break; #endif case MJUM9BYTES: zone = zone_jumbo9; break; case MJUM16BYTES: zone = zone_jumbo16; break; default: panic("%s: invalid cluster size %d", __func__, size); } return (zone); } /* * Initialize an mbuf with linear storage. * * Inline because the consumer text overhead will be roughly the same to * initialize or call a function with this many parameters and M_PKTHDR * should go away with constant propagation for !MGETHDR. */ static __inline int m_init(struct mbuf *m, int how, short type, int flags) { int error; m->m_next = NULL; m->m_nextpkt = NULL; m->m_data = m->m_dat; m->m_len = 0; m->m_flags = flags; m->m_type = type; if (flags & M_PKTHDR) error = m_pkthdr_init(m, how); else error = 0; MBUF_PROBE5(m__init, m, how, type, flags, error); return (error); } static __inline struct mbuf * m_get(int how, short type) { struct mbuf *m; struct mb_args args; args.flags = 0; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); MBUF_PROBE3(m__get, how, type, m); return (m); } static __inline struct mbuf * m_gethdr(int how, short type) { struct mbuf *m; struct mb_args args; args.flags = M_PKTHDR; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); MBUF_PROBE3(m__gethdr, how, type, m); return (m); } static __inline struct mbuf * m_getcl(int how, short type, int flags) { struct mbuf *m; struct mb_args args; args.flags = flags; args.type = type; m = uma_zalloc_arg(zone_pack, &args, how); MBUF_PROBE4(m__getcl, how, type, flags, m); return (m); } /* * XXX: m_cljset() is a dangerous API. One must attach only a new, * unreferenced cluster to an mbuf(9). It is not possible to assert * that, so care can be taken only by users of the API. */ static __inline void m_cljset(struct mbuf *m, void *cl, int type) { int size; switch (type) { case EXT_CLUSTER: size = MCLBYTES; break; #if MJUMPAGESIZE != MCLBYTES case EXT_JUMBOP: size = MJUMPAGESIZE; break; #endif case EXT_JUMBO9: size = MJUM9BYTES; break; case EXT_JUMBO16: size = MJUM16BYTES; break; default: panic("%s: unknown cluster type %d", __func__, type); break; } m->m_data = m->m_ext.ext_buf = cl; m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL; m->m_ext.ext_size = size; m->m_ext.ext_type = type; m->m_ext.ext_flags = EXT_FLAG_EMBREF; m->m_ext.ext_count = 1; m->m_flags |= M_EXT; MBUF_PROBE3(m__cljset, m, cl, type); } static __inline void m_chtype(struct mbuf *m, short new_type) { m->m_type = new_type; } static __inline void m_clrprotoflags(struct mbuf *m) { while (m) { m->m_flags &= ~M_PROTOFLAGS; m = m->m_next; } } static __inline struct mbuf * m_last(struct mbuf *m) { while (m->m_next) m = m->m_next; return (m); } static inline u_int m_extrefcnt(struct mbuf *m) { KASSERT(m->m_flags & M_EXT, ("%s: M_EXT missing", __func__)); return ((m->m_ext.ext_flags & EXT_FLAG_EMBREF) ? m->m_ext.ext_count : *m->m_ext.ext_cnt); } /* * mbuf, cluster, and external object allocation macros (for compatibility * purposes). */ #define M_MOVE_PKTHDR(to, from) m_move_pkthdr((to), (from)) #define MGET(m, how, type) ((m) = m_get((how), (type))) #define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type))) #define MCLGET(m, how) m_clget((m), (how)) #define MEXTADD(m, buf, size, free, arg1, arg2, flags, type) \ - m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2), \ + m_extadd((m), (char *)(buf), (size), (free), (arg1), (arg2), \ (flags), (type)) #define m_getm(m, len, how, type) \ m_getm2((m), (len), (how), (type), M_PKTHDR) /* * Evaluate TRUE if it's safe to write to the mbuf m's data region (this can * be both the local data payload, or an external buffer area, depending on * whether M_EXT is set). */ #define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ (!(((m)->m_flags & M_EXT)) || \ (m_extrefcnt(m) == 1))) /* Check if the supplied mbuf has a packet header, or else panic. */ #define M_ASSERTPKTHDR(m) \ KASSERT((m) != NULL && (m)->m_flags & M_PKTHDR, \ ("%s: no mbuf packet header!", __func__)) /* * Ensure that the supplied mbuf is a valid, non-free mbuf. * * XXX: Broken at the moment. Need some UMA magic to make it work again. */ #define M_ASSERTVALID(m) \ KASSERT((((struct mbuf *)m)->m_flags & 0) == 0, \ ("%s: attempted use of a free mbuf!", __func__)) /* * Return the address of the start of the buffer associated with an mbuf, * handling external storage, packet-header mbufs, and regular data mbufs. */ #define M_START(m) \ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \ ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \ &(m)->m_dat[0]) /* * Return the size of the buffer associated with an mbuf, handling external * storage, packet-header mbufs, and regular data mbufs. */ #define M_SIZE(m) \ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \ ((m)->m_flags & M_PKTHDR) ? MHLEN : \ MLEN) /* * Set the m_data pointer of a newly allocated mbuf to place an object of the * specified size at the end of the mbuf, longword aligned. * * NB: Historically, we had M_ALIGN(), MH_ALIGN(), and MEXT_ALIGN() as * separate macros, each asserting that it was called at the proper moment. * This required callers to themselves test the storage type and call the * right one. Rather than require callers to be aware of those layout * decisions, we centralize here. */ static __inline void m_align(struct mbuf *m, int len) { #ifdef INVARIANTS const char *msg = "%s: not a virgin mbuf"; #endif int adjust; KASSERT(m->m_data == M_START(m), (msg, __func__)); adjust = M_SIZE(m) - len; m->m_data += adjust &~ (sizeof(long)-1); } #define M_ALIGN(m, len) m_align(m, len) #define MH_ALIGN(m, len) m_align(m, len) #define MEXT_ALIGN(m, len) m_align(m, len) /* * Compute the amount of space available before the current start of data in * an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. * * NB: In previous versions, M_LEADINGSPACE() would only check M_WRITABLE() * for mbufs with external storage. We now allow mbuf-embedded data to be * read-only as well. */ #define M_LEADINGSPACE(m) \ (M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0) /* * Compute the amount of space available after the end of data in an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. * * NB: In previous versions, M_TRAILINGSPACE() would only check M_WRITABLE() * for mbufs with external storage. We now allow mbuf-embedded data to be * read-only as well. */ #define M_TRAILINGSPACE(m) \ (M_WRITABLE(m) ? \ ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0) /* * Arrange to prepend space of size plen to mbuf m. If a new mbuf must be * allocated, how specifies whether to wait. If the allocation fails, the * original mbuf chain is freed and m is set to NULL. */ #define M_PREPEND(m, plen, how) do { \ struct mbuf **_mmp = &(m); \ struct mbuf *_mm = *_mmp; \ int _mplen = (plen); \ int __mhow = (how); \ \ MBUF_CHECKSLEEP(how); \ if (M_LEADINGSPACE(_mm) >= _mplen) { \ _mm->m_data -= _mplen; \ _mm->m_len += _mplen; \ } else \ _mm = m_prepend(_mm, _mplen, __mhow); \ if (_mm != NULL && _mm->m_flags & M_PKTHDR) \ _mm->m_pkthdr.len += _mplen; \ *_mmp = _mm; \ } while (0) /* * Change mbuf to new type. This is a relatively expensive operation and * should be avoided. */ #define MCHTYPE(m, t) m_chtype((m), (t)) /* Length to m_copy to copy all. */ #define M_COPYALL 1000000000 extern int max_datalen; /* MHLEN - max_hdr */ extern int max_hdr; /* Largest link + protocol header */ extern int max_linkhdr; /* Largest link-level header */ extern int max_protohdr; /* Largest protocol header */ extern int nmbclusters; /* Maximum number of clusters */ /*- * Network packets may have annotations attached by affixing a list of * "packet tags" to the pkthdr structure. Packet tags are dynamically * allocated semi-opaque data structures that have a fixed header * (struct m_tag) that specifies the size of the memory block and a * pair that identifies it. The cookie is a 32-bit unique * unsigned value used to identify a module or ABI. By convention this value * is chosen as the date+time that the module is created, expressed as the * number of seconds since the epoch (e.g., using date -u +'%s'). The type * value is an ABI/module-specific value that identifies a particular * annotation and is private to the module. For compatibility with systems * like OpenBSD that define packet tags w/o an ABI/module cookie, the value * PACKET_ABI_COMPAT is used to implement m_tag_get and m_tag_find * compatibility shim functions and several tag types are defined below. * Users that do not require compatibility should use a private cookie value * so that packet tag-related definitions can be maintained privately. * * Note that the packet tag returned by m_tag_alloc has the default memory * alignment implemented by malloc. To reference private data one can use a * construct like: * * struct m_tag *mtag = m_tag_alloc(...); * struct foo *p = (struct foo *)(mtag+1); * * if the alignment of struct m_tag is sufficient for referencing members of * struct foo. Otherwise it is necessary to embed struct m_tag within the * private data structure to insure proper alignment; e.g., * * struct foo { * struct m_tag tag; * ... * }; * struct foo *p = (struct foo *) m_tag_alloc(...); * struct m_tag *mtag = &p->tag; */ /* * Persistent tags stay with an mbuf until the mbuf is reclaimed. Otherwise * tags are expected to ``vanish'' when they pass through a network * interface. For most interfaces this happens normally as the tags are * reclaimed when the mbuf is free'd. However in some special cases * reclaiming must be done manually. An example is packets that pass through * the loopback interface. Also, one must be careful to do this when * ``turning around'' packets (e.g., icmp_reflect). * * To mark a tag persistent bit-or this flag in when defining the tag id. * The tag will then be treated as described above. */ #define MTAG_PERSISTENT 0x800 #define PACKET_TAG_NONE 0 /* Nadda */ /* Packet tags for use with PACKET_ABI_COMPAT. */ #define PACKET_TAG_IPSEC_IN_DONE 1 /* IPsec applied, in */ #define PACKET_TAG_IPSEC_OUT_DONE 2 /* IPsec applied, out */ #define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 3 /* NIC IPsec crypto done */ #define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 4 /* NIC IPsec crypto req'ed */ #define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 5 /* NIC notifies IPsec */ #define PACKET_TAG_IPSEC_PENDING_TDB 6 /* Reminder to do IPsec */ #define PACKET_TAG_BRIDGE 7 /* Bridge processing done */ #define PACKET_TAG_GIF 8 /* GIF processing done */ #define PACKET_TAG_GRE 9 /* GRE processing done */ #define PACKET_TAG_IN_PACKET_CHECKSUM 10 /* NIC checksumming done */ #define PACKET_TAG_ENCAP 11 /* Encap. processing */ #define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */ #define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */ #define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */ #define PACKET_TAG_DUMMYNET 15 /* dummynet info */ #define PACKET_TAG_DIVERT 17 /* divert info */ #define PACKET_TAG_IPFORWARD 18 /* ipforward info */ #define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ #define PACKET_TAG_PF (21 | MTAG_PERSISTENT) /* PF/ALTQ information */ #define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ #define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ #define PACKET_TAG_CARP 28 /* CARP info */ #define PACKET_TAG_IPSEC_NAT_T_PORTS 29 /* two uint16_t */ #define PACKET_TAG_ND_OUTGOING 30 /* ND outgoing */ /* Specific cookies and tags. */ /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); void m_tag_delete(struct mbuf *, struct m_tag *); void m_tag_delete_chain(struct mbuf *, struct m_tag *); void m_tag_free_default(struct m_tag *); struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *); struct m_tag *m_tag_copy(struct m_tag *, int); int m_tag_copy_chain(struct mbuf *, const struct mbuf *, int); void m_tag_delete_nonpersistent(struct mbuf *); /* * Initialize the list of tags associated with an mbuf. */ static __inline void m_tag_init(struct mbuf *m) { SLIST_INIT(&m->m_pkthdr.tags); } /* * Set up the contents of a tag. Note that this does not fill in the free * method; the caller is expected to do that. * * XXX probably should be called m_tag_init, but that was already taken. */ static __inline void m_tag_setup(struct m_tag *t, u_int32_t cookie, int type, int len) { t->m_tag_id = type; t->m_tag_len = len; t->m_tag_cookie = cookie; } /* * Reclaim resources associated with a tag. */ static __inline void m_tag_free(struct m_tag *t) { (*t->m_tag_free)(t); } /* * Return the first tag associated with an mbuf. */ static __inline struct m_tag * m_tag_first(struct mbuf *m) { return (SLIST_FIRST(&m->m_pkthdr.tags)); } /* * Return the next tag in the list of tags associated with an mbuf. */ static __inline struct m_tag * m_tag_next(struct mbuf *m __unused, struct m_tag *t) { return (SLIST_NEXT(t, m_tag_link)); } /* * Prepend a tag to the list of tags associated with an mbuf. */ static __inline void m_tag_prepend(struct mbuf *m, struct m_tag *t) { SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); } /* * Unlink a tag from the list of tags associated with an mbuf. */ static __inline void m_tag_unlink(struct mbuf *m, struct m_tag *t) { SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); } /* These are for OpenBSD compatibility. */ #define MTAG_ABI_COMPAT 0 /* compatibility ABI */ static __inline struct m_tag * m_tag_get(int type, int length, int wait) { return (m_tag_alloc(MTAG_ABI_COMPAT, type, length, wait)); } static __inline struct m_tag * m_tag_find(struct mbuf *m, int type, struct m_tag *start) { return (SLIST_EMPTY(&m->m_pkthdr.tags) ? (struct m_tag *)NULL : m_tag_locate(m, MTAG_ABI_COMPAT, type, start)); } static __inline struct mbuf * m_free(struct mbuf *m) { struct mbuf *n = m->m_next; MBUF_PROBE1(m__free, m); if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE)) m_tag_delete_chain(m, NULL); if (m->m_flags & M_EXT) mb_free_ext(m); else if ((m->m_flags & M_NOFREE) == 0) uma_zfree(zone_mbuf, m); return (n); } static __inline int rt_m_getfib(struct mbuf *m) { KASSERT(m->m_flags & M_PKTHDR , ("Attempt to get FIB from non header mbuf.")); return (m->m_pkthdr.fibnum); } #define M_GETFIB(_m) rt_m_getfib(_m) #define M_SETFIB(_m, _fib) do { \ KASSERT((_m)->m_flags & M_PKTHDR, ("Attempt to set FIB on non header mbuf.")); \ ((_m)->m_pkthdr.fibnum) = (_fib); \ } while (0) /* flags passed as first argument for "m_ether_tcpip_hash()" */ #define MBUF_HASHFLAG_L2 (1 << 2) #define MBUF_HASHFLAG_L3 (1 << 3) #define MBUF_HASHFLAG_L4 (1 << 4) /* mbuf hashing helper routines */ uint32_t m_ether_tcpip_hash_init(void); uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t); #ifdef MBUF_PROFILING void m_profile(struct mbuf *m); #define M_PROFILE(m) m_profile(m) #else #define M_PROFILE(m) #endif struct mbufq { STAILQ_HEAD(, mbuf) mq_head; int mq_len; int mq_maxlen; }; static inline void mbufq_init(struct mbufq *mq, int maxlen) { STAILQ_INIT(&mq->mq_head); mq->mq_maxlen = maxlen; mq->mq_len = 0; } static inline struct mbuf * mbufq_flush(struct mbufq *mq) { struct mbuf *m; m = STAILQ_FIRST(&mq->mq_head); STAILQ_INIT(&mq->mq_head); mq->mq_len = 0; return (m); } static inline void mbufq_drain(struct mbufq *mq) { struct mbuf *m, *n; n = mbufq_flush(mq); while ((m = n) != NULL) { n = STAILQ_NEXT(m, m_stailqpkt); m_freem(m); } } static inline struct mbuf * mbufq_first(const struct mbufq *mq) { return (STAILQ_FIRST(&mq->mq_head)); } static inline struct mbuf * mbufq_last(const struct mbufq *mq) { return (STAILQ_LAST(&mq->mq_head, mbuf, m_stailqpkt)); } static inline int mbufq_full(const struct mbufq *mq) { return (mq->mq_len >= mq->mq_maxlen); } static inline int mbufq_len(const struct mbufq *mq) { return (mq->mq_len); } static inline int mbufq_enqueue(struct mbufq *mq, struct mbuf *m) { if (mbufq_full(mq)) return (ENOBUFS); STAILQ_INSERT_TAIL(&mq->mq_head, m, m_stailqpkt); mq->mq_len++; return (0); } static inline struct mbuf * mbufq_dequeue(struct mbufq *mq) { struct mbuf *m; m = STAILQ_FIRST(&mq->mq_head); if (m) { STAILQ_REMOVE_HEAD(&mq->mq_head, m_stailqpkt); m->m_nextpkt = NULL; mq->mq_len--; } return (m); } static inline void mbufq_prepend(struct mbufq *mq, struct mbuf *m) { STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt); mq->mq_len++; } /* * Note: this doesn't enforce the maximum list size for dst. */ static inline void mbufq_concat(struct mbufq *mq_dst, struct mbufq *mq_src) { mq_dst->mq_len += mq_src->mq_len; STAILQ_CONCAT(&mq_dst->mq_head, &mq_src->mq_head); mq_src->mq_len = 0; } #endif /* _KERNEL */ #endif /* !_SYS_MBUF_H_ */