diff --git a/contrib/elftoolchain/libelf/libelf_convert.m4 b/contrib/elftoolchain/libelf/libelf_convert.m4 index bb601ad92841..71c730426b87 100644 --- a/contrib/elftoolchain/libelf/libelf_convert.m4 +++ b/contrib/elftoolchain/libelf/libelf_convert.m4 @@ -1,1090 +1,1090 @@ /*- * Copyright (c) 2006-2011 Joseph Koshy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include "_libelf.h" ELFTC_VCSID("$Id: libelf_convert.m4 3712 2019-03-16 22:23:34Z jkoshy $"); /* WARNING: GENERATED FROM __file__. */ divert(-1) # Generate conversion routines for converting between in-memory and # file representations of Elf data structures. # # These conversions use the type information defined in `elf_types.m4'. include(SRCDIR`/elf_types.m4') # For the purposes of generating conversion code, ELF types may be # classified according to the following characteristics: # # 1. Whether the ELF type can be directly mapped to an integral C # language type. For example, the ELF_T_WORD type maps directly to # a 'uint32_t', but ELF_T_GNUHASH lacks a matching C type. # # 2. Whether the type has word size dependent variants. For example, # ELT_T_EHDR is represented using C types Elf32_Ehdr and El64_Ehdr, # and the ELF_T_ADDR and ELF_T_OFF types have integral C types that # can be 32- or 64- bit wide. # # 3. Whether the ELF types has a fixed representation or not. For # example, the ELF_T_SYM type has a fixed size file representation, # some types like ELF_T_NOTE and ELF_T_GNUHASH use a variable size # representation. # # We use m4 macros to generate conversion code for ELF types that have # a fixed size representation. Conversion functions for the remaining # types are coded by hand. # #* Handling File and Memory Representations # # `In-memory' representations of an Elf data structure use natural # alignments and native byte ordering. This allows pointer arithmetic # and casting to work as expected. On the other hand, the `file' # representation of an ELF data structure could possibly be packed # tighter than its `in-memory' representation, and could be of a # differing byte order. Reading ELF objects that are members of `ar' # archives present an additional complication: `ar' pads file data to # even addresses, so file data structures in an archive member # residing inside an `ar' archive could be at misaligned memory # addresses when brought into memory. # # In summary, casting the `char *' pointers that point to memory # representations (i.e., source pointers for the *_tof() functions and # the destination pointers for the *_tom() functions), is safe, as # these pointers should be correctly aligned for the memory type # already. However, pointers to file representations have to be # treated as being potentially unaligned and no casting can be done. # NOCVT(TYPE) -- Do not generate the cvt[] structure entry for TYPE define(`NOCVT',`define(`NOCVT_'$1,1)') # NOFUNC(TYPE) -- Do not generate a conversion function for TYPE define(`NOFUNC',`define(`NOFUNC_'$1,1)') # IGNORE(TYPE) -- Completely ignore the type. define(`IGNORE',`NOCVT($1)NOFUNC($1)') # Mark ELF types that should not be processed by the M4 macros below. # Types for which we use functions with non-standard names. IGNORE(`BYTE') # Uses a wrapper around memcpy(). IGNORE(`NOTE') # Not a fixed size type. # Types for which we supply hand-coded functions. NOFUNC(`GNUHASH') # A type with complex internal structure. NOFUNC(`VDEF') # See MAKE_VERSION_CONVERTERS below. NOFUNC(`VNEED') # .. # Unimplemented types. IGNORE(`MOVEP') # ELF types that don't exist in a 32-bit world. NOFUNC(`XWORD32') NOFUNC(`SXWORD32') # `Primitive' ELF types are those that are an alias for an integral # type. As they have no internal structure, they can be copied using # a `memcpy()', and byteswapped in straightforward way. # # Mark all ELF types that directly map to integral C types. define(`PRIM_ADDR', 1) define(`PRIM_BYTE', 1) define(`PRIM_HALF', 1) define(`PRIM_LWORD', 1) define(`PRIM_OFF', 1) define(`PRIM_SWORD', 1) define(`PRIM_SXWORD', 1) define(`PRIM_WORD', 1) define(`PRIM_XWORD', 1) # Note the primitive types that are size-dependent. define(`SIZEDEP_ADDR', 1) define(`SIZEDEP_OFF', 1) # Generate conversion functions for primitive types. # # Macro use: MAKEPRIMFUNCS(ELFTYPE,CTYPE,TYPESIZE,SYMSIZE) # `$1': Name of the ELF type. # `$2': C structure name suffix. # `$3': ELF class specifier for types, one of [`32', `64']. # `$4': Additional ELF class specifier, one of [`', `32', `64']. # # Generates a pair of conversion functions. define(`MAKEPRIMFUNCS',` static int _libelf_cvt_$1$4_tof(unsigned char *dst, size_t dsz, unsigned char *src, size_t count, int byteswap) { Elf$3_$2 t, *s = (Elf$3_$2 *) (uintptr_t) src; size_t c; (void) dsz; if (!byteswap) { (void) memcpy(dst, src, count * sizeof(*s)); return (1); } for (c = 0; c < count; c++) { t = *s++; SWAP_$1$4(t); WRITE_$1$4(dst,t); } return (1); } static int _libelf_cvt_$1$4_tom(unsigned char *dst, size_t dsz, unsigned char *src, size_t count, int byteswap) { Elf$3_$2 t, *d = (Elf$3_$2 *) (uintptr_t) dst; size_t c; if (dsz < count * sizeof(Elf$3_$2)) return (0); if (!byteswap) { (void) memcpy(dst, src, count * sizeof(*d)); return (1); } for (c = 0; c < count; c++) { READ_$1$4(src,t); SWAP_$1$4(t); *d++ = t; } return (1); } ') # # Handling composite ELF types # # SWAP_FIELD(FIELDNAME,ELFTYPE) -- Generate code to swap one field. define(`SWAP_FIELD', `ifdef(`SIZEDEP_'$2, `SWAP_$2'SZ()`(t.$1); ', `SWAP_$2(t.$1); ')') # SWAP_MEMBERS(STRUCT) -- Iterate over a structure definition. define(`SWAP_MEMBERS', `ifelse($#,1,`/**/', `SWAP_FIELD($1)SWAP_MEMBERS(shift($@))')') # SWAP_STRUCT(CTYPE,SIZE) -- Generate code to swap an ELF structure. define(`SWAP_STRUCT', `pushdef(`SZ',$2)/* Swap an Elf$2_$1 */ SWAP_MEMBERS(Elf$2_$1_DEF)popdef(`SZ')') # WRITE_FIELD(ELFTYPE,FIELDNAME) -- Generate code to write one field. define(`WRITE_FIELD', `ifdef(`SIZEDEP_'$2, `WRITE_$2'SZ()`(dst,t.$1); ', `WRITE_$2(dst,t.$1); ')') # WRITE_MEMBERS(ELFTYPELIST) -- Iterate over a structure definition. define(`WRITE_MEMBERS', `ifelse($#,1,`/**/', `WRITE_FIELD($1)WRITE_MEMBERS(shift($@))')') # WRITE_STRUCT(CTYPE,SIZE) -- Generate code to write out an ELF structure. define(`WRITE_STRUCT', `pushdef(`SZ',$2)/* Write an Elf$2_$1 */ WRITE_MEMBERS(Elf$2_$1_DEF)popdef(`SZ')') # READ_FIELD(ELFTYPE,CTYPE) -- Generate code to read one field. define(`READ_FIELD', `ifdef(`SIZEDEP_'$2, `READ_$2'SZ()`(s,t.$1); ', `READ_$2(s,t.$1); ')') # READ_MEMBERS(ELFTYPELIST) -- Iterate over a structure definition. define(`READ_MEMBERS', `ifelse($#,1,`/**/', `READ_FIELD($1)READ_MEMBERS(shift($@))')') # READ_STRUCT(CTYPE,SIZE) -- Generate code to read an ELF structure. define(`READ_STRUCT', `pushdef(`SZ',$2)/* Read an Elf$2_$1 */ READ_MEMBERS(Elf$2_$1_DEF)popdef(`SZ')') # MAKECOMPFUNCS -- Generate converters for composite ELF structures. # # When converting data to file representation, the source pointer will # be naturally aligned for a data structure's in-memory # representation. When converting data to memory, the destination # pointer will be similarly aligned. # # For in-place conversions, when converting to file representations, # the source buffer is large enough to hold `file' data. When # converting from file to memory, we need to be careful to work # `backwards', to avoid overwriting unconverted data. # # Macro use: # `$1': Name of the ELF type. # `$2': C structure name suffix. # `$3': ELF class specifier, one of [`', `32', `64'] define(`MAKECOMPFUNCS', `ifdef(`NOFUNC_'$1$3,`',` static int _libelf_cvt_$1$3_tof(unsigned char *dst, size_t dsz, unsigned char *src, size_t count, int byteswap) { Elf$3_$2 t, *s; size_t c; (void) dsz; s = (Elf$3_$2 *) (uintptr_t) src; for (c = 0; c < count; c++) { t = *s++; if (byteswap) { SWAP_STRUCT($2,$3) } WRITE_STRUCT($2,$3) } return (1); } static int _libelf_cvt_$1$3_tom(unsigned char *dst, size_t dsz, unsigned char *src, size_t count, int byteswap) { Elf$3_$2 t, *d; unsigned char *s,*s0; size_t fsz; fsz = elf$3_fsize(ELF_T_$1, (size_t) 1, EV_CURRENT); d = ((Elf$3_$2 *) (uintptr_t) dst) + (count - 1); s0 = src + (count - 1) * fsz; if (dsz < count * sizeof(Elf$3_$2)) return (0); while (count--) { s = s0; READ_STRUCT($2,$3) if (byteswap) { SWAP_STRUCT($2,$3) } *d-- = t; s0 -= fsz; } return (1); } ')') # MAKE_TYPE_CONVERTER(ELFTYPE,CTYPE) # # Make type convertor functions from the type definition # of the ELF type: # - Skip convertors marked as `NOFUNC'. # - Invoke `MAKEPRIMFUNCS' or `MAKECOMPFUNCS' as appropriate. define(`MAKE_TYPE_CONVERTER', `ifdef(`NOFUNC_'$1,`', `ifdef(`PRIM_'$1, `ifdef(`SIZEDEP_'$1, `MAKEPRIMFUNCS($1,$2,32,32)dnl MAKEPRIMFUNCS($1,$2,64,64)', `MAKEPRIMFUNCS($1,$2,64)')', `MAKECOMPFUNCS($1,$2,32)dnl MAKECOMPFUNCS($1,$2,64)')')') # MAKE_TYPE_CONVERTERS(ELFTYPELIST) -- Generate conversion functions. define(`MAKE_TYPE_CONVERTERS', `ifelse($#,1,`', `MAKE_TYPE_CONVERTER($1)MAKE_TYPE_CONVERTERS(shift($@))')') # # Macros to generate entries for the table of convertors. # # CONV(ELFTYPE,SIZE,DIRECTION) # # Generate the name of a convertor function. define(`CONV', `ifdef(`NOFUNC_'$1$2, `.$3$2 = NULL', `ifdef(`PRIM_'$1, `ifdef(`SIZEDEP_'$1, `.$3$2 = _libelf_cvt_$1$2_$3', `.$3$2 = _libelf_cvt_$1_$3')', `.$3$2 = _libelf_cvt_$1$2_$3')')') # CONVERTER_NAME(ELFTYPE) # # Generate the contents of one `struct cvt' instance. define(`CONVERTER_NAME', `ifdef(`NOCVT_'$1,`', ` [ELF_T_$1] = { CONV($1,32,tof), CONV($1,32,tom), CONV($1,64,tof), CONV($1,64,tom) }, ')') # CONVERTER_NAMES(ELFTYPELIST) # # Generate the `struct cvt[]' array. define(`CONVERTER_NAMES', `ifelse($#,1,`', `CONVERTER_NAME($1)CONVERTER_NAMES(shift($@))')') # # Handling ELF version sections. # # _FSZ(FIELD,BASETYPE) - return the file size for a field. define(`_FSZ', `ifelse($2,`HALF',2, $2,`WORD',4)') # FSZ(STRUCT) - determine the file size of a structure. define(`FSZ', `ifelse($#,1,0, `eval(_FSZ($1) + FSZ(shift($@)))')') # MAKE_VERSION_CONVERTERS(TYPE,BASE,AUX,PFX) -- Generate conversion # functions for versioning structures. define(`MAKE_VERSION_CONVERTERS', `MAKE_VERSION_CONVERTER($1,$2,$3,$4,32) MAKE_VERSION_CONVERTER($1,$2,$3,$4,64)') # MAKE_VERSION_CONVERTOR(TYPE,CBASE,CAUX,PFX,SIZE) -- Generate a # conversion function. define(`MAKE_VERSION_CONVERTER',` static int _libelf_cvt_$1$5_tof(unsigned char *dst, size_t dsz, unsigned char *src, size_t count, int byteswap) { Elf$5_$2 t; Elf$5_$3 a; const size_t verfsz = FSZ(Elf$5_$2_DEF); const size_t auxfsz = FSZ(Elf$5_$3_DEF); const size_t vermsz = sizeof(Elf$5_$2); const size_t auxmsz = sizeof(Elf$5_$3); unsigned char * const dstend = dst + dsz; unsigned char * const srcend = src + count; unsigned char *dtmp, *dstaux, *srcaux; Elf$5_Word aux, anext, cnt, vnext; for (dtmp = dst, vnext = ~0U; vnext != 0 && dtmp + verfsz <= dstend && src + vermsz <= srcend; dtmp += vnext, src += vnext) { /* Read in an Elf$5_$2 structure. */ t = *((Elf$5_$2 *) (uintptr_t) src); aux = t.$4_aux; cnt = t.$4_cnt; vnext = t.$4_next; if (byteswap) { SWAP_STRUCT($2, $5) } dst = dtmp; WRITE_STRUCT($2, $5) if (aux < verfsz) return (0); /* Process AUX entries. */ for (anext = ~0U, dstaux = dtmp + aux, srcaux = src + aux; cnt != 0 && anext != 0 && dstaux + auxfsz <= dstend && srcaux + auxmsz <= srcend; dstaux += anext, srcaux += anext, cnt--) { /* Read in an Elf$5_$3 structure. */ a = *((Elf$5_$3 *) (uintptr_t) srcaux); anext = a.$4a_next; if (byteswap) { pushdef(`t',`a')SWAP_STRUCT($3, $5)popdef(`t') } dst = dstaux; pushdef(`t',`a')WRITE_STRUCT($3, $5)popdef(`t') } if (anext || cnt) return (0); } if (vnext) return (0); return (1); } static int _libelf_cvt_$1$5_tom(unsigned char *dst, size_t dsz, unsigned char *src, size_t count, int byteswap) { Elf$5_$2 t, *dp; Elf$5_$3 a, *ap; const size_t verfsz = FSZ(Elf$5_$2_DEF); const size_t auxfsz = FSZ(Elf$5_$3_DEF); const size_t vermsz = sizeof(Elf$5_$2); const size_t auxmsz = sizeof(Elf$5_$3); unsigned char * const dstend = dst + dsz; unsigned char * const srcend = src + count; unsigned char *dstaux, *s, *srcaux, *stmp; Elf$5_Word aux, anext, cnt, vnext; for (stmp = src, vnext = ~0U; vnext != 0 && stmp + verfsz <= srcend && dst + vermsz <= dstend; stmp += vnext, dst += vnext) { /* Read in a $1 structure. */ s = stmp; READ_STRUCT($2, $5) if (byteswap) { SWAP_STRUCT($2, $5) } dp = (Elf$5_$2 *) (uintptr_t) dst; *dp = t; aux = t.$4_aux; cnt = t.$4_cnt; vnext = t.$4_next; if (aux < vermsz) return (0); /* Process AUX entries. */ for (anext = ~0U, dstaux = dst + aux, srcaux = stmp + aux; cnt != 0 && anext != 0 && dstaux + auxmsz <= dstend && srcaux + auxfsz <= srcend; dstaux += anext, srcaux += anext, cnt--) { s = srcaux; pushdef(`t',`a')READ_STRUCT($3, $5)popdef(`t') if (byteswap) { pushdef(`t',`a')SWAP_STRUCT($3, $5)popdef(`t') } anext = a.$4a_next; ap = ((Elf$5_$3 *) (uintptr_t) dstaux); *ap = a; } if (anext || cnt) return (0); } if (vnext) return (0); return (1); }') divert(0) /* * C macros to byte swap integral quantities. */ #define SWAP_BYTE(X) do { (void) (X); } while (0) #define SWAP_IDENT(X) do { (void) (X); } while (0) #define SWAP_HALF(X) do { \ uint16_t _x = (uint16_t) (X); \ uint32_t _t = _x & 0xFFU; \ _t <<= 8U; _x >>= 8U; _t |= _x & 0xFFU; \ (X) = (uint16_t) _t; \ } while (0) #define _SWAP_WORD(X, T) do { \ uint32_t _x = (uint32_t) (X); \ uint32_t _t = _x & 0xFF; \ _t <<= 8; _x >>= 8; _t |= _x & 0xFF; \ _t <<= 8; _x >>= 8; _t |= _x & 0xFF; \ _t <<= 8; _x >>= 8; _t |= _x & 0xFF; \ (X) = (T) _t; \ } while (0) #define SWAP_ADDR32(X) _SWAP_WORD(X, Elf32_Addr) #define SWAP_OFF32(X) _SWAP_WORD(X, Elf32_Off) #define SWAP_SWORD(X) _SWAP_WORD(X, Elf32_Sword) #define SWAP_WORD(X) _SWAP_WORD(X, Elf32_Word) #define _SWAP_WORD64(X, T) do { \ uint64_t _x = (uint64_t) (X); \ uint64_t _t = _x & 0xFF; \ _t <<= 8; _x >>= 8; _t |= _x & 0xFF; \ _t <<= 8; _x >>= 8; _t |= _x & 0xFF; \ _t <<= 8; _x >>= 8; _t |= _x & 0xFF; \ _t <<= 8; _x >>= 8; _t |= _x & 0xFF; \ _t <<= 8; _x >>= 8; _t |= _x & 0xFF; \ _t <<= 8; _x >>= 8; _t |= _x & 0xFF; \ _t <<= 8; _x >>= 8; _t |= _x & 0xFF; \ (X) = (T) _t; \ } while (0) #define SWAP_ADDR64(X) _SWAP_WORD64(X, Elf64_Addr) #define SWAP_LWORD(X) _SWAP_WORD64(X, Elf64_Lword) #define SWAP_OFF64(X) _SWAP_WORD64(X, Elf64_Off) #define SWAP_SXWORD(X) _SWAP_WORD64(X, Elf64_Sxword) #define SWAP_XWORD(X) _SWAP_WORD64(X, Elf64_Xword) /* * C macros to write out various integral values. * * Note: * - The destination pointer could be unaligned. * - Values are written out in native byte order. * - The destination pointer is incremented after the write. */ #define WRITE_BYTE(P,X) do { \ unsigned char *const _p = (unsigned char *) (P); \ _p[0] = (unsigned char) (X); \ (P) = _p + 1; \ } while (0) #define WRITE_HALF(P,X) do { \ uint16_t _t = (X); \ unsigned char *const _p = (unsigned char *) (P); \ const unsigned char *const _q = (unsigned char *) &_t; \ _p[0] = _q[0]; \ _p[1] = _q[1]; \ (P) = _p + 2; \ } while (0) #define WRITE_WORD(P,X) do { \ uint32_t _t = (uint32_t) (X); \ unsigned char *const _p = (unsigned char *) (P); \ const unsigned char *const _q = (unsigned char *) &_t; \ _p[0] = _q[0]; \ _p[1] = _q[1]; \ _p[2] = _q[2]; \ _p[3] = _q[3]; \ (P) = _p + 4; \ } while (0) #define WRITE_ADDR32(P,X) WRITE_WORD(P,X) #define WRITE_OFF32(P,X) WRITE_WORD(P,X) #define WRITE_SWORD(P,X) WRITE_WORD(P,X) #define WRITE_WORD64(P,X) do { \ uint64_t _t = (uint64_t) (X); \ unsigned char *const _p = (unsigned char *) (P); \ const unsigned char *const _q = (unsigned char *) &_t; \ _p[0] = _q[0]; \ _p[1] = _q[1]; \ _p[2] = _q[2]; \ _p[3] = _q[3]; \ _p[4] = _q[4]; \ _p[5] = _q[5]; \ _p[6] = _q[6]; \ _p[7] = _q[7]; \ (P) = _p + 8; \ } while (0) #define WRITE_ADDR64(P,X) WRITE_WORD64(P,X) #define WRITE_LWORD(P,X) WRITE_WORD64(P,X) #define WRITE_OFF64(P,X) WRITE_WORD64(P,X) #define WRITE_SXWORD(P,X) WRITE_WORD64(P,X) #define WRITE_XWORD(P,X) WRITE_WORD64(P,X) #define WRITE_IDENT(P,X) do { \ (void) memcpy((P), (X), sizeof((X))); \ (P) = (P) + EI_NIDENT; \ } while (0) /* * C macros to read in various integral values. * * Note: * - The source pointer could be unaligned. * - Values are read in native byte order. * - The source pointer is incremented appropriately. */ #define READ_BYTE(P,X) do { \ const unsigned char *const _p = \ (const unsigned char *) (P); \ (X) = _p[0]; \ (P) = (P) + 1; \ } while (0) #define READ_HALF(P,X) do { \ uint16_t _t; \ unsigned char *const _q = (unsigned char *) &_t; \ const unsigned char *const _p = \ (const unsigned char *) (P); \ _q[0] = _p[0]; \ _q[1] = _p[1]; \ (P) = (P) + 2; \ (X) = _t; \ } while (0) #define _READ_WORD(P,X,T) do { \ uint32_t _t; \ unsigned char *const _q = (unsigned char *) &_t; \ const unsigned char *const _p = \ (const unsigned char *) (P); \ _q[0] = _p[0]; \ _q[1] = _p[1]; \ _q[2] = _p[2]; \ _q[3] = _p[3]; \ (P) = (P) + 4; \ (X) = (T) _t; \ } while (0) #define READ_ADDR32(P,X) _READ_WORD(P, X, Elf32_Addr) #define READ_OFF32(P,X) _READ_WORD(P, X, Elf32_Off) #define READ_SWORD(P,X) _READ_WORD(P, X, Elf32_Sword) #define READ_WORD(P,X) _READ_WORD(P, X, Elf32_Word) #define _READ_WORD64(P,X,T) do { \ uint64_t _t; \ unsigned char *const _q = (unsigned char *) &_t; \ const unsigned char *const _p = \ (const unsigned char *) (P); \ _q[0] = _p[0]; \ _q[1] = _p[1]; \ _q[2] = _p[2]; \ _q[3] = _p[3]; \ _q[4] = _p[4]; \ _q[5] = _p[5]; \ _q[6] = _p[6]; \ _q[7] = _p[7]; \ (P) = (P) + 8; \ (X) = (T) _t; \ } while (0) #define READ_ADDR64(P,X) _READ_WORD64(P, X, Elf64_Addr) #define READ_LWORD(P,X) _READ_WORD64(P, X, Elf64_Lword) #define READ_OFF64(P,X) _READ_WORD64(P, X, Elf64_Off) #define READ_SXWORD(P,X) _READ_WORD64(P, X, Elf64_Sxword) #define READ_XWORD(P,X) _READ_WORD64(P, X, Elf64_Xword) #define READ_IDENT(P,X) do { \ (void) memcpy((X), (P), sizeof((X))); \ (P) = (P) + EI_NIDENT; \ } while (0) #define ROUNDUP2(V,N) (V) = ((((V) + (N) - 1)) & ~((N) - 1)) /*[*/ MAKE_TYPE_CONVERTERS(ELF_TYPE_LIST) MAKE_VERSION_CONVERTERS(VDEF,Verdef,Verdaux,vd) MAKE_VERSION_CONVERTERS(VNEED,Verneed,Vernaux,vn) /*]*/ /* * Sections of type ELF_T_BYTE are never byteswapped, consequently a * simple memcpy suffices for both directions of conversion. */ static int _libelf_cvt_BYTE_tox(unsigned char *dst, size_t dsz, unsigned char *src, size_t count, int byteswap) { (void) byteswap; if (dsz < count) return (0); if (dst != src) (void) memcpy(dst, src, count); return (1); } /* * Sections of type ELF_T_GNUHASH start with a header containing 4 32-bit * words. Bloom filter data comes next, followed by hash buckets and the * hash chain. * * Bloom filter words are 64 bit wide on ELFCLASS64 objects and are 32 bit * wide on ELFCLASS32 objects. The other objects in this section are 32 * bits wide. * * Argument `srcsz' denotes the number of bytes to be converted. In the * 32-bit case we need to translate `srcsz' to a count of 32-bit words. */ static int _libelf_cvt_GNUHASH32_tom(unsigned char *dst, size_t dsz, unsigned char *src, size_t srcsz, int byteswap) { return (_libelf_cvt_WORD_tom(dst, dsz, src, srcsz / sizeof(uint32_t), byteswap)); } static int _libelf_cvt_GNUHASH32_tof(unsigned char *dst, size_t dsz, unsigned char *src, size_t srcsz, int byteswap) { return (_libelf_cvt_WORD_tof(dst, dsz, src, srcsz / sizeof(uint32_t), byteswap)); } static int _libelf_cvt_GNUHASH64_tom(unsigned char *dst, size_t dsz, unsigned char *src, size_t srcsz, int byteswap) { size_t sz; uint64_t t64, *bloom64; Elf_GNU_Hash_Header *gh; uint32_t n, nbuckets, nchains, maskwords, shift2, symndx, t32; uint32_t *buckets, *chains; sz = 4 * sizeof(uint32_t); /* File header is 4 words long. */ if (dsz < sizeof(Elf_GNU_Hash_Header) || srcsz < sz) return (0); /* Read in the section header and byteswap if needed. */ READ_WORD(src, nbuckets); READ_WORD(src, symndx); READ_WORD(src, maskwords); READ_WORD(src, shift2); srcsz -= sz; if (byteswap) { SWAP_WORD(nbuckets); SWAP_WORD(symndx); SWAP_WORD(maskwords); SWAP_WORD(shift2); } /* Check source buffer and destination buffer sizes. */ sz = nbuckets * sizeof(uint32_t) + maskwords * sizeof(uint64_t); if (srcsz < sz || dsz < sz + sizeof(Elf_GNU_Hash_Header)) return (0); gh = (Elf_GNU_Hash_Header *) (uintptr_t) dst; gh->gh_nbuckets = nbuckets; gh->gh_symndx = symndx; gh->gh_maskwords = maskwords; gh->gh_shift2 = shift2; dsz -= sizeof(Elf_GNU_Hash_Header); dst += sizeof(Elf_GNU_Hash_Header); bloom64 = (uint64_t *) (uintptr_t) dst; /* Copy bloom filter data. */ for (n = 0; n < maskwords; n++) { READ_XWORD(src, t64); if (byteswap) SWAP_XWORD(t64); bloom64[n] = t64; } /* The hash buckets follows the bloom filter. */ dst += maskwords * sizeof(uint64_t); buckets = (uint32_t *) (uintptr_t) dst; for (n = 0; n < nbuckets; n++) { READ_WORD(src, t32); if (byteswap) SWAP_WORD(t32); buckets[n] = t32; } dst += nbuckets * sizeof(uint32_t); /* The hash chain follows the hash buckets. */ dsz -= sz; srcsz -= sz; if (dsz < srcsz) /* Destination lacks space. */ return (0); nchains = (uint32_t) (srcsz / sizeof(uint32_t)); chains = (uint32_t *) (uintptr_t) dst; for (n = 0; n < nchains; n++) { READ_WORD(src, t32); if (byteswap) SWAP_WORD(t32); *chains++ = t32; } return (1); } static int _libelf_cvt_GNUHASH64_tof(unsigned char *dst, size_t dsz, unsigned char *src, size_t srcsz, int byteswap) { uint32_t *s32; size_t sz, hdrsz; uint64_t *s64, t64; Elf_GNU_Hash_Header *gh; uint32_t maskwords, n, nbuckets, nchains, t0, t1, t2, t3, t32; hdrsz = 4 * sizeof(uint32_t); /* Header is 4x32 bits. */ if (dsz < hdrsz || srcsz < sizeof(Elf_GNU_Hash_Header)) return (0); gh = (Elf_GNU_Hash_Header *) (uintptr_t) src; t0 = nbuckets = gh->gh_nbuckets; t1 = gh->gh_symndx; t2 = maskwords = gh->gh_maskwords; t3 = gh->gh_shift2; src += sizeof(Elf_GNU_Hash_Header); srcsz -= sizeof(Elf_GNU_Hash_Header); dsz -= hdrsz; sz = gh->gh_nbuckets * sizeof(uint32_t) + gh->gh_maskwords * sizeof(uint64_t); if (srcsz < sz || dsz < sz) return (0); /* Write out the header. */ if (byteswap) { SWAP_WORD(t0); SWAP_WORD(t1); SWAP_WORD(t2); SWAP_WORD(t3); } WRITE_WORD(dst, t0); WRITE_WORD(dst, t1); WRITE_WORD(dst, t2); WRITE_WORD(dst, t3); /* Copy the bloom filter and the hash table. */ s64 = (uint64_t *) (uintptr_t) src; for (n = 0; n < maskwords; n++) { t64 = *s64++; if (byteswap) SWAP_XWORD(t64); WRITE_WORD64(dst, t64); } s32 = (uint32_t *) s64; for (n = 0; n < nbuckets; n++) { t32 = *s32++; if (byteswap) SWAP_WORD(t32); WRITE_WORD(dst, t32); } srcsz -= sz; dsz -= sz; /* Copy out the hash chains. */ if (dsz < srcsz) return (0); nchains = (uint32_t) (srcsz / sizeof(uint32_t)); for (n = 0; n < nchains; n++) { t32 = *s32++; if (byteswap) SWAP_WORD(t32); WRITE_WORD(dst, t32); } return (1); } /* * Elf_Note structures comprise a fixed size header followed by variable * length strings. The fixed size header needs to be byte swapped, but * not the strings. * * Argument `count' denotes the total number of bytes to be converted. * The destination buffer needs to be at least `count' bytes in size. */ static int _libelf_cvt_NOTE_tom(unsigned char *dst, size_t dsz, unsigned char *src, size_t count, int byteswap) { uint32_t namesz, descsz, type; Elf_Note *en; size_t sz, hdrsz; if (dsz < count) /* Destination buffer is too small. */ return (0); hdrsz = 3 * sizeof(uint32_t); if (count < hdrsz) /* Source too small. */ return (0); if (!byteswap) { (void) memcpy(dst, src, count); return (1); } /* Process all notes in the section. */ while (count > hdrsz) { /* Read the note header. */ READ_WORD(src, namesz); READ_WORD(src, descsz); READ_WORD(src, type); /* Translate. */ SWAP_WORD(namesz); SWAP_WORD(descsz); SWAP_WORD(type); /* Copy out the translated note header. */ en = (Elf_Note *) (uintptr_t) dst; en->n_namesz = namesz; en->n_descsz = descsz; en->n_type = type; dsz -= sizeof(Elf_Note); dst += sizeof(Elf_Note); count -= hdrsz; ROUNDUP2(namesz, 4U); ROUNDUP2(descsz, 4U); sz = namesz + descsz; if (count < sz || dsz < sz) /* Buffers are too small. */ return (0); (void) memcpy(dst, src, sz); src += sz; dst += sz; count -= sz; dsz -= sz; } return (1); } static int _libelf_cvt_NOTE_tof(unsigned char *dst, size_t dsz, unsigned char *src, size_t count, int byteswap) { uint32_t namesz, descsz, type; Elf_Note *en; size_t sz; if (dsz < count) return (0); if (!byteswap) { (void) memcpy(dst, src, count); return (1); } while (count > sizeof(Elf_Note)) { en = (Elf_Note *) (uintptr_t) src; namesz = en->n_namesz; descsz = en->n_descsz; type = en->n_type; sz = namesz; ROUNDUP2(sz, 4U); sz += descsz; ROUNDUP2(sz, 4U); SWAP_WORD(namesz); SWAP_WORD(descsz); SWAP_WORD(type); WRITE_WORD(dst, namesz); WRITE_WORD(dst, descsz); WRITE_WORD(dst, type); src += sizeof(Elf_Note); count -= sizeof(Elf_Note); if (count < sz) - sz = count; + return (0); (void) memcpy(dst, src, sz); src += sz; dst += sz; count -= sz; } return (1); } struct converters { int (*tof32)(unsigned char *dst, size_t dsz, unsigned char *src, size_t cnt, int byteswap); int (*tom32)(unsigned char *dst, size_t dsz, unsigned char *src, size_t cnt, int byteswap); int (*tof64)(unsigned char *dst, size_t dsz, unsigned char *src, size_t cnt, int byteswap); int (*tom64)(unsigned char *dst, size_t dsz, unsigned char *src, size_t cnt, int byteswap); }; static struct converters cvt[ELF_T_NUM] = { /*[*/ CONVERTER_NAMES(ELF_TYPE_LIST) /*]*/ /* * Types that need hand-coded converters follow. */ [ELF_T_BYTE] = { .tof32 = _libelf_cvt_BYTE_tox, .tom32 = _libelf_cvt_BYTE_tox, .tof64 = _libelf_cvt_BYTE_tox, .tom64 = _libelf_cvt_BYTE_tox }, [ELF_T_NOTE] = { .tof32 = _libelf_cvt_NOTE_tof, .tom32 = _libelf_cvt_NOTE_tom, .tof64 = _libelf_cvt_NOTE_tof, .tom64 = _libelf_cvt_NOTE_tom } }; /* * Return a translator function for the specified ELF section type, conversion * direction, ELF class and ELF machine. */ _libelf_translator_function * _libelf_get_translator(Elf_Type t, int direction, int elfclass, int elfmachine) { assert(elfclass == ELFCLASS32 || elfclass == ELFCLASS64); assert(direction == ELF_TOFILE || direction == ELF_TOMEMORY); assert(t >= ELF_T_FIRST && t <= ELF_T_LAST); /* TODO: Handle MIPS64 REL{,A} sections (ticket #559). */ (void) elfmachine; return ((elfclass == ELFCLASS32) ? (direction == ELF_TOFILE ? cvt[t].tof32 : cvt[t].tom32) : (direction == ELF_TOFILE ? cvt[t].tof64 : cvt[t].tom64)); } diff --git a/contrib/elftoolchain/readelf/readelf.c b/contrib/elftoolchain/readelf/readelf.c index 8814f235ce1b..e5a6fa0de2e6 100644 --- a/contrib/elftoolchain/readelf/readelf.c +++ b/contrib/elftoolchain/readelf/readelf.c @@ -1,7758 +1,7810 @@ /*- * Copyright (c) 2009-2015 Kai Wang * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "_elftc.h" ELFTC_VCSID("$Id: readelf.c 3769 2019-06-29 15:15:02Z emaste $"); /* Backwards compatability for older FreeBSD releases. */ #ifndef STB_GNU_UNIQUE #define STB_GNU_UNIQUE 10 #endif #ifndef STT_SPARC_REGISTER #define STT_SPARC_REGISTER 13 #endif /* * readelf(1) options. */ #define RE_AA 0x00000001 #define RE_C 0x00000002 #define RE_DD 0x00000004 #define RE_D 0x00000008 #define RE_G 0x00000010 #define RE_H 0x00000020 #define RE_II 0x00000040 #define RE_I 0x00000080 #define RE_L 0x00000100 #define RE_NN 0x00000200 #define RE_N 0x00000400 #define RE_P 0x00000800 #define RE_R 0x00001000 #define RE_SS 0x00002000 #define RE_S 0x00004000 #define RE_T 0x00008000 #define RE_U 0x00010000 #define RE_VV 0x00020000 #define RE_WW 0x00040000 #define RE_W 0x00080000 #define RE_X 0x00100000 /* * dwarf dump options. */ #define DW_A 0x00000001 #define DW_FF 0x00000002 #define DW_F 0x00000004 #define DW_I 0x00000008 #define DW_LL 0x00000010 #define DW_L 0x00000020 #define DW_M 0x00000040 #define DW_O 0x00000080 #define DW_P 0x00000100 #define DW_RR 0x00000200 #define DW_R 0x00000400 #define DW_S 0x00000800 #define DW_DEFAULT_OPTIONS (DW_A | DW_F | DW_I | DW_L | DW_O | DW_P | \ DW_R | DW_RR | DW_S) /* * readelf(1) run control flags. */ #define DISPLAY_FILENAME 0x0001 /* * Internal data structure for sections. */ struct section { const char *name; /* section name */ Elf_Scn *scn; /* section scn */ uint64_t off; /* section offset */ uint64_t sz; /* section size */ uint64_t entsize; /* section entsize */ uint64_t align; /* section alignment */ uint64_t type; /* section type */ uint64_t flags; /* section flags */ uint64_t addr; /* section virtual addr */ uint32_t link; /* section link ndx */ uint32_t info; /* section info ndx */ }; struct dumpop { union { size_t si; /* section index */ const char *sn; /* section name */ } u; enum { DUMP_BY_INDEX = 0, DUMP_BY_NAME } type; /* dump type */ #define HEX_DUMP 0x0001 #define STR_DUMP 0x0002 int op; /* dump operation */ STAILQ_ENTRY(dumpop) dumpop_list; }; struct symver { const char *name; int type; }; /* * Structure encapsulates the global data for readelf(1). */ struct readelf { const char *filename; /* current processing file. */ int options; /* command line options. */ int flags; /* run control flags. */ int dop; /* dwarf dump options. */ Elf *elf; /* underlying ELF descriptor. */ Elf *ar; /* archive ELF descriptor. */ Dwarf_Debug dbg; /* DWARF handle. */ Dwarf_Half cu_psize; /* DWARF CU pointer size. */ Dwarf_Half cu_osize; /* DWARF CU offset size. */ Dwarf_Half cu_ver; /* DWARF CU version. */ GElf_Ehdr ehdr; /* ELF header. */ int ec; /* ELF class. */ size_t shnum; /* #sections. */ struct section *vd_s; /* Verdef section. */ struct section *vn_s; /* Verneed section. */ struct section *vs_s; /* Versym section. */ uint16_t *vs; /* Versym array. */ int vs_sz; /* Versym array size. */ struct symver *ver; /* Version array. */ int ver_sz; /* Size of version array. */ struct section *sl; /* list of sections. */ STAILQ_HEAD(, dumpop) v_dumpop; /* list of dump ops. */ uint64_t (*dw_read)(Elf_Data *, uint64_t *, int); uint64_t (*dw_decode)(uint8_t **, int); }; enum options { OPTION_DEBUG_DUMP }; static struct option longopts[] = { {"all", no_argument, NULL, 'a'}, {"arch-specific", no_argument, NULL, 'A'}, {"archive-index", no_argument, NULL, 'c'}, {"debug-dump", optional_argument, NULL, OPTION_DEBUG_DUMP}, {"dynamic", no_argument, NULL, 'd'}, {"file-header", no_argument, NULL, 'h'}, {"full-section-name", no_argument, NULL, 'N'}, {"headers", no_argument, NULL, 'e'}, {"help", no_argument, 0, 'H'}, {"hex-dump", required_argument, NULL, 'x'}, {"histogram", no_argument, NULL, 'I'}, {"notes", no_argument, NULL, 'n'}, {"program-headers", no_argument, NULL, 'l'}, {"relocs", no_argument, NULL, 'r'}, {"sections", no_argument, NULL, 'S'}, {"section-headers", no_argument, NULL, 'S'}, {"section-groups", no_argument, NULL, 'g'}, {"section-details", no_argument, NULL, 't'}, {"segments", no_argument, NULL, 'l'}, {"string-dump", required_argument, NULL, 'p'}, {"symbols", no_argument, NULL, 's'}, {"syms", no_argument, NULL, 's'}, {"unwind", no_argument, NULL, 'u'}, {"use-dynamic", no_argument, NULL, 'D'}, {"version-info", no_argument, 0, 'V'}, {"version", no_argument, 0, 'v'}, {"wide", no_argument, 0, 'W'}, {NULL, 0, NULL, 0} }; struct eflags_desc { uint64_t flag; const char *desc; }; struct flag_desc { uint64_t flag; const char *desc; }; struct mips_option { uint64_t flag; const char *desc; }; struct loc_at { Dwarf_Attribute la_at; Dwarf_Unsigned la_off; Dwarf_Unsigned la_lowpc; Dwarf_Half la_cu_psize; Dwarf_Half la_cu_osize; Dwarf_Half la_cu_ver; }; static void add_dumpop(struct readelf *re, size_t si, const char *sn, int op, int t); static const char *aeabi_adv_simd_arch(uint64_t simd); static const char *aeabi_align_needed(uint64_t an); static const char *aeabi_align_preserved(uint64_t ap); static const char *aeabi_arm_isa(uint64_t ai); static const char *aeabi_cpu_arch(uint64_t arch); static const char *aeabi_cpu_arch_profile(uint64_t pf); static const char *aeabi_div(uint64_t du); static const char *aeabi_enum_size(uint64_t es); static const char *aeabi_fp_16bit_format(uint64_t fp16); static const char *aeabi_fp_arch(uint64_t fp); static const char *aeabi_fp_denormal(uint64_t fd); static const char *aeabi_fp_exceptions(uint64_t fe); static const char *aeabi_fp_hpext(uint64_t fh); static const char *aeabi_fp_number_model(uint64_t fn); static const char *aeabi_fp_optm_goal(uint64_t fog); static const char *aeabi_fp_rounding(uint64_t fr); static const char *aeabi_hardfp(uint64_t hfp); static const char *aeabi_mpext(uint64_t mp); static const char *aeabi_optm_goal(uint64_t og); static const char *aeabi_pcs_config(uint64_t pcs); static const char *aeabi_pcs_got(uint64_t got); static const char *aeabi_pcs_r9(uint64_t r9); static const char *aeabi_pcs_ro(uint64_t ro); static const char *aeabi_pcs_rw(uint64_t rw); static const char *aeabi_pcs_wchar_t(uint64_t wt); static const char *aeabi_t2ee(uint64_t t2ee); static const char *aeabi_thumb_isa(uint64_t ti); static const char *aeabi_fp_user_exceptions(uint64_t fu); static const char *aeabi_unaligned_access(uint64_t ua); static const char *aeabi_vfp_args(uint64_t va); static const char *aeabi_virtual(uint64_t vt); static const char *aeabi_wmmx_arch(uint64_t wmmx); static const char *aeabi_wmmx_args(uint64_t wa); static const char *elf_class(unsigned int class); static const char *elf_endian(unsigned int endian); static const char *elf_machine(unsigned int mach); static const char *elf_osabi(unsigned int abi); static const char *elf_type(unsigned int type); static const char *elf_ver(unsigned int ver); static const char *dt_type(unsigned int mach, unsigned int dtype); static void dump_ar(struct readelf *re, int); static void dump_arm_attributes(struct readelf *re, uint8_t *p, uint8_t *pe); static void dump_attributes(struct readelf *re); static uint8_t *dump_compatibility_tag(uint8_t *p, uint8_t *pe); static void dump_dwarf(struct readelf *re); static void dump_dwarf_abbrev(struct readelf *re); static void dump_dwarf_aranges(struct readelf *re); static void dump_dwarf_block(struct readelf *re, uint8_t *b, Dwarf_Unsigned len); static void dump_dwarf_die(struct readelf *re, Dwarf_Die die, int level); static void dump_dwarf_frame(struct readelf *re, int alt); static void dump_dwarf_frame_inst(struct readelf *re, Dwarf_Cie cie, uint8_t *insts, Dwarf_Unsigned len, Dwarf_Unsigned caf, Dwarf_Signed daf, Dwarf_Addr pc, Dwarf_Debug dbg); static int dump_dwarf_frame_regtable(struct readelf *re, Dwarf_Fde fde, Dwarf_Addr pc, Dwarf_Unsigned func_len, Dwarf_Half cie_ra); static void dump_dwarf_frame_section(struct readelf *re, struct section *s, int alt); static void dump_dwarf_info(struct readelf *re, Dwarf_Bool is_info); static void dump_dwarf_macinfo(struct readelf *re); static void dump_dwarf_line(struct readelf *re); static void dump_dwarf_line_decoded(struct readelf *re); static void dump_dwarf_loc(struct readelf *re, Dwarf_Loc *lr); static void dump_dwarf_loclist(struct readelf *re); static void dump_dwarf_pubnames(struct readelf *re); static void dump_dwarf_ranges(struct readelf *re); static void dump_dwarf_ranges_foreach(struct readelf *re, Dwarf_Die die, Dwarf_Addr base); static void dump_dwarf_str(struct readelf *re); static void dump_eflags(struct readelf *re, uint64_t e_flags); static void dump_elf(struct readelf *re); static void dump_flags(struct flag_desc *fd, uint64_t flags); static void dump_dyn_val(struct readelf *re, GElf_Dyn *dyn, uint32_t stab); static void dump_dynamic(struct readelf *re); static void dump_liblist(struct readelf *re); static void dump_mips_abiflags(struct readelf *re, struct section *s); static void dump_mips_attributes(struct readelf *re, uint8_t *p, uint8_t *pe); static void dump_mips_odk_reginfo(struct readelf *re, uint8_t *p, size_t sz); static void dump_mips_options(struct readelf *re, struct section *s); static void dump_mips_option_flags(const char *name, struct mips_option *opt, uint64_t info); static void dump_mips_reginfo(struct readelf *re, struct section *s); static void dump_mips_specific_info(struct readelf *re); static void dump_notes(struct readelf *re); static void dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off); static void dump_notes_data(struct readelf *re, const char *name, uint32_t type, const char *buf, size_t sz); static void dump_svr4_hash(struct section *s); static void dump_svr4_hash64(struct readelf *re, struct section *s); static void dump_gnu_hash(struct readelf *re, struct section *s); static void dump_gnu_property_type_0(struct readelf *re, const char *buf, size_t sz); static void dump_hash(struct readelf *re); static void dump_phdr(struct readelf *re); static void dump_ppc_attributes(uint8_t *p, uint8_t *pe); static void dump_section_groups(struct readelf *re); static void dump_symtab(struct readelf *re, int i); static void dump_symtabs(struct readelf *re); static uint8_t *dump_unknown_tag(uint64_t tag, uint8_t *p, uint8_t *pe); static void dump_ver(struct readelf *re); static void dump_verdef(struct readelf *re, int dump); static void dump_verneed(struct readelf *re, int dump); static void dump_versym(struct readelf *re); static const char *dwarf_reg(unsigned int mach, unsigned int reg); static const char *dwarf_regname(struct readelf *re, unsigned int num); static struct dumpop *find_dumpop(struct readelf *re, size_t si, const char *sn, int op, int t); static int get_ent_count(struct section *s, int *ent_count); static int get_mips_register_size(uint8_t flag); static char *get_regoff_str(struct readelf *re, Dwarf_Half reg, Dwarf_Addr off); static const char *get_string(struct readelf *re, int strtab, size_t off); static const char *get_symbol_name(struct readelf *re, int symtab, int i); static uint64_t get_symbol_value(struct readelf *re, int symtab, int i); static void load_sections(struct readelf *re); static int loc_at_comparator(const void *la1, const void *la2); static const char *mips_abi_fp(uint64_t fp); static const char *note_type(const char *note_name, unsigned int et, unsigned int nt); static const char *note_type_freebsd(unsigned int nt); static const char *note_type_freebsd_core(unsigned int nt); static const char *note_type_linux_core(unsigned int nt); static const char *note_type_gnu(unsigned int nt); static const char *note_type_netbsd(unsigned int nt); static const char *note_type_openbsd(unsigned int nt); static const char *note_type_unknown(unsigned int nt); static const char *note_type_xen(unsigned int nt); static const char *option_kind(uint8_t kind); static const char *phdr_type(unsigned int mach, unsigned int ptype); static const char *ppc_abi_fp(uint64_t fp); static const char *ppc_abi_vector(uint64_t vec); static void readelf_usage(int status); static void readelf_version(void); static void search_loclist_at(struct readelf *re, Dwarf_Die die, Dwarf_Unsigned lowpc, struct loc_at **la_list, size_t *la_list_len, size_t *la_list_cap); static void search_ver(struct readelf *re); static const char *section_type(unsigned int mach, unsigned int stype); static void set_cu_context(struct readelf *re, Dwarf_Half psize, Dwarf_Half osize, Dwarf_Half ver); static const char *st_bind(unsigned int sbind); static const char *st_shndx(unsigned int shndx); static const char *st_type(unsigned int mach, unsigned int os, unsigned int stype); static const char *st_vis(unsigned int svis); static const char *top_tag(unsigned int tag); static void unload_sections(struct readelf *re); static uint64_t _read_lsb(Elf_Data *d, uint64_t *offsetp, int bytes_to_read); static uint64_t _read_msb(Elf_Data *d, uint64_t *offsetp, int bytes_to_read); static uint64_t _decode_lsb(uint8_t **data, int bytes_to_read); static uint64_t _decode_msb(uint8_t **data, int bytes_to_read); static int64_t _decode_sleb128(uint8_t **dp, uint8_t *dpe); static uint64_t _decode_uleb128(uint8_t **dp, uint8_t *dpe); static struct eflags_desc arm_eflags_desc[] = { {EF_ARM_RELEXEC, "relocatable executable"}, {EF_ARM_HASENTRY, "has entry point"}, {EF_ARM_SYMSARESORTED, "sorted symbol tables"}, {EF_ARM_DYNSYMSUSESEGIDX, "dynamic symbols use segment index"}, {EF_ARM_MAPSYMSFIRST, "mapping symbols precede others"}, {EF_ARM_BE8, "BE8"}, {EF_ARM_LE8, "LE8"}, {EF_ARM_INTERWORK, "interworking enabled"}, {EF_ARM_APCS_26, "uses APCS/26"}, {EF_ARM_APCS_FLOAT, "uses APCS/float"}, {EF_ARM_PIC, "position independent"}, {EF_ARM_ALIGN8, "8 bit structure alignment"}, {EF_ARM_NEW_ABI, "uses new ABI"}, {EF_ARM_OLD_ABI, "uses old ABI"}, {EF_ARM_SOFT_FLOAT, "software FP"}, {EF_ARM_VFP_FLOAT, "VFP"}, {EF_ARM_MAVERICK_FLOAT, "Maverick FP"}, {0, NULL} }; static struct eflags_desc mips_eflags_desc[] = { {EF_MIPS_NOREORDER, "noreorder"}, {EF_MIPS_PIC, "pic"}, {EF_MIPS_CPIC, "cpic"}, {EF_MIPS_UCODE, "ugen_reserved"}, {EF_MIPS_ABI2, "abi2"}, {EF_MIPS_OPTIONS_FIRST, "odk first"}, {EF_MIPS_ARCH_ASE_MDMX, "mdmx"}, {EF_MIPS_ARCH_ASE_M16, "mips16"}, {0, NULL} }; static struct eflags_desc powerpc_eflags_desc[] = { {EF_PPC_EMB, "emb"}, {EF_PPC_RELOCATABLE, "relocatable"}, {EF_PPC_RELOCATABLE_LIB, "relocatable-lib"}, {0, NULL} }; static struct eflags_desc riscv_eflags_desc[] = { {EF_RISCV_RVC, "RVC"}, {EF_RISCV_RVE, "RVE"}, {EF_RISCV_TSO, "TSO"}, {0, NULL} }; static struct eflags_desc sparc_eflags_desc[] = { {EF_SPARC_32PLUS, "v8+"}, {EF_SPARC_SUN_US1, "ultrasparcI"}, {EF_SPARC_HAL_R1, "halr1"}, {EF_SPARC_SUN_US3, "ultrasparcIII"}, {0, NULL} }; static const char * elf_osabi(unsigned int abi) { static char s_abi[32]; switch(abi) { case ELFOSABI_NONE: return "NONE"; case ELFOSABI_HPUX: return "HPUX"; case ELFOSABI_NETBSD: return "NetBSD"; case ELFOSABI_GNU: return "GNU"; case ELFOSABI_HURD: return "HURD"; case ELFOSABI_86OPEN: return "86OPEN"; case ELFOSABI_SOLARIS: return "Solaris"; case ELFOSABI_AIX: return "AIX"; case ELFOSABI_IRIX: return "IRIX"; case ELFOSABI_FREEBSD: return "FreeBSD"; case ELFOSABI_TRU64: return "TRU64"; case ELFOSABI_MODESTO: return "MODESTO"; case ELFOSABI_OPENBSD: return "OpenBSD"; case ELFOSABI_OPENVMS: return "OpenVMS"; case ELFOSABI_NSK: return "NSK"; case ELFOSABI_CLOUDABI: return "CloudABI"; case ELFOSABI_ARM_AEABI: return "ARM EABI"; case ELFOSABI_ARM: return "ARM"; case ELFOSABI_STANDALONE: return "StandAlone"; default: snprintf(s_abi, sizeof(s_abi), "", abi); return (s_abi); } }; static const char * elf_machine(unsigned int mach) { static char s_mach[32]; switch (mach) { case EM_NONE: return "Unknown machine"; case EM_M32: return "AT&T WE32100"; case EM_SPARC: return "Sun SPARC"; case EM_386: return "Intel i386"; case EM_68K: return "Motorola 68000"; case EM_IAMCU: return "Intel MCU"; case EM_88K: return "Motorola 88000"; case EM_860: return "Intel i860"; case EM_MIPS: return "MIPS R3000 Big-Endian only"; case EM_S370: return "IBM System/370"; case EM_MIPS_RS3_LE: return "MIPS R3000 Little-Endian"; case EM_PARISC: return "HP PA-RISC"; case EM_VPP500: return "Fujitsu VPP500"; case EM_SPARC32PLUS: return "SPARC v8plus"; case EM_960: return "Intel 80960"; case EM_PPC: return "PowerPC 32-bit"; case EM_PPC64: return "PowerPC 64-bit"; case EM_S390: return "IBM System/390"; case EM_V800: return "NEC V800"; case EM_FR20: return "Fujitsu FR20"; case EM_RH32: return "TRW RH-32"; case EM_RCE: return "Motorola RCE"; case EM_ARM: return "ARM"; case EM_SH: return "Hitachi SH"; case EM_SPARCV9: return "SPARC v9 64-bit"; case EM_TRICORE: return "Siemens TriCore embedded processor"; case EM_ARC: return "Argonaut RISC Core"; case EM_H8_300: return "Hitachi H8/300"; case EM_H8_300H: return "Hitachi H8/300H"; case EM_H8S: return "Hitachi H8S"; case EM_H8_500: return "Hitachi H8/500"; case EM_IA_64: return "Intel IA-64 Processor"; case EM_MIPS_X: return "Stanford MIPS-X"; case EM_COLDFIRE: return "Motorola ColdFire"; case EM_68HC12: return "Motorola M68HC12"; case EM_MMA: return "Fujitsu MMA"; case EM_PCP: return "Siemens PCP"; case EM_NCPU: return "Sony nCPU"; case EM_NDR1: return "Denso NDR1 microprocessor"; case EM_STARCORE: return "Motorola Star*Core processor"; case EM_ME16: return "Toyota ME16 processor"; case EM_ST100: return "STMicroelectronics ST100 processor"; case EM_TINYJ: return "Advanced Logic Corp. TinyJ processor"; case EM_X86_64: return "Advanced Micro Devices x86-64"; case EM_PDSP: return "Sony DSP Processor"; case EM_FX66: return "Siemens FX66 microcontroller"; case EM_ST9PLUS: return "STMicroelectronics ST9+ 8/16 microcontroller"; case EM_ST7: return "STmicroelectronics ST7 8-bit microcontroller"; case EM_68HC16: return "Motorola MC68HC16 microcontroller"; case EM_68HC11: return "Motorola MC68HC11 microcontroller"; case EM_68HC08: return "Motorola MC68HC08 microcontroller"; case EM_68HC05: return "Motorola MC68HC05 microcontroller"; case EM_SVX: return "Silicon Graphics SVx"; case EM_ST19: return "STMicroelectronics ST19 8-bit mc"; case EM_VAX: return "Digital VAX"; case EM_CRIS: return "Axis Communications 32-bit embedded processor"; case EM_JAVELIN: return "Infineon Tech. 32bit embedded processor"; case EM_FIREPATH: return "Element 14 64-bit DSP Processor"; case EM_ZSP: return "LSI Logic 16-bit DSP Processor"; case EM_MMIX: return "Donald Knuth's educational 64-bit proc"; case EM_HUANY: return "Harvard University MI object files"; case EM_PRISM: return "SiTera Prism"; case EM_AVR: return "Atmel AVR 8-bit microcontroller"; case EM_FR30: return "Fujitsu FR30"; case EM_D10V: return "Mitsubishi D10V"; case EM_D30V: return "Mitsubishi D30V"; case EM_V850: return "NEC v850"; case EM_M32R: return "Mitsubishi M32R"; case EM_MN10300: return "Matsushita MN10300"; case EM_MN10200: return "Matsushita MN10200"; case EM_PJ: return "picoJava"; case EM_OPENRISC: return "OpenRISC 32-bit embedded processor"; case EM_ARC_A5: return "ARC Cores Tangent-A5"; case EM_XTENSA: return "Tensilica Xtensa Architecture"; case EM_VIDEOCORE: return "Alphamosaic VideoCore processor"; case EM_TMM_GPP: return "Thompson Multimedia General Purpose Processor"; case EM_NS32K: return "National Semiconductor 32000 series"; case EM_TPC: return "Tenor Network TPC processor"; case EM_SNP1K: return "Trebia SNP 1000 processor"; case EM_ST200: return "STMicroelectronics ST200 microcontroller"; case EM_IP2K: return "Ubicom IP2xxx microcontroller family"; case EM_MAX: return "MAX Processor"; case EM_CR: return "National Semiconductor CompactRISC microprocessor"; case EM_F2MC16: return "Fujitsu F2MC16"; case EM_MSP430: return "TI embedded microcontroller msp430"; case EM_BLACKFIN: return "Analog Devices Blackfin (DSP) processor"; case EM_SE_C33: return "S1C33 Family of Seiko Epson processors"; case EM_SEP: return "Sharp embedded microprocessor"; case EM_ARCA: return "Arca RISC Microprocessor"; case EM_UNICORE: return "Microprocessor series from PKU-Unity Ltd"; case EM_AARCH64: return "AArch64"; case EM_RISCV: return "RISC-V"; default: snprintf(s_mach, sizeof(s_mach), "", mach); return (s_mach); } } static const char * elf_class(unsigned int class) { static char s_class[32]; switch (class) { case ELFCLASSNONE: return "none"; case ELFCLASS32: return "ELF32"; case ELFCLASS64: return "ELF64"; default: snprintf(s_class, sizeof(s_class), "", class); return (s_class); } } static const char * elf_endian(unsigned int endian) { static char s_endian[32]; switch (endian) { case ELFDATANONE: return "none"; case ELFDATA2LSB: return "2's complement, little endian"; case ELFDATA2MSB: return "2's complement, big endian"; default: snprintf(s_endian, sizeof(s_endian), "", endian); return (s_endian); } } static const char * elf_type(unsigned int type) { static char s_type[32]; switch (type) { case ET_NONE: return "NONE (None)"; case ET_REL: return "REL (Relocatable file)"; case ET_EXEC: return "EXEC (Executable file)"; case ET_DYN: return "DYN (Shared object file)"; case ET_CORE: return "CORE (Core file)"; default: if (type >= ET_LOPROC) snprintf(s_type, sizeof(s_type), "", type); else if (type >= ET_LOOS && type <= ET_HIOS) snprintf(s_type, sizeof(s_type), "", type); else snprintf(s_type, sizeof(s_type), "", type); return (s_type); } } static const char * elf_ver(unsigned int ver) { static char s_ver[32]; switch (ver) { case EV_CURRENT: return "(current)"; case EV_NONE: return "(none)"; default: snprintf(s_ver, sizeof(s_ver), "", ver); return (s_ver); } } static const char * phdr_type(unsigned int mach, unsigned int ptype) { static char s_ptype[32]; if (ptype >= PT_LOPROC && ptype <= PT_HIPROC) { switch (mach) { case EM_ARM: switch (ptype) { case PT_ARM_ARCHEXT: return "ARM_ARCHEXT"; case PT_ARM_EXIDX: return "ARM_EXIDX"; } break; } snprintf(s_ptype, sizeof(s_ptype), "LOPROC+%#x", ptype - PT_LOPROC); return (s_ptype); } switch (ptype) { case PT_NULL: return "NULL"; case PT_LOAD: return "LOAD"; case PT_DYNAMIC: return "DYNAMIC"; case PT_INTERP: return "INTERP"; case PT_NOTE: return "NOTE"; case PT_SHLIB: return "SHLIB"; case PT_PHDR: return "PHDR"; case PT_TLS: return "TLS"; case PT_GNU_EH_FRAME: return "GNU_EH_FRAME"; case PT_GNU_STACK: return "GNU_STACK"; case PT_GNU_RELRO: return "GNU_RELRO"; case PT_OPENBSD_RANDOMIZE: return "OPENBSD_RANDOMIZE"; case PT_OPENBSD_WXNEEDED: return "OPENBSD_WXNEEDED"; case PT_OPENBSD_BOOTDATA: return "OPENBSD_BOOTDATA"; default: if (ptype >= PT_LOOS && ptype <= PT_HIOS) snprintf(s_ptype, sizeof(s_ptype), "LOOS+%#x", ptype - PT_LOOS); else snprintf(s_ptype, sizeof(s_ptype), "", ptype); return (s_ptype); } } static const char * section_type(unsigned int mach, unsigned int stype) { static char s_stype[32]; if (stype >= SHT_LOPROC && stype <= SHT_HIPROC) { switch (mach) { case EM_ARM: switch (stype) { case SHT_ARM_EXIDX: return "ARM_EXIDX"; case SHT_ARM_PREEMPTMAP: return "ARM_PREEMPTMAP"; case SHT_ARM_ATTRIBUTES: return "ARM_ATTRIBUTES"; case SHT_ARM_DEBUGOVERLAY: return "ARM_DEBUGOVERLAY"; case SHT_ARM_OVERLAYSECTION: return "ARM_OVERLAYSECTION"; } break; case EM_X86_64: switch (stype) { case SHT_X86_64_UNWIND: return "X86_64_UNWIND"; default: break; } break; case EM_MIPS: case EM_MIPS_RS3_LE: switch (stype) { case SHT_MIPS_LIBLIST: return "MIPS_LIBLIST"; case SHT_MIPS_MSYM: return "MIPS_MSYM"; case SHT_MIPS_CONFLICT: return "MIPS_CONFLICT"; case SHT_MIPS_GPTAB: return "MIPS_GPTAB"; case SHT_MIPS_UCODE: return "MIPS_UCODE"; case SHT_MIPS_DEBUG: return "MIPS_DEBUG"; case SHT_MIPS_REGINFO: return "MIPS_REGINFO"; case SHT_MIPS_PACKAGE: return "MIPS_PACKAGE"; case SHT_MIPS_PACKSYM: return "MIPS_PACKSYM"; case SHT_MIPS_RELD: return "MIPS_RELD"; case SHT_MIPS_IFACE: return "MIPS_IFACE"; case SHT_MIPS_CONTENT: return "MIPS_CONTENT"; case SHT_MIPS_OPTIONS: return "MIPS_OPTIONS"; case SHT_MIPS_DELTASYM: return "MIPS_DELTASYM"; case SHT_MIPS_DELTAINST: return "MIPS_DELTAINST"; case SHT_MIPS_DELTACLASS: return "MIPS_DELTACLASS"; case SHT_MIPS_DWARF: return "MIPS_DWARF"; case SHT_MIPS_DELTADECL: return "MIPS_DELTADECL"; case SHT_MIPS_SYMBOL_LIB: return "MIPS_SYMBOL_LIB"; case SHT_MIPS_EVENTS: return "MIPS_EVENTS"; case SHT_MIPS_TRANSLATE: return "MIPS_TRANSLATE"; case SHT_MIPS_PIXIE: return "MIPS_PIXIE"; case SHT_MIPS_XLATE: return "MIPS_XLATE"; case SHT_MIPS_XLATE_DEBUG: return "MIPS_XLATE_DEBUG"; case SHT_MIPS_WHIRL: return "MIPS_WHIRL"; case SHT_MIPS_EH_REGION: return "MIPS_EH_REGION"; case SHT_MIPS_XLATE_OLD: return "MIPS_XLATE_OLD"; case SHT_MIPS_PDR_EXCEPTION: return "MIPS_PDR_EXCEPTION"; case SHT_MIPS_ABIFLAGS: return "MIPS_ABIFLAGS"; default: break; } break; default: break; } snprintf(s_stype, sizeof(s_stype), "LOPROC+%#x", stype - SHT_LOPROC); return (s_stype); } switch (stype) { case SHT_NULL: return "NULL"; case SHT_PROGBITS: return "PROGBITS"; case SHT_SYMTAB: return "SYMTAB"; case SHT_STRTAB: return "STRTAB"; case SHT_RELA: return "RELA"; case SHT_HASH: return "HASH"; case SHT_DYNAMIC: return "DYNAMIC"; case SHT_NOTE: return "NOTE"; case SHT_NOBITS: return "NOBITS"; case SHT_REL: return "REL"; case SHT_SHLIB: return "SHLIB"; case SHT_DYNSYM: return "DYNSYM"; case SHT_INIT_ARRAY: return "INIT_ARRAY"; case SHT_FINI_ARRAY: return "FINI_ARRAY"; case SHT_PREINIT_ARRAY: return "PREINIT_ARRAY"; case SHT_GROUP: return "GROUP"; case SHT_SYMTAB_SHNDX: return "SYMTAB_SHNDX"; case SHT_SUNW_dof: return "SUNW_dof"; case SHT_SUNW_cap: return "SUNW_cap"; case SHT_GNU_HASH: return "GNU_HASH"; case SHT_SUNW_ANNOTATE: return "SUNW_ANNOTATE"; case SHT_SUNW_DEBUGSTR: return "SUNW_DEBUGSTR"; case SHT_SUNW_DEBUG: return "SUNW_DEBUG"; case SHT_SUNW_move: return "SUNW_move"; case SHT_SUNW_COMDAT: return "SUNW_COMDAT"; case SHT_SUNW_syminfo: return "SUNW_syminfo"; case SHT_SUNW_verdef: return "SUNW_verdef"; case SHT_SUNW_verneed: return "SUNW_verneed"; case SHT_SUNW_versym: return "SUNW_versym"; default: if (stype >= SHT_LOOS && stype <= SHT_HIOS) snprintf(s_stype, sizeof(s_stype), "LOOS+%#x", stype - SHT_LOOS); else if (stype >= SHT_LOUSER) snprintf(s_stype, sizeof(s_stype), "LOUSER+%#x", stype - SHT_LOUSER); else snprintf(s_stype, sizeof(s_stype), "", stype); return (s_stype); } } static const char * dt_type(unsigned int mach, unsigned int dtype) { static char s_dtype[32]; switch (dtype) { case DT_NULL: return "NULL"; case DT_NEEDED: return "NEEDED"; case DT_PLTRELSZ: return "PLTRELSZ"; case DT_PLTGOT: return "PLTGOT"; case DT_HASH: return "HASH"; case DT_STRTAB: return "STRTAB"; case DT_SYMTAB: return "SYMTAB"; case DT_RELA: return "RELA"; case DT_RELASZ: return "RELASZ"; case DT_RELAENT: return "RELAENT"; case DT_STRSZ: return "STRSZ"; case DT_SYMENT: return "SYMENT"; case DT_INIT: return "INIT"; case DT_FINI: return "FINI"; case DT_SONAME: return "SONAME"; case DT_RPATH: return "RPATH"; case DT_SYMBOLIC: return "SYMBOLIC"; case DT_REL: return "REL"; case DT_RELSZ: return "RELSZ"; case DT_RELENT: return "RELENT"; case DT_PLTREL: return "PLTREL"; case DT_DEBUG: return "DEBUG"; case DT_TEXTREL: return "TEXTREL"; case DT_JMPREL: return "JMPREL"; case DT_BIND_NOW: return "BIND_NOW"; case DT_INIT_ARRAY: return "INIT_ARRAY"; case DT_FINI_ARRAY: return "FINI_ARRAY"; case DT_INIT_ARRAYSZ: return "INIT_ARRAYSZ"; case DT_FINI_ARRAYSZ: return "FINI_ARRAYSZ"; case DT_RUNPATH: return "RUNPATH"; case DT_FLAGS: return "FLAGS"; case DT_PREINIT_ARRAY: return "PREINIT_ARRAY"; case DT_PREINIT_ARRAYSZ: return "PREINIT_ARRAYSZ"; case DT_MAXPOSTAGS: return "MAXPOSTAGS"; case DT_SUNW_AUXILIARY: return "SUNW_AUXILIARY"; case DT_SUNW_RTLDINF: return "SUNW_RTLDINF"; case DT_SUNW_FILTER: return "SUNW_FILTER"; case DT_SUNW_CAP: return "SUNW_CAP"; case DT_SUNW_ASLR: return "SUNW_ASLR"; case DT_CHECKSUM: return "CHECKSUM"; case DT_PLTPADSZ: return "PLTPADSZ"; case DT_MOVEENT: return "MOVEENT"; case DT_MOVESZ: return "MOVESZ"; case DT_FEATURE: return "FEATURE"; case DT_POSFLAG_1: return "POSFLAG_1"; case DT_SYMINSZ: return "SYMINSZ"; case DT_SYMINENT: return "SYMINENT"; case DT_GNU_HASH: return "GNU_HASH"; case DT_TLSDESC_PLT: return "DT_TLSDESC_PLT"; case DT_TLSDESC_GOT: return "DT_TLSDESC_GOT"; case DT_GNU_CONFLICT: return "GNU_CONFLICT"; case DT_GNU_LIBLIST: return "GNU_LIBLIST"; case DT_CONFIG: return "CONFIG"; case DT_DEPAUDIT: return "DEPAUDIT"; case DT_AUDIT: return "AUDIT"; case DT_PLTPAD: return "PLTPAD"; case DT_MOVETAB: return "MOVETAB"; case DT_SYMINFO: return "SYMINFO"; case DT_VERSYM: return "VERSYM"; case DT_RELACOUNT: return "RELACOUNT"; case DT_RELCOUNT: return "RELCOUNT"; case DT_FLAGS_1: return "FLAGS_1"; case DT_VERDEF: return "VERDEF"; case DT_VERDEFNUM: return "VERDEFNUM"; case DT_VERNEED: return "VERNEED"; case DT_VERNEEDNUM: return "VERNEEDNUM"; case DT_AUXILIARY: return "AUXILIARY"; case DT_USED: return "USED"; case DT_FILTER: return "FILTER"; case DT_GNU_PRELINKED: return "GNU_PRELINKED"; case DT_GNU_CONFLICTSZ: return "GNU_CONFLICTSZ"; case DT_GNU_LIBLISTSZ: return "GNU_LIBLISTSZ"; } if (dtype >= DT_LOPROC && dtype <= DT_HIPROC) { switch (mach) { case EM_ARM: switch (dtype) { case DT_ARM_SYMTABSZ: return "ARM_SYMTABSZ"; default: break; } break; case EM_MIPS: case EM_MIPS_RS3_LE: switch (dtype) { case DT_MIPS_RLD_VERSION: return "MIPS_RLD_VERSION"; case DT_MIPS_TIME_STAMP: return "MIPS_TIME_STAMP"; case DT_MIPS_ICHECKSUM: return "MIPS_ICHECKSUM"; case DT_MIPS_IVERSION: return "MIPS_IVERSION"; case DT_MIPS_FLAGS: return "MIPS_FLAGS"; case DT_MIPS_BASE_ADDRESS: return "MIPS_BASE_ADDRESS"; case DT_MIPS_CONFLICT: return "MIPS_CONFLICT"; case DT_MIPS_LIBLIST: return "MIPS_LIBLIST"; case DT_MIPS_LOCAL_GOTNO: return "MIPS_LOCAL_GOTNO"; case DT_MIPS_CONFLICTNO: return "MIPS_CONFLICTNO"; case DT_MIPS_LIBLISTNO: return "MIPS_LIBLISTNO"; case DT_MIPS_SYMTABNO: return "MIPS_SYMTABNO"; case DT_MIPS_UNREFEXTNO: return "MIPS_UNREFEXTNO"; case DT_MIPS_GOTSYM: return "MIPS_GOTSYM"; case DT_MIPS_HIPAGENO: return "MIPS_HIPAGENO"; case DT_MIPS_RLD_MAP: return "MIPS_RLD_MAP"; case DT_MIPS_DELTA_CLASS: return "MIPS_DELTA_CLASS"; case DT_MIPS_DELTA_CLASS_NO: return "MIPS_DELTA_CLASS_NO"; case DT_MIPS_DELTA_INSTANCE: return "MIPS_DELTA_INSTANCE"; case DT_MIPS_DELTA_INSTANCE_NO: return "MIPS_DELTA_INSTANCE_NO"; case DT_MIPS_DELTA_RELOC: return "MIPS_DELTA_RELOC"; case DT_MIPS_DELTA_RELOC_NO: return "MIPS_DELTA_RELOC_NO"; case DT_MIPS_DELTA_SYM: return "MIPS_DELTA_SYM"; case DT_MIPS_DELTA_SYM_NO: return "MIPS_DELTA_SYM_NO"; case DT_MIPS_DELTA_CLASSSYM: return "MIPS_DELTA_CLASSSYM"; case DT_MIPS_DELTA_CLASSSYM_NO: return "MIPS_DELTA_CLASSSYM_NO"; case DT_MIPS_CXX_FLAGS: return "MIPS_CXX_FLAGS"; case DT_MIPS_PIXIE_INIT: return "MIPS_PIXIE_INIT"; case DT_MIPS_SYMBOL_LIB: return "MIPS_SYMBOL_LIB"; case DT_MIPS_LOCALPAGE_GOTIDX: return "MIPS_LOCALPAGE_GOTIDX"; case DT_MIPS_LOCAL_GOTIDX: return "MIPS_LOCAL_GOTIDX"; case DT_MIPS_HIDDEN_GOTIDX: return "MIPS_HIDDEN_GOTIDX"; case DT_MIPS_PROTECTED_GOTIDX: return "MIPS_PROTECTED_GOTIDX"; case DT_MIPS_OPTIONS: return "MIPS_OPTIONS"; case DT_MIPS_INTERFACE: return "MIPS_INTERFACE"; case DT_MIPS_DYNSTR_ALIGN: return "MIPS_DYNSTR_ALIGN"; case DT_MIPS_INTERFACE_SIZE: return "MIPS_INTERFACE_SIZE"; case DT_MIPS_RLD_TEXT_RESOLVE_ADDR: return "MIPS_RLD_TEXT_RESOLVE_ADDR"; case DT_MIPS_PERF_SUFFIX: return "MIPS_PERF_SUFFIX"; case DT_MIPS_COMPACT_SIZE: return "MIPS_COMPACT_SIZE"; case DT_MIPS_GP_VALUE: return "MIPS_GP_VALUE"; case DT_MIPS_AUX_DYNAMIC: return "MIPS_AUX_DYNAMIC"; case DT_MIPS_PLTGOT: return "MIPS_PLTGOT"; case DT_MIPS_RLD_OBJ_UPDATE: return "MIPS_RLD_OBJ_UPDATE"; case DT_MIPS_RWPLT: return "MIPS_RWPLT"; default: break; } break; case EM_SPARC: case EM_SPARC32PLUS: case EM_SPARCV9: switch (dtype) { case DT_SPARC_REGISTER: return "DT_SPARC_REGISTER"; default: break; } break; default: break; } } snprintf(s_dtype, sizeof(s_dtype), "", dtype); return (s_dtype); } static const char * st_bind(unsigned int sbind) { static char s_sbind[32]; switch (sbind) { case STB_LOCAL: return "LOCAL"; case STB_GLOBAL: return "GLOBAL"; case STB_WEAK: return "WEAK"; case STB_GNU_UNIQUE: return "UNIQUE"; default: if (sbind >= STB_LOOS && sbind <= STB_HIOS) return "OS"; else if (sbind >= STB_LOPROC && sbind <= STB_HIPROC) return "PROC"; else snprintf(s_sbind, sizeof(s_sbind), "", sbind); return (s_sbind); } } static const char * st_type(unsigned int mach, unsigned int os, unsigned int stype) { static char s_stype[32]; switch (stype) { case STT_NOTYPE: return "NOTYPE"; case STT_OBJECT: return "OBJECT"; case STT_FUNC: return "FUNC"; case STT_SECTION: return "SECTION"; case STT_FILE: return "FILE"; case STT_COMMON: return "COMMON"; case STT_TLS: return "TLS"; default: if (stype >= STT_LOOS && stype <= STT_HIOS) { if ((os == ELFOSABI_GNU || os == ELFOSABI_FREEBSD) && stype == STT_GNU_IFUNC) return "IFUNC"; snprintf(s_stype, sizeof(s_stype), "OS+%#x", stype - STT_LOOS); } else if (stype >= STT_LOPROC && stype <= STT_HIPROC) { if (mach == EM_SPARCV9 && stype == STT_SPARC_REGISTER) return "REGISTER"; snprintf(s_stype, sizeof(s_stype), "PROC+%#x", stype - STT_LOPROC); } else snprintf(s_stype, sizeof(s_stype), "", stype); return (s_stype); } } static const char * st_vis(unsigned int svis) { static char s_svis[32]; switch(svis) { case STV_DEFAULT: return "DEFAULT"; case STV_INTERNAL: return "INTERNAL"; case STV_HIDDEN: return "HIDDEN"; case STV_PROTECTED: return "PROTECTED"; default: snprintf(s_svis, sizeof(s_svis), "", svis); return (s_svis); } } static const char * st_shndx(unsigned int shndx) { static char s_shndx[32]; switch (shndx) { case SHN_UNDEF: return "UND"; case SHN_ABS: return "ABS"; case SHN_COMMON: return "COM"; default: if (shndx >= SHN_LOPROC && shndx <= SHN_HIPROC) return "PRC"; else if (shndx >= SHN_LOOS && shndx <= SHN_HIOS) return "OS"; else snprintf(s_shndx, sizeof(s_shndx), "%u", shndx); return (s_shndx); } } static struct { const char *ln; char sn; int value; } section_flag[] = { {"WRITE", 'W', SHF_WRITE}, {"ALLOC", 'A', SHF_ALLOC}, {"EXEC", 'X', SHF_EXECINSTR}, {"MERGE", 'M', SHF_MERGE}, {"STRINGS", 'S', SHF_STRINGS}, {"INFO LINK", 'I', SHF_INFO_LINK}, {"OS NONCONF", 'O', SHF_OS_NONCONFORMING}, {"GROUP", 'G', SHF_GROUP}, {"TLS", 'T', SHF_TLS}, {"COMPRESSED", 'C', SHF_COMPRESSED}, {NULL, 0, 0} }; static const char * note_type(const char *name, unsigned int et, unsigned int nt) { if ((strcmp(name, "CORE") == 0 || strcmp(name, "LINUX") == 0) && et == ET_CORE) return note_type_linux_core(nt); else if (strcmp(name, "FreeBSD") == 0) if (et == ET_CORE) return note_type_freebsd_core(nt); else return note_type_freebsd(nt); else if (strcmp(name, "GNU") == 0 && et != ET_CORE) return note_type_gnu(nt); else if (strcmp(name, "NetBSD") == 0 && et != ET_CORE) return note_type_netbsd(nt); else if (strcmp(name, "OpenBSD") == 0 && et != ET_CORE) return note_type_openbsd(nt); else if (strcmp(name, "Xen") == 0 && et != ET_CORE) return note_type_xen(nt); return note_type_unknown(nt); } static const char * note_type_freebsd(unsigned int nt) { switch (nt) { case 1: return "NT_FREEBSD_ABI_TAG"; case 2: return "NT_FREEBSD_NOINIT_TAG"; case 3: return "NT_FREEBSD_ARCH_TAG"; case 4: return "NT_FREEBSD_FEATURE_CTL"; default: return (note_type_unknown(nt)); } } static const char * note_type_freebsd_core(unsigned int nt) { switch (nt) { case 1: return "NT_PRSTATUS"; case 2: return "NT_FPREGSET"; case 3: return "NT_PRPSINFO"; case 7: return "NT_THRMISC"; case 8: return "NT_PROCSTAT_PROC"; case 9: return "NT_PROCSTAT_FILES"; case 10: return "NT_PROCSTAT_VMMAP"; case 11: return "NT_PROCSTAT_GROUPS"; case 12: return "NT_PROCSTAT_UMASK"; case 13: return "NT_PROCSTAT_RLIMIT"; case 14: return "NT_PROCSTAT_OSREL"; case 15: return "NT_PROCSTAT_PSSTRINGS"; case 16: return "NT_PROCSTAT_AUXV"; case 17: return "NT_PTLWPINFO"; case 0x100: return "NT_PPC_VMX (ppc Altivec registers)"; case 0x102: return "NT_PPC_VSX (ppc VSX registers)"; case 0x202: return "NT_X86_XSTATE (x86 XSAVE extended state)"; case 0x400: return "NT_ARM_VFP (arm VFP registers)"; default: return (note_type_unknown(nt)); } } static const char * note_type_linux_core(unsigned int nt) { switch (nt) { case 1: return "NT_PRSTATUS (Process status)"; case 2: return "NT_FPREGSET (Floating point information)"; case 3: return "NT_PRPSINFO (Process information)"; case 4: return "NT_TASKSTRUCT (Task structure)"; case 6: return "NT_AUXV (Auxiliary vector)"; case 10: return "NT_PSTATUS (Linux process status)"; case 12: return "NT_FPREGS (Linux floating point regset)"; case 13: return "NT_PSINFO (Linux process information)"; case 16: return "NT_LWPSTATUS (Linux lwpstatus_t type)"; case 17: return "NT_LWPSINFO (Linux lwpinfo_t type)"; case 18: return "NT_WIN32PSTATUS (win32_pstatus structure)"; case 0x100: return "NT_PPC_VMX (ppc Altivec registers)"; case 0x102: return "NT_PPC_VSX (ppc VSX registers)"; case 0x202: return "NT_X86_XSTATE (x86 XSAVE extended state)"; case 0x300: return "NT_S390_HIGH_GPRS (s390 upper register halves)"; case 0x301: return "NT_S390_TIMER (s390 timer register)"; case 0x302: return "NT_S390_TODCMP (s390 TOD comparator register)"; case 0x303: return "NT_S390_TODPREG (s390 TOD programmable register)"; case 0x304: return "NT_S390_CTRS (s390 control registers)"; case 0x305: return "NT_S390_PREFIX (s390 prefix register)"; case 0x400: return "NT_ARM_VFP (arm VFP registers)"; case 0x46494c45UL: return "NT_FILE (mapped files)"; case 0x46E62B7FUL: return "NT_PRXFPREG (Linux user_xfpregs structure)"; case 0x53494749UL: return "NT_SIGINFO (siginfo_t data)"; default: return (note_type_unknown(nt)); } } static const char * note_type_gnu(unsigned int nt) { switch (nt) { case 1: return "NT_GNU_ABI_TAG"; case 2: return "NT_GNU_HWCAP (Hardware capabilities)"; case 3: return "NT_GNU_BUILD_ID (Build id set by ld(1))"; case 4: return "NT_GNU_GOLD_VERSION (GNU gold version)"; case 5: return "NT_GNU_PROPERTY_TYPE_0"; default: return (note_type_unknown(nt)); } } static const char * note_type_netbsd(unsigned int nt) { switch (nt) { case 1: return "NT_NETBSD_IDENT"; default: return (note_type_unknown(nt)); } } static const char * note_type_openbsd(unsigned int nt) { switch (nt) { case 1: return "NT_OPENBSD_IDENT"; default: return (note_type_unknown(nt)); } } static const char * note_type_unknown(unsigned int nt) { static char s_nt[32]; snprintf(s_nt, sizeof(s_nt), nt >= 0x100 ? "" : "", nt); return (s_nt); } static const char * note_type_xen(unsigned int nt) { switch (nt) { case 0: return "XEN_ELFNOTE_INFO"; case 1: return "XEN_ELFNOTE_ENTRY"; case 2: return "XEN_ELFNOTE_HYPERCALL_PAGE"; case 3: return "XEN_ELFNOTE_VIRT_BASE"; case 4: return "XEN_ELFNOTE_PADDR_OFFSET"; case 5: return "XEN_ELFNOTE_XEN_VERSION"; case 6: return "XEN_ELFNOTE_GUEST_OS"; case 7: return "XEN_ELFNOTE_GUEST_VERSION"; case 8: return "XEN_ELFNOTE_LOADER"; case 9: return "XEN_ELFNOTE_PAE_MODE"; case 10: return "XEN_ELFNOTE_FEATURES"; case 11: return "XEN_ELFNOTE_BSD_SYMTAB"; case 12: return "XEN_ELFNOTE_HV_START_LOW"; case 13: return "XEN_ELFNOTE_L1_MFN_VALID"; case 14: return "XEN_ELFNOTE_SUSPEND_CANCEL"; case 15: return "XEN_ELFNOTE_INIT_P2M"; case 16: return "XEN_ELFNOTE_MOD_START_PFN"; case 17: return "XEN_ELFNOTE_SUPPORTED_FEATURES"; + case 18: return "XEN_ELFNOTE_PHYS32_ENTRY"; default: return (note_type_unknown(nt)); } } static struct { const char *name; int value; } l_flag[] = { {"EXACT_MATCH", LL_EXACT_MATCH}, {"IGNORE_INT_VER", LL_IGNORE_INT_VER}, {"REQUIRE_MINOR", LL_REQUIRE_MINOR}, {"EXPORTS", LL_EXPORTS}, {"DELAY_LOAD", LL_DELAY_LOAD}, {"DELTA", LL_DELTA}, {NULL, 0} }; static struct mips_option mips_exceptions_option[] = { {OEX_PAGE0, "PAGE0"}, {OEX_SMM, "SMM"}, {OEX_PRECISEFP, "PRECISEFP"}, {OEX_DISMISS, "DISMISS"}, {0, NULL} }; static struct mips_option mips_pad_option[] = { {OPAD_PREFIX, "PREFIX"}, {OPAD_POSTFIX, "POSTFIX"}, {OPAD_SYMBOL, "SYMBOL"}, {0, NULL} }; static struct mips_option mips_hwpatch_option[] = { {OHW_R4KEOP, "R4KEOP"}, {OHW_R8KPFETCH, "R8KPFETCH"}, {OHW_R5KEOP, "R5KEOP"}, {OHW_R5KCVTL, "R5KCVTL"}, {0, NULL} }; static struct mips_option mips_hwa_option[] = { {OHWA0_R4KEOP_CHECKED, "R4KEOP_CHECKED"}, {OHWA0_R4KEOP_CLEAN, "R4KEOP_CLEAN"}, {0, NULL} }; static struct mips_option mips_hwo_option[] = { {OHWO0_FIXADE, "FIXADE"}, {0, NULL} }; static const char * option_kind(uint8_t kind) { static char s_kind[32]; switch (kind) { case ODK_NULL: return "NULL"; case ODK_REGINFO: return "REGINFO"; case ODK_EXCEPTIONS: return "EXCEPTIONS"; case ODK_PAD: return "PAD"; case ODK_HWPATCH: return "HWPATCH"; case ODK_FILL: return "FILL"; case ODK_TAGS: return "TAGS"; case ODK_HWAND: return "HWAND"; case ODK_HWOR: return "HWOR"; case ODK_GP_GROUP: return "GP_GROUP"; case ODK_IDENT: return "IDENT"; default: snprintf(s_kind, sizeof(s_kind), "", kind); return (s_kind); } } static const char * top_tag(unsigned int tag) { static char s_top_tag[32]; switch (tag) { case 1: return "File Attributes"; case 2: return "Section Attributes"; case 3: return "Symbol Attributes"; default: snprintf(s_top_tag, sizeof(s_top_tag), "Unknown tag: %u", tag); return (s_top_tag); } } static const char * aeabi_cpu_arch(uint64_t arch) { static char s_cpu_arch[32]; switch (arch) { case 0: return "Pre-V4"; case 1: return "ARM v4"; case 2: return "ARM v4T"; case 3: return "ARM v5T"; case 4: return "ARM v5TE"; case 5: return "ARM v5TEJ"; case 6: return "ARM v6"; case 7: return "ARM v6KZ"; case 8: return "ARM v6T2"; case 9: return "ARM v6K"; case 10: return "ARM v7"; case 11: return "ARM v6-M"; case 12: return "ARM v6S-M"; case 13: return "ARM v7E-M"; default: snprintf(s_cpu_arch, sizeof(s_cpu_arch), "Unknown (%ju)", (uintmax_t) arch); return (s_cpu_arch); } } static const char * aeabi_cpu_arch_profile(uint64_t pf) { static char s_arch_profile[32]; switch (pf) { case 0: return "Not applicable"; case 0x41: /* 'A' */ return "Application Profile"; case 0x52: /* 'R' */ return "Real-Time Profile"; case 0x4D: /* 'M' */ return "Microcontroller Profile"; case 0x53: /* 'S' */ return "Application or Real-Time Profile"; default: snprintf(s_arch_profile, sizeof(s_arch_profile), "Unknown (%ju)\n", (uintmax_t) pf); return (s_arch_profile); } } static const char * aeabi_arm_isa(uint64_t ai) { static char s_ai[32]; switch (ai) { case 0: return "No"; case 1: return "Yes"; default: snprintf(s_ai, sizeof(s_ai), "Unknown (%ju)\n", (uintmax_t) ai); return (s_ai); } } static const char * aeabi_thumb_isa(uint64_t ti) { static char s_ti[32]; switch (ti) { case 0: return "No"; case 1: return "16-bit Thumb"; case 2: return "32-bit Thumb"; default: snprintf(s_ti, sizeof(s_ti), "Unknown (%ju)\n", (uintmax_t) ti); return (s_ti); } } static const char * aeabi_fp_arch(uint64_t fp) { static char s_fp_arch[32]; switch (fp) { case 0: return "No"; case 1: return "VFPv1"; case 2: return "VFPv2"; case 3: return "VFPv3"; case 4: return "VFPv3-D16"; case 5: return "VFPv4"; case 6: return "VFPv4-D16"; default: snprintf(s_fp_arch, sizeof(s_fp_arch), "Unknown (%ju)", (uintmax_t) fp); return (s_fp_arch); } } static const char * aeabi_wmmx_arch(uint64_t wmmx) { static char s_wmmx[32]; switch (wmmx) { case 0: return "No"; case 1: return "WMMXv1"; case 2: return "WMMXv2"; default: snprintf(s_wmmx, sizeof(s_wmmx), "Unknown (%ju)", (uintmax_t) wmmx); return (s_wmmx); } } static const char * aeabi_adv_simd_arch(uint64_t simd) { static char s_simd[32]; switch (simd) { case 0: return "No"; case 1: return "NEONv1"; case 2: return "NEONv2"; default: snprintf(s_simd, sizeof(s_simd), "Unknown (%ju)", (uintmax_t) simd); return (s_simd); } } static const char * aeabi_pcs_config(uint64_t pcs) { static char s_pcs[32]; switch (pcs) { case 0: return "None"; case 1: return "Bare platform"; case 2: return "Linux"; case 3: return "Linux DSO"; case 4: return "Palm OS 2004"; case 5: return "Palm OS (future)"; case 6: return "Symbian OS 2004"; case 7: return "Symbian OS (future)"; default: snprintf(s_pcs, sizeof(s_pcs), "Unknown (%ju)", (uintmax_t) pcs); return (s_pcs); } } static const char * aeabi_pcs_r9(uint64_t r9) { static char s_r9[32]; switch (r9) { case 0: return "V6"; case 1: return "SB"; case 2: return "TLS pointer"; case 3: return "Unused"; default: snprintf(s_r9, sizeof(s_r9), "Unknown (%ju)", (uintmax_t) r9); return (s_r9); } } static const char * aeabi_pcs_rw(uint64_t rw) { static char s_rw[32]; switch (rw) { case 0: return "Absolute"; case 1: return "PC-relative"; case 2: return "SB-relative"; case 3: return "None"; default: snprintf(s_rw, sizeof(s_rw), "Unknown (%ju)", (uintmax_t) rw); return (s_rw); } } static const char * aeabi_pcs_ro(uint64_t ro) { static char s_ro[32]; switch (ro) { case 0: return "Absolute"; case 1: return "PC-relative"; case 2: return "None"; default: snprintf(s_ro, sizeof(s_ro), "Unknown (%ju)", (uintmax_t) ro); return (s_ro); } } static const char * aeabi_pcs_got(uint64_t got) { static char s_got[32]; switch (got) { case 0: return "None"; case 1: return "direct"; case 2: return "indirect via GOT"; default: snprintf(s_got, sizeof(s_got), "Unknown (%ju)", (uintmax_t) got); return (s_got); } } static const char * aeabi_pcs_wchar_t(uint64_t wt) { static char s_wt[32]; switch (wt) { case 0: return "None"; case 2: return "wchar_t size 2"; case 4: return "wchar_t size 4"; default: snprintf(s_wt, sizeof(s_wt), "Unknown (%ju)", (uintmax_t) wt); return (s_wt); } } static const char * aeabi_enum_size(uint64_t es) { static char s_es[32]; switch (es) { case 0: return "None"; case 1: return "smallest"; case 2: return "32-bit"; case 3: return "visible 32-bit"; default: snprintf(s_es, sizeof(s_es), "Unknown (%ju)", (uintmax_t) es); return (s_es); } } static const char * aeabi_align_needed(uint64_t an) { static char s_align_n[64]; switch (an) { case 0: return "No"; case 1: return "8-byte align"; case 2: return "4-byte align"; case 3: return "Reserved"; default: if (an >= 4 && an <= 12) snprintf(s_align_n, sizeof(s_align_n), "8-byte align" " and up to 2^%ju-byte extended align", (uintmax_t) an); else snprintf(s_align_n, sizeof(s_align_n), "Unknown (%ju)", (uintmax_t) an); return (s_align_n); } } static const char * aeabi_align_preserved(uint64_t ap) { static char s_align_p[128]; switch (ap) { case 0: return "No"; case 1: return "8-byte align"; case 2: return "8-byte align and SP % 8 == 0"; case 3: return "Reserved"; default: if (ap >= 4 && ap <= 12) snprintf(s_align_p, sizeof(s_align_p), "8-byte align" " and SP %% 8 == 0 and up to 2^%ju-byte extended" " align", (uintmax_t) ap); else snprintf(s_align_p, sizeof(s_align_p), "Unknown (%ju)", (uintmax_t) ap); return (s_align_p); } } static const char * aeabi_fp_rounding(uint64_t fr) { static char s_fp_r[32]; switch (fr) { case 0: return "Unused"; case 1: return "Needed"; default: snprintf(s_fp_r, sizeof(s_fp_r), "Unknown (%ju)", (uintmax_t) fr); return (s_fp_r); } } static const char * aeabi_fp_denormal(uint64_t fd) { static char s_fp_d[32]; switch (fd) { case 0: return "Unused"; case 1: return "Needed"; case 2: return "Sign Only"; default: snprintf(s_fp_d, sizeof(s_fp_d), "Unknown (%ju)", (uintmax_t) fd); return (s_fp_d); } } static const char * aeabi_fp_exceptions(uint64_t fe) { static char s_fp_e[32]; switch (fe) { case 0: return "Unused"; case 1: return "Needed"; default: snprintf(s_fp_e, sizeof(s_fp_e), "Unknown (%ju)", (uintmax_t) fe); return (s_fp_e); } } static const char * aeabi_fp_user_exceptions(uint64_t fu) { static char s_fp_u[32]; switch (fu) { case 0: return "Unused"; case 1: return "Needed"; default: snprintf(s_fp_u, sizeof(s_fp_u), "Unknown (%ju)", (uintmax_t) fu); return (s_fp_u); } } static const char * aeabi_fp_number_model(uint64_t fn) { static char s_fp_n[32]; switch (fn) { case 0: return "Unused"; case 1: return "IEEE 754 normal"; case 2: return "RTABI"; case 3: return "IEEE 754"; default: snprintf(s_fp_n, sizeof(s_fp_n), "Unknown (%ju)", (uintmax_t) fn); return (s_fp_n); } } static const char * aeabi_fp_16bit_format(uint64_t fp16) { static char s_fp_16[64]; switch (fp16) { case 0: return "None"; case 1: return "IEEE 754"; case 2: return "VFPv3/Advanced SIMD (alternative format)"; default: snprintf(s_fp_16, sizeof(s_fp_16), "Unknown (%ju)", (uintmax_t) fp16); return (s_fp_16); } } static const char * aeabi_mpext(uint64_t mp) { static char s_mp[32]; switch (mp) { case 0: return "Not allowed"; case 1: return "Allowed"; default: snprintf(s_mp, sizeof(s_mp), "Unknown (%ju)", (uintmax_t) mp); return (s_mp); } } static const char * aeabi_div(uint64_t du) { static char s_du[32]; switch (du) { case 0: return "Yes (V7-R/V7-M)"; case 1: return "No"; case 2: return "Yes (V7-A)"; default: snprintf(s_du, sizeof(s_du), "Unknown (%ju)", (uintmax_t) du); return (s_du); } } static const char * aeabi_t2ee(uint64_t t2ee) { static char s_t2ee[32]; switch (t2ee) { case 0: return "Not allowed"; case 1: return "Allowed"; default: snprintf(s_t2ee, sizeof(s_t2ee), "Unknown(%ju)", (uintmax_t) t2ee); return (s_t2ee); } } static const char * aeabi_hardfp(uint64_t hfp) { static char s_hfp[32]; switch (hfp) { case 0: return "Tag_FP_arch"; case 1: return "only SP"; case 2: return "only DP"; case 3: return "both SP and DP"; default: snprintf(s_hfp, sizeof(s_hfp), "Unknown (%ju)", (uintmax_t) hfp); return (s_hfp); } } static const char * aeabi_vfp_args(uint64_t va) { static char s_va[32]; switch (va) { case 0: return "AAPCS (base variant)"; case 1: return "AAPCS (VFP variant)"; case 2: return "toolchain-specific"; default: snprintf(s_va, sizeof(s_va), "Unknown (%ju)", (uintmax_t) va); return (s_va); } } static const char * aeabi_wmmx_args(uint64_t wa) { static char s_wa[32]; switch (wa) { case 0: return "AAPCS (base variant)"; case 1: return "Intel WMMX"; case 2: return "toolchain-specific"; default: snprintf(s_wa, sizeof(s_wa), "Unknown(%ju)", (uintmax_t) wa); return (s_wa); } } static const char * aeabi_unaligned_access(uint64_t ua) { static char s_ua[32]; switch (ua) { case 0: return "Not allowed"; case 1: return "Allowed"; default: snprintf(s_ua, sizeof(s_ua), "Unknown(%ju)", (uintmax_t) ua); return (s_ua); } } static const char * aeabi_fp_hpext(uint64_t fh) { static char s_fh[32]; switch (fh) { case 0: return "Not allowed"; case 1: return "Allowed"; default: snprintf(s_fh, sizeof(s_fh), "Unknown(%ju)", (uintmax_t) fh); return (s_fh); } } static const char * aeabi_optm_goal(uint64_t og) { static char s_og[32]; switch (og) { case 0: return "None"; case 1: return "Speed"; case 2: return "Speed aggressive"; case 3: return "Space"; case 4: return "Space aggressive"; case 5: return "Debugging"; case 6: return "Best Debugging"; default: snprintf(s_og, sizeof(s_og), "Unknown(%ju)", (uintmax_t) og); return (s_og); } } static const char * aeabi_fp_optm_goal(uint64_t fog) { static char s_fog[32]; switch (fog) { case 0: return "None"; case 1: return "Speed"; case 2: return "Speed aggressive"; case 3: return "Space"; case 4: return "Space aggressive"; case 5: return "Accurary"; case 6: return "Best Accurary"; default: snprintf(s_fog, sizeof(s_fog), "Unknown(%ju)", (uintmax_t) fog); return (s_fog); } } static const char * aeabi_virtual(uint64_t vt) { static char s_virtual[64]; switch (vt) { case 0: return "No"; case 1: return "TrustZone"; case 2: return "Virtualization extension"; case 3: return "TrustZone and virtualization extension"; default: snprintf(s_virtual, sizeof(s_virtual), "Unknown(%ju)", (uintmax_t) vt); return (s_virtual); } } static struct { uint64_t tag; const char *s_tag; const char *(*get_desc)(uint64_t val); } aeabi_tags[] = { {4, "Tag_CPU_raw_name", NULL}, {5, "Tag_CPU_name", NULL}, {6, "Tag_CPU_arch", aeabi_cpu_arch}, {7, "Tag_CPU_arch_profile", aeabi_cpu_arch_profile}, {8, "Tag_ARM_ISA_use", aeabi_arm_isa}, {9, "Tag_THUMB_ISA_use", aeabi_thumb_isa}, {10, "Tag_FP_arch", aeabi_fp_arch}, {11, "Tag_WMMX_arch", aeabi_wmmx_arch}, {12, "Tag_Advanced_SIMD_arch", aeabi_adv_simd_arch}, {13, "Tag_PCS_config", aeabi_pcs_config}, {14, "Tag_ABI_PCS_R9_use", aeabi_pcs_r9}, {15, "Tag_ABI_PCS_RW_data", aeabi_pcs_rw}, {16, "Tag_ABI_PCS_RO_data", aeabi_pcs_ro}, {17, "Tag_ABI_PCS_GOT_use", aeabi_pcs_got}, {18, "Tag_ABI_PCS_wchar_t", aeabi_pcs_wchar_t}, {19, "Tag_ABI_FP_rounding", aeabi_fp_rounding}, {20, "Tag_ABI_FP_denormal", aeabi_fp_denormal}, {21, "Tag_ABI_FP_exceptions", aeabi_fp_exceptions}, {22, "Tag_ABI_FP_user_exceptions", aeabi_fp_user_exceptions}, {23, "Tag_ABI_FP_number_model", aeabi_fp_number_model}, {24, "Tag_ABI_align_needed", aeabi_align_needed}, {25, "Tag_ABI_align_preserved", aeabi_align_preserved}, {26, "Tag_ABI_enum_size", aeabi_enum_size}, {27, "Tag_ABI_HardFP_use", aeabi_hardfp}, {28, "Tag_ABI_VFP_args", aeabi_vfp_args}, {29, "Tag_ABI_WMMX_args", aeabi_wmmx_args}, {30, "Tag_ABI_optimization_goals", aeabi_optm_goal}, {31, "Tag_ABI_FP_optimization_goals", aeabi_fp_optm_goal}, {32, "Tag_compatibility", NULL}, {34, "Tag_CPU_unaligned_access", aeabi_unaligned_access}, {36, "Tag_FP_HP_extension", aeabi_fp_hpext}, {38, "Tag_ABI_FP_16bit_format", aeabi_fp_16bit_format}, {42, "Tag_MPextension_use", aeabi_mpext}, {44, "Tag_DIV_use", aeabi_div}, {64, "Tag_nodefaults", NULL}, {65, "Tag_also_compatible_with", NULL}, {66, "Tag_T2EE_use", aeabi_t2ee}, {67, "Tag_conformance", NULL}, {68, "Tag_Virtualization_use", aeabi_virtual}, {70, "Tag_MPextension_use", aeabi_mpext}, }; static const char * mips_abi_fp(uint64_t fp) { static char s_mips_abi_fp[64]; switch (fp) { case 0: return "N/A"; case 1: return "Hard float (double precision)"; case 2: return "Hard float (single precision)"; case 3: return "Soft float"; case 4: return "64-bit float (-mips32r2 -mfp64)"; default: snprintf(s_mips_abi_fp, sizeof(s_mips_abi_fp), "Unknown(%ju)", (uintmax_t) fp); return (s_mips_abi_fp); } } static const char * ppc_abi_fp(uint64_t fp) { static char s_ppc_abi_fp[64]; switch (fp) { case 0: return "N/A"; case 1: return "Hard float (double precision)"; case 2: return "Soft float"; case 3: return "Hard float (single precision)"; default: snprintf(s_ppc_abi_fp, sizeof(s_ppc_abi_fp), "Unknown(%ju)", (uintmax_t) fp); return (s_ppc_abi_fp); } } static const char * ppc_abi_vector(uint64_t vec) { static char s_vec[64]; switch (vec) { case 0: return "N/A"; case 1: return "Generic purpose registers"; case 2: return "AltiVec registers"; case 3: return "SPE registers"; default: snprintf(s_vec, sizeof(s_vec), "Unknown(%ju)", (uintmax_t) vec); return (s_vec); } } static const char * dwarf_reg(unsigned int mach, unsigned int reg) { switch (mach) { case EM_386: case EM_IAMCU: switch (reg) { case 0: return "eax"; case 1: return "ecx"; case 2: return "edx"; case 3: return "ebx"; case 4: return "esp"; case 5: return "ebp"; case 6: return "esi"; case 7: return "edi"; case 8: return "eip"; case 9: return "eflags"; case 11: return "st0"; case 12: return "st1"; case 13: return "st2"; case 14: return "st3"; case 15: return "st4"; case 16: return "st5"; case 17: return "st6"; case 18: return "st7"; case 21: return "xmm0"; case 22: return "xmm1"; case 23: return "xmm2"; case 24: return "xmm3"; case 25: return "xmm4"; case 26: return "xmm5"; case 27: return "xmm6"; case 28: return "xmm7"; case 29: return "mm0"; case 30: return "mm1"; case 31: return "mm2"; case 32: return "mm3"; case 33: return "mm4"; case 34: return "mm5"; case 35: return "mm6"; case 36: return "mm7"; case 37: return "fcw"; case 38: return "fsw"; case 39: return "mxcsr"; case 40: return "es"; case 41: return "cs"; case 42: return "ss"; case 43: return "ds"; case 44: return "fs"; case 45: return "gs"; case 48: return "tr"; case 49: return "ldtr"; default: return (NULL); } case EM_RISCV: switch (reg) { case 0: return "zero"; case 1: return "ra"; case 2: return "sp"; case 3: return "gp"; case 4: return "tp"; case 5: return "t0"; case 6: return "t1"; case 7: return "t2"; case 8: return "s0"; case 9: return "s1"; case 10: return "a0"; case 11: return "a1"; case 12: return "a2"; case 13: return "a3"; case 14: return "a4"; case 15: return "a5"; case 16: return "a6"; case 17: return "a7"; case 18: return "s2"; case 19: return "s3"; case 20: return "s4"; case 21: return "s5"; case 22: return "s6"; case 23: return "s7"; case 24: return "s8"; case 25: return "s9"; case 26: return "s10"; case 27: return "s11"; case 28: return "t3"; case 29: return "t4"; case 30: return "t5"; case 31: return "t6"; case 32: return "ft0"; case 33: return "ft1"; case 34: return "ft2"; case 35: return "ft3"; case 36: return "ft4"; case 37: return "ft5"; case 38: return "ft6"; case 39: return "ft7"; case 40: return "fs0"; case 41: return "fs1"; case 42: return "fa0"; case 43: return "fa1"; case 44: return "fa2"; case 45: return "fa3"; case 46: return "fa4"; case 47: return "fa5"; case 48: return "fa6"; case 49: return "fa7"; case 50: return "fs2"; case 51: return "fs3"; case 52: return "fs4"; case 53: return "fs5"; case 54: return "fs6"; case 55: return "fs7"; case 56: return "fs8"; case 57: return "fs9"; case 58: return "fs10"; case 59: return "fs11"; case 60: return "ft8"; case 61: return "ft9"; case 62: return "ft10"; case 63: return "ft11"; default: return (NULL); } case EM_X86_64: switch (reg) { case 0: return "rax"; case 1: return "rdx"; case 2: return "rcx"; case 3: return "rbx"; case 4: return "rsi"; case 5: return "rdi"; case 6: return "rbp"; case 7: return "rsp"; case 16: return "rip"; case 17: return "xmm0"; case 18: return "xmm1"; case 19: return "xmm2"; case 20: return "xmm3"; case 21: return "xmm4"; case 22: return "xmm5"; case 23: return "xmm6"; case 24: return "xmm7"; case 25: return "xmm8"; case 26: return "xmm9"; case 27: return "xmm10"; case 28: return "xmm11"; case 29: return "xmm12"; case 30: return "xmm13"; case 31: return "xmm14"; case 32: return "xmm15"; case 33: return "st0"; case 34: return "st1"; case 35: return "st2"; case 36: return "st3"; case 37: return "st4"; case 38: return "st5"; case 39: return "st6"; case 40: return "st7"; case 41: return "mm0"; case 42: return "mm1"; case 43: return "mm2"; case 44: return "mm3"; case 45: return "mm4"; case 46: return "mm5"; case 47: return "mm6"; case 48: return "mm7"; case 49: return "rflags"; case 50: return "es"; case 51: return "cs"; case 52: return "ss"; case 53: return "ds"; case 54: return "fs"; case 55: return "gs"; case 58: return "fs.base"; case 59: return "gs.base"; case 62: return "tr"; case 63: return "ldtr"; case 64: return "mxcsr"; case 65: return "fcw"; case 66: return "fsw"; default: return (NULL); } default: return (NULL); } } static void dump_ehdr(struct readelf *re) { size_t phnum, shnum, shstrndx; int i; printf("ELF Header:\n"); /* e_ident[]. */ printf(" Magic: "); for (i = 0; i < EI_NIDENT; i++) printf("%.2x ", re->ehdr.e_ident[i]); putchar('\n'); /* EI_CLASS. */ printf("%-37s%s\n", " Class:", elf_class(re->ehdr.e_ident[EI_CLASS])); /* EI_DATA. */ printf("%-37s%s\n", " Data:", elf_endian(re->ehdr.e_ident[EI_DATA])); /* EI_VERSION. */ printf("%-37s%d %s\n", " Version:", re->ehdr.e_ident[EI_VERSION], elf_ver(re->ehdr.e_ident[EI_VERSION])); /* EI_OSABI. */ printf("%-37s%s\n", " OS/ABI:", elf_osabi(re->ehdr.e_ident[EI_OSABI])); /* EI_ABIVERSION. */ printf("%-37s%d\n", " ABI Version:", re->ehdr.e_ident[EI_ABIVERSION]); /* e_type. */ printf("%-37s%s\n", " Type:", elf_type(re->ehdr.e_type)); /* e_machine. */ printf("%-37s%s\n", " Machine:", elf_machine(re->ehdr.e_machine)); /* e_version. */ printf("%-37s%#x\n", " Version:", re->ehdr.e_version); /* e_entry. */ printf("%-37s%#jx\n", " Entry point address:", (uintmax_t)re->ehdr.e_entry); /* e_phoff. */ printf("%-37s%ju (bytes into file)\n", " Start of program headers:", (uintmax_t)re->ehdr.e_phoff); /* e_shoff. */ printf("%-37s%ju (bytes into file)\n", " Start of section headers:", (uintmax_t)re->ehdr.e_shoff); /* e_flags. */ printf("%-37s%#x", " Flags:", re->ehdr.e_flags); dump_eflags(re, re->ehdr.e_flags); putchar('\n'); /* e_ehsize. */ printf("%-37s%u (bytes)\n", " Size of this header:", re->ehdr.e_ehsize); /* e_phentsize. */ printf("%-37s%u (bytes)\n", " Size of program headers:", re->ehdr.e_phentsize); /* e_phnum. */ printf("%-37s%u", " Number of program headers:", re->ehdr.e_phnum); if (re->ehdr.e_phnum == PN_XNUM) { /* Extended program header numbering is in use. */ if (elf_getphnum(re->elf, &phnum)) printf(" (%zu)", phnum); } putchar('\n'); /* e_shentsize. */ printf("%-37s%u (bytes)\n", " Size of section headers:", re->ehdr.e_shentsize); /* e_shnum. */ printf("%-37s%u", " Number of section headers:", re->ehdr.e_shnum); if (re->ehdr.e_shnum == SHN_UNDEF) { /* Extended section numbering is in use. */ if (elf_getshnum(re->elf, &shnum)) printf(" (%ju)", (uintmax_t)shnum); } putchar('\n'); /* e_shstrndx. */ printf("%-37s%u", " Section header string table index:", re->ehdr.e_shstrndx); if (re->ehdr.e_shstrndx == SHN_XINDEX) { /* Extended section numbering is in use. */ if (elf_getshstrndx(re->elf, &shstrndx)) printf(" (%ju)", (uintmax_t)shstrndx); } putchar('\n'); } static void dump_eflags(struct readelf *re, uint64_t e_flags) { struct eflags_desc *edesc; int arm_eabi; edesc = NULL; switch (re->ehdr.e_machine) { case EM_ARM: arm_eabi = (e_flags & EF_ARM_EABIMASK) >> 24; if (arm_eabi == 0) printf(", GNU EABI"); else if (arm_eabi <= 5) printf(", Version%d EABI", arm_eabi); edesc = arm_eflags_desc; break; case EM_MIPS: case EM_MIPS_RS3_LE: switch ((e_flags & EF_MIPS_ARCH) >> 28) { case 0: printf(", mips1"); break; case 1: printf(", mips2"); break; case 2: printf(", mips3"); break; case 3: printf(", mips4"); break; case 4: printf(", mips5"); break; case 5: printf(", mips32"); break; case 6: printf(", mips64"); break; case 7: printf(", mips32r2"); break; case 8: printf(", mips64r2"); break; default: break; } switch ((e_flags & 0x00FF0000) >> 16) { case 0x81: printf(", 3900"); break; case 0x82: printf(", 4010"); break; case 0x83: printf(", 4100"); break; case 0x85: printf(", 4650"); break; case 0x87: printf(", 4120"); break; case 0x88: printf(", 4111"); break; case 0x8a: printf(", sb1"); break; case 0x8b: printf(", octeon"); break; case 0x8c: printf(", xlr"); break; case 0x91: printf(", 5400"); break; case 0x98: printf(", 5500"); break; case 0x99: printf(", 9000"); break; case 0xa0: printf(", loongson-2e"); break; case 0xa1: printf(", loongson-2f"); break; default: break; } switch ((e_flags & 0x0000F000) >> 12) { case 1: printf(", o32"); break; case 2: printf(", o64"); break; case 3: printf(", eabi32"); break; case 4: printf(", eabi64"); break; default: break; } edesc = mips_eflags_desc; break; case EM_PPC64: switch (e_flags) { case 0: printf(", Unspecified or Power ELF V1 ABI"); break; case 1: printf(", Power ELF V1 ABI"); break; case 2: printf(", OpenPOWER ELF V2 ABI"); break; default: break; } /* FALLTHROUGH */ case EM_PPC: edesc = powerpc_eflags_desc; break; case EM_RISCV: switch (e_flags & EF_RISCV_FLOAT_ABI_MASK) { case EF_RISCV_FLOAT_ABI_SOFT: printf(", soft-float ABI"); break; case EF_RISCV_FLOAT_ABI_SINGLE: printf(", single-float ABI"); break; case EF_RISCV_FLOAT_ABI_DOUBLE: printf(", double-float ABI"); break; case EF_RISCV_FLOAT_ABI_QUAD: printf(", quad-float ABI"); break; } edesc = riscv_eflags_desc; break; case EM_SPARC: case EM_SPARC32PLUS: case EM_SPARCV9: switch ((e_flags & EF_SPARCV9_MM)) { case EF_SPARCV9_TSO: printf(", tso"); break; case EF_SPARCV9_PSO: printf(", pso"); break; case EF_SPARCV9_MM: printf(", rmo"); break; default: break; } edesc = sparc_eflags_desc; break; default: break; } if (edesc != NULL) { while (edesc->desc != NULL) { if (e_flags & edesc->flag) printf(", %s", edesc->desc); edesc++; } } } static void dump_phdr(struct readelf *re) { const char *rawfile; GElf_Phdr phdr; size_t phnum, size; int i, j; #define PH_HDR "Type", "Offset", "VirtAddr", "PhysAddr", "FileSiz", \ "MemSiz", "Flg", "Align" #define PH_CT phdr_type(re->ehdr.e_machine, phdr.p_type), \ (uintmax_t)phdr.p_offset, (uintmax_t)phdr.p_vaddr, \ (uintmax_t)phdr.p_paddr, (uintmax_t)phdr.p_filesz, \ (uintmax_t)phdr.p_memsz, \ phdr.p_flags & PF_R ? 'R' : ' ', \ phdr.p_flags & PF_W ? 'W' : ' ', \ phdr.p_flags & PF_X ? 'E' : ' ', \ (uintmax_t)phdr.p_align if (elf_getphnum(re->elf, &phnum) == 0) { warnx("elf_getphnum failed: %s", elf_errmsg(-1)); return; } if (phnum == 0) { printf("\nThere are no program headers in this file.\n"); return; } printf("\nElf file type is %s", elf_type(re->ehdr.e_type)); printf("\nEntry point 0x%jx\n", (uintmax_t)re->ehdr.e_entry); printf("There are %ju program headers, starting at offset %ju\n", (uintmax_t)phnum, (uintmax_t)re->ehdr.e_phoff); /* Dump program headers. */ printf("\nProgram Headers:\n"); if (re->ec == ELFCLASS32) printf(" %-15s%-9s%-11s%-11s%-8s%-8s%-4s%s\n", PH_HDR); else if (re->options & RE_WW) printf(" %-15s%-9s%-19s%-19s%-9s%-9s%-4s%s\n", PH_HDR); else printf(" %-15s%-19s%-19s%s\n %-19s%-20s" "%-7s%s\n", PH_HDR); for (i = 0; (size_t) i < phnum; i++) { if (gelf_getphdr(re->elf, i, &phdr) != &phdr) { warnx("gelf_getphdr failed: %s", elf_errmsg(-1)); continue; } /* TODO: Add arch-specific segment type dump. */ if (re->ec == ELFCLASS32) printf(" %-14.14s 0x%6.6jx 0x%8.8jx 0x%8.8jx " "0x%5.5jx 0x%5.5jx %c%c%c %#jx\n", PH_CT); else if (re->options & RE_WW) printf(" %-14.14s 0x%6.6jx 0x%16.16jx 0x%16.16jx " "0x%6.6jx 0x%6.6jx %c%c%c %#jx\n", PH_CT); else printf(" %-14.14s 0x%16.16jx 0x%16.16jx 0x%16.16jx\n" " 0x%16.16jx 0x%16.16jx %c%c%c" " %#jx\n", PH_CT); if (phdr.p_type == PT_INTERP) { if ((rawfile = elf_rawfile(re->elf, &size)) == NULL) { warnx("elf_rawfile failed: %s", elf_errmsg(-1)); continue; } if (phdr.p_offset >= size) { warnx("invalid program header offset"); continue; } printf(" [Requesting program interpreter: %s]\n", rawfile + phdr.p_offset); } } /* Dump section to segment mapping. */ if (re->shnum == 0) return; printf("\n Section to Segment mapping:\n"); printf(" Segment Sections...\n"); for (i = 0; (size_t)i < phnum; i++) { if (gelf_getphdr(re->elf, i, &phdr) != &phdr) { warnx("gelf_getphdr failed: %s", elf_errmsg(-1)); continue; } printf(" %2.2d ", i); /* skip NULL section. */ for (j = 1; (size_t)j < re->shnum; j++) { if (re->sl[j].off < phdr.p_offset) continue; if (re->sl[j].off + re->sl[j].sz > phdr.p_offset + phdr.p_filesz && re->sl[j].type != SHT_NOBITS) continue; if (re->sl[j].addr < phdr.p_vaddr || re->sl[j].addr + re->sl[j].sz > phdr.p_vaddr + phdr.p_memsz) continue; if (phdr.p_type == PT_TLS && (re->sl[j].flags & SHF_TLS) == 0) continue; printf("%s ", re->sl[j].name); } printf("\n"); } #undef PH_HDR #undef PH_CT } static char * section_flags(struct readelf *re, struct section *s) { #define BUF_SZ 256 static char buf[BUF_SZ]; int i, p, nb; p = 0; nb = re->ec == ELFCLASS32 ? 8 : 16; if (re->options & RE_T) { snprintf(buf, BUF_SZ, "[%*.*jx]: ", nb, nb, (uintmax_t)s->flags); p += nb + 4; } for (i = 0; section_flag[i].ln != NULL; i++) { if ((s->flags & section_flag[i].value) == 0) continue; if (re->options & RE_T) { snprintf(&buf[p], BUF_SZ - p, "%s, ", section_flag[i].ln); p += strlen(section_flag[i].ln) + 2; } else buf[p++] = section_flag[i].sn; } if (re->options & RE_T && p > nb + 4) p -= 2; buf[p] = '\0'; return (buf); } static void dump_shdr(struct readelf *re) { struct section *s; int i; #define S_HDR "[Nr] Name", "Type", "Addr", "Off", "Size", "ES", \ "Flg", "Lk", "Inf", "Al" #define S_HDRL "[Nr] Name", "Type", "Address", "Offset", "Size", \ "EntSize", "Flags", "Link", "Info", "Align" #define ST_HDR "[Nr] Name", "Type", "Addr", "Off", "Size", "ES", \ "Lk", "Inf", "Al", "Flags" #define ST_HDRL "[Nr] Name", "Type", "Address", "Offset", "Link", \ "Size", "EntSize", "Info", "Align", "Flags" #define S_CT i, s->name, section_type(re->ehdr.e_machine, s->type), \ (uintmax_t)s->addr, (uintmax_t)s->off, (uintmax_t)s->sz,\ (uintmax_t)s->entsize, section_flags(re, s), \ s->link, s->info, (uintmax_t)s->align #define ST_CT i, s->name, section_type(re->ehdr.e_machine, s->type), \ (uintmax_t)s->addr, (uintmax_t)s->off, (uintmax_t)s->sz,\ (uintmax_t)s->entsize, s->link, s->info, \ (uintmax_t)s->align, section_flags(re, s) #define ST_CTL i, s->name, section_type(re->ehdr.e_machine, s->type), \ (uintmax_t)s->addr, (uintmax_t)s->off, s->link, \ (uintmax_t)s->sz, (uintmax_t)s->entsize, s->info, \ (uintmax_t)s->align, section_flags(re, s) if (re->shnum == 0) { printf("\nThere are no sections in this file.\n"); return; } printf("There are %ju section headers, starting at offset 0x%jx:\n", (uintmax_t)re->shnum, (uintmax_t)re->ehdr.e_shoff); printf("\nSection Headers:\n"); if (re->ec == ELFCLASS32) { if (re->options & RE_T) printf(" %s\n %-16s%-9s%-7s%-7s%-5s%-3s%-4s%s\n" "%12s\n", ST_HDR); else printf(" %-23s%-16s%-9s%-7s%-7s%-3s%-4s%-3s%-4s%s\n", S_HDR); } else if (re->options & RE_WW) { if (re->options & RE_T) printf(" %s\n %-16s%-17s%-7s%-7s%-5s%-3s%-4s%s\n" "%12s\n", ST_HDR); else printf(" %-23s%-16s%-17s%-7s%-7s%-3s%-4s%-3s%-4s%s\n", S_HDR); } else { if (re->options & RE_T) printf(" %s\n %-18s%-17s%-18s%s\n %-18s" "%-17s%-18s%s\n%12s\n", ST_HDRL); else printf(" %-23s%-17s%-18s%s\n %-18s%-17s%-7s%" "-6s%-6s%s\n", S_HDRL); } for (i = 0; (size_t)i < re->shnum; i++) { s = &re->sl[i]; if (re->ec == ELFCLASS32) { if (re->options & RE_T) printf(" [%2d] %s\n %-15.15s %8.8jx" " %6.6jx %6.6jx %2.2jx %2u %3u %2ju\n" " %s\n", ST_CT); else printf(" [%2d] %-17.17s %-15.15s %8.8jx" " %6.6jx %6.6jx %2.2jx %3s %2u %3u %2ju\n", S_CT); } else if (re->options & RE_WW) { if (re->options & RE_T) printf(" [%2d] %s\n %-15.15s %16.16jx" " %6.6jx %6.6jx %2.2jx %2u %3u %2ju\n" " %s\n", ST_CT); else printf(" [%2d] %-17.17s %-15.15s %16.16jx" " %6.6jx %6.6jx %2.2jx %3s %2u %3u %2ju\n", S_CT); } else { if (re->options & RE_T) printf(" [%2d] %s\n %-15.15s %16.16jx" " %16.16jx %u\n %16.16jx %16.16jx" " %-16u %ju\n %s\n", ST_CTL); else printf(" [%2d] %-17.17s %-15.15s %16.16jx" " %8.8jx\n %16.16jx %16.16jx " "%3s %2u %3u %ju\n", S_CT); } } if ((re->options & RE_T) == 0) printf("Key to Flags:\n W (write), A (alloc)," " X (execute), M (merge), S (strings)\n" " I (info), L (link order), G (group), x (unknown)\n" " O (extra OS processing required)" " o (OS specific), p (processor specific)\n"); #undef S_HDR #undef S_HDRL #undef ST_HDR #undef ST_HDRL #undef S_CT #undef ST_CT #undef ST_CTL } /* * Return number of entries in the given section. We'd prefer ent_count be a * size_t *, but libelf APIs already use int for section indices. */ static int get_ent_count(struct section *s, int *ent_count) { if (s->entsize == 0) { warnx("section %s has entry size 0", s->name); return (0); } else if (s->sz / s->entsize > INT_MAX) { warnx("section %s has invalid section count", s->name); return (0); } *ent_count = (int)(s->sz / s->entsize); return (1); } static void dump_dynamic(struct readelf *re) { GElf_Dyn dyn; Elf_Data *d; struct section *s; int elferr, i, is_dynamic, j, jmax, nentries; is_dynamic = 0; for (i = 0; (size_t)i < re->shnum; i++) { s = &re->sl[i]; if (s->type != SHT_DYNAMIC) continue; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(-1)); continue; } if (d->d_size <= 0) continue; is_dynamic = 1; /* Determine the actual number of table entries. */ nentries = 0; if (!get_ent_count(s, &jmax)) continue; for (j = 0; j < jmax; j++) { if (gelf_getdyn(d, j, &dyn) != &dyn) { warnx("gelf_getdyn failed: %s", elf_errmsg(-1)); continue; } nentries ++; if (dyn.d_tag == DT_NULL) break; } printf("\nDynamic section at offset 0x%jx", (uintmax_t)s->off); printf(" contains %u entries:\n", nentries); if (re->ec == ELFCLASS32) printf("%5s%12s%28s\n", "Tag", "Type", "Name/Value"); else printf("%5s%20s%28s\n", "Tag", "Type", "Name/Value"); for (j = 0; j < nentries; j++) { if (gelf_getdyn(d, j, &dyn) != &dyn) continue; /* Dump dynamic entry type. */ if (re->ec == ELFCLASS32) printf(" 0x%8.8jx", (uintmax_t)dyn.d_tag); else printf(" 0x%16.16jx", (uintmax_t)dyn.d_tag); printf(" %-20s", dt_type(re->ehdr.e_machine, dyn.d_tag)); /* Dump dynamic entry value. */ dump_dyn_val(re, &dyn, s->link); } } if (!is_dynamic) printf("\nThere is no dynamic section in this file.\n"); } static char * timestamp(time_t ti) { static char ts[32]; struct tm *t; t = gmtime(&ti); snprintf(ts, sizeof(ts), "%04d-%02d-%02dT%02d:%02d:%02d", t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, t->tm_hour, t->tm_min, t->tm_sec); return (ts); } static const char * dyn_str(struct readelf *re, uint32_t stab, uint64_t d_val) { const char *name; if (stab == SHN_UNDEF) name = "ERROR"; else if ((name = elf_strptr(re->elf, stab, d_val)) == NULL) { (void) elf_errno(); /* clear error */ name = "ERROR"; } return (name); } static void dump_arch_dyn_val(struct readelf *re, GElf_Dyn *dyn) { switch (re->ehdr.e_machine) { case EM_MIPS: case EM_MIPS_RS3_LE: switch (dyn->d_tag) { case DT_MIPS_RLD_VERSION: case DT_MIPS_LOCAL_GOTNO: case DT_MIPS_CONFLICTNO: case DT_MIPS_LIBLISTNO: case DT_MIPS_SYMTABNO: case DT_MIPS_UNREFEXTNO: case DT_MIPS_GOTSYM: case DT_MIPS_HIPAGENO: case DT_MIPS_DELTA_CLASS_NO: case DT_MIPS_DELTA_INSTANCE_NO: case DT_MIPS_DELTA_RELOC_NO: case DT_MIPS_DELTA_SYM_NO: case DT_MIPS_DELTA_CLASSSYM_NO: case DT_MIPS_LOCALPAGE_GOTIDX: case DT_MIPS_LOCAL_GOTIDX: case DT_MIPS_HIDDEN_GOTIDX: case DT_MIPS_PROTECTED_GOTIDX: printf(" %ju\n", (uintmax_t) dyn->d_un.d_val); break; case DT_MIPS_ICHECKSUM: case DT_MIPS_FLAGS: case DT_MIPS_BASE_ADDRESS: case DT_MIPS_CONFLICT: case DT_MIPS_LIBLIST: case DT_MIPS_RLD_MAP: case DT_MIPS_DELTA_CLASS: case DT_MIPS_DELTA_INSTANCE: case DT_MIPS_DELTA_RELOC: case DT_MIPS_DELTA_SYM: case DT_MIPS_DELTA_CLASSSYM: case DT_MIPS_CXX_FLAGS: case DT_MIPS_PIXIE_INIT: case DT_MIPS_SYMBOL_LIB: case DT_MIPS_OPTIONS: case DT_MIPS_INTERFACE: case DT_MIPS_DYNSTR_ALIGN: case DT_MIPS_INTERFACE_SIZE: case DT_MIPS_RLD_TEXT_RESOLVE_ADDR: case DT_MIPS_COMPACT_SIZE: case DT_MIPS_GP_VALUE: case DT_MIPS_AUX_DYNAMIC: case DT_MIPS_PLTGOT: case DT_MIPS_RLD_OBJ_UPDATE: case DT_MIPS_RWPLT: printf(" 0x%jx\n", (uintmax_t) dyn->d_un.d_val); break; case DT_MIPS_IVERSION: case DT_MIPS_PERF_SUFFIX: case DT_MIPS_TIME_STAMP: printf(" %s\n", timestamp(dyn->d_un.d_val)); break; default: printf("\n"); break; } break; default: printf("\n"); break; } } static void dump_flags(struct flag_desc *desc, uint64_t val) { struct flag_desc *fd; for (fd = desc; fd->flag != 0; fd++) { if (val & fd->flag) { val &= ~fd->flag; printf(" %s", fd->desc); } } if (val != 0) printf(" unknown (0x%jx)", (uintmax_t)val); printf("\n"); } static struct flag_desc dt_flags[] = { { DF_ORIGIN, "ORIGIN" }, { DF_SYMBOLIC, "SYMBOLIC" }, { DF_TEXTREL, "TEXTREL" }, { DF_BIND_NOW, "BIND_NOW" }, { DF_STATIC_TLS, "STATIC_TLS" }, { 0, NULL } }; static struct flag_desc dt_flags_1[] = { { DF_1_BIND_NOW, "NOW" }, { DF_1_GLOBAL, "GLOBAL" }, { 0x4, "GROUP" }, { DF_1_NODELETE, "NODELETE" }, { DF_1_LOADFLTR, "LOADFLTR" }, { 0x20, "INITFIRST" }, { DF_1_NOOPEN, "NOOPEN" }, { DF_1_ORIGIN, "ORIGIN" }, { 0x100, "DIRECT" }, { DF_1_INTERPOSE, "INTERPOSE" }, { DF_1_NODEFLIB, "NODEFLIB" }, { 0x1000, "NODUMP" }, { 0x2000, "CONFALT" }, { 0x4000, "ENDFILTEE" }, { 0x8000, "DISPRELDNE" }, { 0x10000, "DISPRELPND" }, { 0x20000, "NODIRECT" }, { 0x40000, "IGNMULDEF" }, { 0x80000, "NOKSYMS" }, { 0x100000, "NOHDR" }, { 0x200000, "EDITED" }, { 0x400000, "NORELOC" }, { 0x800000, "SYMINTPOSE" }, { 0x1000000, "GLOBAUDIT" }, { 0, NULL } }; static void dump_dyn_val(struct readelf *re, GElf_Dyn *dyn, uint32_t stab) { const char *name; if (dyn->d_tag >= DT_LOPROC && dyn->d_tag <= DT_HIPROC && dyn->d_tag != DT_AUXILIARY && dyn->d_tag != DT_FILTER) { dump_arch_dyn_val(re, dyn); return; } /* These entry values are index into the string table. */ name = NULL; if (dyn->d_tag == DT_AUXILIARY || dyn->d_tag == DT_FILTER || dyn->d_tag == DT_NEEDED || dyn->d_tag == DT_SONAME || dyn->d_tag == DT_RPATH || dyn->d_tag == DT_RUNPATH) name = dyn_str(re, stab, dyn->d_un.d_val); switch(dyn->d_tag) { case DT_NULL: case DT_PLTGOT: case DT_HASH: case DT_STRTAB: case DT_SYMTAB: case DT_RELA: case DT_INIT: case DT_SYMBOLIC: case DT_REL: case DT_DEBUG: case DT_TEXTREL: case DT_JMPREL: case DT_FINI: case DT_VERDEF: case DT_VERNEED: case DT_VERSYM: case DT_GNU_HASH: case DT_GNU_LIBLIST: case DT_GNU_CONFLICT: printf(" 0x%jx\n", (uintmax_t) dyn->d_un.d_val); break; case DT_PLTRELSZ: case DT_RELASZ: case DT_RELAENT: case DT_STRSZ: case DT_SYMENT: case DT_RELSZ: case DT_RELENT: case DT_PREINIT_ARRAYSZ: case DT_INIT_ARRAYSZ: case DT_FINI_ARRAYSZ: case DT_GNU_CONFLICTSZ: case DT_GNU_LIBLISTSZ: printf(" %ju (bytes)\n", (uintmax_t) dyn->d_un.d_val); break; case DT_RELACOUNT: case DT_RELCOUNT: case DT_VERDEFNUM: case DT_VERNEEDNUM: printf(" %ju\n", (uintmax_t) dyn->d_un.d_val); break; case DT_AUXILIARY: printf(" Auxiliary library: [%s]\n", name); break; case DT_FILTER: printf(" Filter library: [%s]\n", name); break; case DT_NEEDED: printf(" Shared library: [%s]\n", name); break; case DT_SONAME: printf(" Library soname: [%s]\n", name); break; case DT_RPATH: printf(" Library rpath: [%s]\n", name); break; case DT_RUNPATH: printf(" Library runpath: [%s]\n", name); break; case DT_PLTREL: printf(" %s\n", dt_type(re->ehdr.e_machine, dyn->d_un.d_val)); break; case DT_GNU_PRELINKED: printf(" %s\n", timestamp(dyn->d_un.d_val)); break; case DT_FLAGS: dump_flags(dt_flags, dyn->d_un.d_val); break; case DT_FLAGS_1: dump_flags(dt_flags_1, dyn->d_un.d_val); break; default: printf("\n"); } } static void dump_rel(struct readelf *re, struct section *s, Elf_Data *d) { GElf_Rel r; const char *symname; uint64_t symval; int i, len; uint32_t type; uint8_t type2, type3; if (s->link >= re->shnum) return; #define REL_HDR "r_offset", "r_info", "r_type", "st_value", "st_name" #define REL_CT32 (uintmax_t)r.r_offset, (uintmax_t)r.r_info, \ elftc_reloc_type_str(re->ehdr.e_machine, \ ELF32_R_TYPE(r.r_info)), (uintmax_t)symval, symname #define REL_CT64 (uintmax_t)r.r_offset, (uintmax_t)r.r_info, \ elftc_reloc_type_str(re->ehdr.e_machine, type), \ (uintmax_t)symval, symname printf("\nRelocation section (%s):\n", s->name); if (re->ec == ELFCLASS32) printf("%-8s %-8s %-19s %-8s %s\n", REL_HDR); else { if (re->options & RE_WW) printf("%-16s %-16s %-24s %-16s %s\n", REL_HDR); else printf("%-12s %-12s %-19s %-16s %s\n", REL_HDR); } assert(d->d_size == s->sz); if (!get_ent_count(s, &len)) return; for (i = 0; i < len; i++) { if (gelf_getrel(d, i, &r) != &r) { warnx("gelf_getrel failed: %s", elf_errmsg(-1)); continue; } symname = get_symbol_name(re, s->link, GELF_R_SYM(r.r_info)); symval = get_symbol_value(re, s->link, GELF_R_SYM(r.r_info)); if (re->ec == ELFCLASS32) { r.r_info = ELF32_R_INFO(ELF64_R_SYM(r.r_info), ELF64_R_TYPE(r.r_info)); printf("%8.8jx %8.8jx %-19.19s %8.8jx %s\n", REL_CT32); } else { type = ELF64_R_TYPE(r.r_info); if (re->ehdr.e_machine == EM_MIPS) { type2 = (type >> 8) & 0xFF; type3 = (type >> 16) & 0xFF; type = type & 0xFF; } else { type2 = type3 = 0; } if (re->options & RE_WW) printf("%16.16jx %16.16jx %-24.24s" " %16.16jx %s\n", REL_CT64); else printf("%12.12jx %12.12jx %-19.19s" " %16.16jx %s\n", REL_CT64); if (re->ehdr.e_machine == EM_MIPS) { if (re->options & RE_WW) { printf("%32s: %s\n", "Type2", elftc_reloc_type_str(EM_MIPS, type2)); printf("%32s: %s\n", "Type3", elftc_reloc_type_str(EM_MIPS, type3)); } else { printf("%24s: %s\n", "Type2", elftc_reloc_type_str(EM_MIPS, type2)); printf("%24s: %s\n", "Type3", elftc_reloc_type_str(EM_MIPS, type3)); } } } } #undef REL_HDR #undef REL_CT } static void dump_rela(struct readelf *re, struct section *s, Elf_Data *d) { GElf_Rela r; const char *symname; uint64_t symval; int i, len; uint32_t type; uint8_t type2, type3; if (s->link >= re->shnum) return; #define RELA_HDR "r_offset", "r_info", "r_type", "st_value", \ "st_name + r_addend" #define RELA_CT32 (uintmax_t)r.r_offset, (uintmax_t)r.r_info, \ elftc_reloc_type_str(re->ehdr.e_machine, \ ELF32_R_TYPE(r.r_info)), (uintmax_t)symval, symname #define RELA_CT64 (uintmax_t)r.r_offset, (uintmax_t)r.r_info, \ elftc_reloc_type_str(re->ehdr.e_machine, type), \ (uintmax_t)symval, symname printf("\nRelocation section with addend (%s):\n", s->name); if (re->ec == ELFCLASS32) printf("%-8s %-8s %-19s %-8s %s\n", RELA_HDR); else { if (re->options & RE_WW) printf("%-16s %-16s %-24s %-16s %s\n", RELA_HDR); else printf("%-12s %-12s %-19s %-16s %s\n", RELA_HDR); } assert(d->d_size == s->sz); if (!get_ent_count(s, &len)) return; for (i = 0; i < len; i++) { if (gelf_getrela(d, i, &r) != &r) { warnx("gelf_getrel failed: %s", elf_errmsg(-1)); continue; } symname = get_symbol_name(re, s->link, GELF_R_SYM(r.r_info)); symval = get_symbol_value(re, s->link, GELF_R_SYM(r.r_info)); if (re->ec == ELFCLASS32) { r.r_info = ELF32_R_INFO(ELF64_R_SYM(r.r_info), ELF64_R_TYPE(r.r_info)); printf("%8.8jx %8.8jx %-19.19s %8.8jx %s", RELA_CT32); printf(" + %x\n", (uint32_t) r.r_addend); } else { type = ELF64_R_TYPE(r.r_info); if (re->ehdr.e_machine == EM_MIPS) { type2 = (type >> 8) & 0xFF; type3 = (type >> 16) & 0xFF; type = type & 0xFF; } else { type2 = type3 = 0; } if (re->options & RE_WW) printf("%16.16jx %16.16jx %-24.24s" " %16.16jx %s", RELA_CT64); else printf("%12.12jx %12.12jx %-19.19s" " %16.16jx %s", RELA_CT64); printf(" + %jx\n", (uintmax_t) r.r_addend); if (re->ehdr.e_machine == EM_MIPS) { if (re->options & RE_WW) { printf("%32s: %s\n", "Type2", elftc_reloc_type_str(EM_MIPS, type2)); printf("%32s: %s\n", "Type3", elftc_reloc_type_str(EM_MIPS, type3)); } else { printf("%24s: %s\n", "Type2", elftc_reloc_type_str(EM_MIPS, type2)); printf("%24s: %s\n", "Type3", elftc_reloc_type_str(EM_MIPS, type3)); } } } } #undef RELA_HDR #undef RELA_CT } static void dump_reloc(struct readelf *re) { struct section *s; Elf_Data *d; int i, elferr; for (i = 0; (size_t)i < re->shnum; i++) { s = &re->sl[i]; if (s->type == SHT_REL || s->type == SHT_RELA) { (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); continue; } if (s->type == SHT_REL) dump_rel(re, s, d); else dump_rela(re, s, d); } } } static void dump_symtab(struct readelf *re, int i) { struct section *s; Elf_Data *d; GElf_Sym sym; const char *name; uint32_t stab; int elferr, j, len; uint16_t vs; s = &re->sl[i]; if (s->link >= re->shnum) return; stab = s->link; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); return; } if (d->d_size <= 0) return; if (!get_ent_count(s, &len)) return; printf("Symbol table (%s)", s->name); printf(" contains %d entries:\n", len); printf("%7s%9s%14s%5s%8s%6s%9s%5s\n", "Num:", "Value", "Size", "Type", "Bind", "Vis", "Ndx", "Name"); for (j = 0; j < len; j++) { if (gelf_getsym(d, j, &sym) != &sym) { warnx("gelf_getsym failed: %s", elf_errmsg(-1)); continue; } printf("%6d:", j); printf(" %16.16jx", (uintmax_t) sym.st_value); printf(" %5ju", (uintmax_t) sym.st_size); printf(" %-7s", st_type(re->ehdr.e_machine, re->ehdr.e_ident[EI_OSABI], GELF_ST_TYPE(sym.st_info))); printf(" %-6s", st_bind(GELF_ST_BIND(sym.st_info))); printf(" %-8s", st_vis(GELF_ST_VISIBILITY(sym.st_other))); printf(" %3s", st_shndx(sym.st_shndx)); if ((name = elf_strptr(re->elf, stab, sym.st_name)) != NULL) printf(" %s", name); /* Append symbol version string for SHT_DYNSYM symbol table. */ if (s->type == SHT_DYNSYM && re->ver != NULL && re->vs != NULL && re->vs[j] > 1) { vs = re->vs[j] & VERSYM_VERSION; if (vs >= re->ver_sz || re->ver[vs].name == NULL) { warnx("invalid versym version index %u", vs); break; } if (re->vs[j] & VERSYM_HIDDEN || re->ver[vs].type == 0) printf("@%s (%d)", re->ver[vs].name, vs); else printf("@@%s (%d)", re->ver[vs].name, vs); } putchar('\n'); } } static void dump_symtabs(struct readelf *re) { GElf_Dyn dyn; Elf_Data *d; struct section *s; uint64_t dyn_off; int elferr, i, len; /* * If -D is specified, only dump the symbol table specified by * the DT_SYMTAB entry in the .dynamic section. */ dyn_off = 0; if (re->options & RE_DD) { s = NULL; for (i = 0; (size_t)i < re->shnum; i++) if (re->sl[i].type == SHT_DYNAMIC) { s = &re->sl[i]; break; } if (s == NULL) return; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(-1)); return; } if (d->d_size <= 0) return; if (!get_ent_count(s, &len)) return; for (i = 0; i < len; i++) { if (gelf_getdyn(d, i, &dyn) != &dyn) { warnx("gelf_getdyn failed: %s", elf_errmsg(-1)); continue; } if (dyn.d_tag == DT_SYMTAB) { dyn_off = dyn.d_un.d_val; break; } } } /* Find and dump symbol tables. */ for (i = 0; (size_t)i < re->shnum; i++) { s = &re->sl[i]; if (s->type == SHT_SYMTAB || s->type == SHT_DYNSYM) { if (re->options & RE_DD) { if (dyn_off == s->addr) { dump_symtab(re, i); break; } } else dump_symtab(re, i); } } } static void dump_svr4_hash(struct section *s) { Elf_Data *d; uint32_t *buf; uint32_t nbucket, nchain; uint32_t *bucket, *chain; uint32_t *bl, *c, maxl, total; int elferr, i, j; /* Read and parse the content of .hash section. */ (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); return; } if (d->d_size < 2 * sizeof(uint32_t)) { warnx(".hash section too small"); return; } buf = d->d_buf; nbucket = buf[0]; nchain = buf[1]; if (nbucket <= 0 || nchain <= 0) { warnx("Malformed .hash section"); return; } if (d->d_size != (nbucket + nchain + 2) * sizeof(uint32_t)) { warnx("Malformed .hash section"); return; } bucket = &buf[2]; chain = &buf[2 + nbucket]; maxl = 0; if ((bl = calloc(nbucket, sizeof(*bl))) == NULL) errx(EXIT_FAILURE, "calloc failed"); for (i = 0; (uint32_t)i < nbucket; i++) for (j = bucket[i]; j > 0 && (uint32_t)j < nchain; j = chain[j]) if (++bl[i] > maxl) maxl = bl[i]; if ((c = calloc(maxl + 1, sizeof(*c))) == NULL) errx(EXIT_FAILURE, "calloc failed"); for (i = 0; (uint32_t)i < nbucket; i++) c[bl[i]]++; printf("\nHistogram for bucket list length (total of %u buckets):\n", nbucket); printf(" Length\tNumber\t\t%% of total\tCoverage\n"); total = 0; for (i = 0; (uint32_t)i <= maxl; i++) { total += c[i] * i; printf("%7u\t%-10u\t(%5.1f%%)\t%5.1f%%\n", i, c[i], c[i] * 100.0 / nbucket, total * 100.0 / (nchain - 1)); } free(c); free(bl); } static void dump_svr4_hash64(struct readelf *re, struct section *s) { Elf_Data *d, dst; uint64_t *buf; uint64_t nbucket, nchain; uint64_t *bucket, *chain; uint64_t *bl, *c, maxl, total; int elferr, i, j; /* * ALPHA uses 64-bit hash entries. Since libelf assumes that * .hash section contains only 32-bit entry, an explicit * gelf_xlatetom is needed here. */ (void) elf_errno(); if ((d = elf_rawdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_rawdata failed: %s", elf_errmsg(elferr)); return; } d->d_type = ELF_T_XWORD; memcpy(&dst, d, sizeof(Elf_Data)); if (gelf_xlatetom(re->elf, &dst, d, re->ehdr.e_ident[EI_DATA]) != &dst) { warnx("gelf_xlatetom failed: %s", elf_errmsg(-1)); return; } if (dst.d_size < 2 * sizeof(uint64_t)) { warnx(".hash section too small"); return; } buf = dst.d_buf; nbucket = buf[0]; nchain = buf[1]; if (nbucket <= 0 || nchain <= 0) { warnx("Malformed .hash section"); return; } if (d->d_size != (nbucket + nchain + 2) * sizeof(uint32_t)) { warnx("Malformed .hash section"); return; } bucket = &buf[2]; chain = &buf[2 + nbucket]; maxl = 0; if ((bl = calloc(nbucket, sizeof(*bl))) == NULL) errx(EXIT_FAILURE, "calloc failed"); for (i = 0; (uint32_t)i < nbucket; i++) for (j = bucket[i]; j > 0 && (uint32_t)j < nchain; j = chain[j]) if (++bl[i] > maxl) maxl = bl[i]; if ((c = calloc(maxl + 1, sizeof(*c))) == NULL) errx(EXIT_FAILURE, "calloc failed"); for (i = 0; (uint64_t)i < nbucket; i++) c[bl[i]]++; printf("Histogram for bucket list length (total of %ju buckets):\n", (uintmax_t)nbucket); printf(" Length\tNumber\t\t%% of total\tCoverage\n"); total = 0; for (i = 0; (uint64_t)i <= maxl; i++) { total += c[i] * i; printf("%7u\t%-10ju\t(%5.1f%%)\t%5.1f%%\n", i, (uintmax_t)c[i], c[i] * 100.0 / nbucket, total * 100.0 / (nchain - 1)); } free(c); free(bl); } static void dump_gnu_hash(struct readelf *re, struct section *s) { struct section *ds; Elf_Data *d; uint32_t *buf; uint32_t *bucket, *chain; uint32_t nbucket, nchain, symndx, maskwords; uint32_t *bl, *c, maxl, total; int elferr, dynsymcount, i, j; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); return; } if (d->d_size < 4 * sizeof(uint32_t)) { warnx(".gnu.hash section too small"); return; } buf = d->d_buf; nbucket = buf[0]; symndx = buf[1]; maskwords = buf[2]; buf += 4; if (s->link >= re->shnum) return; ds = &re->sl[s->link]; if (!get_ent_count(ds, &dynsymcount)) return; if (symndx >= (uint32_t)dynsymcount) { warnx("Malformed .gnu.hash section (symndx out of range)"); return; } nchain = dynsymcount - symndx; if (d->d_size != 4 * sizeof(uint32_t) + maskwords * (re->ec == ELFCLASS32 ? sizeof(uint32_t) : sizeof(uint64_t)) + (nbucket + nchain) * sizeof(uint32_t)) { warnx("Malformed .gnu.hash section"); return; } bucket = buf + (re->ec == ELFCLASS32 ? maskwords : maskwords * 2); chain = bucket + nbucket; maxl = 0; if ((bl = calloc(nbucket, sizeof(*bl))) == NULL) errx(EXIT_FAILURE, "calloc failed"); for (i = 0; (uint32_t)i < nbucket; i++) for (j = bucket[i]; j > 0 && (uint32_t)j - symndx < nchain; j++) { if (++bl[i] > maxl) maxl = bl[i]; if (chain[j - symndx] & 1) break; } if ((c = calloc(maxl + 1, sizeof(*c))) == NULL) errx(EXIT_FAILURE, "calloc failed"); for (i = 0; (uint32_t)i < nbucket; i++) c[bl[i]]++; printf("Histogram for bucket list length (total of %u buckets):\n", nbucket); printf(" Length\tNumber\t\t%% of total\tCoverage\n"); total = 0; for (i = 0; (uint32_t)i <= maxl; i++) { total += c[i] * i; printf("%7u\t%-10u\t(%5.1f%%)\t%5.1f%%\n", i, c[i], c[i] * 100.0 / nbucket, total * 100.0 / (nchain - 1)); } free(c); free(bl); } static struct flag_desc gnu_property_x86_feature_1_and_bits[] = { { GNU_PROPERTY_X86_FEATURE_1_IBT, "IBT" }, { GNU_PROPERTY_X86_FEATURE_1_SHSTK, "SHSTK" }, { 0, NULL } }; static void dump_gnu_property_type_0(struct readelf *re, const char *buf, size_t sz) { size_t i; uint32_t type, prop_sz; printf(" Properties: "); while (sz > 0) { if (sz < 8) goto bad; type = *(const uint32_t *)(const void *)buf; prop_sz = *(const uint32_t *)(const void *)(buf + 4); buf += 8; sz -= 8; if (prop_sz > sz) goto bad; if (type >= GNU_PROPERTY_LOPROC && type <= GNU_PROPERTY_HIPROC) { if (re->ehdr.e_machine != EM_X86_64) { printf("machine type %x unknown\n", re->ehdr.e_machine); goto unknown; } switch (type) { case GNU_PROPERTY_X86_FEATURE_1_AND: printf("x86 features:"); if (prop_sz != 4) goto bad; dump_flags(gnu_property_x86_feature_1_and_bits, *(const uint32_t *)(const void *)buf); break; } } buf += roundup2(prop_sz, 8); sz -= roundup2(prop_sz, 8); } return; bad: printf("corrupt GNU property\n"); unknown: printf("remaining description data:"); for (i = 0; i < sz; i++) printf(" %02x", (unsigned char)buf[i]); printf("\n"); } static void dump_hash(struct readelf *re) { struct section *s; int i; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->type == SHT_HASH || s->type == SHT_GNU_HASH) { if (s->type == SHT_GNU_HASH) dump_gnu_hash(re, s); else if (re->ehdr.e_machine == EM_ALPHA && s->entsize == 8) dump_svr4_hash64(re, s); else dump_svr4_hash(s); } } } static void dump_notes(struct readelf *re) { struct section *s; const char *rawfile; GElf_Phdr phdr; Elf_Data *d; size_t filesize, phnum; int i, elferr; if (re->ehdr.e_type == ET_CORE) { /* * Search program headers in the core file for * PT_NOTE entry. */ if (elf_getphnum(re->elf, &phnum) == 0) { warnx("elf_getphnum failed: %s", elf_errmsg(-1)); return; } if (phnum == 0) return; if ((rawfile = elf_rawfile(re->elf, &filesize)) == NULL) { warnx("elf_rawfile failed: %s", elf_errmsg(-1)); return; } for (i = 0; (size_t) i < phnum; i++) { if (gelf_getphdr(re->elf, i, &phdr) != &phdr) { warnx("gelf_getphdr failed: %s", elf_errmsg(-1)); continue; } if (phdr.p_type == PT_NOTE) { if (phdr.p_offset >= filesize || phdr.p_filesz > filesize - phdr.p_offset) { warnx("invalid PHDR offset"); continue; } dump_notes_content(re, rawfile + phdr.p_offset, phdr.p_filesz, phdr.p_offset); } } } else { /* * For objects other than core files, Search for * SHT_NOTE sections. */ for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->type == SHT_NOTE) { (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); continue; } dump_notes_content(re, d->d_buf, d->d_size, s->off); } } } } static struct flag_desc note_feature_ctl_flags[] = { { NT_FREEBSD_FCTL_ASLR_DISABLE, "ASLR_DISABLE" }, { NT_FREEBSD_FCTL_PROTMAX_DISABLE, "PROTMAX_DISABLE" }, { NT_FREEBSD_FCTL_STKGAP_DISABLE, "STKGAP_DISABLE" }, { NT_FREEBSD_FCTL_WXNEEDED, "WXNEEDED" }, { 0, NULL } }; +static void +dump_note_string(const char *description, const char *s, size_t len) +{ + size_t i; + int printable = 1; + + if (len == 0 || s[--len] != '\0') { + printable = 0; + } else { + for (i = 0; i < len; i++) { + if (!isprint(s[i])) { + printable = 0; + break; + } + } + } + + if (printable) { + printf(" %s: %s\n", description, s); + } else { + printf(" description data:"); + for (i = 0; i < len; i++) + printf(" %02x", (unsigned char)s[i]); + printf("\n"); + } +} + static void dump_notes_data(struct readelf *re, const char *name, uint32_t type, const char *buf, size_t sz) { size_t i; const uint32_t *ubuf; /* Note data is at least 4-byte aligned. */ if (((uintptr_t)buf & 3) != 0) { warnx("bad note data alignment"); goto unknown; } ubuf = (const uint32_t *)(const void *)buf; if (strcmp(name, "FreeBSD") == 0) { switch (type) { case NT_FREEBSD_ABI_TAG: if (sz != 4) goto unknown; printf(" ABI tag: %u\n", ubuf[0]); return; /* NT_FREEBSD_NOINIT_TAG carries no data, treat as unknown. */ case NT_FREEBSD_ARCH_TAG: if (sz != 4) goto unknown; printf(" Arch tag: %x\n", ubuf[0]); return; case NT_FREEBSD_FEATURE_CTL: if (sz != 4) goto unknown; printf(" Features:"); dump_flags(note_feature_ctl_flags, ubuf[0]); return; } } else if (strcmp(name, "GNU") == 0) { switch (type) { case NT_GNU_PROPERTY_TYPE_0: dump_gnu_property_type_0(re, buf, sz); return; } + } else if (strcmp(name, "Xen") == 0) { + switch (type) { + case 5: + dump_note_string("Xen version", buf, sz); + return; + case 6: + dump_note_string("Guest OS", buf, sz); + return; + case 7: + dump_note_string("Guest version", buf, sz); + return; + case 8: + dump_note_string("Loader", buf, sz); + return; + case 9: + dump_note_string("PAE mode", buf, sz); + return; + case 10: + dump_note_string("Features", buf, sz); + return; + case 11: + dump_note_string("BSD symtab", buf, sz); + return; + } } unknown: printf(" description data:"); for (i = 0; i < sz; i++) printf(" %02x", (unsigned char)buf[i]); printf("\n"); } static void dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off) { Elf_Note *note; const char *end, *name; uint32_t namesz, descsz; printf("\nNotes at offset %#010jx with length %#010jx:\n", (uintmax_t) off, (uintmax_t) sz); printf(" %-13s %-15s %s\n", "Owner", "Data size", "Description"); end = buf + sz; while (buf < end) { if (buf + sizeof(*note) > end) { warnx("invalid note header"); return; } note = (Elf_Note *)(uintptr_t) buf; namesz = roundup2(note->n_namesz, 4); descsz = roundup2(note->n_descsz, 4); if (namesz < note->n_namesz || descsz < note->n_descsz || buf + namesz + descsz > end) { warnx("invalid note header"); return; } buf += sizeof(Elf_Note); name = buf; buf += namesz; /* * The name field is required to be nul-terminated, and * n_namesz includes the terminating nul in observed * implementations (contrary to the ELF-64 spec). A special * case is needed for cores generated by some older Linux * versions, which write a note named "CORE" without a nul * terminator and n_namesz = 4. */ if (note->n_namesz == 0) name = ""; else if (note->n_namesz == 4 && strncmp(name, "CORE", 4) == 0) name = "CORE"; else if (strnlen(name, note->n_namesz) >= note->n_namesz) name = ""; printf(" %-13s %#010jx", name, (uintmax_t) note->n_descsz); printf(" %s\n", note_type(name, re->ehdr.e_type, note->n_type)); dump_notes_data(re, name, note->n_type, buf, note->n_descsz); buf += descsz; } } /* * Symbol versioning sections are the same for 32bit and 64bit * ELF objects. */ #define Elf_Verdef Elf32_Verdef #define Elf_Verdaux Elf32_Verdaux #define Elf_Verneed Elf32_Verneed #define Elf_Vernaux Elf32_Vernaux #define SAVE_VERSION_NAME(x, n, t) \ do { \ while (x >= re->ver_sz) { \ nv = realloc(re->ver, \ sizeof(*re->ver) * re->ver_sz * 2); \ if (nv == NULL) { \ warn("realloc failed"); \ free(re->ver); \ return; \ } \ re->ver = nv; \ for (i = re->ver_sz; i < re->ver_sz * 2; i++) { \ re->ver[i].name = NULL; \ re->ver[i].type = 0; \ } \ re->ver_sz *= 2; \ } \ if (x > 1) { \ re->ver[x].name = n; \ re->ver[x].type = t; \ } \ } while (0) static void dump_verdef(struct readelf *re, int dump) { struct section *s; struct symver *nv; Elf_Data *d; Elf_Verdef *vd; Elf_Verdaux *vda; uint8_t *buf, *end, *buf2; const char *name; int elferr, i, j; if ((s = re->vd_s) == NULL) return; if (s->link >= re->shnum) return; if (re->ver == NULL) { re->ver_sz = 16; if ((re->ver = calloc(re->ver_sz, sizeof(*re->ver))) == NULL) { warn("calloc failed"); return; } re->ver[0].name = "*local*"; re->ver[1].name = "*global*"; } if (dump) printf("\nVersion definition section (%s):\n", s->name); (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); return; } if (d->d_size == 0) return; buf = d->d_buf; end = buf + d->d_size; while (buf + sizeof(Elf_Verdef) <= end) { vd = (Elf_Verdef *) (uintptr_t) buf; if (dump) { printf(" 0x%4.4lx", (unsigned long) (buf - (uint8_t *)d->d_buf)); printf(" vd_version: %u vd_flags: %d" " vd_ndx: %u vd_cnt: %u", vd->vd_version, vd->vd_flags, vd->vd_ndx, vd->vd_cnt); } buf2 = buf + vd->vd_aux; j = 0; while (buf2 + sizeof(Elf_Verdaux) <= end && j < vd->vd_cnt) { vda = (Elf_Verdaux *) (uintptr_t) buf2; name = get_string(re, s->link, vda->vda_name); if (j == 0) { if (dump) printf(" vda_name: %s\n", name); SAVE_VERSION_NAME((int)vd->vd_ndx, name, 1); } else if (dump) printf(" 0x%4.4lx parent: %s\n", (unsigned long) (buf2 - (uint8_t *)d->d_buf), name); if (vda->vda_next == 0) break; buf2 += vda->vda_next; j++; } if (vd->vd_next == 0) break; buf += vd->vd_next; } } static void dump_verneed(struct readelf *re, int dump) { struct section *s; struct symver *nv; Elf_Data *d; Elf_Verneed *vn; Elf_Vernaux *vna; uint8_t *buf, *end, *buf2; const char *name; int elferr, i, j; if ((s = re->vn_s) == NULL) return; if (s->link >= re->shnum) return; if (re->ver == NULL) { re->ver_sz = 16; if ((re->ver = calloc(re->ver_sz, sizeof(*re->ver))) == NULL) { warn("calloc failed"); return; } re->ver[0].name = "*local*"; re->ver[1].name = "*global*"; } if (dump) printf("\nVersion needed section (%s):\n", s->name); (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); return; } if (d->d_size == 0) return; buf = d->d_buf; end = buf + d->d_size; while (buf + sizeof(Elf_Verneed) <= end) { vn = (Elf_Verneed *) (uintptr_t) buf; if (dump) { printf(" 0x%4.4lx", (unsigned long) (buf - (uint8_t *)d->d_buf)); printf(" vn_version: %u vn_file: %s vn_cnt: %u\n", vn->vn_version, get_string(re, s->link, vn->vn_file), vn->vn_cnt); } buf2 = buf + vn->vn_aux; j = 0; while (buf2 + sizeof(Elf_Vernaux) <= end && j < vn->vn_cnt) { vna = (Elf32_Vernaux *) (uintptr_t) buf2; if (dump) printf(" 0x%4.4lx", (unsigned long) (buf2 - (uint8_t *)d->d_buf)); name = get_string(re, s->link, vna->vna_name); if (dump) printf(" vna_name: %s vna_flags: %u" " vna_other: %u\n", name, vna->vna_flags, vna->vna_other); SAVE_VERSION_NAME((int)vna->vna_other, name, 0); if (vna->vna_next == 0) break; buf2 += vna->vna_next; j++; } if (vn->vn_next == 0) break; buf += vn->vn_next; } } static void dump_versym(struct readelf *re) { int i; uint16_t vs; if (re->vs_s == NULL || re->ver == NULL || re->vs == NULL) return; printf("\nVersion symbol section (%s):\n", re->vs_s->name); for (i = 0; i < re->vs_sz; i++) { if ((i & 3) == 0) { if (i > 0) putchar('\n'); printf(" %03x:", i); } vs = re->vs[i] & VERSYM_VERSION; if (vs >= re->ver_sz || re->ver[vs].name == NULL) { warnx("invalid versym version index %u", re->vs[i]); break; } if (re->vs[i] & VERSYM_HIDDEN) printf(" %3xh %-12s ", vs, re->ver[re->vs[i] & VERSYM_VERSION].name); else printf(" %3x %-12s ", vs, re->ver[re->vs[i]].name); } putchar('\n'); } static void dump_ver(struct readelf *re) { if (re->vs_s && re->ver && re->vs) dump_versym(re); if (re->vd_s) dump_verdef(re, 1); if (re->vn_s) dump_verneed(re, 1); } static void search_ver(struct readelf *re) { struct section *s; Elf_Data *d; int elferr, i; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->type == SHT_SUNW_versym) re->vs_s = s; if (s->type == SHT_SUNW_verneed) re->vn_s = s; if (s->type == SHT_SUNW_verdef) re->vd_s = s; } if (re->vd_s) dump_verdef(re, 0); if (re->vn_s) dump_verneed(re, 0); if (re->vs_s && re->ver != NULL) { (void) elf_errno(); if ((d = elf_getdata(re->vs_s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); return; } if (d->d_size == 0) return; re->vs = d->d_buf; re->vs_sz = d->d_size / sizeof(Elf32_Half); } } #undef Elf_Verdef #undef Elf_Verdaux #undef Elf_Verneed #undef Elf_Vernaux #undef SAVE_VERSION_NAME /* * Elf32_Lib and Elf64_Lib are identical. */ #define Elf_Lib Elf32_Lib static void dump_liblist(struct readelf *re) { struct section *s; struct tm *t; time_t ti; char tbuf[20]; Elf_Data *d; Elf_Lib *lib; int i, j, k, elferr, first, len; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->type != SHT_GNU_LIBLIST) continue; if (s->link >= re->shnum) continue; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); continue; } if (d->d_size <= 0) continue; lib = d->d_buf; if (!get_ent_count(s, &len)) continue; printf("\nLibrary list section '%s' ", s->name); printf("contains %d entries:\n", len); printf("%12s%24s%18s%10s%6s\n", "Library", "Time Stamp", "Checksum", "Version", "Flags"); for (j = 0; (uint64_t) j < s->sz / s->entsize; j++) { printf("%3d: ", j); printf("%-20.20s ", get_string(re, s->link, lib->l_name)); ti = lib->l_time_stamp; t = gmtime(&ti); snprintf(tbuf, sizeof(tbuf), "%04d-%02d-%02dT%02d:%02d" ":%2d", t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, t->tm_hour, t->tm_min, t->tm_sec); printf("%-19.19s ", tbuf); printf("0x%08x ", lib->l_checksum); printf("%-7d %#x", lib->l_version, lib->l_flags); if (lib->l_flags != 0) { first = 1; putchar('('); for (k = 0; l_flag[k].name != NULL; k++) { if ((l_flag[k].value & lib->l_flags) == 0) continue; if (!first) putchar(','); else first = 0; printf("%s", l_flag[k].name); } putchar(')'); } putchar('\n'); lib++; } } } #undef Elf_Lib static void dump_section_groups(struct readelf *re) { struct section *s; const char *symname; Elf_Data *d; uint32_t *w; int i, j, elferr; size_t n; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->type != SHT_GROUP) continue; if (s->link >= re->shnum) continue; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); continue; } if (d->d_size <= 0) continue; w = d->d_buf; /* We only support COMDAT section. */ #ifndef GRP_COMDAT #define GRP_COMDAT 0x1 #endif if ((*w++ & GRP_COMDAT) == 0) return; if (s->entsize == 0) s->entsize = 4; symname = get_symbol_name(re, s->link, s->info); n = s->sz / s->entsize; if (n-- < 1) return; printf("\nCOMDAT group section [%5d] `%s' [%s] contains %ju" " sections:\n", i, s->name, symname, (uintmax_t)n); printf(" %-10.10s %s\n", "[Index]", "Name"); for (j = 0; (size_t) j < n; j++, w++) { if (*w >= re->shnum) { warnx("invalid section index: %u", *w); continue; } printf(" [%5u] %s\n", *w, re->sl[*w].name); } } } static uint8_t * dump_unknown_tag(uint64_t tag, uint8_t *p, uint8_t *pe) { uint64_t val; /* * According to ARM EABI: For tags > 32, even numbered tags have * a ULEB128 param and odd numbered ones have NUL-terminated * string param. This rule probably also applies for tags <= 32 * if the object arch is not ARM. */ printf(" Tag_unknown_%ju: ", (uintmax_t) tag); if (tag & 1) { printf("%s\n", (char *) p); p += strlen((char *) p) + 1; } else { val = _decode_uleb128(&p, pe); printf("%ju\n", (uintmax_t) val); } return (p); } static uint8_t * dump_compatibility_tag(uint8_t *p, uint8_t *pe) { uint64_t val; val = _decode_uleb128(&p, pe); printf("flag = %ju, vendor = %s\n", (uintmax_t) val, p); p += strlen((char *) p) + 1; return (p); } static void dump_arm_attributes(struct readelf *re, uint8_t *p, uint8_t *pe) { uint64_t tag, val; size_t i; int found, desc; (void) re; while (p < pe) { tag = _decode_uleb128(&p, pe); found = desc = 0; for (i = 0; i < sizeof(aeabi_tags) / sizeof(aeabi_tags[0]); i++) { if (tag == aeabi_tags[i].tag) { found = 1; printf(" %s: ", aeabi_tags[i].s_tag); if (aeabi_tags[i].get_desc) { desc = 1; val = _decode_uleb128(&p, pe); printf("%s\n", aeabi_tags[i].get_desc(val)); } break; } if (tag < aeabi_tags[i].tag) break; } if (!found) { p = dump_unknown_tag(tag, p, pe); continue; } if (desc) continue; switch (tag) { case 4: /* Tag_CPU_raw_name */ case 5: /* Tag_CPU_name */ case 67: /* Tag_conformance */ printf("%s\n", (char *) p); p += strlen((char *) p) + 1; break; case 32: /* Tag_compatibility */ p = dump_compatibility_tag(p, pe); break; case 64: /* Tag_nodefaults */ /* ignored, written as 0. */ (void) _decode_uleb128(&p, pe); printf("True\n"); break; case 65: /* Tag_also_compatible_with */ val = _decode_uleb128(&p, pe); /* Must be Tag_CPU_arch */ if (val != 6) { printf("unknown\n"); break; } val = _decode_uleb128(&p, pe); printf("%s\n", aeabi_cpu_arch(val)); /* Skip NUL terminator. */ p++; break; default: putchar('\n'); break; } } } #ifndef Tag_GNU_MIPS_ABI_FP #define Tag_GNU_MIPS_ABI_FP 4 #endif static void dump_mips_attributes(struct readelf *re, uint8_t *p, uint8_t *pe) { uint64_t tag, val; (void) re; while (p < pe) { tag = _decode_uleb128(&p, pe); switch (tag) { case Tag_GNU_MIPS_ABI_FP: val = _decode_uleb128(&p, pe); printf(" Tag_GNU_MIPS_ABI_FP: %s\n", mips_abi_fp(val)); break; case 32: /* Tag_compatibility */ p = dump_compatibility_tag(p, pe); break; default: p = dump_unknown_tag(tag, p, pe); break; } } } #ifndef Tag_GNU_Power_ABI_FP #define Tag_GNU_Power_ABI_FP 4 #endif #ifndef Tag_GNU_Power_ABI_Vector #define Tag_GNU_Power_ABI_Vector 8 #endif static void dump_ppc_attributes(uint8_t *p, uint8_t *pe) { uint64_t tag, val; while (p < pe) { tag = _decode_uleb128(&p, pe); switch (tag) { case Tag_GNU_Power_ABI_FP: val = _decode_uleb128(&p, pe); printf(" Tag_GNU_Power_ABI_FP: %s\n", ppc_abi_fp(val)); break; case Tag_GNU_Power_ABI_Vector: val = _decode_uleb128(&p, pe); printf(" Tag_GNU_Power_ABI_Vector: %s\n", ppc_abi_vector(val)); break; case 32: /* Tag_compatibility */ p = dump_compatibility_tag(p, pe); break; default: p = dump_unknown_tag(tag, p, pe); break; } } } static void dump_attributes(struct readelf *re) { struct section *s; Elf_Data *d; uint8_t *p, *pe, *sp; size_t len, seclen, nlen, sublen; uint64_t val; int tag, i, elferr; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->type != SHT_GNU_ATTRIBUTES && (re->ehdr.e_machine != EM_ARM || s->type != SHT_LOPROC + 3)) continue; (void) elf_errno(); if ((d = elf_rawdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_rawdata failed: %s", elf_errmsg(elferr)); continue; } if (d->d_size <= 0) continue; p = d->d_buf; pe = p + d->d_size; if (*p != 'A') { printf("Unknown Attribute Section Format: %c\n", (char) *p); continue; } len = d->d_size - 1; p++; while (len > 0) { if (len < 4) { warnx("truncated attribute section length"); return; } seclen = re->dw_decode(&p, 4); if (seclen > len) { warnx("invalid attribute section length"); return; } len -= seclen; nlen = strlen((char *) p) + 1; if (nlen + 4 > seclen) { warnx("invalid attribute section name"); return; } printf("Attribute Section: %s\n", (char *) p); p += nlen; seclen -= nlen + 4; while (seclen > 0) { sp = p; tag = *p++; sublen = re->dw_decode(&p, 4); if (sublen > seclen) { warnx("invalid attribute sub-section" " length"); return; } seclen -= sublen; printf("%s", top_tag(tag)); if (tag == 2 || tag == 3) { putchar(':'); for (;;) { val = _decode_uleb128(&p, pe); if (val == 0) break; printf(" %ju", (uintmax_t) val); } } putchar('\n'); if (re->ehdr.e_machine == EM_ARM && s->type == SHT_LOPROC + 3) dump_arm_attributes(re, p, sp + sublen); else if (re->ehdr.e_machine == EM_MIPS || re->ehdr.e_machine == EM_MIPS_RS3_LE) dump_mips_attributes(re, p, sp + sublen); else if (re->ehdr.e_machine == EM_PPC) dump_ppc_attributes(p, sp + sublen); p = sp + sublen; } } } } static void dump_mips_specific_info(struct readelf *re) { struct section *s; int i; s = NULL; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->name != NULL && (!strcmp(s->name, ".MIPS.options") || (s->type == SHT_MIPS_OPTIONS))) { dump_mips_options(re, s); } } if (s->name != NULL && (!strcmp(s->name, ".MIPS.abiflags") || (s->type == SHT_MIPS_ABIFLAGS))) dump_mips_abiflags(re, s); /* * Dump .reginfo if present (although it will be ignored by an OS if a * .MIPS.options section is present, according to SGI mips64 spec). */ for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->name != NULL && (!strcmp(s->name, ".reginfo") || (s->type == SHT_MIPS_REGINFO))) dump_mips_reginfo(re, s); } } static void dump_mips_abiflags(struct readelf *re, struct section *s) { Elf_Data *d; uint8_t *p; int elferr; uint32_t isa_ext, ases, flags1, flags2; uint16_t version; uint8_t isa_level, isa_rev, gpr_size, cpr1_size, cpr2_size, fp_abi; if ((d = elf_rawdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_rawdata failed: %s", elf_errmsg(elferr)); return; } if (d->d_size != 24) { warnx("invalid MIPS abiflags section size"); return; } p = d->d_buf; version = re->dw_decode(&p, 2); printf("MIPS ABI Flags Version: %u", version); if (version != 0) { printf(" (unknown)\n\n"); return; } printf("\n\n"); isa_level = re->dw_decode(&p, 1); isa_rev = re->dw_decode(&p, 1); gpr_size = re->dw_decode(&p, 1); cpr1_size = re->dw_decode(&p, 1); cpr2_size = re->dw_decode(&p, 1); fp_abi = re->dw_decode(&p, 1); isa_ext = re->dw_decode(&p, 4); ases = re->dw_decode(&p, 4); flags1 = re->dw_decode(&p, 4); flags2 = re->dw_decode(&p, 4); printf("ISA: "); if (isa_rev <= 1) printf("MIPS%u\n", isa_level); else printf("MIPS%ur%u\n", isa_level, isa_rev); printf("GPR size: %d\n", get_mips_register_size(gpr_size)); printf("CPR1 size: %d\n", get_mips_register_size(cpr1_size)); printf("CPR2 size: %d\n", get_mips_register_size(cpr2_size)); printf("FP ABI: "); switch (fp_abi) { case 3: printf("Soft float"); break; default: printf("%u", fp_abi); break; } printf("\nISA Extension: %u\n", isa_ext); printf("ASEs: %u\n", ases); printf("FLAGS 1: %08x\n", flags1); printf("FLAGS 2: %08x\n", flags2); } static int get_mips_register_size(uint8_t flag) { switch (flag) { case 0: return 0; case 1: return 32; case 2: return 64; case 3: return 128; default: return -1; } } static void dump_mips_reginfo(struct readelf *re, struct section *s) { Elf_Data *d; int elferr, len; (void) elf_errno(); if ((d = elf_rawdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_rawdata failed: %s", elf_errmsg(elferr)); return; } if (d->d_size <= 0) return; if (!get_ent_count(s, &len)) return; printf("\nSection '%s' contains %d entries:\n", s->name, len); dump_mips_odk_reginfo(re, d->d_buf, d->d_size); } static void dump_mips_options(struct readelf *re, struct section *s) { Elf_Data *d; uint32_t info; uint16_t sndx; uint8_t *p, *pe; uint8_t kind, size; int elferr; (void) elf_errno(); if ((d = elf_rawdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_rawdata failed: %s", elf_errmsg(elferr)); return; } if (d->d_size == 0) return; printf("\nSection %s contains:\n", s->name); p = d->d_buf; pe = p + d->d_size; while (p < pe) { if (pe - p < 8) { warnx("Truncated MIPS option header"); return; } kind = re->dw_decode(&p, 1); size = re->dw_decode(&p, 1); sndx = re->dw_decode(&p, 2); info = re->dw_decode(&p, 4); if (size < 8 || size - 8 > pe - p) { warnx("Malformed MIPS option header"); return; } size -= 8; switch (kind) { case ODK_REGINFO: dump_mips_odk_reginfo(re, p, size); break; case ODK_EXCEPTIONS: printf(" EXCEPTIONS FPU_MIN: %#x\n", info & OEX_FPU_MIN); printf("%11.11s FPU_MAX: %#x\n", "", info & OEX_FPU_MAX); dump_mips_option_flags("", mips_exceptions_option, info); break; case ODK_PAD: printf(" %-10.10s section: %ju\n", "OPAD", (uintmax_t) sndx); dump_mips_option_flags("", mips_pad_option, info); break; case ODK_HWPATCH: dump_mips_option_flags("HWPATCH", mips_hwpatch_option, info); break; case ODK_HWAND: dump_mips_option_flags("HWAND", mips_hwa_option, info); break; case ODK_HWOR: dump_mips_option_flags("HWOR", mips_hwo_option, info); break; case ODK_FILL: printf(" %-10.10s %#jx\n", "FILL", (uintmax_t) info); break; case ODK_TAGS: printf(" %-10.10s\n", "TAGS"); break; case ODK_GP_GROUP: printf(" %-10.10s GP group number: %#x\n", "GP_GROUP", info & 0xFFFF); if (info & 0x10000) printf(" %-10.10s GP group is " "self-contained\n", ""); break; case ODK_IDENT: printf(" %-10.10s default GP group number: %#x\n", "IDENT", info & 0xFFFF); if (info & 0x10000) printf(" %-10.10s default GP group is " "self-contained\n", ""); break; case ODK_PAGESIZE: printf(" %-10.10s\n", "PAGESIZE"); break; default: break; } p += size; } } static void dump_mips_option_flags(const char *name, struct mips_option *opt, uint64_t info) { int first; first = 1; for (; opt->desc != NULL; opt++) { if (info & opt->flag) { printf(" %-10.10s %s\n", first ? name : "", opt->desc); first = 0; } } } static void dump_mips_odk_reginfo(struct readelf *re, uint8_t *p, size_t sz) { uint32_t ri_gprmask; uint32_t ri_cprmask[4]; uint64_t ri_gp_value; uint8_t *pe; int i; pe = p + sz; while (p < pe) { ri_gprmask = re->dw_decode(&p, 4); /* Skip ri_pad padding field for mips64. */ if (re->ec == ELFCLASS64) re->dw_decode(&p, 4); for (i = 0; i < 4; i++) ri_cprmask[i] = re->dw_decode(&p, 4); if (re->ec == ELFCLASS32) ri_gp_value = re->dw_decode(&p, 4); else ri_gp_value = re->dw_decode(&p, 8); printf(" %s ", option_kind(ODK_REGINFO)); printf("ri_gprmask: 0x%08jx\n", (uintmax_t) ri_gprmask); for (i = 0; i < 4; i++) printf("%11.11s ri_cprmask[%d]: 0x%08jx\n", "", i, (uintmax_t) ri_cprmask[i]); printf("%12.12s", ""); printf("ri_gp_value: %#jx\n", (uintmax_t) ri_gp_value); } } static void dump_arch_specific_info(struct readelf *re) { dump_liblist(re); dump_attributes(re); switch (re->ehdr.e_machine) { case EM_MIPS: case EM_MIPS_RS3_LE: dump_mips_specific_info(re); default: break; } } static const char * dwarf_regname(struct readelf *re, unsigned int num) { static char rx[32]; const char *rn; if ((rn = dwarf_reg(re->ehdr.e_machine, num)) != NULL) return (rn); snprintf(rx, sizeof(rx), "r%u", num); return (rx); } static void dump_dwarf_line(struct readelf *re) { struct section *s; Dwarf_Die die; Dwarf_Error de; Dwarf_Half tag, version, pointer_size; Dwarf_Unsigned offset, endoff, length, hdrlen, dirndx, mtime, fsize; Dwarf_Small minlen, defstmt, lrange, opbase, oplen; Elf_Data *d; char *pn; uint64_t address, file, line, column, isa, opsize, udelta; int64_t sdelta; uint8_t *p, *pe; int8_t lbase; int i, is_stmt, dwarf_size, elferr, ret; printf("\nDump of debug contents of section .debug_line:\n"); s = NULL; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->name != NULL && !strcmp(s->name, ".debug_line")) break; } if ((size_t) i >= re->shnum) return; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(-1)); return; } if (d->d_size <= 0) return; while ((ret = dwarf_next_cu_header(re->dbg, NULL, NULL, NULL, NULL, NULL, &de)) == DW_DLV_OK) { die = NULL; while (dwarf_siblingof(re->dbg, die, &die, &de) == DW_DLV_OK) { if (dwarf_tag(die, &tag, &de) != DW_DLV_OK) { warnx("dwarf_tag failed: %s", dwarf_errmsg(de)); return; } /* XXX: What about DW_TAG_partial_unit? */ if (tag == DW_TAG_compile_unit) break; } if (die == NULL) { warnx("could not find DW_TAG_compile_unit die"); return; } if (dwarf_attrval_unsigned(die, DW_AT_stmt_list, &offset, &de) != DW_DLV_OK) continue; length = re->dw_read(d, &offset, 4); if (length == 0xffffffff) { dwarf_size = 8; length = re->dw_read(d, &offset, 8); } else dwarf_size = 4; if (length > d->d_size - offset) { warnx("invalid .dwarf_line section"); continue; } endoff = offset + length; pe = (uint8_t *) d->d_buf + endoff; version = re->dw_read(d, &offset, 2); hdrlen = re->dw_read(d, &offset, dwarf_size); minlen = re->dw_read(d, &offset, 1); defstmt = re->dw_read(d, &offset, 1); lbase = re->dw_read(d, &offset, 1); lrange = re->dw_read(d, &offset, 1); opbase = re->dw_read(d, &offset, 1); printf("\n"); printf(" Length:\t\t\t%ju\n", (uintmax_t) length); printf(" DWARF version:\t\t%u\n", version); printf(" Prologue Length:\t\t%ju\n", (uintmax_t) hdrlen); printf(" Minimum Instruction Length:\t%u\n", minlen); printf(" Initial value of 'is_stmt':\t%u\n", defstmt); printf(" Line Base:\t\t\t%d\n", lbase); printf(" Line Range:\t\t\t%u\n", lrange); printf(" Opcode Base:\t\t\t%u\n", opbase); (void) dwarf_get_address_size(re->dbg, &pointer_size, &de); printf(" (Pointer size:\t\t%u)\n", pointer_size); printf("\n"); printf(" Opcodes:\n"); for (i = 1; i < opbase; i++) { oplen = re->dw_read(d, &offset, 1); printf(" Opcode %d has %u args\n", i, oplen); } printf("\n"); printf(" The Directory Table:\n"); p = (uint8_t *) d->d_buf + offset; while (*p != '\0') { printf(" %s\n", (char *) p); p += strlen((char *) p) + 1; } p++; printf("\n"); printf(" The File Name Table:\n"); printf(" Entry\tDir\tTime\tSize\tName\n"); i = 0; while (*p != '\0') { i++; pn = (char *) p; p += strlen(pn) + 1; dirndx = _decode_uleb128(&p, pe); mtime = _decode_uleb128(&p, pe); fsize = _decode_uleb128(&p, pe); printf(" %d\t%ju\t%ju\t%ju\t%s\n", i, (uintmax_t) dirndx, (uintmax_t) mtime, (uintmax_t) fsize, pn); } #define RESET_REGISTERS \ do { \ address = 0; \ file = 1; \ line = 1; \ column = 0; \ is_stmt = defstmt; \ } while(0) #define LINE(x) (lbase + (((x) - opbase) % lrange)) #define ADDRESS(x) ((((x) - opbase) / lrange) * minlen) p++; printf("\n"); printf(" Line Number Statements:\n"); RESET_REGISTERS; while (p < pe) { if (*p == 0) { /* * Extended Opcodes. */ p++; opsize = _decode_uleb128(&p, pe); printf(" Extended opcode %u: ", *p); switch (*p) { case DW_LNE_end_sequence: p++; RESET_REGISTERS; printf("End of Sequence\n"); break; case DW_LNE_set_address: p++; address = re->dw_decode(&p, pointer_size); printf("set Address to %#jx\n", (uintmax_t) address); break; case DW_LNE_define_file: p++; pn = (char *) p; p += strlen(pn) + 1; dirndx = _decode_uleb128(&p, pe); mtime = _decode_uleb128(&p, pe); fsize = _decode_uleb128(&p, pe); printf("define new file: %s\n", pn); break; default: /* Unrecognized extened opcodes. */ p += opsize; printf("unknown opcode\n"); } } else if (*p > 0 && *p < opbase) { /* * Standard Opcodes. */ switch(*p++) { case DW_LNS_copy: printf(" Copy\n"); break; case DW_LNS_advance_pc: udelta = _decode_uleb128(&p, pe) * minlen; address += udelta; printf(" Advance PC by %ju to %#jx\n", (uintmax_t) udelta, (uintmax_t) address); break; case DW_LNS_advance_line: sdelta = _decode_sleb128(&p, pe); line += sdelta; printf(" Advance Line by %jd to %ju\n", (intmax_t) sdelta, (uintmax_t) line); break; case DW_LNS_set_file: file = _decode_uleb128(&p, pe); printf(" Set File to %ju\n", (uintmax_t) file); break; case DW_LNS_set_column: column = _decode_uleb128(&p, pe); printf(" Set Column to %ju\n", (uintmax_t) column); break; case DW_LNS_negate_stmt: is_stmt = !is_stmt; printf(" Set is_stmt to %d\n", is_stmt); break; case DW_LNS_set_basic_block: printf(" Set basic block flag\n"); break; case DW_LNS_const_add_pc: address += ADDRESS(255); printf(" Advance PC by constant %ju" " to %#jx\n", (uintmax_t) ADDRESS(255), (uintmax_t) address); break; case DW_LNS_fixed_advance_pc: udelta = re->dw_decode(&p, 2); address += udelta; printf(" Advance PC by fixed value " "%ju to %#jx\n", (uintmax_t) udelta, (uintmax_t) address); break; case DW_LNS_set_prologue_end: printf(" Set prologue end flag\n"); break; case DW_LNS_set_epilogue_begin: printf(" Set epilogue begin flag\n"); break; case DW_LNS_set_isa: isa = _decode_uleb128(&p, pe); printf(" Set isa to %ju\n", (uintmax_t) isa); break; default: /* Unrecognized extended opcodes. */ printf(" Unknown extended opcode %u\n", *(p - 1)); break; } } else { /* * Special Opcodes. */ line += LINE(*p); address += ADDRESS(*p); printf(" Special opcode %u: advance Address " "by %ju to %#jx and Line by %jd to %ju\n", *p - opbase, (uintmax_t) ADDRESS(*p), (uintmax_t) address, (intmax_t) LINE(*p), (uintmax_t) line); p++; } } } if (ret == DW_DLV_ERROR) warnx("dwarf_next_cu_header: %s", dwarf_errmsg(de)); #undef RESET_REGISTERS #undef LINE #undef ADDRESS } static void dump_dwarf_line_decoded(struct readelf *re) { Dwarf_Die die; Dwarf_Line *linebuf, ln; Dwarf_Addr lineaddr; Dwarf_Signed linecount, srccount; Dwarf_Unsigned lineno, fn; Dwarf_Error de; const char *dir, *file; char **srcfiles; int i, ret; printf("Decoded dump of debug contents of section .debug_line:\n\n"); while ((ret = dwarf_next_cu_header(re->dbg, NULL, NULL, NULL, NULL, NULL, &de)) == DW_DLV_OK) { if (dwarf_siblingof(re->dbg, NULL, &die, &de) != DW_DLV_OK) continue; if (dwarf_attrval_string(die, DW_AT_name, &file, &de) != DW_DLV_OK) file = NULL; if (dwarf_attrval_string(die, DW_AT_comp_dir, &dir, &de) != DW_DLV_OK) dir = NULL; printf("CU: "); if (dir && file && file[0] != '/') printf("%s/", dir); if (file) printf("%s", file); putchar('\n'); printf("%-37s %11s %s\n", "Filename", "Line Number", "Starting Address"); if (dwarf_srclines(die, &linebuf, &linecount, &de) != DW_DLV_OK) continue; if (dwarf_srcfiles(die, &srcfiles, &srccount, &de) != DW_DLV_OK) continue; for (i = 0; i < linecount; i++) { ln = linebuf[i]; if (dwarf_line_srcfileno(ln, &fn, &de) != DW_DLV_OK) continue; if (dwarf_lineno(ln, &lineno, &de) != DW_DLV_OK) continue; if (dwarf_lineaddr(ln, &lineaddr, &de) != DW_DLV_OK) continue; printf("%-37s %11ju %#18jx\n", basename(srcfiles[fn - 1]), (uintmax_t) lineno, (uintmax_t) lineaddr); } putchar('\n'); } } static void dump_dwarf_die(struct readelf *re, Dwarf_Die die, int level) { Dwarf_Attribute *attr_list; Dwarf_Die ret_die; Dwarf_Off dieoff, cuoff, culen, attroff; Dwarf_Unsigned ate, lang, v_udata, v_sig; Dwarf_Signed attr_count, v_sdata; Dwarf_Off v_off; Dwarf_Addr v_addr; Dwarf_Half tag, attr, form; Dwarf_Block *v_block; Dwarf_Bool v_bool, is_info; Dwarf_Sig8 v_sig8; Dwarf_Error de; Dwarf_Ptr v_expr; const char *tag_str, *attr_str, *ate_str, *lang_str; char unk_tag[32], unk_attr[32]; char *v_str; uint8_t *b, *p; int i, j, abc, ret; if (dwarf_dieoffset(die, &dieoff, &de) != DW_DLV_OK) { warnx("dwarf_dieoffset failed: %s", dwarf_errmsg(de)); goto cont_search; } printf(" <%d><%jx>: ", level, (uintmax_t) dieoff); if (dwarf_die_CU_offset_range(die, &cuoff, &culen, &de) != DW_DLV_OK) { warnx("dwarf_die_CU_offset_range failed: %s", dwarf_errmsg(de)); cuoff = 0; } abc = dwarf_die_abbrev_code(die); if (dwarf_tag(die, &tag, &de) != DW_DLV_OK) { warnx("dwarf_tag failed: %s", dwarf_errmsg(de)); goto cont_search; } if (dwarf_get_TAG_name(tag, &tag_str) != DW_DLV_OK) { snprintf(unk_tag, sizeof(unk_tag), "[Unknown Tag: %#x]", tag); tag_str = unk_tag; } printf("Abbrev Number: %d (%s)\n", abc, tag_str); if ((ret = dwarf_attrlist(die, &attr_list, &attr_count, &de)) != DW_DLV_OK) { if (ret == DW_DLV_ERROR) warnx("dwarf_attrlist failed: %s", dwarf_errmsg(de)); goto cont_search; } for (i = 0; i < attr_count; i++) { if (dwarf_whatform(attr_list[i], &form, &de) != DW_DLV_OK) { warnx("dwarf_whatform failed: %s", dwarf_errmsg(de)); continue; } if (dwarf_whatattr(attr_list[i], &attr, &de) != DW_DLV_OK) { warnx("dwarf_whatattr failed: %s", dwarf_errmsg(de)); continue; } if (dwarf_get_AT_name(attr, &attr_str) != DW_DLV_OK) { snprintf(unk_attr, sizeof(unk_attr), "[Unknown AT: %#x]", attr); attr_str = unk_attr; } if (dwarf_attroffset(attr_list[i], &attroff, &de) != DW_DLV_OK) { warnx("dwarf_attroffset failed: %s", dwarf_errmsg(de)); attroff = 0; } printf(" <%jx> %-18s: ", (uintmax_t) attroff, attr_str); switch (form) { case DW_FORM_ref_addr: case DW_FORM_sec_offset: if (dwarf_global_formref(attr_list[i], &v_off, &de) != DW_DLV_OK) { warnx("dwarf_global_formref failed: %s", dwarf_errmsg(de)); continue; } if (form == DW_FORM_ref_addr) printf("<0x%jx>", (uintmax_t) v_off); else printf("0x%jx", (uintmax_t) v_off); break; case DW_FORM_ref1: case DW_FORM_ref2: case DW_FORM_ref4: case DW_FORM_ref8: case DW_FORM_ref_udata: if (dwarf_formref(attr_list[i], &v_off, &de) != DW_DLV_OK) { warnx("dwarf_formref failed: %s", dwarf_errmsg(de)); continue; } v_off += cuoff; printf("<0x%jx>", (uintmax_t) v_off); break; case DW_FORM_addr: if (dwarf_formaddr(attr_list[i], &v_addr, &de) != DW_DLV_OK) { warnx("dwarf_formaddr failed: %s", dwarf_errmsg(de)); continue; } printf("%#jx", (uintmax_t) v_addr); break; case DW_FORM_data1: case DW_FORM_data2: case DW_FORM_data4: case DW_FORM_data8: case DW_FORM_udata: if (dwarf_formudata(attr_list[i], &v_udata, &de) != DW_DLV_OK) { warnx("dwarf_formudata failed: %s", dwarf_errmsg(de)); continue; } if (attr == DW_AT_high_pc) printf("0x%jx", (uintmax_t) v_udata); else printf("%ju", (uintmax_t) v_udata); break; case DW_FORM_sdata: if (dwarf_formsdata(attr_list[i], &v_sdata, &de) != DW_DLV_OK) { warnx("dwarf_formudata failed: %s", dwarf_errmsg(de)); continue; } printf("%jd", (intmax_t) v_sdata); break; case DW_FORM_flag: if (dwarf_formflag(attr_list[i], &v_bool, &de) != DW_DLV_OK) { warnx("dwarf_formflag failed: %s", dwarf_errmsg(de)); continue; } printf("%jd", (intmax_t) v_bool); break; case DW_FORM_flag_present: putchar('1'); break; case DW_FORM_string: case DW_FORM_strp: if (dwarf_formstring(attr_list[i], &v_str, &de) != DW_DLV_OK) { warnx("dwarf_formstring failed: %s", dwarf_errmsg(de)); continue; } if (form == DW_FORM_string) printf("%s", v_str); else printf("(indirect string) %s", v_str); break; case DW_FORM_block: case DW_FORM_block1: case DW_FORM_block2: case DW_FORM_block4: if (dwarf_formblock(attr_list[i], &v_block, &de) != DW_DLV_OK) { warnx("dwarf_formblock failed: %s", dwarf_errmsg(de)); continue; } printf("%ju byte block:", (uintmax_t) v_block->bl_len); b = v_block->bl_data; for (j = 0; (Dwarf_Unsigned) j < v_block->bl_len; j++) printf(" %x", b[j]); printf("\t("); dump_dwarf_block(re, v_block->bl_data, v_block->bl_len); putchar(')'); break; case DW_FORM_exprloc: if (dwarf_formexprloc(attr_list[i], &v_udata, &v_expr, &de) != DW_DLV_OK) { warnx("dwarf_formexprloc failed: %s", dwarf_errmsg(de)); continue; } printf("%ju byte block:", (uintmax_t) v_udata); b = v_expr; for (j = 0; (Dwarf_Unsigned) j < v_udata; j++) printf(" %x", b[j]); printf("\t("); dump_dwarf_block(re, v_expr, v_udata); putchar(')'); break; case DW_FORM_ref_sig8: if (dwarf_formsig8(attr_list[i], &v_sig8, &de) != DW_DLV_OK) { warnx("dwarf_formsig8 failed: %s", dwarf_errmsg(de)); continue; } p = (uint8_t *)(uintptr_t) &v_sig8.signature[0]; v_sig = re->dw_decode(&p, 8); printf("signature: 0x%jx", (uintmax_t) v_sig); } switch (attr) { case DW_AT_encoding: if (dwarf_attrval_unsigned(die, attr, &ate, &de) != DW_DLV_OK) break; if (dwarf_get_ATE_name(ate, &ate_str) != DW_DLV_OK) ate_str = "DW_ATE_UNKNOWN"; printf("\t(%s)", &ate_str[strlen("DW_ATE_")]); break; case DW_AT_language: if (dwarf_attrval_unsigned(die, attr, &lang, &de) != DW_DLV_OK) break; if (dwarf_get_LANG_name(lang, &lang_str) != DW_DLV_OK) break; printf("\t(%s)", &lang_str[strlen("DW_LANG_")]); break; case DW_AT_location: case DW_AT_string_length: case DW_AT_return_addr: case DW_AT_data_member_location: case DW_AT_frame_base: case DW_AT_segment: case DW_AT_static_link: case DW_AT_use_location: case DW_AT_vtable_elem_location: switch (form) { case DW_FORM_data4: case DW_FORM_data8: case DW_FORM_sec_offset: printf("\t(location list)"); break; default: break; } default: break; } putchar('\n'); } cont_search: /* Search children. */ ret = dwarf_child(die, &ret_die, &de); if (ret == DW_DLV_ERROR) warnx("dwarf_child: %s", dwarf_errmsg(de)); else if (ret == DW_DLV_OK) dump_dwarf_die(re, ret_die, level + 1); /* Search sibling. */ is_info = dwarf_get_die_infotypes_flag(die); ret = dwarf_siblingof_b(re->dbg, die, &ret_die, is_info, &de); if (ret == DW_DLV_ERROR) warnx("dwarf_siblingof: %s", dwarf_errmsg(de)); else if (ret == DW_DLV_OK) dump_dwarf_die(re, ret_die, level); dwarf_dealloc(re->dbg, die, DW_DLA_DIE); } static void set_cu_context(struct readelf *re, Dwarf_Half psize, Dwarf_Half osize, Dwarf_Half ver) { re->cu_psize = psize; re->cu_osize = osize; re->cu_ver = ver; } static void dump_dwarf_info(struct readelf *re, Dwarf_Bool is_info) { struct section *s; Dwarf_Die die; Dwarf_Error de; Dwarf_Half tag, version, pointer_size, off_size; Dwarf_Off cu_offset, cu_length; Dwarf_Off aboff; Dwarf_Unsigned typeoff; Dwarf_Sig8 sig8; Dwarf_Unsigned sig; uint8_t *p; const char *sn; int i, ret; sn = is_info ? ".debug_info" : ".debug_types"; s = NULL; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->name != NULL && !strcmp(s->name, sn)) break; } if ((size_t) i >= re->shnum) return; do { printf("\nDump of debug contents of section %s:\n", sn); while ((ret = dwarf_next_cu_header_c(re->dbg, is_info, NULL, &version, &aboff, &pointer_size, &off_size, NULL, &sig8, &typeoff, NULL, &de)) == DW_DLV_OK) { set_cu_context(re, pointer_size, off_size, version); die = NULL; while (dwarf_siblingof_b(re->dbg, die, &die, is_info, &de) == DW_DLV_OK) { if (dwarf_tag(die, &tag, &de) != DW_DLV_OK) { warnx("dwarf_tag failed: %s", dwarf_errmsg(de)); continue; } /* XXX: What about DW_TAG_partial_unit? */ if ((is_info && tag == DW_TAG_compile_unit) || (!is_info && tag == DW_TAG_type_unit)) break; } if (die == NULL && is_info) { warnx("could not find DW_TAG_compile_unit " "die"); continue; } else if (die == NULL && !is_info) { warnx("could not find DW_TAG_type_unit die"); continue; } if (dwarf_die_CU_offset_range(die, &cu_offset, &cu_length, &de) != DW_DLV_OK) { warnx("dwarf_die_CU_offset failed: %s", dwarf_errmsg(de)); continue; } cu_length -= off_size == 4 ? 4 : 12; sig = 0; if (!is_info) { p = (uint8_t *)(uintptr_t) &sig8.signature[0]; sig = re->dw_decode(&p, 8); } printf("\n Type Unit @ offset 0x%jx:\n", (uintmax_t) cu_offset); printf(" Length:\t\t%#jx (%d-bit)\n", (uintmax_t) cu_length, off_size == 4 ? 32 : 64); printf(" Version:\t\t%u\n", version); printf(" Abbrev Offset:\t0x%jx\n", (uintmax_t) aboff); printf(" Pointer Size:\t%u\n", pointer_size); if (!is_info) { printf(" Signature:\t\t0x%016jx\n", (uintmax_t) sig); printf(" Type Offset:\t0x%jx\n", (uintmax_t) typeoff); } dump_dwarf_die(re, die, 0); } if (ret == DW_DLV_ERROR) warnx("dwarf_next_cu_header: %s", dwarf_errmsg(de)); if (is_info) break; } while (dwarf_next_types_section(re->dbg, &de) == DW_DLV_OK); } static void dump_dwarf_abbrev(struct readelf *re) { Dwarf_Abbrev ab; Dwarf_Off aboff, atoff; Dwarf_Unsigned length, attr_count; Dwarf_Signed flag, form; Dwarf_Half tag, attr; Dwarf_Error de; const char *tag_str, *attr_str, *form_str; char unk_tag[32], unk_attr[32], unk_form[32]; int i, j, ret; printf("\nContents of section .debug_abbrev:\n\n"); while ((ret = dwarf_next_cu_header(re->dbg, NULL, NULL, &aboff, NULL, NULL, &de)) == DW_DLV_OK) { printf(" Number TAG\n"); i = 0; while ((ret = dwarf_get_abbrev(re->dbg, aboff, &ab, &length, &attr_count, &de)) == DW_DLV_OK) { if (length == 1) { dwarf_dealloc(re->dbg, ab, DW_DLA_ABBREV); break; } aboff += length; printf("%4d", ++i); if (dwarf_get_abbrev_tag(ab, &tag, &de) != DW_DLV_OK) { warnx("dwarf_get_abbrev_tag failed: %s", dwarf_errmsg(de)); goto next_abbrev; } if (dwarf_get_TAG_name(tag, &tag_str) != DW_DLV_OK) { snprintf(unk_tag, sizeof(unk_tag), "[Unknown Tag: %#x]", tag); tag_str = unk_tag; } if (dwarf_get_abbrev_children_flag(ab, &flag, &de) != DW_DLV_OK) { warnx("dwarf_get_abbrev_children_flag failed:" " %s", dwarf_errmsg(de)); goto next_abbrev; } printf(" %s %s\n", tag_str, flag ? "[has children]" : "[no children]"); for (j = 0; (Dwarf_Unsigned) j < attr_count; j++) { if (dwarf_get_abbrev_entry(ab, (Dwarf_Signed) j, &attr, &form, &atoff, &de) != DW_DLV_OK) { warnx("dwarf_get_abbrev_entry failed:" " %s", dwarf_errmsg(de)); continue; } if (dwarf_get_AT_name(attr, &attr_str) != DW_DLV_OK) { snprintf(unk_attr, sizeof(unk_attr), "[Unknown AT: %#x]", attr); attr_str = unk_attr; } if (dwarf_get_FORM_name(form, &form_str) != DW_DLV_OK) { snprintf(unk_form, sizeof(unk_form), "[Unknown Form: %#x]", (Dwarf_Half) form); form_str = unk_form; } printf(" %-18s %s\n", attr_str, form_str); } next_abbrev: dwarf_dealloc(re->dbg, ab, DW_DLA_ABBREV); } if (ret != DW_DLV_OK) warnx("dwarf_get_abbrev: %s", dwarf_errmsg(de)); } if (ret == DW_DLV_ERROR) warnx("dwarf_next_cu_header: %s", dwarf_errmsg(de)); } static void dump_dwarf_pubnames(struct readelf *re) { struct section *s; Dwarf_Off die_off; Dwarf_Unsigned offset, length, nt_cu_offset, nt_cu_length; Dwarf_Signed cnt; Dwarf_Global *globs; Dwarf_Half nt_version; Dwarf_Error de; Elf_Data *d; char *glob_name; int i, dwarf_size, elferr; printf("\nContents of the .debug_pubnames section:\n"); s = NULL; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->name != NULL && !strcmp(s->name, ".debug_pubnames")) break; } if ((size_t) i >= re->shnum) return; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(-1)); return; } if (d->d_size <= 0) return; /* Read in .debug_pubnames section table header. */ offset = 0; length = re->dw_read(d, &offset, 4); if (length == 0xffffffff) { dwarf_size = 8; length = re->dw_read(d, &offset, 8); } else dwarf_size = 4; if (length > d->d_size - offset) { warnx("invalid .dwarf_pubnames section"); return; } nt_version = re->dw_read(d, &offset, 2); nt_cu_offset = re->dw_read(d, &offset, dwarf_size); nt_cu_length = re->dw_read(d, &offset, dwarf_size); printf(" Length:\t\t\t\t%ju\n", (uintmax_t) length); printf(" Version:\t\t\t\t%u\n", nt_version); printf(" Offset into .debug_info section:\t%ju\n", (uintmax_t) nt_cu_offset); printf(" Size of area in .debug_info section:\t%ju\n", (uintmax_t) nt_cu_length); if (dwarf_get_globals(re->dbg, &globs, &cnt, &de) != DW_DLV_OK) { warnx("dwarf_get_globals failed: %s", dwarf_errmsg(de)); return; } printf("\n Offset Name\n"); for (i = 0; i < cnt; i++) { if (dwarf_globname(globs[i], &glob_name, &de) != DW_DLV_OK) { warnx("dwarf_globname failed: %s", dwarf_errmsg(de)); continue; } if (dwarf_global_die_offset(globs[i], &die_off, &de) != DW_DLV_OK) { warnx("dwarf_global_die_offset failed: %s", dwarf_errmsg(de)); continue; } printf(" %-11ju %s\n", (uintmax_t) die_off, glob_name); } } static void dump_dwarf_aranges(struct readelf *re) { struct section *s; Dwarf_Arange *aranges; Dwarf_Addr start; Dwarf_Unsigned offset, length, as_cu_offset; Dwarf_Off die_off; Dwarf_Signed cnt; Dwarf_Half as_version, as_addrsz, as_segsz; Dwarf_Error de; Elf_Data *d; int i, dwarf_size, elferr; printf("\nContents of section .debug_aranges:\n"); s = NULL; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->name != NULL && !strcmp(s->name, ".debug_aranges")) break; } if ((size_t) i >= re->shnum) return; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(-1)); return; } if (d->d_size <= 0) return; /* Read in the .debug_aranges section table header. */ offset = 0; length = re->dw_read(d, &offset, 4); if (length == 0xffffffff) { dwarf_size = 8; length = re->dw_read(d, &offset, 8); } else dwarf_size = 4; if (length > d->d_size - offset) { warnx("invalid .dwarf_aranges section"); return; } as_version = re->dw_read(d, &offset, 2); as_cu_offset = re->dw_read(d, &offset, dwarf_size); as_addrsz = re->dw_read(d, &offset, 1); as_segsz = re->dw_read(d, &offset, 1); printf(" Length:\t\t\t%ju\n", (uintmax_t) length); printf(" Version:\t\t\t%u\n", as_version); printf(" Offset into .debug_info:\t%ju\n", (uintmax_t) as_cu_offset); printf(" Pointer Size:\t\t\t%u\n", as_addrsz); printf(" Segment Size:\t\t\t%u\n", as_segsz); if (dwarf_get_aranges(re->dbg, &aranges, &cnt, &de) != DW_DLV_OK) { warnx("dwarf_get_aranges failed: %s", dwarf_errmsg(de)); return; } printf("\n Address Length\n"); for (i = 0; i < cnt; i++) { if (dwarf_get_arange_info(aranges[i], &start, &length, &die_off, &de) != DW_DLV_OK) { warnx("dwarf_get_arange_info failed: %s", dwarf_errmsg(de)); continue; } printf(" %08jx %ju\n", (uintmax_t) start, (uintmax_t) length); } } static void dump_dwarf_ranges_foreach(struct readelf *re, Dwarf_Die die, Dwarf_Addr base) { Dwarf_Attribute *attr_list; Dwarf_Ranges *ranges; Dwarf_Die ret_die; Dwarf_Error de; Dwarf_Addr base0; Dwarf_Half attr; Dwarf_Signed attr_count, cnt; Dwarf_Unsigned off, bytecnt; int i, j, ret; if ((ret = dwarf_attrlist(die, &attr_list, &attr_count, &de)) != DW_DLV_OK) { if (ret == DW_DLV_ERROR) warnx("dwarf_attrlist failed: %s", dwarf_errmsg(de)); goto cont_search; } for (i = 0; i < attr_count; i++) { if (dwarf_whatattr(attr_list[i], &attr, &de) != DW_DLV_OK) { warnx("dwarf_whatattr failed: %s", dwarf_errmsg(de)); continue; } if (attr != DW_AT_ranges) continue; if (dwarf_formudata(attr_list[i], &off, &de) != DW_DLV_OK) { warnx("dwarf_formudata failed: %s", dwarf_errmsg(de)); continue; } if (dwarf_get_ranges(re->dbg, (Dwarf_Off) off, &ranges, &cnt, &bytecnt, &de) != DW_DLV_OK) continue; base0 = base; for (j = 0; j < cnt; j++) { printf(" %08jx ", (uintmax_t) off); if (ranges[j].dwr_type == DW_RANGES_END) { printf("%s\n", ""); continue; } else if (ranges[j].dwr_type == DW_RANGES_ADDRESS_SELECTION) { base0 = ranges[j].dwr_addr2; continue; } if (re->ec == ELFCLASS32) printf("%08jx %08jx\n", (uintmax_t) (ranges[j].dwr_addr1 + base0), (uintmax_t) (ranges[j].dwr_addr2 + base0)); else printf("%016jx %016jx\n", (uintmax_t) (ranges[j].dwr_addr1 + base0), (uintmax_t) (ranges[j].dwr_addr2 + base0)); } } cont_search: /* Search children. */ ret = dwarf_child(die, &ret_die, &de); if (ret == DW_DLV_ERROR) warnx("dwarf_child: %s", dwarf_errmsg(de)); else if (ret == DW_DLV_OK) dump_dwarf_ranges_foreach(re, ret_die, base); /* Search sibling. */ ret = dwarf_siblingof(re->dbg, die, &ret_die, &de); if (ret == DW_DLV_ERROR) warnx("dwarf_siblingof: %s", dwarf_errmsg(de)); else if (ret == DW_DLV_OK) dump_dwarf_ranges_foreach(re, ret_die, base); } static void dump_dwarf_ranges(struct readelf *re) { Dwarf_Ranges *ranges; Dwarf_Die die; Dwarf_Signed cnt; Dwarf_Unsigned bytecnt; Dwarf_Half tag; Dwarf_Error de; Dwarf_Unsigned lowpc; int ret; if (dwarf_get_ranges(re->dbg, 0, &ranges, &cnt, &bytecnt, &de) != DW_DLV_OK) return; printf("Contents of the .debug_ranges section:\n\n"); if (re->ec == ELFCLASS32) printf(" %-8s %-8s %s\n", "Offset", "Begin", "End"); else printf(" %-8s %-16s %s\n", "Offset", "Begin", "End"); while ((ret = dwarf_next_cu_header(re->dbg, NULL, NULL, NULL, NULL, NULL, &de)) == DW_DLV_OK) { die = NULL; if (dwarf_siblingof(re->dbg, die, &die, &de) != DW_DLV_OK) continue; if (dwarf_tag(die, &tag, &de) != DW_DLV_OK) { warnx("dwarf_tag failed: %s", dwarf_errmsg(de)); continue; } /* XXX: What about DW_TAG_partial_unit? */ lowpc = 0; if (tag == DW_TAG_compile_unit) { if (dwarf_attrval_unsigned(die, DW_AT_low_pc, &lowpc, &de) != DW_DLV_OK) lowpc = 0; } dump_dwarf_ranges_foreach(re, die, (Dwarf_Addr) lowpc); } putchar('\n'); } static void dump_dwarf_macinfo(struct readelf *re) { Dwarf_Unsigned offset; Dwarf_Signed cnt; Dwarf_Macro_Details *md; Dwarf_Error de; const char *mi_str; char unk_mi[32]; int i; #define _MAX_MACINFO_ENTRY 65535 printf("\nContents of section .debug_macinfo:\n\n"); offset = 0; while (dwarf_get_macro_details(re->dbg, offset, _MAX_MACINFO_ENTRY, &cnt, &md, &de) == DW_DLV_OK) { for (i = 0; i < cnt; i++) { offset = md[i].dmd_offset + 1; if (md[i].dmd_type == 0) break; if (dwarf_get_MACINFO_name(md[i].dmd_type, &mi_str) != DW_DLV_OK) { snprintf(unk_mi, sizeof(unk_mi), "[Unknown MACINFO: %#x]", md[i].dmd_type); mi_str = unk_mi; } printf(" %s", mi_str); switch (md[i].dmd_type) { case DW_MACINFO_define: case DW_MACINFO_undef: printf(" - lineno : %jd macro : %s\n", (intmax_t) md[i].dmd_lineno, md[i].dmd_macro); break; case DW_MACINFO_start_file: printf(" - lineno : %jd filenum : %jd\n", (intmax_t) md[i].dmd_lineno, (intmax_t) md[i].dmd_fileindex); break; default: putchar('\n'); break; } } } #undef _MAX_MACINFO_ENTRY } static void dump_dwarf_frame_inst(struct readelf *re, Dwarf_Cie cie, uint8_t *insts, Dwarf_Unsigned len, Dwarf_Unsigned caf, Dwarf_Signed daf, Dwarf_Addr pc, Dwarf_Debug dbg) { Dwarf_Frame_Op *oplist; Dwarf_Signed opcnt, delta; Dwarf_Small op; Dwarf_Error de; const char *op_str; char unk_op[32]; int i; if (dwarf_expand_frame_instructions(cie, insts, len, &oplist, &opcnt, &de) != DW_DLV_OK) { warnx("dwarf_expand_frame_instructions failed: %s", dwarf_errmsg(de)); return; } for (i = 0; i < opcnt; i++) { if (oplist[i].fp_base_op != 0) op = oplist[i].fp_base_op << 6; else op = oplist[i].fp_extended_op; if (dwarf_get_CFA_name(op, &op_str) != DW_DLV_OK) { snprintf(unk_op, sizeof(unk_op), "[Unknown CFA: %#x]", op); op_str = unk_op; } printf(" %s", op_str); switch (op) { case DW_CFA_advance_loc: delta = oplist[i].fp_offset * caf; pc += delta; printf(": %ju to %08jx", (uintmax_t) delta, (uintmax_t) pc); break; case DW_CFA_offset: case DW_CFA_offset_extended: case DW_CFA_offset_extended_sf: delta = oplist[i].fp_offset * daf; printf(": r%u (%s) at cfa%+jd", oplist[i].fp_register, dwarf_regname(re, oplist[i].fp_register), (intmax_t) delta); break; case DW_CFA_restore: printf(": r%u (%s)", oplist[i].fp_register, dwarf_regname(re, oplist[i].fp_register)); break; case DW_CFA_set_loc: pc = oplist[i].fp_offset; printf(": to %08jx", (uintmax_t) pc); break; case DW_CFA_advance_loc1: case DW_CFA_advance_loc2: case DW_CFA_advance_loc4: pc += oplist[i].fp_offset; printf(": %jd to %08jx", (intmax_t) oplist[i].fp_offset, (uintmax_t) pc); break; case DW_CFA_def_cfa: printf(": r%u (%s) ofs %ju", oplist[i].fp_register, dwarf_regname(re, oplist[i].fp_register), (uintmax_t) oplist[i].fp_offset); break; case DW_CFA_def_cfa_sf: printf(": r%u (%s) ofs %jd", oplist[i].fp_register, dwarf_regname(re, oplist[i].fp_register), (intmax_t) (oplist[i].fp_offset * daf)); break; case DW_CFA_def_cfa_register: printf(": r%u (%s)", oplist[i].fp_register, dwarf_regname(re, oplist[i].fp_register)); break; case DW_CFA_def_cfa_offset: printf(": %ju", (uintmax_t) oplist[i].fp_offset); break; case DW_CFA_def_cfa_offset_sf: printf(": %jd", (intmax_t) (oplist[i].fp_offset * daf)); break; default: break; } putchar('\n'); } dwarf_dealloc(dbg, oplist, DW_DLA_FRAME_BLOCK); } static char * get_regoff_str(struct readelf *re, Dwarf_Half reg, Dwarf_Addr off) { static char rs[16]; if (reg == DW_FRAME_UNDEFINED_VAL || reg == DW_FRAME_REG_INITIAL_VALUE) snprintf(rs, sizeof(rs), "%c", 'u'); else if (reg == DW_FRAME_CFA_COL) snprintf(rs, sizeof(rs), "c%+jd", (intmax_t) off); else snprintf(rs, sizeof(rs), "%s%+jd", dwarf_regname(re, reg), (intmax_t) off); return (rs); } static int dump_dwarf_frame_regtable(struct readelf *re, Dwarf_Fde fde, Dwarf_Addr pc, Dwarf_Unsigned func_len, Dwarf_Half cie_ra) { Dwarf_Regtable rt; Dwarf_Addr row_pc, end_pc, pre_pc, cur_pc; Dwarf_Error de; char *vec; int i; #define BIT_SET(v, n) (v[(n)>>3] |= 1U << ((n) & 7)) #define BIT_CLR(v, n) (v[(n)>>3] &= ~(1U << ((n) & 7))) #define BIT_ISSET(v, n) (v[(n)>>3] & (1U << ((n) & 7))) #define RT(x) rt.rules[(x)] vec = calloc((DW_REG_TABLE_SIZE + 7) / 8, 1); if (vec == NULL) err(EXIT_FAILURE, "calloc failed"); pre_pc = ~((Dwarf_Addr) 0); cur_pc = pc; end_pc = pc + func_len; for (; cur_pc < end_pc; cur_pc++) { if (dwarf_get_fde_info_for_all_regs(fde, cur_pc, &rt, &row_pc, &de) != DW_DLV_OK) { free(vec); warnx("dwarf_get_fde_info_for_all_regs failed: %s\n", dwarf_errmsg(de)); return (-1); } if (row_pc == pre_pc) continue; pre_pc = row_pc; for (i = 1; i < DW_REG_TABLE_SIZE; i++) { if (rt.rules[i].dw_regnum != DW_FRAME_REG_INITIAL_VALUE) BIT_SET(vec, i); } } printf(" LOC CFA "); for (i = 1; i < DW_REG_TABLE_SIZE; i++) { if (BIT_ISSET(vec, i)) { if ((Dwarf_Half) i == cie_ra) printf("ra "); else printf("%-5s", dwarf_regname(re, (unsigned int) i)); } } putchar('\n'); pre_pc = ~((Dwarf_Addr) 0); cur_pc = pc; end_pc = pc + func_len; for (; cur_pc < end_pc; cur_pc++) { if (dwarf_get_fde_info_for_all_regs(fde, cur_pc, &rt, &row_pc, &de) != DW_DLV_OK) { warnx("dwarf_get_fde_info_for_all_regs failed: %s\n", dwarf_errmsg(de)); return (-1); } if (row_pc == pre_pc) continue; pre_pc = row_pc; printf("%08jx ", (uintmax_t) row_pc); printf("%-8s ", get_regoff_str(re, RT(0).dw_regnum, RT(0).dw_offset)); for (i = 1; i < DW_REG_TABLE_SIZE; i++) { if (BIT_ISSET(vec, i)) { printf("%-5s", get_regoff_str(re, RT(i).dw_regnum, RT(i).dw_offset)); } } putchar('\n'); } free(vec); return (0); #undef BIT_SET #undef BIT_CLR #undef BIT_ISSET #undef RT } static void dump_dwarf_frame_section(struct readelf *re, struct section *s, int alt) { Dwarf_Cie *cie_list, cie, pre_cie; Dwarf_Fde *fde_list, fde; Dwarf_Off cie_offset, fde_offset; Dwarf_Unsigned cie_length, fde_instlen; Dwarf_Unsigned cie_caf, cie_daf, cie_instlen, func_len, fde_length; Dwarf_Signed cie_count, fde_count, cie_index; Dwarf_Addr low_pc; Dwarf_Half cie_ra; Dwarf_Small cie_version; Dwarf_Ptr fde_addr, fde_inst, cie_inst; char *cie_aug, c; int i, eh_frame; Dwarf_Error de; printf("\nThe section %s contains:\n\n", s->name); if (!strcmp(s->name, ".debug_frame")) { eh_frame = 0; if (dwarf_get_fde_list(re->dbg, &cie_list, &cie_count, &fde_list, &fde_count, &de) != DW_DLV_OK) { warnx("dwarf_get_fde_list failed: %s", dwarf_errmsg(de)); return; } } else if (!strcmp(s->name, ".eh_frame")) { eh_frame = 1; if (dwarf_get_fde_list_eh(re->dbg, &cie_list, &cie_count, &fde_list, &fde_count, &de) != DW_DLV_OK) { warnx("dwarf_get_fde_list_eh failed: %s", dwarf_errmsg(de)); return; } } else return; pre_cie = NULL; for (i = 0; i < fde_count; i++) { if (dwarf_get_fde_n(fde_list, i, &fde, &de) != DW_DLV_OK) { warnx("dwarf_get_fde_n failed: %s", dwarf_errmsg(de)); continue; } if (dwarf_get_cie_of_fde(fde, &cie, &de) != DW_DLV_OK) { warnx("dwarf_get_fde_n failed: %s", dwarf_errmsg(de)); continue; } if (dwarf_get_fde_range(fde, &low_pc, &func_len, &fde_addr, &fde_length, &cie_offset, &cie_index, &fde_offset, &de) != DW_DLV_OK) { warnx("dwarf_get_fde_range failed: %s", dwarf_errmsg(de)); continue; } if (dwarf_get_fde_instr_bytes(fde, &fde_inst, &fde_instlen, &de) != DW_DLV_OK) { warnx("dwarf_get_fde_instr_bytes failed: %s", dwarf_errmsg(de)); continue; } if (pre_cie == NULL || cie != pre_cie) { pre_cie = cie; if (dwarf_get_cie_info(cie, &cie_length, &cie_version, &cie_aug, &cie_caf, &cie_daf, &cie_ra, &cie_inst, &cie_instlen, &de) != DW_DLV_OK) { warnx("dwarf_get_cie_info failed: %s", dwarf_errmsg(de)); continue; } printf("%08jx %08jx %8.8jx CIE", (uintmax_t) cie_offset, (uintmax_t) cie_length, (uintmax_t) (eh_frame ? 0 : ~0U)); if (!alt) { putchar('\n'); printf(" Version:\t\t\t%u\n", cie_version); printf(" Augmentation:\t\t\t\""); while ((c = *cie_aug++) != '\0') putchar(c); printf("\"\n"); printf(" Code alignment factor:\t%ju\n", (uintmax_t) cie_caf); printf(" Data alignment factor:\t%jd\n", (intmax_t) cie_daf); printf(" Return address column:\t%ju\n", (uintmax_t) cie_ra); putchar('\n'); dump_dwarf_frame_inst(re, cie, cie_inst, cie_instlen, cie_caf, cie_daf, 0, re->dbg); putchar('\n'); } else { printf(" \""); while ((c = *cie_aug++) != '\0') putchar(c); putchar('"'); printf(" cf=%ju df=%jd ra=%ju\n", (uintmax_t) cie_caf, (uintmax_t) cie_daf, (uintmax_t) cie_ra); dump_dwarf_frame_regtable(re, fde, low_pc, 1, cie_ra); putchar('\n'); } } printf("%08jx %08jx %08jx FDE cie=%08jx pc=%08jx..%08jx\n", (uintmax_t) fde_offset, (uintmax_t) fde_length, (uintmax_t) cie_offset, (uintmax_t) (eh_frame ? fde_offset + 4 - cie_offset : cie_offset), (uintmax_t) low_pc, (uintmax_t) (low_pc + func_len)); if (!alt) dump_dwarf_frame_inst(re, cie, fde_inst, fde_instlen, cie_caf, cie_daf, low_pc, re->dbg); else dump_dwarf_frame_regtable(re, fde, low_pc, func_len, cie_ra); putchar('\n'); } } static void dump_dwarf_frame(struct readelf *re, int alt) { struct section *s; int i; (void) dwarf_set_frame_cfa_value(re->dbg, DW_FRAME_CFA_COL); for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->name != NULL && (!strcmp(s->name, ".debug_frame") || !strcmp(s->name, ".eh_frame"))) dump_dwarf_frame_section(re, s, alt); } } static void dump_dwarf_str(struct readelf *re) { struct section *s; Elf_Data *d; unsigned char *p; int elferr, end, i, j; printf("\nContents of section .debug_str:\n"); s = NULL; for (i = 0; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (s->name != NULL && !strcmp(s->name, ".debug_str")) break; } if ((size_t) i >= re->shnum) return; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(-1)); return; } if (d->d_size <= 0) return; for (i = 0, p = d->d_buf; (size_t) i < d->d_size; i += 16) { printf(" 0x%08x", (unsigned int) i); if ((size_t) i + 16 > d->d_size) end = d->d_size; else end = i + 16; for (j = i; j < i + 16; j++) { if ((j - i) % 4 == 0) putchar(' '); if (j >= end) { printf(" "); continue; } printf("%02x", (uint8_t) p[j]); } putchar(' '); for (j = i; j < end; j++) { if (isprint(p[j])) putchar(p[j]); else if (p[j] == 0) putchar('.'); else putchar(' '); } putchar('\n'); } } static int loc_at_comparator(const void *la1, const void *la2) { const struct loc_at *left, *right; left = (const struct loc_at *)la1; right = (const struct loc_at *)la2; if (left->la_off > right->la_off) return (1); else if (left->la_off < right->la_off) return (-1); else return (0); } static void search_loclist_at(struct readelf *re, Dwarf_Die die, Dwarf_Unsigned lowpc, struct loc_at **la_list, size_t *la_list_len, size_t *la_list_cap) { struct loc_at *la; Dwarf_Attribute *attr_list; Dwarf_Die ret_die; Dwarf_Unsigned off; Dwarf_Off ref; Dwarf_Signed attr_count; Dwarf_Half attr, form; Dwarf_Bool is_info; Dwarf_Error de; int i, ret; is_info = dwarf_get_die_infotypes_flag(die); if ((ret = dwarf_attrlist(die, &attr_list, &attr_count, &de)) != DW_DLV_OK) { if (ret == DW_DLV_ERROR) warnx("dwarf_attrlist failed: %s", dwarf_errmsg(de)); goto cont_search; } for (i = 0; i < attr_count; i++) { if (dwarf_whatattr(attr_list[i], &attr, &de) != DW_DLV_OK) { warnx("dwarf_whatattr failed: %s", dwarf_errmsg(de)); continue; } if (attr != DW_AT_location && attr != DW_AT_string_length && attr != DW_AT_return_addr && attr != DW_AT_data_member_location && attr != DW_AT_frame_base && attr != DW_AT_segment && attr != DW_AT_static_link && attr != DW_AT_use_location && attr != DW_AT_vtable_elem_location) continue; if (dwarf_whatform(attr_list[i], &form, &de) != DW_DLV_OK) { warnx("dwarf_whatform failed: %s", dwarf_errmsg(de)); continue; } if (form == DW_FORM_data4 || form == DW_FORM_data8) { if (dwarf_formudata(attr_list[i], &off, &de) != DW_DLV_OK) { warnx("dwarf_formudata failed: %s", dwarf_errmsg(de)); continue; } } else if (form == DW_FORM_sec_offset) { if (dwarf_global_formref(attr_list[i], &ref, &de) != DW_DLV_OK) { warnx("dwarf_global_formref failed: %s", dwarf_errmsg(de)); continue; } off = ref; } else continue; if (*la_list_cap == *la_list_len) { *la_list = realloc(*la_list, *la_list_cap * 2 * sizeof(**la_list)); if (*la_list == NULL) err(EXIT_FAILURE, "realloc failed"); *la_list_cap *= 2; } la = &((*la_list)[*la_list_len]); la->la_at = attr_list[i]; la->la_off = off; la->la_lowpc = lowpc; la->la_cu_psize = re->cu_psize; la->la_cu_osize = re->cu_osize; la->la_cu_ver = re->cu_ver; (*la_list_len)++; } cont_search: /* Search children. */ ret = dwarf_child(die, &ret_die, &de); if (ret == DW_DLV_ERROR) warnx("dwarf_child: %s", dwarf_errmsg(de)); else if (ret == DW_DLV_OK) search_loclist_at(re, ret_die, lowpc, la_list, la_list_len, la_list_cap); /* Search sibling. */ ret = dwarf_siblingof_b(re->dbg, die, &ret_die, is_info, &de); if (ret == DW_DLV_ERROR) warnx("dwarf_siblingof: %s", dwarf_errmsg(de)); else if (ret == DW_DLV_OK) search_loclist_at(re, ret_die, lowpc, la_list, la_list_len, la_list_cap); } static void dump_dwarf_loc(struct readelf *re, Dwarf_Loc *lr) { const char *op_str; char unk_op[32]; uint8_t *b, n; int i; if (dwarf_get_OP_name(lr->lr_atom, &op_str) != DW_DLV_OK) { snprintf(unk_op, sizeof(unk_op), "[Unknown OP: %#x]", lr->lr_atom); op_str = unk_op; } printf("%s", op_str); switch (lr->lr_atom) { case DW_OP_reg0: case DW_OP_reg1: case DW_OP_reg2: case DW_OP_reg3: case DW_OP_reg4: case DW_OP_reg5: case DW_OP_reg6: case DW_OP_reg7: case DW_OP_reg8: case DW_OP_reg9: case DW_OP_reg10: case DW_OP_reg11: case DW_OP_reg12: case DW_OP_reg13: case DW_OP_reg14: case DW_OP_reg15: case DW_OP_reg16: case DW_OP_reg17: case DW_OP_reg18: case DW_OP_reg19: case DW_OP_reg20: case DW_OP_reg21: case DW_OP_reg22: case DW_OP_reg23: case DW_OP_reg24: case DW_OP_reg25: case DW_OP_reg26: case DW_OP_reg27: case DW_OP_reg28: case DW_OP_reg29: case DW_OP_reg30: case DW_OP_reg31: printf(" (%s)", dwarf_regname(re, lr->lr_atom - DW_OP_reg0)); break; case DW_OP_deref: case DW_OP_lit0: case DW_OP_lit1: case DW_OP_lit2: case DW_OP_lit3: case DW_OP_lit4: case DW_OP_lit5: case DW_OP_lit6: case DW_OP_lit7: case DW_OP_lit8: case DW_OP_lit9: case DW_OP_lit10: case DW_OP_lit11: case DW_OP_lit12: case DW_OP_lit13: case DW_OP_lit14: case DW_OP_lit15: case DW_OP_lit16: case DW_OP_lit17: case DW_OP_lit18: case DW_OP_lit19: case DW_OP_lit20: case DW_OP_lit21: case DW_OP_lit22: case DW_OP_lit23: case DW_OP_lit24: case DW_OP_lit25: case DW_OP_lit26: case DW_OP_lit27: case DW_OP_lit28: case DW_OP_lit29: case DW_OP_lit30: case DW_OP_lit31: case DW_OP_dup: case DW_OP_drop: case DW_OP_over: case DW_OP_swap: case DW_OP_rot: case DW_OP_xderef: case DW_OP_abs: case DW_OP_and: case DW_OP_div: case DW_OP_minus: case DW_OP_mod: case DW_OP_mul: case DW_OP_neg: case DW_OP_not: case DW_OP_or: case DW_OP_plus: case DW_OP_shl: case DW_OP_shr: case DW_OP_shra: case DW_OP_xor: case DW_OP_eq: case DW_OP_ge: case DW_OP_gt: case DW_OP_le: case DW_OP_lt: case DW_OP_ne: case DW_OP_nop: case DW_OP_push_object_address: case DW_OP_form_tls_address: case DW_OP_call_frame_cfa: case DW_OP_stack_value: case DW_OP_GNU_push_tls_address: case DW_OP_GNU_uninit: break; case DW_OP_const1u: case DW_OP_pick: case DW_OP_deref_size: case DW_OP_xderef_size: case DW_OP_const2u: case DW_OP_bra: case DW_OP_skip: case DW_OP_const4u: case DW_OP_const8u: case DW_OP_constu: case DW_OP_plus_uconst: case DW_OP_regx: case DW_OP_piece: printf(": %ju", (uintmax_t) lr->lr_number); break; case DW_OP_const1s: case DW_OP_const2s: case DW_OP_const4s: case DW_OP_const8s: case DW_OP_consts: printf(": %jd", (intmax_t) lr->lr_number); break; case DW_OP_breg0: case DW_OP_breg1: case DW_OP_breg2: case DW_OP_breg3: case DW_OP_breg4: case DW_OP_breg5: case DW_OP_breg6: case DW_OP_breg7: case DW_OP_breg8: case DW_OP_breg9: case DW_OP_breg10: case DW_OP_breg11: case DW_OP_breg12: case DW_OP_breg13: case DW_OP_breg14: case DW_OP_breg15: case DW_OP_breg16: case DW_OP_breg17: case DW_OP_breg18: case DW_OP_breg19: case DW_OP_breg20: case DW_OP_breg21: case DW_OP_breg22: case DW_OP_breg23: case DW_OP_breg24: case DW_OP_breg25: case DW_OP_breg26: case DW_OP_breg27: case DW_OP_breg28: case DW_OP_breg29: case DW_OP_breg30: case DW_OP_breg31: printf(" (%s): %jd", dwarf_regname(re, lr->lr_atom - DW_OP_breg0), (intmax_t) lr->lr_number); break; case DW_OP_fbreg: printf(": %jd", (intmax_t) lr->lr_number); break; case DW_OP_bregx: printf(": %ju (%s) %jd", (uintmax_t) lr->lr_number, dwarf_regname(re, (unsigned int) lr->lr_number), (intmax_t) lr->lr_number2); break; case DW_OP_addr: case DW_OP_GNU_encoded_addr: printf(": %#jx", (uintmax_t) lr->lr_number); break; case DW_OP_GNU_implicit_pointer: printf(": <0x%jx> %jd", (uintmax_t) lr->lr_number, (intmax_t) lr->lr_number2); break; case DW_OP_implicit_value: printf(": %ju byte block:", (uintmax_t) lr->lr_number); b = (uint8_t *)(uintptr_t) lr->lr_number2; for (i = 0; (Dwarf_Unsigned) i < lr->lr_number; i++) printf(" %x", b[i]); break; case DW_OP_GNU_entry_value: printf(": ("); dump_dwarf_block(re, (uint8_t *)(uintptr_t) lr->lr_number2, lr->lr_number); putchar(')'); break; case DW_OP_GNU_const_type: printf(": <0x%jx> ", (uintmax_t) lr->lr_number); b = (uint8_t *)(uintptr_t) lr->lr_number2; n = *b; for (i = 1; (uint8_t) i < n; i++) printf(" %x", b[i]); break; case DW_OP_GNU_regval_type: printf(": %ju (%s) <0x%jx>", (uintmax_t) lr->lr_number, dwarf_regname(re, (unsigned int) lr->lr_number), (uintmax_t) lr->lr_number2); break; case DW_OP_GNU_convert: case DW_OP_GNU_deref_type: case DW_OP_GNU_parameter_ref: case DW_OP_GNU_reinterpret: printf(": <0x%jx>", (uintmax_t) lr->lr_number); break; default: break; } } static void dump_dwarf_block(struct readelf *re, uint8_t *b, Dwarf_Unsigned len) { Dwarf_Locdesc *llbuf; Dwarf_Signed lcnt; Dwarf_Error de; int i; if (dwarf_loclist_from_expr_b(re->dbg, b, len, re->cu_psize, re->cu_osize, re->cu_ver, &llbuf, &lcnt, &de) != DW_DLV_OK) { warnx("dwarf_loclist_form_expr_b: %s", dwarf_errmsg(de)); return; } for (i = 0; (Dwarf_Half) i < llbuf->ld_cents; i++) { dump_dwarf_loc(re, &llbuf->ld_s[i]); if (i < llbuf->ld_cents - 1) printf("; "); } dwarf_dealloc(re->dbg, llbuf->ld_s, DW_DLA_LOC_BLOCK); dwarf_dealloc(re->dbg, llbuf, DW_DLA_LOCDESC); } static void dump_dwarf_loclist(struct readelf *re) { Dwarf_Die die; Dwarf_Locdesc **llbuf; Dwarf_Unsigned lowpc; Dwarf_Signed lcnt; Dwarf_Half tag, version, pointer_size, off_size; Dwarf_Error de; struct loc_at *la_list, *left, *right, *la; size_t la_list_len, la_list_cap; unsigned int duplicates, k; int i, j, ret, has_content; la_list_len = 0; la_list_cap = 200; if ((la_list = calloc(la_list_cap, sizeof(struct loc_at))) == NULL) errx(EXIT_FAILURE, "calloc failed"); /* Search .debug_info section. */ while ((ret = dwarf_next_cu_header_b(re->dbg, NULL, &version, NULL, &pointer_size, &off_size, NULL, NULL, &de)) == DW_DLV_OK) { set_cu_context(re, pointer_size, off_size, version); die = NULL; if (dwarf_siblingof(re->dbg, die, &die, &de) != DW_DLV_OK) continue; if (dwarf_tag(die, &tag, &de) != DW_DLV_OK) { warnx("dwarf_tag failed: %s", dwarf_errmsg(de)); continue; } /* XXX: What about DW_TAG_partial_unit? */ lowpc = 0; if (tag == DW_TAG_compile_unit) { if (dwarf_attrval_unsigned(die, DW_AT_low_pc, &lowpc, &de) != DW_DLV_OK) lowpc = 0; } /* Search attributes for reference to .debug_loc section. */ search_loclist_at(re, die, lowpc, &la_list, &la_list_len, &la_list_cap); } if (ret == DW_DLV_ERROR) warnx("dwarf_next_cu_header: %s", dwarf_errmsg(de)); /* Search .debug_types section. */ do { while ((ret = dwarf_next_cu_header_c(re->dbg, 0, NULL, &version, NULL, &pointer_size, &off_size, NULL, NULL, NULL, NULL, &de)) == DW_DLV_OK) { set_cu_context(re, pointer_size, off_size, version); die = NULL; if (dwarf_siblingof(re->dbg, die, &die, &de) != DW_DLV_OK) continue; if (dwarf_tag(die, &tag, &de) != DW_DLV_OK) { warnx("dwarf_tag failed: %s", dwarf_errmsg(de)); continue; } lowpc = 0; if (tag == DW_TAG_type_unit) { if (dwarf_attrval_unsigned(die, DW_AT_low_pc, &lowpc, &de) != DW_DLV_OK) lowpc = 0; } /* * Search attributes for reference to .debug_loc * section. */ search_loclist_at(re, die, lowpc, &la_list, &la_list_len, &la_list_cap); } if (ret == DW_DLV_ERROR) warnx("dwarf_next_cu_header: %s", dwarf_errmsg(de)); } while (dwarf_next_types_section(re->dbg, &de) == DW_DLV_OK); if (la_list_len == 0) { free(la_list); return; } /* Sort la_list using loc_at_comparator. */ qsort(la_list, la_list_len, sizeof(struct loc_at), loc_at_comparator); /* Get rid of the duplicates in la_list. */ duplicates = 0; for (k = 1; k < la_list_len; ++k) { left = &la_list[k - 1 - duplicates]; right = &la_list[k]; if (left->la_off == right->la_off) duplicates++; else la_list[k - duplicates] = *right; } la_list_len -= duplicates; has_content = 0; for (k = 0; k < la_list_len; ++k) { la = &la_list[k]; if ((ret = dwarf_loclist_n(la->la_at, &llbuf, &lcnt, &de)) != DW_DLV_OK) { if (ret != DW_DLV_NO_ENTRY) warnx("dwarf_loclist_n failed: %s", dwarf_errmsg(de)); continue; } if (!has_content) { has_content = 1; printf("\nContents of section .debug_loc:\n"); printf(" Offset Begin End Expression\n"); } set_cu_context(re, la->la_cu_psize, la->la_cu_osize, la->la_cu_ver); for (i = 0; i < lcnt; i++) { printf(" %8.8jx ", (uintmax_t) la->la_off); if (llbuf[i]->ld_lopc == 0 && llbuf[i]->ld_hipc == 0) { printf("\n"); continue; } /* TODO: handle base selection entry. */ printf("%8.8jx %8.8jx ", (uintmax_t) (la->la_lowpc + llbuf[i]->ld_lopc), (uintmax_t) (la->la_lowpc + llbuf[i]->ld_hipc)); putchar('('); for (j = 0; (Dwarf_Half) j < llbuf[i]->ld_cents; j++) { dump_dwarf_loc(re, &llbuf[i]->ld_s[j]); if (j < llbuf[i]->ld_cents - 1) printf("; "); } putchar(')'); if (llbuf[i]->ld_lopc == llbuf[i]->ld_hipc) printf(" (start == end)"); putchar('\n'); } for (i = 0; i < lcnt; i++) { dwarf_dealloc(re->dbg, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK); dwarf_dealloc(re->dbg, llbuf[i], DW_DLA_LOCDESC); } dwarf_dealloc(re->dbg, llbuf, DW_DLA_LIST); } if (!has_content) printf("\nSection '.debug_loc' has no debugging data.\n"); free(la_list); } /* * Retrieve a string using string table section index and the string offset. */ static const char* get_string(struct readelf *re, int strtab, size_t off) { const char *name; if ((name = elf_strptr(re->elf, strtab, off)) == NULL) return (""); return (name); } /* * Retrieve the name of a symbol using the section index of the symbol * table and the index of the symbol within that table. */ static const char * get_symbol_name(struct readelf *re, int symtab, int i) { struct section *s; const char *name; GElf_Sym sym; Elf_Data *data; int elferr; s = &re->sl[symtab]; if (s->type != SHT_SYMTAB && s->type != SHT_DYNSYM) return (""); (void) elf_errno(); if ((data = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); return (""); } if (gelf_getsym(data, i, &sym) != &sym) return (""); /* Return section name for STT_SECTION symbol. */ if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) { if (sym.st_shndx < re->shnum && re->sl[sym.st_shndx].name != NULL) return (re->sl[sym.st_shndx].name); return (""); } if (s->link >= re->shnum || (name = elf_strptr(re->elf, s->link, sym.st_name)) == NULL) return (""); return (name); } static uint64_t get_symbol_value(struct readelf *re, int symtab, int i) { struct section *s; GElf_Sym sym; Elf_Data *data; int elferr; s = &re->sl[symtab]; if (s->type != SHT_SYMTAB && s->type != SHT_DYNSYM) return (0); (void) elf_errno(); if ((data = elf_getdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); return (0); } if (gelf_getsym(data, i, &sym) != &sym) return (0); return (sym.st_value); } static void hex_dump(struct readelf *re) { struct section *s; Elf_Data *d; uint8_t *buf; size_t sz, nbytes; uint64_t addr; int elferr, i, j; for (i = 1; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (find_dumpop(re, (size_t) i, s->name, HEX_DUMP, -1) == NULL) continue; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL && (d = elf_rawdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); continue; } (void) elf_errno(); if (d->d_size <= 0 || d->d_buf == NULL) { printf("\nSection '%s' has no data to dump.\n", s->name); continue; } buf = d->d_buf; sz = d->d_size; addr = s->addr; printf("\nHex dump of section '%s':\n", s->name); while (sz > 0) { printf(" 0x%8.8jx ", (uintmax_t)addr); nbytes = sz > 16? 16 : sz; for (j = 0; j < 16; j++) { if ((size_t)j < nbytes) printf("%2.2x", buf[j]); else printf(" "); if ((j & 3) == 3) printf(" "); } for (j = 0; (size_t)j < nbytes; j++) { if (isprint(buf[j])) printf("%c", buf[j]); else printf("."); } printf("\n"); buf += nbytes; addr += nbytes; sz -= nbytes; } } } static void str_dump(struct readelf *re) { struct section *s; Elf_Data *d; unsigned char *start, *end, *buf_end; unsigned int len; int i, j, elferr, found; for (i = 1; (size_t) i < re->shnum; i++) { s = &re->sl[i]; if (find_dumpop(re, (size_t) i, s->name, STR_DUMP, -1) == NULL) continue; (void) elf_errno(); if ((d = elf_getdata(s->scn, NULL)) == NULL && (d = elf_rawdata(s->scn, NULL)) == NULL) { elferr = elf_errno(); if (elferr != 0) warnx("elf_getdata failed: %s", elf_errmsg(elferr)); continue; } (void) elf_errno(); if (d->d_size <= 0 || d->d_buf == NULL) { printf("\nSection '%s' has no data to dump.\n", s->name); continue; } buf_end = (unsigned char *) d->d_buf + d->d_size; start = (unsigned char *) d->d_buf; found = 0; printf("\nString dump of section '%s':\n", s->name); for (;;) { while (start < buf_end && !isprint(*start)) start++; if (start >= buf_end) break; end = start + 1; while (end < buf_end && isprint(*end)) end++; printf(" [%6lx] ", (long) (start - (unsigned char *) d->d_buf)); len = end - start; for (j = 0; (unsigned int) j < len; j++) putchar(start[j]); putchar('\n'); found = 1; if (end >= buf_end) break; start = end + 1; } if (!found) printf(" No strings found in this section."); putchar('\n'); } } static void load_sections(struct readelf *re) { struct section *s; const char *name; Elf_Scn *scn; GElf_Shdr sh; size_t shstrndx, ndx; int elferr; /* Allocate storage for internal section list. */ if (!elf_getshnum(re->elf, &re->shnum)) { warnx("elf_getshnum failed: %s", elf_errmsg(-1)); return; } if (re->sl != NULL) free(re->sl); if ((re->sl = calloc(re->shnum, sizeof(*re->sl))) == NULL) err(EXIT_FAILURE, "calloc failed"); /* Get the index of .shstrtab section. */ if (!elf_getshstrndx(re->elf, &shstrndx)) { warnx("elf_getshstrndx failed: %s", elf_errmsg(-1)); return; } if ((scn = elf_getscn(re->elf, 0)) == NULL) return; (void) elf_errno(); do { if (gelf_getshdr(scn, &sh) == NULL) { warnx("gelf_getshdr failed: %s", elf_errmsg(-1)); (void) elf_errno(); continue; } if ((name = elf_strptr(re->elf, shstrndx, sh.sh_name)) == NULL) { (void) elf_errno(); name = ""; } if ((ndx = elf_ndxscn(scn)) == SHN_UNDEF) { if ((elferr = elf_errno()) != 0) { warnx("elf_ndxscn failed: %s", elf_errmsg(elferr)); continue; } } if (ndx >= re->shnum) { warnx("section index of '%s' out of range", name); continue; } if (sh.sh_link >= re->shnum) warnx("section link %llu of '%s' out of range", (unsigned long long)sh.sh_link, name); s = &re->sl[ndx]; s->name = name; s->scn = scn; s->off = sh.sh_offset; s->sz = sh.sh_size; s->entsize = sh.sh_entsize; s->align = sh.sh_addralign; s->type = sh.sh_type; s->flags = sh.sh_flags; s->addr = sh.sh_addr; s->link = sh.sh_link; s->info = sh.sh_info; } while ((scn = elf_nextscn(re->elf, scn)) != NULL); elferr = elf_errno(); if (elferr != 0) warnx("elf_nextscn failed: %s", elf_errmsg(elferr)); } static void unload_sections(struct readelf *re) { if (re->sl != NULL) { free(re->sl); re->sl = NULL; } re->shnum = 0; re->vd_s = NULL; re->vn_s = NULL; re->vs_s = NULL; re->vs = NULL; re->vs_sz = 0; if (re->ver != NULL) { free(re->ver); re->ver = NULL; re->ver_sz = 0; } } static void dump_elf(struct readelf *re) { /* Fetch ELF header. No need to continue if it fails. */ if (gelf_getehdr(re->elf, &re->ehdr) == NULL) { warnx("gelf_getehdr failed: %s", elf_errmsg(-1)); return; } if ((re->ec = gelf_getclass(re->elf)) == ELFCLASSNONE) { warnx("gelf_getclass failed: %s", elf_errmsg(-1)); return; } if (re->ehdr.e_ident[EI_DATA] == ELFDATA2MSB) { re->dw_read = _read_msb; re->dw_decode = _decode_msb; } else { re->dw_read = _read_lsb; re->dw_decode = _decode_lsb; } if (re->options & ~RE_H) load_sections(re); if ((re->options & RE_VV) || (re->options & RE_S)) search_ver(re); if (re->options & RE_H) dump_ehdr(re); if (re->options & RE_L) dump_phdr(re); if (re->options & RE_SS) dump_shdr(re); if (re->options & RE_G) dump_section_groups(re); if (re->options & RE_D) dump_dynamic(re); if (re->options & RE_R) dump_reloc(re); if (re->options & RE_S) dump_symtabs(re); if (re->options & RE_N) dump_notes(re); if (re->options & RE_II) dump_hash(re); if (re->options & RE_X) hex_dump(re); if (re->options & RE_P) str_dump(re); if (re->options & RE_VV) dump_ver(re); if (re->options & RE_AA) dump_arch_specific_info(re); if (re->options & RE_W) dump_dwarf(re); if (re->options & ~RE_H) unload_sections(re); } static void dump_dwarf(struct readelf *re) { Dwarf_Error de; int error; if (dwarf_elf_init(re->elf, DW_DLC_READ, NULL, NULL, &re->dbg, &de)) { if ((error = dwarf_errno(de)) != DW_DLE_DEBUG_INFO_NULL) errx(EXIT_FAILURE, "dwarf_elf_init failed: %s", dwarf_errmsg(de)); return; } if (re->dop & DW_A) dump_dwarf_abbrev(re); if (re->dop & DW_L) dump_dwarf_line(re); if (re->dop & DW_LL) dump_dwarf_line_decoded(re); if (re->dop & DW_I) { dump_dwarf_info(re, 0); dump_dwarf_info(re, 1); } if (re->dop & DW_P) dump_dwarf_pubnames(re); if (re->dop & DW_R) dump_dwarf_aranges(re); if (re->dop & DW_RR) dump_dwarf_ranges(re); if (re->dop & DW_M) dump_dwarf_macinfo(re); if (re->dop & DW_F) dump_dwarf_frame(re, 0); else if (re->dop & DW_FF) dump_dwarf_frame(re, 1); if (re->dop & DW_S) dump_dwarf_str(re); if (re->dop & DW_O) dump_dwarf_loclist(re); dwarf_finish(re->dbg, &de); } static void dump_ar(struct readelf *re, int fd) { Elf_Arsym *arsym; Elf_Arhdr *arhdr; Elf_Cmd cmd; Elf *e; size_t sz; off_t off; int i; re->ar = re->elf; if (re->options & RE_C) { if ((arsym = elf_getarsym(re->ar, &sz)) == NULL) { warnx("elf_getarsym() failed: %s", elf_errmsg(-1)); goto process_members; } printf("Index of archive %s: (%ju entries)\n", re->filename, (uintmax_t) sz - 1); off = 0; for (i = 0; (size_t) i < sz; i++) { if (arsym[i].as_name == NULL) break; if (arsym[i].as_off != off) { off = arsym[i].as_off; if (elf_rand(re->ar, off) != off) { warnx("elf_rand() failed: %s", elf_errmsg(-1)); continue; } if ((e = elf_begin(fd, ELF_C_READ, re->ar)) == NULL) { warnx("elf_begin() failed: %s", elf_errmsg(-1)); continue; } if ((arhdr = elf_getarhdr(e)) == NULL) { warnx("elf_getarhdr() failed: %s", elf_errmsg(-1)); elf_end(e); continue; } printf("Binary %s(%s) contains:\n", re->filename, arhdr->ar_name); } printf("\t%s\n", arsym[i].as_name); } if (elf_rand(re->ar, SARMAG) != SARMAG) { warnx("elf_rand() failed: %s", elf_errmsg(-1)); return; } } process_members: if ((re->options & ~RE_C) == 0) return; cmd = ELF_C_READ; while ((re->elf = elf_begin(fd, cmd, re->ar)) != NULL) { if ((arhdr = elf_getarhdr(re->elf)) == NULL) { warnx("elf_getarhdr() failed: %s", elf_errmsg(-1)); goto next_member; } if (strcmp(arhdr->ar_name, "/") == 0 || strcmp(arhdr->ar_name, "//") == 0 || strcmp(arhdr->ar_name, "__.SYMDEF") == 0) goto next_member; printf("\nFile: %s(%s)\n", re->filename, arhdr->ar_name); dump_elf(re); next_member: cmd = elf_next(re->elf); elf_end(re->elf); } re->elf = re->ar; } static void dump_object(struct readelf *re, int fd) { if ((re->flags & DISPLAY_FILENAME) != 0) printf("\nFile: %s\n", re->filename); if ((re->elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL) { warnx("elf_begin() failed: %s", elf_errmsg(-1)); goto done; } switch (elf_kind(re->elf)) { case ELF_K_NONE: warnx("Not an ELF file."); goto done; case ELF_K_ELF: dump_elf(re); break; case ELF_K_AR: dump_ar(re, fd); break; default: warnx("Internal: libelf returned unknown elf kind."); } done: elf_end(re->elf); } static void add_dumpop(struct readelf *re, size_t si, const char *sn, int op, int t) { struct dumpop *d; if ((d = find_dumpop(re, si, sn, -1, t)) == NULL) { if ((d = calloc(1, sizeof(*d))) == NULL) err(EXIT_FAILURE, "calloc failed"); if (t == DUMP_BY_INDEX) d->u.si = si; else d->u.sn = sn; d->type = t; d->op = op; STAILQ_INSERT_TAIL(&re->v_dumpop, d, dumpop_list); } else d->op |= op; } static struct dumpop * find_dumpop(struct readelf *re, size_t si, const char *sn, int op, int t) { struct dumpop *d; STAILQ_FOREACH(d, &re->v_dumpop, dumpop_list) { if ((op == -1 || op & d->op) && (t == -1 || (unsigned) t == d->type)) { if ((d->type == DUMP_BY_INDEX && d->u.si == si) || (d->type == DUMP_BY_NAME && !strcmp(d->u.sn, sn))) return (d); } } return (NULL); } static struct { const char *ln; char sn; int value; } dwarf_op[] = { {"rawline", 'l', DW_L}, {"decodedline", 'L', DW_LL}, {"info", 'i', DW_I}, {"abbrev", 'a', DW_A}, {"pubnames", 'p', DW_P}, {"aranges", 'r', DW_R}, {"ranges", 'r', DW_R}, {"Ranges", 'R', DW_RR}, {"macro", 'm', DW_M}, {"frames", 'f', DW_F}, {"frames-interp", 'F', DW_FF}, {"str", 's', DW_S}, {"loc", 'o', DW_O}, {NULL, 0, 0} }; static void parse_dwarf_op_short(struct readelf *re, const char *op) { int i; if (op == NULL) { re->dop |= DW_DEFAULT_OPTIONS; return; } for (; *op != '\0'; op++) { for (i = 0; dwarf_op[i].ln != NULL; i++) { if (dwarf_op[i].sn == *op) { re->dop |= dwarf_op[i].value; break; } } } } static void parse_dwarf_op_long(struct readelf *re, const char *op) { char *p, *token, *bp; int i; if (op == NULL) { re->dop |= DW_DEFAULT_OPTIONS; return; } if ((p = strdup(op)) == NULL) err(EXIT_FAILURE, "strdup failed"); bp = p; while ((token = strsep(&p, ",")) != NULL) { for (i = 0; dwarf_op[i].ln != NULL; i++) { if (!strcmp(token, dwarf_op[i].ln)) { re->dop |= dwarf_op[i].value; break; } } } free(bp); } static uint64_t _read_lsb(Elf_Data *d, uint64_t *offsetp, int bytes_to_read) { uint64_t ret; uint8_t *src; src = (uint8_t *) d->d_buf + *offsetp; ret = 0; switch (bytes_to_read) { case 8: ret |= ((uint64_t) src[4]) << 32 | ((uint64_t) src[5]) << 40; ret |= ((uint64_t) src[6]) << 48 | ((uint64_t) src[7]) << 56; /* FALLTHROUGH */ case 4: ret |= ((uint64_t) src[2]) << 16 | ((uint64_t) src[3]) << 24; /* FALLTHROUGH */ case 2: ret |= ((uint64_t) src[1]) << 8; /* FALLTHROUGH */ case 1: ret |= src[0]; break; default: return (0); } *offsetp += bytes_to_read; return (ret); } static uint64_t _read_msb(Elf_Data *d, uint64_t *offsetp, int bytes_to_read) { uint64_t ret; uint8_t *src; src = (uint8_t *) d->d_buf + *offsetp; switch (bytes_to_read) { case 1: ret = src[0]; break; case 2: ret = src[1] | ((uint64_t) src[0]) << 8; break; case 4: ret = src[3] | ((uint64_t) src[2]) << 8; ret |= ((uint64_t) src[1]) << 16 | ((uint64_t) src[0]) << 24; break; case 8: ret = src[7] | ((uint64_t) src[6]) << 8; ret |= ((uint64_t) src[5]) << 16 | ((uint64_t) src[4]) << 24; ret |= ((uint64_t) src[3]) << 32 | ((uint64_t) src[2]) << 40; ret |= ((uint64_t) src[1]) << 48 | ((uint64_t) src[0]) << 56; break; default: return (0); } *offsetp += bytes_to_read; return (ret); } static uint64_t _decode_lsb(uint8_t **data, int bytes_to_read) { uint64_t ret; uint8_t *src; src = *data; ret = 0; switch (bytes_to_read) { case 8: ret |= ((uint64_t) src[4]) << 32 | ((uint64_t) src[5]) << 40; ret |= ((uint64_t) src[6]) << 48 | ((uint64_t) src[7]) << 56; /* FALLTHROUGH */ case 4: ret |= ((uint64_t) src[2]) << 16 | ((uint64_t) src[3]) << 24; /* FALLTHROUGH */ case 2: ret |= ((uint64_t) src[1]) << 8; /* FALLTHROUGH */ case 1: ret |= src[0]; break; default: return (0); } *data += bytes_to_read; return (ret); } static uint64_t _decode_msb(uint8_t **data, int bytes_to_read) { uint64_t ret; uint8_t *src; src = *data; ret = 0; switch (bytes_to_read) { case 1: ret = src[0]; break; case 2: ret = src[1] | ((uint64_t) src[0]) << 8; break; case 4: ret = src[3] | ((uint64_t) src[2]) << 8; ret |= ((uint64_t) src[1]) << 16 | ((uint64_t) src[0]) << 24; break; case 8: ret = src[7] | ((uint64_t) src[6]) << 8; ret |= ((uint64_t) src[5]) << 16 | ((uint64_t) src[4]) << 24; ret |= ((uint64_t) src[3]) << 32 | ((uint64_t) src[2]) << 40; ret |= ((uint64_t) src[1]) << 48 | ((uint64_t) src[0]) << 56; break; default: return (0); break; } *data += bytes_to_read; return (ret); } static int64_t _decode_sleb128(uint8_t **dp, uint8_t *dpe) { int64_t ret = 0; uint8_t b = 0; int shift = 0; uint8_t *src = *dp; do { if (src >= dpe) break; b = *src++; ret |= ((b & 0x7f) << shift); shift += 7; } while ((b & 0x80) != 0); if (shift < 32 && (b & 0x40) != 0) ret |= (-1 << shift); *dp = src; return (ret); } static uint64_t _decode_uleb128(uint8_t **dp, uint8_t *dpe) { uint64_t ret = 0; uint8_t b; int shift = 0; uint8_t *src = *dp; do { if (src >= dpe) break; b = *src++; ret |= ((b & 0x7f) << shift); shift += 7; } while ((b & 0x80) != 0); *dp = src; return (ret); } static void readelf_version(void) { (void) printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version()); exit(EXIT_SUCCESS); } #define USAGE_MESSAGE "\ Usage: %s [options] file...\n\ Display information about ELF objects and ar(1) archives.\n\n\ Options:\n\ -a | --all Equivalent to specifying options '-dhIlrsASV'.\n\ -c | --archive-index Print the archive symbol table for archives.\n\ -d | --dynamic Print the contents of SHT_DYNAMIC sections.\n\ -e | --headers Print all headers in the object.\n\ -g | --section-groups Print the contents of the section groups.\n\ -h | --file-header Print the file header for the object.\n\ -l | --program-headers Print the PHDR table for the object.\n\ -n | --notes Print the contents of SHT_NOTE sections.\n\ -p INDEX | --string-dump=INDEX\n\ Print the contents of section at index INDEX.\n\ -r | --relocs Print relocation information.\n\ -s | --syms | --symbols Print symbol tables.\n\ -t | --section-details Print additional information about sections.\n\ -v | --version Print a version identifier and exit.\n\ -w[afilmoprsFLR] | --debug-dump={abbrev,aranges,decodedline,frames,\n\ frames-interp,info,loc,macro,pubnames,\n\ ranges,Ranges,rawline,str}\n\ Display DWARF information.\n\ -x INDEX | --hex-dump=INDEX\n\ Display contents of a section as hexadecimal.\n\ -A | --arch-specific (accepted, but ignored)\n\ -D | --use-dynamic Print the symbol table specified by the DT_SYMTAB\n\ entry in the \".dynamic\" section.\n\ -H | --help Print a help message.\n\ -I | --histogram Print information on bucket list lengths for \n\ hash sections.\n\ -N | --full-section-name (accepted, but ignored)\n\ -S | --sections | --section-headers\n\ Print information about section headers.\n\ -V | --version-info Print symbol versoning information.\n\ -W | --wide Print information without wrapping long lines.\n" static void readelf_usage(int status) { fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME()); exit(status); } int main(int argc, char **argv) { cap_rights_t rights; fileargs_t *fa; struct readelf *re, re_storage; unsigned long si; int fd, opt, i; char *ep; re = &re_storage; memset(re, 0, sizeof(*re)); STAILQ_INIT(&re->v_dumpop); while ((opt = getopt_long(argc, argv, "AacDdegHhIi:lNnp:rSstuVvWw::x:", longopts, NULL)) != -1) { switch(opt) { case '?': readelf_usage(EXIT_SUCCESS); break; case 'A': re->options |= RE_AA; break; case 'a': re->options |= RE_AA | RE_D | RE_G | RE_H | RE_II | RE_L | RE_R | RE_SS | RE_S | RE_VV; break; case 'c': re->options |= RE_C; break; case 'D': re->options |= RE_DD; break; case 'd': re->options |= RE_D; break; case 'e': re->options |= RE_H | RE_L | RE_SS; break; case 'g': re->options |= RE_G; break; case 'H': readelf_usage(EXIT_SUCCESS); break; case 'h': re->options |= RE_H; break; case 'I': re->options |= RE_II; break; case 'i': /* Not implemented yet. */ break; case 'l': re->options |= RE_L; break; case 'N': re->options |= RE_NN; break; case 'n': re->options |= RE_N; break; case 'p': re->options |= RE_P; si = strtoul(optarg, &ep, 10); if (*ep == '\0') add_dumpop(re, (size_t) si, NULL, STR_DUMP, DUMP_BY_INDEX); else add_dumpop(re, 0, optarg, STR_DUMP, DUMP_BY_NAME); break; case 'r': re->options |= RE_R; break; case 'S': re->options |= RE_SS; break; case 's': re->options |= RE_S; break; case 't': re->options |= RE_SS | RE_T; break; case 'u': re->options |= RE_U; break; case 'V': re->options |= RE_VV; break; case 'v': readelf_version(); break; case 'W': re->options |= RE_WW; break; case 'w': re->options |= RE_W; parse_dwarf_op_short(re, optarg); break; case 'x': re->options |= RE_X; si = strtoul(optarg, &ep, 10); if (*ep == '\0') add_dumpop(re, (size_t) si, NULL, HEX_DUMP, DUMP_BY_INDEX); else add_dumpop(re, 0, optarg, HEX_DUMP, DUMP_BY_NAME); break; case OPTION_DEBUG_DUMP: re->options |= RE_W; parse_dwarf_op_long(re, optarg); } } argv += optind; argc -= optind; if (argc == 0 || re->options == 0) readelf_usage(EXIT_FAILURE); if (argc > 1) re->flags |= DISPLAY_FILENAME; if (elf_version(EV_CURRENT) == EV_NONE) errx(EXIT_FAILURE, "ELF library initialization failed: %s", elf_errmsg(-1)); cap_rights_init(&rights, CAP_FCNTL, CAP_FSTAT, CAP_MMAP_R, CAP_SEEK); fa = fileargs_init(argc, argv, O_RDONLY, 0, &rights, FA_OPEN); if (fa == NULL) err(1, "Unable to initialize casper fileargs"); caph_cache_catpages(); if (caph_limit_stdio() < 0) { fileargs_free(fa); err(1, "Unable to limit stdio rights"); } if (caph_enter_casper() < 0) { fileargs_free(fa); err(1, "Unable to enter capability mode"); } for (i = 0; i < argc; i++) { re->filename = argv[i]; fd = fileargs_open(fa, re->filename); if (fd < 0) { warn("open %s failed", re->filename); } else { dump_object(re, fd); close(fd); } } exit(EXIT_SUCCESS); } diff --git a/lib/libc/powerpc64/string/bcopy_resolver.c b/lib/libc/powerpc64/string/bcopy_resolver.c index 34b88d6ccbe4..1991a2998a4d 100644 --- a/lib/libc/powerpc64/string/bcopy_resolver.c +++ b/lib/libc/powerpc64/string/bcopy_resolver.c @@ -1,73 +1,73 @@ /*- * Copyright (c) 2018 Instituto de Pesquisas Eldorado * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of its contributors may * be used to endorse or promote products derived from this software * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #define _CAT(a,b) a##b #define CAT(a,b) _CAT(a,b) #define CAT3(a,b,c) CAT(CAT(a,b),c) #ifdef MEMCOPY #define FN_NAME memcpy #define FN_RET void * #define FN_PARAMS (void *dst, const void *src, size_t len) #elif defined(MEMMOVE) #define FN_NAME memmove #define FN_RET void * #define FN_PARAMS (void *dst, const void *src, size_t len) #else #define FN_NAME bcopy #define FN_RET void #define FN_PARAMS (const void *src, void *dst, size_t len) #endif #define FN_NAME_NOVSX CAT(__, FN_NAME) #define FN_NAME_VSX CAT3(__, FN_NAME, _vsx) FN_RET FN_NAME_NOVSX FN_PARAMS; FN_RET FN_NAME_VSX FN_PARAMS; DEFINE_UIFUNC(, FN_RET, FN_NAME, FN_PARAMS) { /* VSX instructions were added in POWER ISA 2.06, * however it requires data to be word-aligned. * Since POWER ISA 2.07B this is solved transparently * by the hardware */ - if (cpu_features2 & PPC_FEATURE2_ARCH_2_07) + if (cpu_features & PPC_FEATURE_HAS_VSX) return (FN_NAME_VSX); else return (FN_NAME_NOVSX); } diff --git a/lib/libc/powerpc64/string/memcpy.S b/lib/libc/powerpc64/string/memcpy.S index 2ea676bf67a9..3e6649711382 100644 --- a/lib/libc/powerpc64/string/memcpy.S +++ b/lib/libc/powerpc64/string/memcpy.S @@ -1,122 +1,133 @@ /*- * Copyright (c) 2018 Instituto de Pesquisas Eldorado * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of its contributors may * be used to endorse or promote products derived from this software * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #ifndef FN_NAME #define FN_NAME __memcpy WEAK_REFERENCE(__memcpy, memcpy); #define BLOCK_BITS 4 #endif #define BLOCK_BYTES (1 << BLOCK_BITS) #define BLOCK_MASK (BLOCK_BYTES - 1) +/* Minimum 8 byte alignment, to avoid cache-inhibited alignment faults. */ +#ifndef ALIGN_MASK +#define ALIGN_MASK 0x7 +#endif + /* * r3: dst * r4: src * r5: len */ ENTRY(FN_NAME) cmpdi %r5, 0 /* len == 0? nothing to do */ beqlr- + /* If src and dst are relatively misaligned, do byte copies. */ + andi. %r8, %r3, ALIGN_MASK + andi. %r7, %r4, ALIGN_MASK + cmpd %r8, %r7 + mr %r7, %r5 + bne .Lcopy_remaining_fix_index_byte mr %r8, %r3 /* save dst */ /* align src */ .Lalignment_loop: lbz %r6, 0(%r4) stb %r6, 0(%r3) addi %r3, %r3, 1 addi %r4, %r4, 1 addi %r5, %r5, -1 cmpdi %r5, 0 beq .Lexit andi. %r0, %r4, BLOCK_MASK bne .Lalignment_loop /* r7 = remaining, non-block, bytes */ andi. %r7, %r5, BLOCK_MASK /* Check if there are blocks of BLOCK_BYTES to be copied */ xor. %r5, %r5, %r7 beq .Lcopy_remaining_fix_index_byte #ifdef FN_COPY_LOOP FN_COPY_LOOP #else /* Setup to copy word with ldu and stdu */ ld %r6, 0(%r4) ld %r9, 8(%r4) std %r6, 0(%r3) std %r9, 8(%r3) addi %r5, %r5, -BLOCK_BYTES cmpd %r5, 0 beq .Lcopy_remaining_fix_index_word srdi %r5, %r5, BLOCK_BITS mtctr %r5 .Lcopy_word: ldu %r6, 16(%r4) ld %r9, 8(%r4) stdu %r6, 16(%r3) std %r9, 8(%r3) bdnz .Lcopy_word .Lcopy_remaining_fix_index_word: /* Check if there are remaining bytes */ cmpd %r7, 0 beq .Lexit addi %r3, %r3, BLOCK_MASK addi %r4, %r4, BLOCK_MASK b .Lcopy_remaining #endif .Lcopy_remaining_fix_index_byte: addi %r4, %r4, -1 addi %r3, %r3, -1 /* Copy remaining bytes */ .Lcopy_remaining: mtctr %r7 .Lcopy_remaining_loop: lbzu %r6, 1(%r4) stbu %r6, 1(%r3) bdnz .Lcopy_remaining_loop .Lexit: /* Restore dst */ mr %r3, %r8 blr END(FN_NAME) .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/powerpc64/string/memcpy_vsx.S b/lib/libc/powerpc64/string/memcpy_vsx.S index 708eb7d9f45d..83dd84ef7ee0 100644 --- a/lib/libc/powerpc64/string/memcpy_vsx.S +++ b/lib/libc/powerpc64/string/memcpy_vsx.S @@ -1,65 +1,66 @@ /*- * Copyright (c) 2018 Instituto de Pesquisas Eldorado * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of its contributors may * be used to endorse or promote products derived from this software * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #define FN_NAME __memcpy_vsx #define BLOCK_BITS 6 +#define ALIGN_MASK 0xf /* * r5: bytes to copy (multiple of BLOCK_BYTES) * */ #define FN_COPY_LOOP \ /* Load CTR with number of blocks */ \ srdi %r5, %r5, BLOCK_BITS ;\ mtctr %r5 ;\ /* Prepare indexes to load and store data */ \ xor %r6, %r6, %r6 ;\ li %r9, 16 ;\ li %r10, 32 ;\ li %r11, 48 ;\ .Lcopy_vsx_loop: \ lxvd2x %vs6, %r6, %r4 ;\ lxvd2x %vs7, %r9, %r4 ;\ lxvd2x %vs8, %r10, %r4 ;\ lxvd2x %vs9, %r11, %r4 ;\ stxvd2x %vs6, %r6, %r3 ;\ stxvd2x %vs7, %r9, %r3 ;\ stxvd2x %vs8, %r10, %r3 ;\ stxvd2x %vs9, %r11, %r3 ;\ \ addi %r3, %r3, BLOCK_BYTES ;\ addi %r4, %r4, BLOCK_BYTES ;\ bdnz .Lcopy_vsx_loop ;\ \ /* Check if there is remaining bytes */ \ cmpd %r7, 0 ;\ beq .Lexit ;\ #include "memcpy.S" diff --git a/libexec/tftpd/tests/Makefile b/libexec/tftpd/tests/Makefile index 06c5537372a7..9aa420cec1a5 100644 --- a/libexec/tftpd/tests/Makefile +++ b/libexec/tftpd/tests/Makefile @@ -1,14 +1,15 @@ # $FreeBSD$ .include # Skip on GCC 4.2, because it lacks __COUNTER__ .if ${COMPILER_TYPE} != "gcc" || ${COMPILER_VERSION} >= 40300 ATF_TESTS_C= functional TEST_METADATA.functional+= timeout=15 .endif LIBADD= util WARNS?= 6 +CSTD= c11 .include diff --git a/sbin/mount_nfs/mount_nfs.c b/sbin/mount_nfs/mount_nfs.c index 8d8ddac62224..e1eaf206e982 100644 --- a/sbin/mount_nfs/mount_nfs.c +++ b/sbin/mount_nfs/mount_nfs.c @@ -1,1096 +1,1096 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1992, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)mount_nfs.c 8.11 (Berkeley) 5/4/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mntopts.h" #include "mounttab.h" /* Table for af,sotype -> netid conversions. */ static struct nc_protos { const char *netid; int af; int sotype; } nc_protos[] = { {"udp", AF_INET, SOCK_DGRAM}, {"tcp", AF_INET, SOCK_STREAM}, {"udp6", AF_INET6, SOCK_DGRAM}, {"tcp6", AF_INET6, SOCK_STREAM}, {NULL, 0, 0} }; struct nfhret { u_long stat; long vers; long auth; long fhsize; u_char nfh[NFS3_FHSIZE]; }; #define BGRND 1 #define ISBGRND 2 #define OF_NOINET4 4 #define OF_NOINET6 8 static int retrycnt = -1; static int opflags = 0; static int nfsproto = IPPROTO_TCP; static int mnttcp_ok = 1; static int noconn = 0; /* The 'portspec' is the server nfs port; NULL means look up via rpcbind. */ static const char *portspec = NULL; static struct sockaddr *addr; static int addrlen = 0; static u_char *fh = NULL; static int fhsize = 0; static int secflavor = -1; static int got_principal = 0; static enum mountmode { ANY, V2, V3, V4 } mountmode = ANY; /* Return codes for nfs_tryproto. */ enum tryret { TRYRET_SUCCESS, TRYRET_TIMEOUT, /* No response received. */ TRYRET_REMOTEERR, /* Error received from remote server. */ TRYRET_LOCALERR /* Local failure. */ }; static int sec_name_to_num(const char *sec); static const char *sec_num_to_name(int num); static int getnfsargs(char *, struct iovec **iov, int *iovlen); /* void set_rpc_maxgrouplist(int); */ static struct netconfig *getnetconf_cached(const char *netid); static const char *netidbytype(int af, int sotype); static void usage(void) __dead2; static int xdr_dir(XDR *, char *); static int xdr_fh(XDR *, struct nfhret *); static enum tryret nfs_tryproto(struct addrinfo *ai, char *hostp, char *spec, char **errstr, struct iovec **iov, int *iovlen); static enum tryret returncode(enum clnt_stat stat, struct rpc_err *rpcerr); int main(int argc, char *argv[]) { int c; struct iovec *iov; int num, iovlen; char *mntname, *p, *spec, *tmp; char mntpath[MAXPATHLEN], errmsg[255]; char hostname[MAXHOSTNAMELEN + 1], gssn[MAXHOSTNAMELEN + 50]; const char *gssname, *nmount_errstr; iov = NULL; iovlen = 0; memset(errmsg, 0, sizeof(errmsg)); gssname = NULL; while ((c = getopt(argc, argv, "23a:bcdD:g:I:iLlNo:PR:r:sTt:w:x:U")) != -1) switch (c) { case '2': mountmode = V2; break; case '3': mountmode = V3; break; case 'a': printf("-a deprecated, use -o readahead=\n"); build_iovec(&iov, &iovlen, "readahead", optarg, (size_t)-1); break; case 'b': opflags |= BGRND; break; case 'c': printf("-c deprecated, use -o noconn\n"); build_iovec(&iov, &iovlen, "noconn", NULL, 0); noconn = 1; break; case 'D': printf("-D deprecated, use -o deadthresh=\n"); build_iovec(&iov, &iovlen, "deadthresh", optarg, (size_t)-1); break; case 'd': printf("-d deprecated, use -o dumbtimer"); build_iovec(&iov, &iovlen, "dumbtimer", NULL, 0); break; case 'g': printf("-g deprecated, use -o maxgroups"); num = strtol(optarg, &p, 10); if (*p || num <= 0) errx(1, "illegal -g value -- %s", optarg); //set_rpc_maxgrouplist(num); build_iovec(&iov, &iovlen, "maxgroups", optarg, (size_t)-1); break; case 'I': printf("-I deprecated, use -o readdirsize=\n"); build_iovec(&iov, &iovlen, "readdirsize", optarg, (size_t)-1); break; case 'i': printf("-i deprecated, use -o intr\n"); build_iovec(&iov, &iovlen, "intr", NULL, 0); break; case 'L': printf("-L deprecated, use -o nolockd\n"); build_iovec(&iov, &iovlen, "nolockd", NULL, 0); break; case 'l': printf("-l deprecated, -o rdirplus\n"); build_iovec(&iov, &iovlen, "rdirplus", NULL, 0); break; case 'N': printf("-N deprecated, do not specify -o resvport\n"); break; case 'o': { int pass_flag_to_nmount; char *opt = optarg; while (opt) { char *pval = NULL; char *pnextopt = NULL; const char *val = ""; pass_flag_to_nmount = 1; pnextopt = strchr(opt, ','); if (pnextopt != NULL) { *pnextopt = '\0'; pnextopt++; } pval = strchr(opt, '='); if (pval != NULL) { *pval = '\0'; val = pval + 1; } if (strcmp(opt, "bg") == 0) { opflags |= BGRND; pass_flag_to_nmount=0; } else if (strcmp(opt, "fg") == 0) { /* same as not specifying -o bg */ pass_flag_to_nmount=0; } else if (strcmp(opt, "gssname") == 0) { pass_flag_to_nmount = 0; gssname = val; } else if (strcmp(opt, "mntudp") == 0) { mnttcp_ok = 0; nfsproto = IPPROTO_UDP; } else if (strcmp(opt, "udp") == 0) { nfsproto = IPPROTO_UDP; } else if (strcmp(opt, "tcp") == 0) { nfsproto = IPPROTO_TCP; } else if (strcmp(opt, "noinet4") == 0) { pass_flag_to_nmount=0; opflags |= OF_NOINET4; } else if (strcmp(opt, "noinet6") == 0) { pass_flag_to_nmount=0; opflags |= OF_NOINET6; } else if (strcmp(opt, "noconn") == 0) { noconn = 1; } else if (strcmp(opt, "nfsv2") == 0) { pass_flag_to_nmount=0; mountmode = V2; } else if (strcmp(opt, "nfsv3") == 0) { mountmode = V3; } else if (strcmp(opt, "nfsv4") == 0) { pass_flag_to_nmount=0; mountmode = V4; nfsproto = IPPROTO_TCP; if (portspec == NULL) portspec = "2049"; } else if (strcmp(opt, "port") == 0) { pass_flag_to_nmount=0; asprintf(&tmp, "%d", atoi(val)); if (tmp == NULL) err(1, "asprintf"); portspec = tmp; } else if (strcmp(opt, "principal") == 0) { got_principal = 1; } else if (strcmp(opt, "proto") == 0) { pass_flag_to_nmount=0; if (strcmp(val, "tcp") == 0) { nfsproto = IPPROTO_TCP; opflags |= OF_NOINET6; build_iovec(&iov, &iovlen, "tcp", NULL, 0); } else if (strcmp(val, "udp") == 0) { mnttcp_ok = 0; nfsproto = IPPROTO_UDP; opflags |= OF_NOINET6; build_iovec(&iov, &iovlen, "udp", NULL, 0); } else if (strcmp(val, "tcp6") == 0) { nfsproto = IPPROTO_TCP; opflags |= OF_NOINET4; build_iovec(&iov, &iovlen, "tcp", NULL, 0); } else if (strcmp(val, "udp6") == 0) { mnttcp_ok = 0; nfsproto = IPPROTO_UDP; opflags |= OF_NOINET4; build_iovec(&iov, &iovlen, "udp", NULL, 0); } else { errx(1, "illegal proto value -- %s", val); } } else if (strcmp(opt, "sec") == 0) { /* * Don't add this option to * the iovec yet - we will * negotiate which sec flavor * to use with the remote * mountd. */ pass_flag_to_nmount=0; secflavor = sec_name_to_num(val); if (secflavor < 0) { errx(1, "illegal sec value -- %s", val); } } else if (strcmp(opt, "retrycnt") == 0) { pass_flag_to_nmount=0; num = strtol(val, &p, 10); if (*p || num < 0) errx(1, "illegal retrycnt value -- %s", val); retrycnt = num; } else if (strcmp(opt, "maxgroups") == 0) { num = strtol(val, &p, 10); if (*p || num <= 0) errx(1, "illegal maxgroups value -- %s", val); //set_rpc_maxgrouplist(num); } else if (strcmp(opt, "vers") == 0) { num = strtol(val, &p, 10); if (*p || num <= 0) errx(1, "illegal vers value -- " "%s", val); switch (num) { case 2: mountmode = V2; break; case 3: mountmode = V3; build_iovec(&iov, &iovlen, "nfsv3", NULL, 0); break; case 4: mountmode = V4; nfsproto = IPPROTO_TCP; if (portspec == NULL) portspec = "2049"; break; default: errx(1, "illegal nfs version " "value -- %s", val); } pass_flag_to_nmount=0; } if (pass_flag_to_nmount) { build_iovec(&iov, &iovlen, opt, __DECONST(void *, val), strlen(val) + 1); } opt = pnextopt; } } break; case 'P': /* obsolete for -o noresvport now default */ printf("-P deprecated, use -o noresvport\n"); build_iovec(&iov, &iovlen, "noresvport", NULL, 0); break; case 'R': printf("-R deprecated, use -o retrycnt=\n"); num = strtol(optarg, &p, 10); if (*p || num < 0) errx(1, "illegal -R value -- %s", optarg); retrycnt = num; break; case 'r': printf("-r deprecated, use -o rsize=\n"); build_iovec(&iov, &iovlen, "rsize", optarg, (size_t)-1); break; case 's': printf("-s deprecated, use -o soft\n"); build_iovec(&iov, &iovlen, "soft", NULL, 0); break; case 'T': nfsproto = IPPROTO_TCP; printf("-T deprecated, use -o tcp\n"); break; case 't': printf("-t deprecated, use -o timeout=\n"); build_iovec(&iov, &iovlen, "timeout", optarg, (size_t)-1); break; case 'w': printf("-w deprecated, use -o wsize=\n"); build_iovec(&iov, &iovlen, "wsize", optarg, (size_t)-1); break; case 'x': printf("-x deprecated, use -o retrans=\n"); build_iovec(&iov, &iovlen, "retrans", optarg, (size_t)-1); break; case 'U': printf("-U deprecated, use -o mntudp\n"); mnttcp_ok = 0; nfsproto = IPPROTO_UDP; build_iovec(&iov, &iovlen, "mntudp", NULL, 0); break; default: usage(); break; } argc -= optind; argv += optind; if (argc != 2) { usage(); /* NOTREACHED */ } spec = *argv++; mntname = *argv; if (retrycnt == -1) /* The default is to keep retrying forever. */ retrycnt = 0; if (modfind("nfscl") < 0) { /* Not present in kernel, try loading it */ if (kldload("nfscl") < 0 || modfind("nfscl") < 0) errx(1, "nfscl is not available"); } /* * Add the fqdn to the gssname, as required. */ if (gssname != NULL) { if (strchr(gssname, '@') == NULL && gethostname(hostname, MAXHOSTNAMELEN) == 0) { snprintf(gssn, sizeof (gssn), "%s@%s", gssname, hostname); gssname = gssn; } build_iovec(&iov, &iovlen, "gssname", __DECONST(void *, gssname), strlen(gssname) + 1); } if (!getnfsargs(spec, &iov, &iovlen)) exit(1); /* resolve the mountpoint with realpath(3) */ if (checkpath(mntname, mntpath) != 0) err(1, "%s", mntpath); build_iovec_argf(&iov, &iovlen, "fstype", "nfs"); build_iovec(&iov, &iovlen, "fspath", mntpath, (size_t)-1); build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); if (nmount(iov, iovlen, 0)) { nmount_errstr = nfsv4_geterrstr(errno); if (mountmode == V4 && nmount_errstr != NULL) errx(1, "nmount: %s, %s", mntpath, nmount_errstr); else err(1, "nmount: %s%s%s", mntpath, errmsg[0] ? ", " : "", errmsg); } exit(0); } static int sec_name_to_num(const char *sec) { if (!strcmp(sec, "krb5")) return (RPCSEC_GSS_KRB5); if (!strcmp(sec, "krb5i")) return (RPCSEC_GSS_KRB5I); if (!strcmp(sec, "krb5p")) return (RPCSEC_GSS_KRB5P); if (!strcmp(sec, "sys")) return (AUTH_SYS); return (-1); } static const char * sec_num_to_name(int flavor) { switch (flavor) { case RPCSEC_GSS_KRB5: return ("krb5"); case RPCSEC_GSS_KRB5I: return ("krb5i"); case RPCSEC_GSS_KRB5P: return ("krb5p"); case AUTH_SYS: return ("sys"); } return (NULL); } /* * Wait for RTM_IFINFO message with interface that is IFF_UP and with * link on, or until timeout expires. Returns seconds left. */ static time_t rtm_ifinfo_sleep(time_t sec) { - char buf[2048]; + char buf[2048] __aligned(__alignof(struct if_msghdr)); fd_set rfds; struct timeval tv, start; ssize_t nread; int n, s; s = socket(PF_ROUTE, SOCK_RAW, 0); if (s < 0) err(EX_OSERR, "socket"); (void)gettimeofday(&start, NULL); for (tv.tv_sec = sec, tv.tv_usec = 0; tv.tv_sec > 0; (void)gettimeofday(&tv, NULL), tv.tv_sec = sec - (tv.tv_sec - start.tv_sec)) { FD_ZERO(&rfds); FD_SET(s, &rfds); n = select(s + 1, &rfds, NULL, NULL, &tv); if (n == 0) continue; if (n == -1) { if (errno == EINTR) continue; else err(EX_SOFTWARE, "select"); } nread = read(s, buf, 2048); if (nread < 0) err(EX_OSERR, "read"); if ((size_t)nread >= sizeof(struct if_msghdr)) { struct if_msghdr *ifm; ifm = (struct if_msghdr *)buf; if (ifm->ifm_version == RTM_VERSION && ifm->ifm_type == RTM_IFINFO && (ifm->ifm_flags & IFF_UP) && ifm->ifm_data.ifi_link_state != LINK_STATE_DOWN) break; } } close(s); return (tv.tv_sec); } static int getnfsargs(char *spec, struct iovec **iov, int *iovlen) { struct addrinfo hints, *ai_nfs, *ai; enum tryret ret; int ecode, speclen, remoteerr, offset, have_bracket = 0; char *hostp, *delimp, *errstr; size_t len; static char nam[MNAMELEN + 1], pname[MAXHOSTNAMELEN + 5]; if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL && *(delimp + 1) == ':') { hostp = spec + 1; spec = delimp + 2; have_bracket = 1; } else if ((delimp = strrchr(spec, ':')) != NULL) { hostp = spec; spec = delimp + 1; } else if ((delimp = strrchr(spec, '@')) != NULL) { warnx("path@server syntax is deprecated, use server:path"); hostp = delimp + 1; } else { warnx("no : nfs-name"); return (0); } *delimp = '\0'; /* * If there has been a trailing slash at mounttime it seems * that some mountd implementations fail to remove the mount * entries from their mountlist while unmounting. */ for (speclen = strlen(spec); speclen > 1 && spec[speclen - 1] == '/'; speclen--) spec[speclen - 1] = '\0'; if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) { warnx("%s:%s: %s", hostp, spec, strerror(ENAMETOOLONG)); return (0); } /* Make both '@' and ':' notations equal */ if (*hostp != '\0') { len = strlen(hostp); offset = 0; if (have_bracket) nam[offset++] = '['; memmove(nam + offset, hostp, len); if (have_bracket) nam[len + offset++] = ']'; nam[len + offset++] = ':'; memmove(nam + len + offset, spec, speclen); nam[len + speclen + offset] = '\0'; } /* * Handle an internet host address. */ memset(&hints, 0, sizeof hints); hints.ai_flags = AI_NUMERICHOST; if (nfsproto == IPPROTO_TCP) hints.ai_socktype = SOCK_STREAM; else if (nfsproto == IPPROTO_UDP) hints.ai_socktype = SOCK_DGRAM; if (getaddrinfo(hostp, portspec, &hints, &ai_nfs) != 0) { hints.ai_flags = AI_CANONNAME; if ((ecode = getaddrinfo(hostp, portspec, &hints, &ai_nfs)) != 0) { if (portspec == NULL) errx(1, "%s: %s", hostp, gai_strerror(ecode)); else errx(1, "%s:%s: %s", hostp, portspec, gai_strerror(ecode)); return (0); } /* * For a Kerberized nfs mount where the "principal" * argument has not been set, add it here. */ if (got_principal == 0 && secflavor != AUTH_SYS && ai_nfs->ai_canonname != NULL) { snprintf(pname, sizeof (pname), "nfs@%s", ai_nfs->ai_canonname); build_iovec(iov, iovlen, "principal", pname, strlen(pname) + 1); } } ret = TRYRET_LOCALERR; for (;;) { /* * Try each entry returned by getaddrinfo(). Note the * occurrence of remote errors by setting `remoteerr'. */ remoteerr = 0; for (ai = ai_nfs; ai != NULL; ai = ai->ai_next) { if ((ai->ai_family == AF_INET6) && (opflags & OF_NOINET6)) continue; if ((ai->ai_family == AF_INET) && (opflags & OF_NOINET4)) continue; ret = nfs_tryproto(ai, hostp, spec, &errstr, iov, iovlen); if (ret == TRYRET_SUCCESS) break; if (ret != TRYRET_LOCALERR) remoteerr = 1; if ((opflags & ISBGRND) == 0) fprintf(stderr, "%s\n", errstr); } if (ret == TRYRET_SUCCESS) break; /* Exit if all errors were local. */ if (!remoteerr) exit(1); /* * If retrycnt == 0, we are to keep retrying forever. * Otherwise decrement it, and exit if it hits zero. */ if (retrycnt != 0 && --retrycnt == 0) exit(1); if ((opflags & (BGRND | ISBGRND)) == BGRND) { warnx("Cannot immediately mount %s:%s, backgrounding", hostp, spec); opflags |= ISBGRND; if (daemon(0, 0) != 0) err(1, "daemon"); } /* * If rtm_ifinfo_sleep() returns non-zero, don't count * that as a retry attempt. */ if (rtm_ifinfo_sleep(60) && retrycnt != 0) retrycnt++; } freeaddrinfo(ai_nfs); build_iovec(iov, iovlen, "hostname", nam, (size_t)-1); /* Add mounted file system to PATH_MOUNTTAB */ if (mountmode != V4 && !add_mtab(hostp, spec)) warnx("can't update %s for %s:%s", PATH_MOUNTTAB, hostp, spec); return (1); } /* * Try to set up the NFS arguments according to the address * family, protocol (and possibly port) specified in `ai'. * * Returns TRYRET_SUCCESS if successful, or: * TRYRET_TIMEOUT The server did not respond. * TRYRET_REMOTEERR The server reported an error. * TRYRET_LOCALERR Local failure. * * In all error cases, *errstr will be set to a statically-allocated string * describing the error. */ static enum tryret nfs_tryproto(struct addrinfo *ai, char *hostp, char *spec, char **errstr, struct iovec **iov, int *iovlen) { static char errbuf[256]; struct sockaddr_storage nfs_ss; struct netbuf nfs_nb; struct nfhret nfhret; struct timeval try; struct rpc_err rpcerr; CLIENT *clp; struct netconfig *nconf, *nconf_mnt; const char *netid, *netid_mnt, *secname; int doconnect, nfsvers, mntvers, sotype; enum clnt_stat clntstat; enum mountmode trymntmode; sotype = 0; trymntmode = mountmode; errbuf[0] = '\0'; *errstr = errbuf; if (nfsproto == IPPROTO_TCP) sotype = SOCK_STREAM; else if (nfsproto == IPPROTO_UDP) sotype = SOCK_DGRAM; if ((netid = netidbytype(ai->ai_family, sotype)) == NULL) { snprintf(errbuf, sizeof errbuf, "af %d sotype %d not supported", ai->ai_family, sotype); return (TRYRET_LOCALERR); } if ((nconf = getnetconf_cached(netid)) == NULL) { snprintf(errbuf, sizeof errbuf, "%s: %s", netid, nc_sperror()); return (TRYRET_LOCALERR); } /* The RPCPROG_MNT netid may be different. */ if (mnttcp_ok) { netid_mnt = netid; nconf_mnt = nconf; } else { if ((netid_mnt = netidbytype(ai->ai_family, SOCK_DGRAM)) == NULL) { snprintf(errbuf, sizeof errbuf, "af %d sotype SOCK_DGRAM not supported", ai->ai_family); return (TRYRET_LOCALERR); } if ((nconf_mnt = getnetconf_cached(netid_mnt)) == NULL) { snprintf(errbuf, sizeof errbuf, "%s: %s", netid_mnt, nc_sperror()); return (TRYRET_LOCALERR); } } tryagain: if (trymntmode == V4) { nfsvers = 4; mntvers = 3; /* Workaround for GCC. */ } else if (trymntmode == V2) { nfsvers = 2; mntvers = 1; } else { nfsvers = 3; mntvers = 3; } if (portspec != NULL) { /* `ai' contains the complete nfsd sockaddr. */ nfs_nb.buf = ai->ai_addr; nfs_nb.len = nfs_nb.maxlen = ai->ai_addrlen; } else { /* Ask the remote rpcbind. */ nfs_nb.buf = &nfs_ss; nfs_nb.len = nfs_nb.maxlen = sizeof nfs_ss; if (!rpcb_getaddr(NFS_PROGRAM, nfsvers, nconf, &nfs_nb, hostp)) { if (rpc_createerr.cf_stat == RPC_PROGVERSMISMATCH && trymntmode == ANY) { trymntmode = V2; goto tryagain; } snprintf(errbuf, sizeof errbuf, "[%s] %s:%s: %s", netid, hostp, spec, clnt_spcreateerror("RPCPROG_NFS")); return (returncode(rpc_createerr.cf_stat, &rpc_createerr.cf_error)); } } /* Check that the server (nfsd) responds on the port we have chosen. */ clp = clnt_tli_create(RPC_ANYFD, nconf, &nfs_nb, NFS_PROGRAM, nfsvers, 0, 0); if (clp == NULL) { snprintf(errbuf, sizeof errbuf, "[%s] %s:%s: %s", netid, hostp, spec, clnt_spcreateerror("nfsd: RPCPROG_NFS")); return (returncode(rpc_createerr.cf_stat, &rpc_createerr.cf_error)); } if (sotype == SOCK_DGRAM && noconn == 0) { /* * Use connect(), to match what the kernel does. This * catches cases where the server responds from the * wrong source address. */ doconnect = 1; if (!clnt_control(clp, CLSET_CONNECT, (char *)&doconnect)) { clnt_destroy(clp); snprintf(errbuf, sizeof errbuf, "[%s] %s:%s: CLSET_CONNECT failed", netid, hostp, spec); return (TRYRET_LOCALERR); } } try.tv_sec = 10; try.tv_usec = 0; clntstat = clnt_call(clp, NFSPROC_NULL, (xdrproc_t)xdr_void, NULL, (xdrproc_t)xdr_void, NULL, try); if (clntstat != RPC_SUCCESS) { if (clntstat == RPC_PROGVERSMISMATCH && trymntmode == ANY) { clnt_destroy(clp); trymntmode = V2; goto tryagain; } clnt_geterr(clp, &rpcerr); snprintf(errbuf, sizeof errbuf, "[%s] %s:%s: %s", netid, hostp, spec, clnt_sperror(clp, "NFSPROC_NULL")); clnt_destroy(clp); return (returncode(clntstat, &rpcerr)); } clnt_destroy(clp); /* * For NFSv4, there is no mount protocol. */ if (trymntmode == V4) { /* * Store the server address in nfsargsp, making * sure to copy any locally allocated structures. */ addrlen = nfs_nb.len; addr = malloc(addrlen); if (addr == NULL) err(1, "malloc"); bcopy(nfs_nb.buf, addr, addrlen); build_iovec(iov, iovlen, "addr", addr, addrlen); secname = sec_num_to_name(secflavor); if (secname != NULL) { build_iovec(iov, iovlen, "sec", __DECONST(void *, secname), (size_t)-1); } build_iovec(iov, iovlen, "nfsv4", NULL, 0); build_iovec(iov, iovlen, "dirpath", spec, (size_t)-1); return (TRYRET_SUCCESS); } /* Send the MOUNTPROC_MNT RPC to get the root filehandle. */ try.tv_sec = 10; try.tv_usec = 0; clp = clnt_tp_create(hostp, MOUNTPROG, mntvers, nconf_mnt); if (clp == NULL) { snprintf(errbuf, sizeof errbuf, "[%s] %s:%s: %s", netid_mnt, hostp, spec, clnt_spcreateerror("RPCMNT: clnt_create")); return (returncode(rpc_createerr.cf_stat, &rpc_createerr.cf_error)); } clp->cl_auth = authsys_create_default(); nfhret.auth = secflavor; nfhret.vers = mntvers; clntstat = clnt_call(clp, MOUNTPROC_MNT, (xdrproc_t)xdr_dir, spec, (xdrproc_t)xdr_fh, &nfhret, try); auth_destroy(clp->cl_auth); if (clntstat != RPC_SUCCESS) { if (clntstat == RPC_PROGVERSMISMATCH && trymntmode == ANY) { clnt_destroy(clp); trymntmode = V2; goto tryagain; } clnt_geterr(clp, &rpcerr); snprintf(errbuf, sizeof errbuf, "[%s] %s:%s: %s", netid_mnt, hostp, spec, clnt_sperror(clp, "RPCPROG_MNT")); clnt_destroy(clp); return (returncode(clntstat, &rpcerr)); } clnt_destroy(clp); if (nfhret.stat != 0) { snprintf(errbuf, sizeof errbuf, "[%s] %s:%s: %s", netid_mnt, hostp, spec, strerror(nfhret.stat)); return (TRYRET_REMOTEERR); } /* * Store the filehandle and server address in nfsargsp, making * sure to copy any locally allocated structures. */ addrlen = nfs_nb.len; addr = malloc(addrlen); fhsize = nfhret.fhsize; fh = malloc(fhsize); if (addr == NULL || fh == NULL) err(1, "malloc"); bcopy(nfs_nb.buf, addr, addrlen); bcopy(nfhret.nfh, fh, fhsize); build_iovec(iov, iovlen, "addr", addr, addrlen); build_iovec(iov, iovlen, "fh", fh, fhsize); secname = sec_num_to_name(nfhret.auth); if (secname) { build_iovec(iov, iovlen, "sec", __DECONST(void *, secname), (size_t)-1); } if (nfsvers == 3) build_iovec(iov, iovlen, "nfsv3", NULL, 0); return (TRYRET_SUCCESS); } /* * Catagorise a RPC return status and error into an `enum tryret' * return code. */ static enum tryret returncode(enum clnt_stat clntstat, struct rpc_err *rpcerr) { switch (clntstat) { case RPC_TIMEDOUT: return (TRYRET_TIMEOUT); case RPC_PMAPFAILURE: case RPC_PROGNOTREGISTERED: case RPC_PROGVERSMISMATCH: /* XXX, these can be local or remote. */ case RPC_CANTSEND: case RPC_CANTRECV: return (TRYRET_REMOTEERR); case RPC_SYSTEMERROR: switch (rpcerr->re_errno) { case ETIMEDOUT: return (TRYRET_TIMEOUT); case ENOMEM: break; default: return (TRYRET_REMOTEERR); } /* FALLTHROUGH */ default: break; } return (TRYRET_LOCALERR); } /* * Look up a netid based on an address family and socket type. * `af' is the address family, and `sotype' is SOCK_DGRAM or SOCK_STREAM. * * XXX there should be a library function for this. */ static const char * netidbytype(int af, int sotype) { struct nc_protos *p; for (p = nc_protos; p->netid != NULL; p++) { if (af != p->af || sotype != p->sotype) continue; return (p->netid); } return (NULL); } /* * Look up a netconfig entry based on a netid, and cache the result so * that we don't need to remember to call freenetconfigent(). * * Otherwise it behaves just like getnetconfigent(), so nc_*error() * work on failure. */ static struct netconfig * getnetconf_cached(const char *netid) { static struct nc_entry { struct netconfig *nconf; struct nc_entry *next; } *head; struct nc_entry *p; struct netconfig *nconf; for (p = head; p != NULL; p = p->next) if (strcmp(netid, p->nconf->nc_netid) == 0) return (p->nconf); if ((nconf = getnetconfigent(netid)) == NULL) return (NULL); if ((p = malloc(sizeof(*p))) == NULL) err(1, "malloc"); p->nconf = nconf; p->next = head; head = p; return (p->nconf); } /* * xdr routines for mount rpc's */ static int xdr_dir(XDR *xdrsp, char *dirp) { return (xdr_string(xdrsp, &dirp, MNTPATHLEN)); } static int xdr_fh(XDR *xdrsp, struct nfhret *np) { int i; long auth, authcnt, authfnd = 0; if (!xdr_u_long(xdrsp, &np->stat)) return (0); if (np->stat) return (1); switch (np->vers) { case 1: np->fhsize = NFS_FHSIZE; return (xdr_opaque(xdrsp, (caddr_t)np->nfh, NFS_FHSIZE)); case 3: if (!xdr_long(xdrsp, &np->fhsize)) return (0); if (np->fhsize <= 0 || np->fhsize > NFS3_FHSIZE) return (0); if (!xdr_opaque(xdrsp, (caddr_t)np->nfh, np->fhsize)) return (0); if (!xdr_long(xdrsp, &authcnt)) return (0); for (i = 0; i < authcnt; i++) { if (!xdr_long(xdrsp, &auth)) return (0); if (np->auth == -1) { np->auth = auth; authfnd++; } else if (auth == np->auth) { authfnd++; } } /* * Some servers, such as DEC's OSF/1 return a nil authenticator * list to indicate RPCAUTH_UNIX. */ if (authcnt == 0 && np->auth == -1) np->auth = AUTH_SYS; if (!authfnd && (authcnt > 0 || np->auth != AUTH_SYS)) np->stat = EAUTH; return (1); } return (0); } static void usage(void) { (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", "usage: mount_nfs [-23bcdiLlNPsTU] [-a maxreadahead] [-D deadthresh]", " [-g maxgroups] [-I readdirsize] [-o options] [-R retrycnt]", " [-r readsize] [-t timeout] [-w writesize] [-x retrans]", " rhost:path node"); exit(1); } diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index c24295629098..5d5b14d81c43 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -1,680 +1,676 @@ # # NOTES -- Lines that can be cut/pasted into kernel and hints configs. # # This file contains machine dependent kernel configuration notes. For # machine independent notes, look in /sys/conf/NOTES. # # $FreeBSD$ # # # We want LINT to cover profiling as well. profile 2 # # Enable the kernel DTrace hooks which are required to load the DTrace # kernel modules. # options KDTRACE_HOOKS # DTrace core # NOTE: introduces CDDL-licensed components into the kernel #device dtrace # DTrace modules #device dtrace_profile #device dtrace_sdt #device dtrace_fbt #device dtrace_systrace #device dtrace_prototype #device dtnfscl #device dtmalloc # Alternatively include all the DTrace modules #device dtraceall ##################################################################### # SMP OPTIONS: # # Notes: # # IPI_PREEMPTION instructs the kernel to preempt threads running on other # CPUS if needed. Relies on the PREEMPTION option # Optional: options IPI_PREEMPTION device atpic # Optional legacy pic support device mptable # Optional MPSPEC mptable support # # Watchdog routines. # options MP_WATCHDOG # Debugging options. # options COUNT_XINVLTLB_HITS # Counters for TLB events options COUNT_IPIS # Per-CPU IPI interrupt counters ##################################################################### # CPU OPTIONS # # You must specify at least one CPU (the one you intend to run on); # deleting the specification for CPUs you don't need to use may make # parts of the system run faster. # cpu HAMMER # aka K8, aka Opteron & Athlon64 # # Options for CPU features. # ##################################################################### # NETWORKING OPTIONS # # DEVICE_POLLING adds support for mixed interrupt-polling handling # of network device drivers, which has significant benefits in terms # of robustness to overloads and responsivity, as well as permitting # accurate scheduling of the CPU time between kernel network processing # and other activities. The drawback is a moderate (up to 1/HZ seconds) # potential increase in response times. # It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING # to achieve smoother behaviour. # Additionally, you can enable/disable polling at runtime with help of # the ifconfig(8) utility, and select the CPU fraction reserved to # userland with the sysctl variable kern.polling.user_frac # (default 50, range 0..100). # # Not all device drivers support this mode of operation at the time of # this writing. See polling(4) for more details. options DEVICE_POLLING # BPF_JITTER adds support for BPF just-in-time compiler. options BPF_JITTER # OpenFabrics Enterprise Distribution (Infiniband). options OFED options OFED_DEBUG_INIT # Sockets Direct Protocol options SDP options SDP_DEBUG # IP over Infiniband options IPOIB options IPOIB_DEBUG options IPOIB_CM ##################################################################### # CLOCK OPTIONS # Provide read/write access to the memory in the clock chip. device nvram # Access to rtc cmos via /dev/nvram ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS device speaker #Play IBM BASIC-style noises out your speaker hint.speaker.0.at="isa" hint.speaker.0.port="0x61" ##################################################################### # HARDWARE BUS CONFIGURATION # # ISA bus # device isa # # Options for `isa': # # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # This option breaks suspend/resume on some portables. # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # MAXMEM specifies the amount of RAM on the machine; if this is not # specified, FreeBSD will first read the amount of memory from the CMOS # RAM, so the amount of memory will initially be limited to 64MB or 16MB # depending on the BIOS. If the BIOS reports 64MB, a memory probe will # then attempt to detect the installed amount of RAM. If this probe # fails to detect >64MB RAM you will have to use the MAXMEM option. # The amount is in kilobytes, so for a machine with 128MB of RAM, it would # be 131072 (128 * 1024). # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. options AUTO_EOI_1 #options AUTO_EOI_2 options MAXMEM=(128*1024) #options BROKEN_KEYBOARD_RESET # # AGP GART support device agp # # AGP debugging. # options AGP_DEBUG ##################################################################### # HARDWARE DEVICE CONFIGURATION # To include support for VGA VESA video modes options VESA # Turn on extra debugging checks and output for VESA support. options VESA_DEBUG device dpms # DPMS suspend & resume via VESA BIOS # x86 real mode BIOS emulator, required by atkbdc/dpms/vesa options X86BIOS # # Optional devices: # # PS/2 mouse device psm hint.psm.0.at="atkbdc" hint.psm.0.irq="12" # Options for psm: options PSM_HOOKRESUME #hook the system resume event, useful #for some laptops options PSM_RESETAFTERSUSPEND #reset the device at the resume event # The keyboard controller; it controls the keyboard and the PS/2 mouse. device atkbdc hint.atkbdc.0.at="isa" hint.atkbdc.0.port="0x060" # The AT keyboard device atkbd hint.atkbd.0.at="atkbdc" hint.atkbd.0.irq="1" # Options for atkbd: options ATKBD_DFLT_KEYMAP # specify the built-in keymap makeoptions ATKBD_DFLT_KEYMAP=fr.dvorak # `flags' for atkbd: # 0x01 Force detection of keyboard, else we always assume a keyboard # 0x02 Don't reset keyboard, useful for some newer ThinkPads # 0x03 Force detection and avoid reset, might help with certain # dockingstations # 0x04 Old-style (XT) keyboard support, useful for older ThinkPads # Video card driver for VGA adapters. device vga hint.vga.0.at="isa" # Options for vga: # Try the following option if the mouse pointer is not drawn correctly # or font does not seem to be loaded properly. May cause flicker on # some systems. options VGA_ALT_SEQACCESS # If you can dispense with some vga driver features, you may want to # use the following options to save some memory. #options VGA_NO_FONT_LOADING # don't save/load font #options VGA_NO_MODE_CHANGE # don't change video modes # Older video cards may require this option for proper operation. options VGA_SLOW_IOACCESS # do byte-wide i/o's to TS and GDC regs # The following option probably won't work with the LCD displays. options VGA_WIDTH90 # support 90 column modes # Debugging. options VGA_DEBUG # vt(4) drivers. device vt_vga # VGA device vt_efifb # EFI framebuffer # Linear framebuffer driver for S3 VESA 1.2 cards. Works on top of VESA. device s3pci # 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create # the /dev/3dfx0 device to work with glide implementations. This should get # linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as # the tdfx DRI module from XFree86 and is completely unrelated. # # To enable Linuxulator support, one must also include COMPAT_LINUX in the # config as well. The other option is to load both as modules. device tdfx # Enable 3Dfx Voodoo support #XXX#device tdfx_linux # Enable Linuxulator support # # ACPI support using the Intel ACPI Component Architecture reference # implementation. # # ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer # kernel environment variables to select initial debugging levels for the # Intel ACPICA code. (Note that the Intel code must also have USE_DEBUGGER # defined when it is built). device acpi options ACPI_DEBUG # The cpufreq(4) driver provides support for non-ACPI CPU frequency control device cpufreq # # Network interfaces: # # bxe: Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet # adapters. # ipw: Intel PRO/Wireless 2100 IEEE 802.11 adapter # Requires the ipw firmware module # iwi: Intel PRO/Wireless 2200BG/2225BG/2915ABG IEEE 802.11 adapters # Requires the iwi firmware module # iwn: Intel Wireless WiFi Link 1000/105/135/2000/4965/5000/6000/6050 abgn # 802.11 network adapters # Requires the iwn firmware module # mthca: Mellanox HCA InfiniBand # mlx4ib: Mellanox ConnectX HCA InfiniBand # mlx4en: Mellanox ConnectX HCA Ethernet # nfe: nVidia nForce MCP on-board Ethernet Networking (BSD open source) # sfxge: Solarflare SFC9000 family 10Gb Ethernet adapters # vmx: VMware VMXNET3 Ethernet (BSD open source) # wpi: Intel 3945ABG Wireless LAN controller # Requires the wpi firmware module device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE options ED_3C503 options ED_HPP options ED_SIC device ipw # Intel 2100 wireless NICs. device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. device iwn # Intel 4965/1000/5000/6000 wireless NICs. device ixl # Intel 700 Series Physical Function device iavf # Intel Adaptive Virtual Function device mthca # Mellanox HCA InfiniBand device mlx4 # Shared code module between IB and Ethernet device mlx4ib # Mellanox ConnectX HCA InfiniBand device mlx4en # Mellanox ConnectX HCA Ethernet device nfe # nVidia nForce MCP on-board Ethernet device sfxge # Solarflare SFC9000 10Gb Ethernet device vmx # VMware VMXNET3 Ethernet device wpi # Intel 3945ABG wireless NICs. # IEEE 802.11 adapter firmware modules # Intel PRO/Wireless 2100 firmware: # ipwfw: BSS/IBSS/monitor mode firmware # ipwbssfw: BSS mode firmware # ipwibssfw: IBSS mode firmware # ipwmonitorfw: Monitor mode firmware # Intel PRO/Wireless 2200BG/2225BG/2915ABG firmware: # iwifw: BSS/IBSS/monitor mode firmware # iwibssfw: BSS mode firmware # iwiibssfw: IBSS mode firmware # iwimonitorfw: Monitor mode firmware # Intel Wireless WiFi Link 4965/1000/5000/6000 series firmware: # iwnfw: Single module to support all devices # iwn1000fw: Specific module for the 1000 only # iwn105fw: Specific module for the 105 only # iwn135fw: Specific module for the 135 only # iwn2000fw: Specific module for the 2000 only # iwn2030fw: Specific module for the 2030 only # iwn4965fw: Specific module for the 4965 only # iwn5000fw: Specific module for the 5000 only # iwn5150fw: Specific module for the 5150 only # iwn6000fw: Specific module for the 6000 only # iwn6000g2afw: Specific module for the 6000g2a only # iwn6000g2bfw: Specific module for the 6000g2b only # iwn6050fw: Specific module for the 6050 only # wpifw: Intel 3945ABG Wireless LAN Controller firmware device iwifw device iwibssfw device iwiibssfw device iwimonitorfw device ipwfw device ipwbssfw device ipwibssfw device ipwmonitorfw device iwnfw device iwn1000fw device iwn105fw device iwn135fw device iwn2000fw device iwn2030fw device iwn4965fw device iwn5000fw device iwn5150fw device iwn6000fw device iwn6000g2afw device iwn6000g2bfw device iwn6050fw device wpifw # # Non-Transparent Bridge (NTB) drivers # device if_ntb # Virtual NTB network interface device ntb_transport # NTB packet transport driver device ntb # NTB hardware interface device ntb_hw_amd # AMD NTB hardware driver device ntb_hw_intel # Intel NTB hardware driver device ntb_hw_plx # PLX NTB hardware driver # #XXX this stores pointers in a 32bit field that is defined by the hardware #device pst # # Areca 11xx and 12xx series of SATA II RAID controllers. # CAM is required. # device arcmsr # Areca SATA II RAID # # Microsemi smartpqi controllers. # These controllers have a SCSI-like interface, and require the # CAM infrastructure. # device smartpqi # # 3ware 9000 series PATA/SATA RAID controller driver and options. # The driver is implemented as a SIM, and so, needs the CAM infrastructure. # options TWA_DEBUG # 0-10; 10 prints the most messages. device twa # 3ware 9000 series PATA/SATA RAID # # Adaptec FSA RAID controllers, including integrated DELL controllers, # the Dell PERC 2/QC and the HP NetRAID-4M device aac device aacp # SCSI Passthrough interface (optional, CAM required) -# -# Adaptec by PMC RAID controllers, Series 6/7/8 and upcoming families -device aacraid # Container interface, CAM required - # # Highpoint RocketRAID 27xx. device hpt27xx # # Highpoint RocketRAID 182x. device hptmv # # Highpoint DC7280 and R750. device hptnr # # Highpoint RocketRAID. Supports RR172x, RR222x, RR2240, RR232x, RR2340, # RR2210, RR174x, RR2522, RR231x, RR230x. device hptrr # # Highpoint RocketRaid 3xxx series SATA RAID device hptiop # # IBM (now Adaptec) ServeRAID controllers device ips # # Intel integrated Memory Controller (iMC) SMBus controller # Sandybridge-Xeon, Ivybridge-Xeon, Haswell-Xeon, Broadwell-Xeon device imcsmb # # Intel C600 (Patsburg) integrated SAS controller device isci options ISCI_LOGGING # enable debugging in isci HAL # # NVM Express (NVMe) support device nvme # base NVMe driver device nvd # expose NVMe namespaces as disks, depends on nvme # # Intel Volume Management Device (VMD) support device vmd # base VMD device device vmd_bus # bus for VMD children # # PMC-Sierra SAS/SATA controller device pmspcv # # SafeNet crypto driver: can be moved to the MI NOTES as soon as # it's tested on a big-endian machine # device safe # SafeNet 1141 options SAFE_DEBUG # enable debugging support: hw.safe.debug options SAFE_RNDTEST # enable rndtest support # # VirtIO support # # The virtio entry provides a generic bus for use by the device drivers. # It must be combined with an interface that communicates with the host. # Multiple such interfaces are defined by the VirtIO specification. FreeBSD # only has support for PCI. Therefore, virtio_pci must be statically # compiled in or loaded as a module for the device drivers to function. # device virtio # Generic VirtIO bus (required) device virtio_pci # VirtIO PCI Interface device vtnet # VirtIO Ethernet device device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device device virtio_random # VirtIO Entropy device device virtio_console # VirtIO Console device # Microsoft Hyper-V enhancement support device hyperv # HyperV drivers # Xen HVM Guest Optimizations options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver ##################################################################### # # Miscellaneous hardware: # # ipmi: Intelligent Platform Management Interface # pbio: Parallel (8255 PPI) basic I/O (mode 0) port (e.g. Advantech PCL-724) # smbios: DMI/SMBIOS entry point # vpd: Vital Product Data kernel interface # asmc: Apple System Management Controller # si: Specialix International SI/XIO or SX intelligent serial card # tpm: Trusted Platform Module # Notes on the Specialix SI/XIO driver: # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. device ipmi device pbio hint.pbio.0.at="isa" hint.pbio.0.port="0x360" device smbios device vpd device asmc device tpm device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device aesni # AES-NI OpenCrypto module device ioat # Intel I/OAT DMA engine # # Laptop/Notebook options: # # # I2C Bus # # # Hardware watchdog timers: # # ichwd: Intel ICH watchdog timer # amdsbwd: AMD SB7xx watchdog timer # viawd: VIA south bridge watchdog timer # wbwd: Winbond watchdog timer # itwd: ITE Super I/O watchdog timer # device ichwd device amdsbwd device viawd device wbwd device itwd # # Temperature sensors: # # coretemp: on-die sensor on Intel Core and newer CPUs # amdtemp: on-die sensor on AMD K8/K10/K11 CPUs # device coretemp device amdtemp # # CPU control pseudo-device. Provides access to MSRs, CPUID info and # microcode update feature. # device cpuctl # # SuperIO driver. # device superio # # System Management Bus (SMB) # options ENABLE_ALART # Control alarm on Intel intpm driver # # AMD System Management Network (SMN) # device amdsmn # # Number of initial kernel page table pages used for early bootstrap. # This number should include enough pages to map the kernel and any # modules or other data loaded with the kernel by the loader. Each # page table page maps 2MB. # options NKPT=31 # EFI Runtime Services support options EFIRT ##################################################################### # ABI Emulation #XXX keep these here for now and reactivate when support for emulating #XXX these 32 bit binaries is added. # Enable 32-bit runtime support for FreeBSD/i386 binaries. options COMPAT_FREEBSD32 # Enable (32-bit) a.out binary support options COMPAT_AOUT # Enable 32-bit runtime support for CloudABI binaries. options COMPAT_CLOUDABI32 # Enable 64-bit runtime support for CloudABI binaries. options COMPAT_CLOUDABI64 # Enable Linux ABI emulation #XXX#options COMPAT_LINUX # Enable 32-bit Linux ABI emulation (requires COMPAT_FREEBSD32). options COMPAT_LINUX32 # Enable the linux-like proc filesystem support (requires COMPAT_LINUX32 # and PSEUDOFS) options LINPROCFS #Enable the linux-like sys filesystem support (requires COMPAT_LINUX32 # and PSEUDOFS) options LINSYSFS ##################################################################### # ZFS support options ZFS ##################################################################### # VM OPTIONS # KSTACK_PAGES is the number of memory pages to assign to the kernel # stack of each thread. options KSTACK_PAGES=5 # Enable detailed accounting by the PV entry allocator. options PV_STATS ##################################################################### # More undocumented options for linting. # Note that documenting these are not considered an affront. options FB_INSTALL_CDEV # install a CDEV entry in /dev options KBDIO_DEBUG=2 options KBD_MAXRETRY=4 options KBD_MAXWAIT=6 options KBD_RESETDELAY=201 options PSM_DEBUG=1 options TIMER_FREQ=((14318182+6)/12) options VM_KMEM_SIZE options VM_KMEM_SIZE_MAX options VM_KMEM_SIZE_SCALE # Enable NDIS binary driver support options NDISAPI device ndis # GCOV (code coverage) support options LINDEBUGFS options GCOV diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h index b2a201119fe3..338106c9dec6 100644 --- a/sys/arm64/include/armreg.h +++ b/sys/arm64/include/armreg.h @@ -1,784 +1,821 @@ /*- * Copyright (c) 2013, 2014 Andrew Turner * Copyright (c) 2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ARMREG_H_ #define _MACHINE_ARMREG_H_ #define INSN_SIZE 4 #define MRS_MASK 0xfff00000 #define MRS_VALUE 0xd5300000 #define MRS_SPECIAL(insn) ((insn) & 0x000fffe0) #define MRS_REGISTER(insn) ((insn) & 0x0000001f) #define MRS_Op0_SHIFT 19 #define MRS_Op0_MASK 0x00080000 #define MRS_Op1_SHIFT 16 #define MRS_Op1_MASK 0x00070000 #define MRS_CRn_SHIFT 12 #define MRS_CRn_MASK 0x0000f000 #define MRS_CRm_SHIFT 8 #define MRS_CRm_MASK 0x00000f00 #define MRS_Op2_SHIFT 5 #define MRS_Op2_MASK 0x000000e0 #define MRS_Rt_SHIFT 0 #define MRS_Rt_MASK 0x0000001f #define MRS_REG(op0, op1, crn, crm, op2) \ (((op0) << MRS_Op0_SHIFT) | ((op1) << MRS_Op1_SHIFT) | \ ((crn) << MRS_CRn_SHIFT) | ((crm) << MRS_CRm_SHIFT) | \ ((op2) << MRS_Op2_SHIFT)) #define READ_SPECIALREG(reg) \ ({ uint64_t _val; \ __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \ _val; \ }) #define WRITE_SPECIALREG(reg, _val) \ __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val)) #define UL(x) UINT64_C(x) /* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */ #define CNTHCTL_EVNTI_MASK (0xf << 4) /* Bit to trigger event stream */ #define CNTHCTL_EVNTDIR (1 << 3) /* Control transition trigger bit */ #define CNTHCTL_EVNTEN (1 << 2) /* Enable event stream */ #define CNTHCTL_EL1PCEN (1 << 1) /* Allow EL0/1 physical timer access */ #define CNTHCTL_EL1PCTEN (1 << 0) /*Allow EL0/1 physical counter access*/ +/* CNTP_CTL_EL0 - Counter-timer Physical Timer Control register */ +#define CNTP_CTL_ENABLE (1 << 0) +#define CNTP_CTL_IMASK (1 << 1) +#define CNTP_CTL_ISTATUS (1 << 2) + /* CPACR_EL1 */ #define CPACR_FPEN_MASK (0x3 << 20) #define CPACR_FPEN_TRAP_ALL1 (0x0 << 20) /* Traps from EL0 and EL1 */ #define CPACR_FPEN_TRAP_EL0 (0x1 << 20) /* Traps from EL0 */ #define CPACR_FPEN_TRAP_ALL2 (0x2 << 20) /* Traps from EL0 and EL1 */ #define CPACR_FPEN_TRAP_NONE (0x3 << 20) /* No traps */ #define CPACR_TTA (0x1 << 28) /* CTR_EL0 - Cache Type Register */ #define CTR_RES1 (1 << 31) #define CTR_TminLine_SHIFT 32 #define CTR_TminLine_MASK (UL(0x3f) << CTR_TminLine_SHIFT) #define CTR_TminLine_VAL(reg) ((reg) & CTR_TminLine_MASK) #define CTR_DIC_SHIFT 29 #define CTR_DIC_MASK (0x1 << CTR_DIC_SHIFT) #define CTR_DIC_VAL(reg) ((reg) & CTR_DIC_MASK) #define CTR_IDC_SHIFT 28 #define CTR_IDC_MASK (0x1 << CTR_IDC_SHIFT) #define CTR_IDC_VAL(reg) ((reg) & CTR_IDC_MASK) #define CTR_CWG_SHIFT 24 #define CTR_CWG_MASK (0xf << CTR_CWG_SHIFT) #define CTR_CWG_VAL(reg) ((reg) & CTR_CWG_MASK) #define CTR_CWG_SIZE(reg) (4 << (CTR_CWG_VAL(reg) >> CTR_CWG_SHIFT)) #define CTR_ERG_SHIFT 20 #define CTR_ERG_MASK (0xf << CTR_ERG_SHIFT) #define CTR_ERG_VAL(reg) ((reg) & CTR_ERG_MASK) #define CTR_ERG_SIZE(reg) (4 << (CTR_ERG_VAL(reg) >> CTR_ERG_SHIFT)) #define CTR_DLINE_SHIFT 16 #define CTR_DLINE_MASK (0xf << CTR_DLINE_SHIFT) #define CTR_DLINE_VAL(reg) ((reg) & CTR_DLINE_MASK) #define CTR_DLINE_SIZE(reg) (4 << (CTR_DLINE_VAL(reg) >> CTR_DLINE_SHIFT)) #define CTR_L1IP_SHIFT 14 #define CTR_L1IP_MASK (0x3 << CTR_L1IP_SHIFT) #define CTR_L1IP_VAL(reg) ((reg) & CTR_L1IP_MASK) #define CTR_L1IP_VPIPT (0 << CTR_L1IP_SHIFT) #define CTR_L1IP_AIVIVT (1 << CTR_L1IP_SHIFT) #define CTR_L1IP_VIPT (2 << CTR_L1IP_SHIFT) #define CTR_L1IP_PIPT (3 << CTR_L1IP_SHIFT) #define CTR_ILINE_SHIFT 0 #define CTR_ILINE_MASK (0xf << CTR_ILINE_SHIFT) #define CTR_ILINE_VAL(reg) ((reg) & CTR_ILINE_MASK) #define CTR_ILINE_SIZE(reg) (4 << (CTR_ILINE_VAL(reg) >> CTR_ILINE_SHIFT)) /* DAIF - Interrupt Mask Bits */ #define DAIF_D_MASKED (1 << 9) #define DAIF_A_MASKED (1 << 8) #define DAIF_I_MASKED (1 << 7) #define DAIF_F_MASKED (1 << 6) /* DCZID_EL0 - Data Cache Zero ID register */ #define DCZID_DZP (1 << 4) /* DC ZVA prohibited if non-0 */ #define DCZID_BS_SHIFT 0 #define DCZID_BS_MASK (0xf << DCZID_BS_SHIFT) #define DCZID_BS_SIZE(reg) (((reg) & DCZID_BS_MASK) >> DCZID_BS_SHIFT) /* ESR_ELx */ -#define ESR_ELx_ISS_MASK 0x00ffffff +#define ESR_ELx_ISS_MASK 0x01ffffff #define ISS_INSN_FnV (0x01 << 10) #define ISS_INSN_EA (0x01 << 9) #define ISS_INSN_S1PTW (0x01 << 7) #define ISS_INSN_IFSC_MASK (0x1f << 0) -#define ISS_DATA_ISV (0x01 << 24) -#define ISS_DATA_SAS_MASK (0x03 << 22) -#define ISS_DATA_SSE (0x01 << 21) -#define ISS_DATA_SRT_MASK (0x1f << 16) + +#define ISS_MSR_DIR_SHIFT 0 +#define ISS_MSR_DIR (0x01 << ISS_MSR_DIR_SHIFT) +#define ISS_MSR_Rt_SHIFT 5 +#define ISS_MSR_Rt_MASK (0x1f << ISS_MSR_Rt_SHIFT) +#define ISS_MSR_Rt(x) (((x) & ISS_MSR_Rt_MASK) >> ISS_MSR_Rt_SHIFT) +#define ISS_MSR_CRm_SHIFT 1 +#define ISS_MSR_CRm_MASK (0xf << ISS_MSR_CRm_SHIFT) +#define ISS_MSR_CRm(x) (((x) & ISS_MSR_CRm_MASK) >> ISS_MSR_CRm_SHIFT) +#define ISS_MSR_CRn_SHIFT 10 +#define ISS_MSR_CRn_MASK (0xf << ISS_MSR_CRn_SHIFT) +#define ISS_MSR_CRn(x) (((x) & ISS_MSR_CRn_MASK) >> ISS_MSR_CRn_SHIFT) +#define ISS_MSR_OP1_SHIFT 14 +#define ISS_MSR_OP1_MASK (0x7 << ISS_MSR_OP1_SHIFT) +#define ISS_MSR_OP1(x) (((x) & ISS_MSR_OP1_MASK) >> ISS_MSR_OP1_SHIFT) +#define ISS_MSR_OP2_SHIFT 17 +#define ISS_MSR_OP2_MASK (0x7 << ISS_MSR_OP2_SHIFT) +#define ISS_MSR_OP2(x) (((x) & ISS_MSR_OP2_MASK) >> ISS_MSR_OP2_SHIFT) +#define ISS_MSR_OP0_SHIFT 20 +#define ISS_MSR_OP0_MASK (0x3 << ISS_MSR_OP0_SHIFT) +#define ISS_MSR_OP0(x) (((x) & ISS_MSR_OP0_MASK) >> ISS_MSR_OP0_SHIFT) +#define ISS_MSR_REG_MASK \ + (ISS_MSR_OP0_MASK | ISS_MSR_OP2_MASK | ISS_MSR_OP1_MASK | \ + ISS_MSR_CRn_MASK | ISS_MSR_CRm_MASK) + + +#define ISS_DATA_ISV_SHIFT 24 +#define ISS_DATA_ISV (0x01 << ISS_DATA_ISV_SHIFT) +#define ISS_DATA_SAS_SHIFT 22 +#define ISS_DATA_SAS_MASK (0x03 << ISS_DATA_SAS_SHIFT) +#define ISS_DATA_SSE_SHIFT 21 +#define ISS_DATA_SSE (0x01 << ISS_DATA_SSE_SHIFT) +#define ISS_DATA_SRT_SHIFT 16 +#define ISS_DATA_SRT_MASK (0x1f << ISS_DATA_SRT_SHIFT) #define ISS_DATA_SF (0x01 << 15) #define ISS_DATA_AR (0x01 << 14) #define ISS_DATA_FnV (0x01 << 10) #define ISS_DATA_EA (0x01 << 9) #define ISS_DATA_CM (0x01 << 8) #define ISS_DATA_S1PTW (0x01 << 7) -#define ISS_DATA_WnR (0x01 << 6) +#define ISS_DATA_WnR_SHIFT 6 +#define ISS_DATA_WnR (0x01 << ISS_DATA_WnR_SHIFT) #define ISS_DATA_DFSC_MASK (0x3f << 0) #define ISS_DATA_DFSC_ASF_L0 (0x00 << 0) #define ISS_DATA_DFSC_ASF_L1 (0x01 << 0) #define ISS_DATA_DFSC_ASF_L2 (0x02 << 0) #define ISS_DATA_DFSC_ASF_L3 (0x03 << 0) #define ISS_DATA_DFSC_TF_L0 (0x04 << 0) #define ISS_DATA_DFSC_TF_L1 (0x05 << 0) #define ISS_DATA_DFSC_TF_L2 (0x06 << 0) #define ISS_DATA_DFSC_TF_L3 (0x07 << 0) #define ISS_DATA_DFSC_AFF_L1 (0x09 << 0) #define ISS_DATA_DFSC_AFF_L2 (0x0a << 0) #define ISS_DATA_DFSC_AFF_L3 (0x0b << 0) #define ISS_DATA_DFSC_PF_L1 (0x0d << 0) #define ISS_DATA_DFSC_PF_L2 (0x0e << 0) #define ISS_DATA_DFSC_PF_L3 (0x0f << 0) #define ISS_DATA_DFSC_EXT (0x10 << 0) #define ISS_DATA_DFSC_EXT_L0 (0x14 << 0) #define ISS_DATA_DFSC_EXT_L1 (0x15 << 0) #define ISS_DATA_DFSC_EXT_L2 (0x16 << 0) #define ISS_DATA_DFSC_EXT_L3 (0x17 << 0) #define ISS_DATA_DFSC_ECC (0x18 << 0) #define ISS_DATA_DFSC_ECC_L0 (0x1c << 0) #define ISS_DATA_DFSC_ECC_L1 (0x1d << 0) #define ISS_DATA_DFSC_ECC_L2 (0x1e << 0) #define ISS_DATA_DFSC_ECC_L3 (0x1f << 0) #define ISS_DATA_DFSC_ALIGN (0x21 << 0) #define ISS_DATA_DFSC_TLB_CONFLICT (0x30 << 0) #define ESR_ELx_IL (0x01 << 25) #define ESR_ELx_EC_SHIFT 26 #define ESR_ELx_EC_MASK (0x3f << 26) #define ESR_ELx_EXCEPTION(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) #define EXCP_UNKNOWN 0x00 /* Unkwn exception */ +#define EXCP_TRAP_WFI_WFE 0x01 /* Trapped WFI or WFE */ #define EXCP_FP_SIMD 0x07 /* VFP/SIMD trap */ #define EXCP_ILL_STATE 0x0e /* Illegal execution state */ #define EXCP_SVC32 0x11 /* SVC trap for AArch32 */ #define EXCP_SVC64 0x15 /* SVC trap for AArch64 */ +#define EXCP_HVC 0x16 /* HVC trap */ #define EXCP_MSR 0x18 /* MSR/MRS trap */ #define EXCP_INSN_ABORT_L 0x20 /* Instruction abort, from lower EL */ #define EXCP_INSN_ABORT 0x21 /* Instruction abort, from same EL */ #define EXCP_PC_ALIGN 0x22 /* PC alignment fault */ #define EXCP_DATA_ABORT_L 0x24 /* Data abort, from lower EL */ #define EXCP_DATA_ABORT 0x25 /* Data abort, from same EL */ #define EXCP_SP_ALIGN 0x26 /* SP slignment fault */ #define EXCP_TRAP_FP 0x2c /* Trapped FP exception */ #define EXCP_SERROR 0x2f /* SError interrupt */ #define EXCP_BRKPT_EL0 0x30 /* Hardware breakpoint, from same EL */ #define EXCP_SOFTSTP_EL0 0x32 /* Software Step, from lower EL */ #define EXCP_SOFTSTP_EL1 0x33 /* Software Step, from same EL */ #define EXCP_WATCHPT_EL1 0x35 /* Watchpoint, from same EL */ #define EXCP_BRK 0x3c /* Breakpoint */ /* ICC_CTLR_EL1 */ #define ICC_CTLR_EL1_EOIMODE (1U << 1) /* ICC_IAR1_EL1 */ #define ICC_IAR1_EL1_SPUR (0x03ff) /* ICC_IGRPEN0_EL1 */ #define ICC_IGRPEN0_EL1_EN (1U << 0) /* ICC_PMR_EL1 */ #define ICC_PMR_EL1_PRIO_MASK (0xFFUL) /* ICC_SGI1R_EL1 */ #define ICC_SGI1R_EL1_TL_MASK 0xffffUL #define ICC_SGI1R_EL1_AFF1_SHIFT 16 #define ICC_SGI1R_EL1_SGIID_SHIFT 24 #define ICC_SGI1R_EL1_AFF2_SHIFT 32 #define ICC_SGI1R_EL1_AFF3_SHIFT 48 #define ICC_SGI1R_EL1_SGIID_MASK 0xfUL #define ICC_SGI1R_EL1_IRM (0x1UL << 40) /* ICC_SRE_EL1 */ #define ICC_SRE_EL1_SRE (1U << 0) -/* ICC_SRE_EL2 */ -#define ICC_SRE_EL2_SRE (1U << 0) -#define ICC_SRE_EL2_EN (1U << 3) - /* ID_AA64DFR0_EL1 */ #define ID_AA64DFR0_EL1 MRS_REG(3, 0, 0, 5, 0) #define ID_AA64DFR0_DebugVer_SHIFT 0 #define ID_AA64DFR0_DebugVer_MASK (UL(0xf) << ID_AA64DFR0_DebugVer_SHIFT) #define ID_AA64DFR0_DebugVer_VAL(x) ((x) & ID_AA64DFR0_DebugVer_MASK) #define ID_AA64DFR0_DebugVer_8 (UL(0x6) << ID_AA64DFR0_DebugVer_SHIFT) #define ID_AA64DFR0_DebugVer_8_VHE (UL(0x7) << ID_AA64DFR0_DebugVer_SHIFT) #define ID_AA64DFR0_DebugVer_8_2 (UL(0x8) << ID_AA64DFR0_DebugVer_SHIFT) #define ID_AA64DFR0_TraceVer_SHIFT 4 #define ID_AA64DFR0_TraceVer_MASK (UL(0xf) << ID_AA64DFR0_TraceVer_SHIFT) #define ID_AA64DFR0_TraceVer_VAL(x) ((x) & ID_AA64DFR0_TraceVer_MASK) #define ID_AA64DFR0_TraceVer_NONE (UL(0x0) << ID_AA64DFR0_TraceVer_SHIFT) #define ID_AA64DFR0_TraceVer_IMPL (UL(0x1) << ID_AA64DFR0_TraceVer_SHIFT) #define ID_AA64DFR0_PMUVer_SHIFT 8 #define ID_AA64DFR0_PMUVer_MASK (UL(0xf) << ID_AA64DFR0_PMUVer_SHIFT) #define ID_AA64DFR0_PMUVer_VAL(x) ((x) & ID_AA64DFR0_PMUVer_MASK) #define ID_AA64DFR0_PMUVer_NONE (UL(0x0) << ID_AA64DFR0_PMUVer_SHIFT) #define ID_AA64DFR0_PMUVer_3 (UL(0x1) << ID_AA64DFR0_PMUVer_SHIFT) #define ID_AA64DFR0_PMUVer_3_1 (UL(0x4) << ID_AA64DFR0_PMUVer_SHIFT) #define ID_AA64DFR0_PMUVer_IMPL (UL(0xf) << ID_AA64DFR0_PMUVer_SHIFT) #define ID_AA64DFR0_BRPs_SHIFT 12 #define ID_AA64DFR0_BRPs_MASK (UL(0xf) << ID_AA64DFR0_BRPs_SHIFT) #define ID_AA64DFR0_BRPs_VAL(x) \ ((((x) >> ID_AA64DFR0_BRPs_SHIFT) & 0xf) + 1) #define ID_AA64DFR0_WRPs_SHIFT 20 #define ID_AA64DFR0_WRPs_MASK (UL(0xf) << ID_AA64DFR0_WRPs_SHIFT) #define ID_AA64DFR0_WRPs_VAL(x) \ ((((x) >> ID_AA64DFR0_WRPs_SHIFT) & 0xf) + 1) #define ID_AA64DFR0_CTX_CMPs_SHIFT 28 #define ID_AA64DFR0_CTX_CMPs_MASK (UL(0xf) << ID_AA64DFR0_CTX_CMPs_SHIFT) #define ID_AA64DFR0_CTX_CMPs_VAL(x) \ ((((x) >> ID_AA64DFR0_CTX_CMPs_SHIFT) & 0xf) + 1) #define ID_AA64DFR0_PMSVer_SHIFT 32 #define ID_AA64DFR0_PMSVer_MASK (UL(0xf) << ID_AA64DFR0_PMSVer_SHIFT) #define ID_AA64DFR0_PMSVer_VAL(x) ((x) & ID_AA64DFR0_PMSVer_MASK) #define ID_AA64DFR0_PMSVer_NONE (UL(0x0) << ID_AA64DFR0_PMSVer_SHIFT) #define ID_AA64DFR0_PMSVer_V1 (UL(0x1) << ID_AA64DFR0_PMSVer_SHIFT) /* ID_AA64ISAR0_EL1 */ #define ID_AA64ISAR0_EL1 MRS_REG(3, 0, 0, 6, 0) #define ID_AA64ISAR0_AES_SHIFT 4 #define ID_AA64ISAR0_AES_MASK (UL(0xf) << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_AES_VAL(x) ((x) & ID_AA64ISAR0_AES_MASK) #define ID_AA64ISAR0_AES_NONE (UL(0x0) << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_AES_BASE (UL(0x1) << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_AES_PMULL (UL(0x2) << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_SHA1_SHIFT 8 #define ID_AA64ISAR0_SHA1_MASK (UL(0xf) << ID_AA64ISAR0_SHA1_SHIFT) #define ID_AA64ISAR0_SHA1_VAL(x) ((x) & ID_AA64ISAR0_SHA1_MASK) #define ID_AA64ISAR0_SHA1_NONE (UL(0x0) << ID_AA64ISAR0_SHA1_SHIFT) #define ID_AA64ISAR0_SHA1_BASE (UL(0x1) << ID_AA64ISAR0_SHA1_SHIFT) #define ID_AA64ISAR0_SHA2_SHIFT 12 #define ID_AA64ISAR0_SHA2_MASK (UL(0xf) << ID_AA64ISAR0_SHA2_SHIFT) #define ID_AA64ISAR0_SHA2_VAL(x) ((x) & ID_AA64ISAR0_SHA2_MASK) #define ID_AA64ISAR0_SHA2_NONE (UL(0x0) << ID_AA64ISAR0_SHA2_SHIFT) #define ID_AA64ISAR0_SHA2_BASE (UL(0x1) << ID_AA64ISAR0_SHA2_SHIFT) #define ID_AA64ISAR0_SHA2_512 (UL(0x2) << ID_AA64ISAR0_SHA2_SHIFT) #define ID_AA64ISAR0_CRC32_SHIFT 16 #define ID_AA64ISAR0_CRC32_MASK (UL(0xf) << ID_AA64ISAR0_CRC32_SHIFT) #define ID_AA64ISAR0_CRC32_VAL(x) ((x) & ID_AA64ISAR0_CRC32_MASK) #define ID_AA64ISAR0_CRC32_NONE (UL(0x0) << ID_AA64ISAR0_CRC32_SHIFT) #define ID_AA64ISAR0_CRC32_BASE (UL(0x1) << ID_AA64ISAR0_CRC32_SHIFT) #define ID_AA64ISAR0_Atomic_SHIFT 20 #define ID_AA64ISAR0_Atomic_MASK (UL(0xf) << ID_AA64ISAR0_Atomic_SHIFT) #define ID_AA64ISAR0_Atomic_VAL(x) ((x) & ID_AA64ISAR0_Atomic_MASK) #define ID_AA64ISAR0_Atomic_NONE (UL(0x0) << ID_AA64ISAR0_Atomic_SHIFT) #define ID_AA64ISAR0_Atomic_IMPL (UL(0x2) << ID_AA64ISAR0_Atomic_SHIFT) #define ID_AA64ISAR0_RDM_SHIFT 28 #define ID_AA64ISAR0_RDM_MASK (UL(0xf) << ID_AA64ISAR0_RDM_SHIFT) #define ID_AA64ISAR0_RDM_VAL(x) ((x) & ID_AA64ISAR0_RDM_MASK) #define ID_AA64ISAR0_RDM_NONE (UL(0x0) << ID_AA64ISAR0_RDM_SHIFT) #define ID_AA64ISAR0_RDM_IMPL (UL(0x1) << ID_AA64ISAR0_RDM_SHIFT) #define ID_AA64ISAR0_SHA3_SHIFT 32 #define ID_AA64ISAR0_SHA3_MASK (UL(0xf) << ID_AA64ISAR0_SHA3_SHIFT) #define ID_AA64ISAR0_SHA3_VAL(x) ((x) & ID_AA64ISAR0_SHA3_MASK) #define ID_AA64ISAR0_SHA3_NONE (UL(0x0) << ID_AA64ISAR0_SHA3_SHIFT) #define ID_AA64ISAR0_SHA3_IMPL (UL(0x1) << ID_AA64ISAR0_SHA3_SHIFT) #define ID_AA64ISAR0_SM3_SHIFT 36 #define ID_AA64ISAR0_SM3_MASK (UL(0xf) << ID_AA64ISAR0_SM3_SHIFT) #define ID_AA64ISAR0_SM3_VAL(x) ((x) & ID_AA64ISAR0_SM3_MASK) #define ID_AA64ISAR0_SM3_NONE (UL(0x0) << ID_AA64ISAR0_SM3_SHIFT) #define ID_AA64ISAR0_SM3_IMPL (UL(0x1) << ID_AA64ISAR0_SM3_SHIFT) #define ID_AA64ISAR0_SM4_SHIFT 40 #define ID_AA64ISAR0_SM4_MASK (UL(0xf) << ID_AA64ISAR0_SM4_SHIFT) #define ID_AA64ISAR0_SM4_VAL(x) ((x) & ID_AA64ISAR0_SM4_MASK) #define ID_AA64ISAR0_SM4_NONE (UL(0x0) << ID_AA64ISAR0_SM4_SHIFT) #define ID_AA64ISAR0_SM4_IMPL (UL(0x1) << ID_AA64ISAR0_SM4_SHIFT) #define ID_AA64ISAR0_DP_SHIFT 44 #define ID_AA64ISAR0_DP_MASK (UL(0xf) << ID_AA64ISAR0_DP_SHIFT) #define ID_AA64ISAR0_DP_VAL(x) ((x) & ID_AA64ISAR0_DP_MASK) #define ID_AA64ISAR0_DP_NONE (UL(0x0) << ID_AA64ISAR0_DP_SHIFT) #define ID_AA64ISAR0_DP_IMPL (UL(0x1) << ID_AA64ISAR0_DP_SHIFT) /* ID_AA64ISAR1_EL1 */ #define ID_AA64ISAR1_EL1 MRS_REG(3, 0, 0, 6, 1) #define ID_AA64ISAR1_DPB_SHIFT 0 #define ID_AA64ISAR1_DPB_MASK (UL(0xf) << ID_AA64ISAR1_DPB_SHIFT) #define ID_AA64ISAR1_DPB_VAL(x) ((x) & ID_AA64ISAR1_DPB_MASK) #define ID_AA64ISAR1_DPB_NONE (UL(0x0) << ID_AA64ISAR1_DPB_SHIFT) #define ID_AA64ISAR1_DPB_IMPL (UL(0x1) << ID_AA64ISAR1_DPB_SHIFT) #define ID_AA64ISAR1_APA_SHIFT 4 #define ID_AA64ISAR1_APA_MASK (UL(0xf) << ID_AA64ISAR1_APA_SHIFT) #define ID_AA64ISAR1_APA_VAL(x) ((x) & ID_AA64ISAR1_APA_MASK) #define ID_AA64ISAR1_APA_NONE (UL(0x0) << ID_AA64ISAR1_APA_SHIFT) #define ID_AA64ISAR1_APA_IMPL (UL(0x1) << ID_AA64ISAR1_APA_SHIFT) #define ID_AA64ISAR1_API_SHIFT 8 #define ID_AA64ISAR1_API_MASK (UL(0xf) << ID_AA64ISAR1_API_SHIFT) #define ID_AA64ISAR1_API_VAL(x) ((x) & ID_AA64ISAR1_API_MASK) #define ID_AA64ISAR1_API_NONE (UL(0x0) << ID_AA64ISAR1_API_SHIFT) #define ID_AA64ISAR1_API_IMPL (UL(0x1) << ID_AA64ISAR1_API_SHIFT) #define ID_AA64ISAR1_JSCVT_SHIFT 12 #define ID_AA64ISAR1_JSCVT_MASK (UL(0xf) << ID_AA64ISAR1_JSCVT_SHIFT) #define ID_AA64ISAR1_JSCVT_VAL(x) ((x) & ID_AA64ISAR1_JSCVT_MASK) #define ID_AA64ISAR1_JSCVT_NONE (UL(0x0) << ID_AA64ISAR1_JSCVT_SHIFT) #define ID_AA64ISAR1_JSCVT_IMPL (UL(0x1) << ID_AA64ISAR1_JSCVT_SHIFT) #define ID_AA64ISAR1_FCMA_SHIFT 16 #define ID_AA64ISAR1_FCMA_MASK (UL(0xf) << ID_AA64ISAR1_FCMA_SHIFT) #define ID_AA64ISAR1_FCMA_VAL(x) ((x) & ID_AA64ISAR1_FCMA_MASK) #define ID_AA64ISAR1_FCMA_NONE (UL(0x0) << ID_AA64ISAR1_FCMA_SHIFT) #define ID_AA64ISAR1_FCMA_IMPL (UL(0x1) << ID_AA64ISAR1_FCMA_SHIFT) #define ID_AA64ISAR1_LRCPC_SHIFT 20 #define ID_AA64ISAR1_LRCPC_MASK (UL(0xf) << ID_AA64ISAR1_LRCPC_SHIFT) #define ID_AA64ISAR1_LRCPC_VAL(x) ((x) & ID_AA64ISAR1_LRCPC_MASK) #define ID_AA64ISAR1_LRCPC_NONE (UL(0x0) << ID_AA64ISAR1_LRCPC_SHIFT) #define ID_AA64ISAR1_LRCPC_IMPL (UL(0x1) << ID_AA64ISAR1_LRCPC_SHIFT) #define ID_AA64ISAR1_GPA_SHIFT 24 #define ID_AA64ISAR1_GPA_MASK (UL(0xf) << ID_AA64ISAR1_GPA_SHIFT) #define ID_AA64ISAR1_GPA_VAL(x) ((x) & ID_AA64ISAR1_GPA_MASK) #define ID_AA64ISAR1_GPA_NONE (UL(0x0) << ID_AA64ISAR1_GPA_SHIFT) #define ID_AA64ISAR1_GPA_IMPL (UL(0x1) << ID_AA64ISAR1_GPA_SHIFT) #define ID_AA64ISAR1_GPI_SHIFT 28 #define ID_AA64ISAR1_GPI_MASK (UL(0xf) << ID_AA64ISAR1_GPI_SHIFT) #define ID_AA64ISAR1_GPI_VAL(x) ((x) & ID_AA64ISAR1_GPI_MASK) #define ID_AA64ISAR1_GPI_NONE (UL(0x0) << ID_AA64ISAR1_GPI_SHIFT) #define ID_AA64ISAR1_GPI_IMPL (UL(0x1) << ID_AA64ISAR1_GPI_SHIFT) /* ID_AA64MMFR0_EL1 */ #define ID_AA64MMFR0_EL1 MRS_REG(3, 0, 0, 7, 0) #define ID_AA64MMFR0_PARange_SHIFT 0 #define ID_AA64MMFR0_PARange_MASK (UL(0xf) << ID_AA64MMFR0_PARange_SHIFT) #define ID_AA64MMFR0_PARange_VAL(x) ((x) & ID_AA64MMFR0_PARange_MASK) #define ID_AA64MMFR0_PARange_4G (UL(0x0) << ID_AA64MMFR0_PARange_SHIFT) #define ID_AA64MMFR0_PARange_64G (UL(0x1) << ID_AA64MMFR0_PARange_SHIFT) #define ID_AA64MMFR0_PARange_1T (UL(0x2) << ID_AA64MMFR0_PARange_SHIFT) #define ID_AA64MMFR0_PARange_4T (UL(0x3) << ID_AA64MMFR0_PARange_SHIFT) #define ID_AA64MMFR0_PARange_16T (UL(0x4) << ID_AA64MMFR0_PARange_SHIFT) #define ID_AA64MMFR0_PARange_256T (UL(0x5) << ID_AA64MMFR0_PARange_SHIFT) #define ID_AA64MMFR0_PARange_4P (UL(0x6) << ID_AA64MMFR0_PARange_SHIFT) #define ID_AA64MMFR0_ASIDBits_SHIFT 4 #define ID_AA64MMFR0_ASIDBits_MASK (UL(0xf) << ID_AA64MMFR0_ASIDBits_SHIFT) #define ID_AA64MMFR0_ASIDBits_VAL(x) ((x) & ID_AA64MMFR0_ASIDBits_MASK) #define ID_AA64MMFR0_ASIDBits_8 (UL(0x0) << ID_AA64MMFR0_ASIDBits_SHIFT) #define ID_AA64MMFR0_ASIDBits_16 (UL(0x2) << ID_AA64MMFR0_ASIDBits_SHIFT) #define ID_AA64MMFR0_BigEnd_SHIFT 8 #define ID_AA64MMFR0_BigEnd_MASK (UL(0xf) << ID_AA64MMFR0_BigEnd_SHIFT) #define ID_AA64MMFR0_BigEnd_VAL(x) ((x) & ID_AA64MMFR0_BigEnd_MASK) #define ID_AA64MMFR0_BigEnd_FIXED (UL(0x0) << ID_AA64MMFR0_BigEnd_SHIFT) #define ID_AA64MMFR0_BigEnd_MIXED (UL(0x1) << ID_AA64MMFR0_BigEnd_SHIFT) #define ID_AA64MMFR0_SNSMem_SHIFT 12 #define ID_AA64MMFR0_SNSMem_MASK (UL(0xf) << ID_AA64MMFR0_SNSMem_SHIFT) #define ID_AA64MMFR0_SNSMem_VAL(x) ((x) & ID_AA64MMFR0_SNSMem_MASK) #define ID_AA64MMFR0_SNSMem_NONE (UL(0x0) << ID_AA64MMFR0_SNSMem_SHIFT) #define ID_AA64MMFR0_SNSMem_DISTINCT (UL(0x1) << ID_AA64MMFR0_SNSMem_SHIFT) #define ID_AA64MMFR0_BigEndEL0_SHIFT 16 #define ID_AA64MMFR0_BigEndEL0_MASK (UL(0xf) << ID_AA64MMFR0_BigEndEL0_SHIFT) #define ID_AA64MMFR0_BigEndEL0_VAL(x) ((x) & ID_AA64MMFR0_BigEndEL0_MASK) #define ID_AA64MMFR0_BigEndEL0_FIXED (UL(0x0) << ID_AA64MMFR0_BigEndEL0_SHIFT) #define ID_AA64MMFR0_BigEndEL0_MIXED (UL(0x1) << ID_AA64MMFR0_BigEndEL0_SHIFT) #define ID_AA64MMFR0_TGran16_SHIFT 20 #define ID_AA64MMFR0_TGran16_MASK (UL(0xf) << ID_AA64MMFR0_TGran16_SHIFT) #define ID_AA64MMFR0_TGran16_VAL(x) ((x) & ID_AA64MMFR0_TGran16_MASK) #define ID_AA64MMFR0_TGran16_NONE (UL(0x0) << ID_AA64MMFR0_TGran16_SHIFT) #define ID_AA64MMFR0_TGran16_IMPL (UL(0x1) << ID_AA64MMFR0_TGran16_SHIFT) #define ID_AA64MMFR0_TGran64_SHIFT 24 #define ID_AA64MMFR0_TGran64_MASK (UL(0xf) << ID_AA64MMFR0_TGran64_SHIFT) #define ID_AA64MMFR0_TGran64_VAL(x) ((x) & ID_AA64MMFR0_TGran64_MASK) #define ID_AA64MMFR0_TGran64_IMPL (UL(0x0) << ID_AA64MMFR0_TGran64_SHIFT) #define ID_AA64MMFR0_TGran64_NONE (UL(0xf) << ID_AA64MMFR0_TGran64_SHIFT) #define ID_AA64MMFR0_TGran4_SHIFT 28 #define ID_AA64MMFR0_TGran4_MASK (UL(0xf) << ID_AA64MMFR0_TGran4_SHIFT) #define ID_AA64MMFR0_TGran4_VAL(x) ((x) & ID_AA64MMFR0_TGran4_MASK) #define ID_AA64MMFR0_TGran4_IMPL (UL(0x0) << ID_AA64MMFR0_TGran4_SHIFT) #define ID_AA64MMFR0_TGran4_NONE (UL(0xf) << ID_AA64MMFR0_TGran4_SHIFT) /* ID_AA64MMFR1_EL1 */ #define ID_AA64MMFR1_EL1 MRS_REG(3, 0, 0, 7, 1) #define ID_AA64MMFR1_HAFDBS_SHIFT 0 #define ID_AA64MMFR1_HAFDBS_MASK (UL(0xf) << ID_AA64MMFR1_HAFDBS_SHIFT) #define ID_AA64MMFR1_HAFDBS_VAL(x) ((x) & ID_AA64MMFR1_HAFDBS_MASK) #define ID_AA64MMFR1_HAFDBS_NONE (UL(0x0) << ID_AA64MMFR1_HAFDBS_SHIFT) #define ID_AA64MMFR1_HAFDBS_AF (UL(0x1) << ID_AA64MMFR1_HAFDBS_SHIFT) #define ID_AA64MMFR1_HAFDBS_AF_DBS (UL(0x2) << ID_AA64MMFR1_HAFDBS_SHIFT) #define ID_AA64MMFR1_VMIDBits_SHIFT 4 #define ID_AA64MMFR1_VMIDBits_MASK (UL(0xf) << ID_AA64MMFR1_VMIDBits_SHIFT) #define ID_AA64MMFR1_VMIDBits_VAL(x) ((x) & ID_AA64MMFR1_VMIDBits_MASK) #define ID_AA64MMFR1_VMIDBits_8 (UL(0x0) << ID_AA64MMFR1_VMIDBits_SHIFT) #define ID_AA64MMFR1_VMIDBits_16 (UL(0x2) << ID_AA64MMFR1_VMIDBits_SHIFT) #define ID_AA64MMFR1_VH_SHIFT 8 #define ID_AA64MMFR1_VH_MASK (UL(0xf) << ID_AA64MMFR1_VH_SHIFT) #define ID_AA64MMFR1_VH_VAL(x) ((x) & ID_AA64MMFR1_VH_MASK) #define ID_AA64MMFR1_VH_NONE (UL(0x0) << ID_AA64MMFR1_VH_SHIFT) #define ID_AA64MMFR1_VH_IMPL (UL(0x1) << ID_AA64MMFR1_VH_SHIFT) #define ID_AA64MMFR1_HPDS_SHIFT 12 #define ID_AA64MMFR1_HPDS_MASK (UL(0xf) << ID_AA64MMFR1_HPDS_SHIFT) #define ID_AA64MMFR1_HPDS_VAL(x) ((x) & ID_AA64MMFR1_HPDS_MASK) #define ID_AA64MMFR1_HPDS_NONE (UL(0x0) << ID_AA64MMFR1_HPDS_SHIFT) #define ID_AA64MMFR1_HPDS_HPD (UL(0x1) << ID_AA64MMFR1_HPDS_SHIFT) #define ID_AA64MMFR1_HPDS_TTPBHA (UL(0x2) << ID_AA64MMFR1_HPDS_SHIFT) #define ID_AA64MMFR1_LO_SHIFT 16 #define ID_AA64MMFR1_LO_MASK (UL(0xf) << ID_AA64MMFR1_LO_SHIFT) #define ID_AA64MMFR1_LO_VAL(x) ((x) & ID_AA64MMFR1_LO_MASK) #define ID_AA64MMFR1_LO_NONE (UL(0x0) << ID_AA64MMFR1_LO_SHIFT) #define ID_AA64MMFR1_LO_IMPL (UL(0x1) << ID_AA64MMFR1_LO_SHIFT) #define ID_AA64MMFR1_PAN_SHIFT 20 #define ID_AA64MMFR1_PAN_MASK (UL(0xf) << ID_AA64MMFR1_PAN_SHIFT) #define ID_AA64MMFR1_PAN_VAL(x) ((x) & ID_AA64MMFR1_PAN_MASK) #define ID_AA64MMFR1_PAN_NONE (UL(0x0) << ID_AA64MMFR1_PAN_SHIFT) #define ID_AA64MMFR1_PAN_IMPL (UL(0x1) << ID_AA64MMFR1_PAN_SHIFT) #define ID_AA64MMFR1_PAN_ATS1E1 (UL(0x2) << ID_AA64MMFR1_PAN_SHIFT) #define ID_AA64MMFR1_SpecSEI_SHIFT 24 #define ID_AA64MMFR1_SpecSEI_MASK (UL(0xf) << ID_AA64MMFR1_SpecSEI_SHIFT) #define ID_AA64MMFR1_SpecSEI_VAL(x) ((x) & ID_AA64MMFR1_SpecSEI_MASK) #define ID_AA64MMFR1_SpecSEI_NONE (UL(0x0) << ID_AA64MMFR1_SpecSEI_SHIFT) #define ID_AA64MMFR1_SpecSEI_IMPL (UL(0x1) << ID_AA64MMFR1_SpecSEI_SHIFT) #define ID_AA64MMFR1_XNX_SHIFT 28 #define ID_AA64MMFR1_XNX_MASK (UL(0xf) << ID_AA64MMFR1_XNX_SHIFT) #define ID_AA64MMFR1_XNX_VAL(x) ((x) & ID_AA64MMFR1_XNX_MASK) #define ID_AA64MMFR1_XNX_NONE (UL(0x0) << ID_AA64MMFR1_XNX_SHIFT) #define ID_AA64MMFR1_XNX_IMPL (UL(0x1) << ID_AA64MMFR1_XNX_SHIFT) /* ID_AA64MMFR2_EL1 */ #define ID_AA64MMFR2_EL1 MRS_REG(3, 0, 0, 7, 2) #define ID_AA64MMFR2_CnP_SHIFT 0 #define ID_AA64MMFR2_CnP_MASK (UL(0xf) << ID_AA64MMFR2_CnP_SHIFT) #define ID_AA64MMFR2_CnP_VAL(x) ((x) & ID_AA64MMFR2_CnP_MASK) #define ID_AA64MMFR2_CnP_NONE (UL(0x0) << ID_AA64MMFR2_CnP_SHIFT) #define ID_AA64MMFR2_CnP_IMPL (UL(0x1) << ID_AA64MMFR2_CnP_SHIFT) #define ID_AA64MMFR2_UAO_SHIFT 4 #define ID_AA64MMFR2_UAO_MASK (UL(0xf) << ID_AA64MMFR2_UAO_SHIFT) #define ID_AA64MMFR2_UAO_VAL(x) ((x) & ID_AA64MMFR2_UAO_MASK) #define ID_AA64MMFR2_UAO_NONE (UL(0x0) << ID_AA64MMFR2_UAO_SHIFT) #define ID_AA64MMFR2_UAO_IMPL (UL(0x1) << ID_AA64MMFR2_UAO_SHIFT) #define ID_AA64MMFR2_LSM_SHIFT 8 #define ID_AA64MMFR2_LSM_MASK (UL(0xf) << ID_AA64MMFR2_LSM_SHIFT) #define ID_AA64MMFR2_LSM_VAL(x) ((x) & ID_AA64MMFR2_LSM_MASK) #define ID_AA64MMFR2_LSM_NONE (UL(0x0) << ID_AA64MMFR2_LSM_SHIFT) #define ID_AA64MMFR2_LSM_IMPL (UL(0x1) << ID_AA64MMFR2_LSM_SHIFT) #define ID_AA64MMFR2_IESB_SHIFT 12 #define ID_AA64MMFR2_IESB_MASK (UL(0xf) << ID_AA64MMFR2_IESB_SHIFT) #define ID_AA64MMFR2_IESB_VAL(x) ((x) & ID_AA64MMFR2_IESB_MASK) #define ID_AA64MMFR2_IESB_NONE (UL(0x0) << ID_AA64MMFR2_IESB_SHIFT) #define ID_AA64MMFR2_IESB_IMPL (UL(0x1) << ID_AA64MMFR2_IESB_SHIFT) #define ID_AA64MMFR2_VARange_SHIFT 16 #define ID_AA64MMFR2_VARange_MASK (UL(0xf) << ID_AA64MMFR2_VARange_SHIFT) #define ID_AA64MMFR2_VARange_VAL(x) ((x) & ID_AA64MMFR2_VARange_MASK) #define ID_AA64MMFR2_VARange_48 (UL(0x0) << ID_AA64MMFR2_VARange_SHIFT) #define ID_AA64MMFR2_VARange_52 (UL(0x1) << ID_AA64MMFR2_VARange_SHIFT) #define ID_AA64MMFR2_CCIDX_SHIFT 20 #define ID_AA64MMFR2_CCIDX_MASK (UL(0xf) << ID_AA64MMFR2_CCIDX_SHIFT) #define ID_AA64MMFR2_CCIDX_VAL(x) ((x) & ID_AA64MMFR2_CCIDX_MASK) #define ID_AA64MMFR2_CCIDX_32 (UL(0x0) << ID_AA64MMFR2_CCIDX_SHIFT) #define ID_AA64MMFR2_CCIDX_64 (UL(0x1) << ID_AA64MMFR2_CCIDX_SHIFT) #define ID_AA64MMFR2_NV_SHIFT 24 #define ID_AA64MMFR2_NV_MASK (UL(0xf) << ID_AA64MMFR2_NV_SHIFT) #define ID_AA64MMFR2_NV_VAL(x) ((x) & ID_AA64MMFR2_NV_MASK) #define ID_AA64MMFR2_NV_NONE (UL(0x0) << ID_AA64MMFR2_NV_SHIFT) #define ID_AA64MMFR2_NV_IMPL (UL(0x1) << ID_AA64MMFR2_NV_SHIFT) /* ID_AA64PFR0_EL1 */ #define ID_AA64PFR0_EL1 MRS_REG(3, 0, 0, 4, 0) #define ID_AA64PFR0_EL0_SHIFT 0 #define ID_AA64PFR0_EL0_MASK (UL(0xf) << ID_AA64PFR0_EL0_SHIFT) #define ID_AA64PFR0_EL0_VAL(x) ((x) & ID_AA64PFR0_EL0_MASK) #define ID_AA64PFR0_EL0_64 (UL(0x1) << ID_AA64PFR0_EL0_SHIFT) #define ID_AA64PFR0_EL0_64_32 (UL(0x2) << ID_AA64PFR0_EL0_SHIFT) #define ID_AA64PFR0_EL1_SHIFT 4 #define ID_AA64PFR0_EL1_MASK (UL(0xf) << ID_AA64PFR0_EL1_SHIFT) #define ID_AA64PFR0_EL1_VAL(x) ((x) & ID_AA64PFR0_EL1_MASK) #define ID_AA64PFR0_EL1_64 (UL(0x1) << ID_AA64PFR0_EL1_SHIFT) #define ID_AA64PFR0_EL1_64_32 (UL(0x2) << ID_AA64PFR0_EL1_SHIFT) #define ID_AA64PFR0_EL2_SHIFT 8 #define ID_AA64PFR0_EL2_MASK (UL(0xf) << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL2_VAL(x) ((x) & ID_AA64PFR0_EL2_MASK) #define ID_AA64PFR0_EL2_NONE (UL(0x0) << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL2_64 (UL(0x1) << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL2_64_32 (UL(0x2) << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL3_SHIFT 12 #define ID_AA64PFR0_EL3_MASK (UL(0xf) << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_EL3_VAL(x) ((x) & ID_AA64PFR0_EL3_MASK) #define ID_AA64PFR0_EL3_NONE (UL(0x0) << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_EL3_64 (UL(0x1) << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_EL3_64_32 (UL(0x2) << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_FP_SHIFT 16 #define ID_AA64PFR0_FP_MASK (UL(0xf) << ID_AA64PFR0_FP_SHIFT) #define ID_AA64PFR0_FP_VAL(x) ((x) & ID_AA64PFR0_FP_MASK) #define ID_AA64PFR0_FP_IMPL (UL(0x0) << ID_AA64PFR0_FP_SHIFT) #define ID_AA64PFR0_FP_HP (UL(0x1) << ID_AA64PFR0_FP_SHIFT) #define ID_AA64PFR0_FP_NONE (UL(0xf) << ID_AA64PFR0_FP_SHIFT) #define ID_AA64PFR0_AdvSIMD_SHIFT 20 #define ID_AA64PFR0_AdvSIMD_MASK (UL(0xf) << ID_AA64PFR0_AdvSIMD_SHIFT) #define ID_AA64PFR0_AdvSIMD_VAL(x) ((x) & ID_AA64PFR0_AdvSIMD_MASK) #define ID_AA64PFR0_AdvSIMD_IMPL (UL(0x0) << ID_AA64PFR0_AdvSIMD_SHIFT) #define ID_AA64PFR0_AdvSIMD_HP (UL(0x1) << ID_AA64PFR0_AdvSIMD_SHIFT) #define ID_AA64PFR0_AdvSIMD_NONE (UL(0xf) << ID_AA64PFR0_AdvSIMD_SHIFT) #define ID_AA64PFR0_GIC_BITS 0x4 /* Number of bits in GIC field */ #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_GIC_MASK (UL(0xf) << ID_AA64PFR0_GIC_SHIFT) #define ID_AA64PFR0_GIC_VAL(x) ((x) & ID_AA64PFR0_GIC_MASK) #define ID_AA64PFR0_GIC_CPUIF_NONE (UL(0x0) << ID_AA64PFR0_GIC_SHIFT) #define ID_AA64PFR0_GIC_CPUIF_EN (UL(0x1) << ID_AA64PFR0_GIC_SHIFT) #define ID_AA64PFR0_RAS_SHIFT 28 #define ID_AA64PFR0_RAS_MASK (UL(0xf) << ID_AA64PFR0_RAS_SHIFT) #define ID_AA64PFR0_RAS_VAL(x) ((x) & ID_AA64PFR0_RAS_MASK) #define ID_AA64PFR0_RAS_NONE (UL(0x0) << ID_AA64PFR0_RAS_SHIFT) #define ID_AA64PFR0_RAS_V1 (UL(0x1) << ID_AA64PFR0_RAS_SHIFT) #define ID_AA64PFR0_SVE_SHIFT 32 #define ID_AA64PFR0_SVE_MASK (UL(0xf) << ID_AA64PFR0_SVE_SHIFT) #define ID_AA64PFR0_SVE_VAL(x) ((x) & ID_AA64PFR0_SVE_MASK) #define ID_AA64PFR0_SVE_NONE (UL(0x0) << ID_AA64PFR0_SVE_SHIFT) #define ID_AA64PFR0_SVE_IMPL (UL(0x1) << ID_AA64PFR0_SVE_SHIFT) #define ID_AA64PFR0_SEL2_SHIFT 36 #define ID_AA64PFR0_SEL2_MASK (UL(0xf) << ID_AA64PFR0_SEL2_SHIFT) #define ID_AA64PFR0_SEL2_VAL(x) ((x) & ID_AA64PFR0_SEL2_MASK) #define ID_AA64PFR0_SEL2_NONE (UL(0x0) << ID_AA64PFR0_SEL2_SHIFT) #define ID_AA64PFR0_SEL2_IMPL (UL(0x1) << ID_AA64PFR0_SEL2_SHIFT) #define ID_AA64PFR0_MPAM_SHIFT 40 #define ID_AA64PFR0_MPAM_MASK (UL(0xf) << ID_AA64PFR0_MPAM_SHIFT) #define ID_AA64PFR0_MPAM_VAL(x) ((x) & ID_AA64PFR0_MPAM_MASK) #define ID_AA64PFR0_MPAM_NONE (UL(0x0) << ID_AA64PFR0_MPAM_SHIFT) #define ID_AA64PFR0_MPAM_IMPL (UL(0x1) << ID_AA64PFR0_MPAM_SHIFT) #define ID_AA64PFR0_AMU_SHIFT 44 #define ID_AA64PFR0_AMU_MASK (UL(0xf) << ID_AA64PFR0_AMU_SHIFT) #define ID_AA64PFR0_AMU_VAL(x) ((x) & ID_AA64PFR0_AMU_MASK) #define ID_AA64PFR0_AMU_NONE (UL(0x0) << ID_AA64PFR0_AMU_SHIFT) #define ID_AA64PFR0_AMU_V1 (UL(0x1) << ID_AA64PFR0_AMU_SHIFT) #define ID_AA64PFR0_DIT_SHIFT 48 #define ID_AA64PFR0_DIT_MASK (UL(0xf) << ID_AA64PFR0_DIT_SHIFT) #define ID_AA64PFR0_DIT_VAL(x) ((x) & ID_AA64PFR0_DIT_MASK) #define ID_AA64PFR0_DIT_NONE (UL(0x0) << ID_AA64PFR0_DIT_SHIFT) #define ID_AA64PFR0_DIT_PSTATE (UL(0x1) << ID_AA64PFR0_DIT_SHIFT) #define ID_AA64PFR0_CSV2_SHIFT 56 #define ID_AA64PFR0_CSV2_MASK (UL(0xf) << ID_AA64PFR0_CSV2_SHIFT) #define ID_AA64PFR0_CSV2_VAL(x) ((x) & ID_AA64PFR0_CSV2_MASK) #define ID_AA64PFR0_CSV2_NONE (UL(0x0) << ID_AA64PFR0_CSV2_SHIFT) #define ID_AA64PFR0_CSV2_ISOLATED (UL(0x1) << ID_AA64PFR0_CSV2_SHIFT) #define ID_AA64PFR0_CSV2_SCXTNUM (UL(0x2) << ID_AA64PFR0_CSV2_SHIFT) #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV3_MASK (UL(0xf) << ID_AA64PFR0_CSV3_SHIFT) #define ID_AA64PFR0_CSV3_VAL(x) ((x) & ID_AA64PFR0_CSV3_MASK) #define ID_AA64PFR0_CSV3_NONE (UL(0x0) << ID_AA64PFR0_CSV3_SHIFT) #define ID_AA64PFR0_CSV3_ISOLATED (UL(0x1) << ID_AA64PFR0_CSV3_SHIFT) /* ID_AA64PFR1_EL1 */ #define ID_AA64PFR1_EL1 MRS_REG(3, 0, 0, 4, 1) #define ID_AA64PFR1_BT_SHIFT 0 #define ID_AA64PFR1_BT_MASK (UL(0xf) << ID_AA64PFR1_BT_SHIFT) #define ID_AA64PFR1_BT_VAL(x) ((x) & ID_AA64PFR1_BT_MASK) #define ID_AA64PFR1_BT_NONE (UL(0x0) << ID_AA64PFR1_BT_SHIFT) #define ID_AA64PFR1_BT_IMPL (UL(0x1) << ID_AA64PFR1_BT_SHIFT) #define ID_AA64PFR1_SSBS_SHIFT 4 #define ID_AA64PFR1_SSBS_MASK (UL(0xf) << ID_AA64PFR1_SSBS_SHIFT) #define ID_AA64PFR1_SSBS_VAL(x) ((x) & ID_AA64PFR1_SSBS_MASK) #define ID_AA64PFR1_SSBS_NONE (UL(0x0) << ID_AA64PFR1_SSBS_SHIFT) #define ID_AA64PFR1_SSBS_PSTATE (UL(0x1) << ID_AA64PFR1_SSBS_SHIFT) #define ID_AA64PFR1_SSBS_PSTATE_MSR (UL(0x2) << ID_AA64PFR1_SSBS_SHIFT) #define ID_AA64PFR1_MTE_SHIFT 8 #define ID_AA64PFR1_MTE_MASK (UL(0xf) << ID_AA64PFR1_MTE_SHIFT) #define ID_AA64PFR1_MTE_VAL(x) ((x) & ID_AA64PFR1_MTE_MASK) #define ID_AA64PFR1_MTE_NONE (UL(0x0) << ID_AA64PFR1_MTE_SHIFT) #define ID_AA64PFR1_MTE_IMPL_EL0 (UL(0x1) << ID_AA64PFR1_MTE_SHIFT) #define ID_AA64PFR1_MTE_IMPL (UL(0x2) << ID_AA64PFR1_MTE_SHIFT) #define ID_AA64PFR1_RAS_frac_SHIFT 12 #define ID_AA64PFR1_RAS_frac_MASK (UL(0xf) << ID_AA64PFR1_RAS_frac_SHIFT) #define ID_AA64PFR1_RAS_frac_VAL(x) ((x) & ID_AA64PFR1_RAS_frac_MASK) #define ID_AA64PFR1_RAS_frac_V1 (UL(0x0) << ID_AA64PFR1_RAS_frac_SHIFT) #define ID_AA64PFR1_RAS_frac_V2 (UL(0x1) << ID_AA64PFR1_RAS_frac_SHIFT) /* MAIR_EL1 - Memory Attribute Indirection Register */ #define MAIR_ATTR_MASK(idx) (0xff << ((n)* 8)) #define MAIR_ATTR(attr, idx) ((attr) << ((idx) * 8)) #define MAIR_DEVICE_nGnRnE 0x00 #define MAIR_NORMAL_NC 0x44 #define MAIR_NORMAL_WT 0xbb #define MAIR_NORMAL_WB 0xff /* PAR_EL1 - Physical Address Register */ #define PAR_F_SHIFT 0 #define PAR_F (0x1 << PAR_F_SHIFT) #define PAR_SUCCESS(x) (((x) & PAR_F) == 0) /* When PAR_F == 0 (success) */ #define PAR_SH_SHIFT 7 #define PAR_SH_MASK (0x3 << PAR_SH_SHIFT) #define PAR_NS_SHIFT 9 #define PAR_NS_MASK (0x3 << PAR_NS_SHIFT) #define PAR_PA_SHIFT 12 #define PAR_PA_MASK 0x0000fffffffff000 #define PAR_ATTR_SHIFT 56 #define PAR_ATTR_MASK (0xff << PAR_ATTR_SHIFT) /* When PAR_F == 1 (aborted) */ #define PAR_FST_SHIFT 1 #define PAR_FST_MASK (0x3f << PAR_FST_SHIFT) #define PAR_PTW_SHIFT 8 #define PAR_PTW_MASK (0x1 << PAR_PTW_SHIFT) #define PAR_S_SHIFT 9 #define PAR_S_MASK (0x1 << PAR_S_SHIFT) /* SCTLR_EL1 - System Control Register */ #define SCTLR_RES0 0xc8222440 /* Reserved ARMv8.0, write 0 */ #define SCTLR_RES1 0x30d00800 /* Reserved ARMv8.0, write 1 */ #define SCTLR_M 0x00000001 #define SCTLR_A 0x00000002 #define SCTLR_C 0x00000004 #define SCTLR_SA 0x00000008 #define SCTLR_SA0 0x00000010 #define SCTLR_CP15BEN 0x00000020 /* Bit 6 is reserved */ #define SCTLR_ITD 0x00000080 #define SCTLR_SED 0x00000100 #define SCTLR_UMA 0x00000200 /* Bit 10 is reserved */ /* Bit 11 is reserved */ #define SCTLR_I 0x00001000 #define SCTLR_EnDB 0x00002000 /* ARMv8.3 */ #define SCTLR_DZE 0x00004000 #define SCTLR_UCT 0x00008000 #define SCTLR_nTWI 0x00010000 /* Bit 17 is reserved */ #define SCTLR_nTWE 0x00040000 #define SCTLR_WXN 0x00080000 /* Bit 20 is reserved */ #define SCTLR_IESB 0x00200000 /* ARMv8.2 */ /* Bit 22 is reserved */ #define SCTLR_SPAN 0x00800000 /* ARMv8.1 */ #define SCTLR_EOE 0x01000000 #define SCTLR_EE 0x02000000 #define SCTLR_UCI 0x04000000 #define SCTLR_EnDA 0x08000000 /* ARMv8.3 */ #define SCTLR_nTLSMD 0x10000000 /* ARMv8.2 */ #define SCTLR_LSMAOE 0x20000000 /* ARMv8.2 */ #define SCTLR_EnIB 0x40000000 /* ARMv8.3 */ #define SCTLR_EnIA 0x80000000 /* ARMv8.3 */ /* SPSR_EL1 */ /* * When the exception is taken in AArch64: * M[3:2] is the exception level * M[1] is unused * M[0] is the SP select: * 0: always SP0 * 1: current ELs SP */ #define PSR_M_EL0t 0x00000000 #define PSR_M_EL1t 0x00000004 #define PSR_M_EL1h 0x00000005 #define PSR_M_EL2t 0x00000008 #define PSR_M_EL2h 0x00000009 #define PSR_M_64 0x00000000 #define PSR_M_32 0x00000010 #define PSR_M_MASK 0x0000000f #define PSR_T 0x00000020 #define PSR_AARCH32 0x00000010 #define PSR_F 0x00000040 #define PSR_I 0x00000080 #define PSR_A 0x00000100 #define PSR_D 0x00000200 #define PSR_DAIF (PSR_D | PSR_A | PSR_I | PSR_F) #define PSR_IL 0x00100000 #define PSR_SS 0x00200000 #define PSR_V 0x10000000 #define PSR_C 0x20000000 #define PSR_Z 0x40000000 #define PSR_N 0x80000000 #define PSR_FLAGS 0xf0000000 /* TCR_EL1 - Translation Control Register */ #define TCR_HD_SHIFT 40 #define TCR_HD (0x1UL << TCR_HD_SHIFT) #define TCR_HA_SHIFT 39 #define TCR_HA (0x1UL << TCR_HA_SHIFT) #define TCR_ASID_SHIFT 36 #define TCR_ASID_WIDTH 1 #define TCR_ASID_16 (0x1UL << TCR_ASID_SHIFT) #define TCR_IPS_SHIFT 32 #define TCR_IPS_WIDTH 3 #define TCR_IPS_32BIT (0 << TCR_IPS_SHIFT) #define TCR_IPS_36BIT (1 << TCR_IPS_SHIFT) #define TCR_IPS_40BIT (2 << TCR_IPS_SHIFT) #define TCR_IPS_42BIT (3 << TCR_IPS_SHIFT) #define TCR_IPS_44BIT (4 << TCR_IPS_SHIFT) #define TCR_IPS_48BIT (5 << TCR_IPS_SHIFT) #define TCR_TG1_SHIFT 30 #define TCR_TG1_16K (1 << TCR_TG1_SHIFT) #define TCR_TG1_4K (2 << TCR_TG1_SHIFT) #define TCR_TG1_64K (3 << TCR_TG1_SHIFT) +#define TCR_TG0_MASK 0x000000000000c000 + #define TCR_SH1_SHIFT 28 #define TCR_SH1_IS (0x3UL << TCR_SH1_SHIFT) #define TCR_ORGN1_SHIFT 26 #define TCR_ORGN1_WBWA (0x1UL << TCR_ORGN1_SHIFT) #define TCR_IRGN1_SHIFT 24 #define TCR_IRGN1_WBWA (0x1UL << TCR_IRGN1_SHIFT) #define TCR_A1_SHIFT 22 #define TCR_A1 (0x1UL << TCR_A1_SHIFT) #define TCR_SH0_SHIFT 12 #define TCR_SH0_IS (0x3UL << TCR_SH0_SHIFT) #define TCR_ORGN0_SHIFT 10 #define TCR_ORGN0_WBWA (0x1UL << TCR_ORGN0_SHIFT) #define TCR_IRGN0_SHIFT 8 #define TCR_IRGN0_WBWA (0x1UL << TCR_IRGN0_SHIFT) #define TCR_CACHE_ATTRS ((TCR_IRGN0_WBWA | TCR_IRGN1_WBWA) |\ (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA)) #ifdef SMP #define TCR_SMP_ATTRS (TCR_SH0_IS | TCR_SH1_IS) #else #define TCR_SMP_ATTRS 0 #endif #define TCR_T1SZ_SHIFT 16 #define TCR_T0SZ_SHIFT 0 +#define TCR_T0SZ_MASK 0x3f #define TCR_T1SZ(x) ((x) << TCR_T1SZ_SHIFT) #define TCR_T0SZ(x) ((x) << TCR_T0SZ_SHIFT) #define TCR_TxSZ(x) (TCR_T1SZ(x) | TCR_T0SZ(x)) /* Saved Program Status Register */ #define DBG_SPSR_SS (0x1 << 21) /* Monitor Debug System Control Register */ #define DBG_MDSCR_SS (0x1 << 0) #define DBG_MDSCR_KDE (0x1 << 13) #define DBG_MDSCR_MDE (0x1 << 15) /* Perfomance Monitoring Counters */ #define PMCR_E (1 << 0) /* Enable all counters */ #define PMCR_P (1 << 1) /* Reset all counters */ #define PMCR_C (1 << 2) /* Clock counter reset */ #define PMCR_D (1 << 3) /* CNTR counts every 64 clk cycles */ #define PMCR_X (1 << 4) /* Export to ext. monitoring (ETM) */ #define PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define PMCR_LC (1 << 6) /* Long cycle count enable */ #define PMCR_IMP_SHIFT 24 /* Implementer code */ #define PMCR_IMP_MASK (0xff << PMCR_IMP_SHIFT) #define PMCR_IDCODE_SHIFT 16 /* Identification code */ #define PMCR_IDCODE_MASK (0xff << PMCR_IDCODE_SHIFT) #define PMCR_IDCODE_CORTEX_A57 0x01 #define PMCR_IDCODE_CORTEX_A72 0x02 #define PMCR_IDCODE_CORTEX_A53 0x03 #define PMCR_N_SHIFT 11 /* Number of counters implemented */ #define PMCR_N_MASK (0x1f << PMCR_N_SHIFT) #endif /* !_MACHINE_ARMREG_H_ */ diff --git a/sys/arm64/include/hypervisor.h b/sys/arm64/include/hypervisor.h index 7ac0069af1be..eab43b29a89e 100644 --- a/sys/arm64/include/hypervisor.h +++ b/sys/arm64/include/hypervisor.h @@ -1,96 +1,185 @@ /*- * Copyright (c) 2013, 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_HYPERVISOR_H_ #define _MACHINE_HYPERVISOR_H_ /* * These registers are only useful when in hypervisor context, * e.g. specific to EL2, or controlling the hypervisor. */ -/* - * Architecture feature trap register - */ +/* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */ +#define CNTHCTL_EVNTI_MASK (0xf << 4) /* Bit to trigger event stream */ +#define CNTHCTL_EVNTDIR (1 << 3) /* Control transition trigger bit */ +#define CNTHCTL_EVNTEN (1 << 2) /* Enable event stream */ +#define CNTHCTL_EL1PCEN (1 << 1) /* Allow EL0/1 physical timer access */ +#define CNTHCTL_EL1PCTEN (1 << 0) /*Allow EL0/1 physical counter access*/ + +/* CPTR_EL2 - Architecture feature trap register */ #define CPTR_RES0 0x7fefc800 #define CPTR_RES1 0x000033ff #define CPTR_TFP 0x00000400 #define CPTR_TTA 0x00100000 #define CPTR_TCPAC 0x80000000 -/* - * Hypervisor Config Register - */ - +/* HCR_EL2 - Hypervisor Config Register */ #define HCR_VM 0x0000000000000001 #define HCR_SWIO 0x0000000000000002 #define HCR_PTW 0x0000000000000004 #define HCR_FMO 0x0000000000000008 #define HCR_IMO 0x0000000000000010 #define HCR_AMO 0x0000000000000020 #define HCR_VF 0x0000000000000040 #define HCR_VI 0x0000000000000080 #define HCR_VSE 0x0000000000000100 #define HCR_FB 0x0000000000000200 #define HCR_BSU_MASK 0x0000000000000c00 +#define HCR_BSU_IS 0x0000000000000400 +#define HCR_BSU_OS 0x0000000000000800 +#define HCR_BSU_FS 0x0000000000000c00 #define HCR_DC 0x0000000000001000 #define HCR_TWI 0x0000000000002000 #define HCR_TWE 0x0000000000004000 #define HCR_TID0 0x0000000000008000 #define HCR_TID1 0x0000000000010000 #define HCR_TID2 0x0000000000020000 #define HCR_TID3 0x0000000000040000 #define HCR_TSC 0x0000000000080000 #define HCR_TIDCP 0x0000000000100000 #define HCR_TACR 0x0000000000200000 #define HCR_TSW 0x0000000000400000 -#define HCR_TPC 0x0000000000800000 +#define HCR_TPCP 0x0000000000800000 #define HCR_TPU 0x0000000001000000 #define HCR_TTLB 0x0000000002000000 #define HCR_TVM 0x0000000004000000 #define HCR_TGE 0x0000000008000000 #define HCR_TDZ 0x0000000010000000 #define HCR_HCD 0x0000000020000000 #define HCR_TRVM 0x0000000040000000 #define HCR_RW 0x0000000080000000 #define HCR_CD 0x0000000100000000 #define HCR_ID 0x0000000200000000 #define HCR_E2H 0x0000000400000000 #define HCR_TLOR 0x0000000800000000 #define HCR_TERR 0x0000001000000000 #define HCR_TEA 0x0000002000000000 #define HCR_MIOCNCE 0x0000004000000000 /* Bit 39 is reserved */ #define HCR_APK 0x0000010000000000 #define HCR_API 0x0000020000000000 #define HCR_NV 0x0000040000000000 #define HCR_NV1 0x0000080000000000 #define HCR_AT 0x0000100000000000 -#endif +/* HPFAR_EL2 - Hypervisor IPA Fault Address Register */ +#define HPFAR_EL2_FIPA_SHIFT 4 +#define HPFAR_EL2_FIPA_MASK 0xfffffffff0 + +/* ICC_SRE_EL2 */ +#define ICC_SRE_EL2_SRE (1U << 0) +#define ICC_SRE_EL2_EN (1U << 3) + +/* SCTLR_EL2 - System Control Register */ +#define SCTLR_EL2_RES1 0x30c50830 +#define SCTLR_EL2_M_SHIFT 0 +#define SCTLR_EL2_M (0x1 << SCTLR_EL2_M_SHIFT) +#define SCTLR_EL2_A_SHIFT 1 +#define SCTLR_EL2_A (0x1 << SCTLR_EL2_A_SHIFT) +#define SCTLR_EL2_C_SHIFT 2 +#define SCTLR_EL2_C (0x1 << SCTLR_EL2_C_SHIFT) +#define SCTLR_EL2_SA_SHIFT 3 +#define SCTLR_EL2_SA (0x1 << SCTLR_EL2_SA_SHIFT) +#define SCTLR_EL2_I_SHIFT 12 +#define SCTLR_EL2_I (0x1 << SCTLR_EL2_I_SHIFT) +#define SCTLR_EL2_WXN_SHIFT 19 +#define SCTLR_EL2_WXN (0x1 << SCTLR_EL2_WXN_SHIFT) +#define SCTLR_EL2_EE_SHIFT 25 +#define SCTLR_EL2_EE (0x1 << SCTLR_EL2_EE_SHIFT) + +/* TCR_EL2 - Translation Control Register */ +#define TCR_EL2_RES1 ((0x1UL << 31) | (0x1UL << 23)) +#define TCR_EL2_T0SZ_SHIFT 0 +#define TCR_EL2_T0SZ_MASK (0x3f << TCR_EL2_T0SZ_SHIFT) +#define TCR_EL2_T0SZ(x) ((x) << TCR_EL2_T0SZ_SHIFT) +/* Bits 7:6 are reserved */ +#define TCR_EL2_IRGN0_SHIFT 8 +#define TCR_EL2_IRGN0_MASK (0x3 << TCR_EL2_IRGN0_SHIFT) +#define TCR_EL2_ORGN0_SHIFT 10 +#define TCR_EL2_ORGN0_MASK (0x3 << TCR_EL2_ORGN0_SHIFT) +#define TCR_EL2_SH0_SHIFT 12 +#define TCR_EL2_SH0_MASK (0x3 << TCR_EL2_SH0_SHIFT) +#define TCR_EL2_TG0_SHIFT 14 +#define TCR_EL2_TG0_MASK (0x3 << TCR_EL2_TG0_SHIFT) +#define TCR_EL2_PS_SHIFT 16 +#define TCR_EL2_PS_32BITS (0 << TCR_EL2_PS_SHIFT) +#define TCR_EL2_PS_36BITS (1 << TCR_EL2_PS_SHIFT) +#define TCR_EL2_PS_40BITS (2 << TCR_EL2_PS_SHIFT) +#define TCR_EL2_PS_42BITS (3 << TCR_EL2_PS_SHIFT) +#define TCR_EL2_PS_44BITS (4 << TCR_EL2_PS_SHIFT) +#define TCR_EL2_PS_48BITS (5 << TCR_EL2_PS_SHIFT) +#define TCR_EL2_PS_52BITS (6 << TCR_EL2_PS_SHIFT) /* ARMv8.2-LPA */ + +/* VMPDIR_EL2 - Virtualization Multiprocessor ID Register */ +#define VMPIDR_EL2_U 0x0000000040000000 +#define VMPIDR_EL2_MT 0x0000000001000000 +#define VMPIDR_EL2_RES1 0x0000000080000000 + +/* VTCR_EL2 - Virtualization Translation Control Register */ +#define VTCR_EL2_RES1 (0x1 << 31) +#define VTCR_EL2_T0SZ_MASK 0x3f +#define VTCR_EL2_SL0_SHIFT 6 +#define VTCR_EL2_SL0_4K_LVL2 (0x0 << VTCR_EL2_SL0_SHIFT) +#define VTCR_EL2_SL0_4K_LVL1 (0x1 << VTCR_EL2_SL0_SHIFT) +#define VTCR_EL2_SL0_4K_LVL0 (0x2 << VTCR_EL2_SL0_SHIFT) +#define VTCR_EL2_IRGN0_SHIFT 8 +#define VTCR_EL2_IRGN0_WBWA (0x1 << VTCR_EL2_IRGN0_SHIFT) +#define VTCR_EL2_ORGN0_SHIFT 10 +#define VTCR_EL2_ORGN0_WBWA (0x1 << VTCR_EL2_ORGN0_SHIFT) +#define VTCR_EL2_SH0_SHIFT 12 +#define VTCR_EL2_SH0_NS (0x0 << VTCR_EL2_SH0_SHIFT) +#define VTCR_EL2_SH0_OS (0x2 << VTCR_EL2_SH0_SHIFT) +#define VTCR_EL2_SH0_IS (0x3 << VTCR_EL2_SH0_SHIFT) +#define VTCR_EL2_TG0_SHIFT 14 +#define VTCR_EL2_TG0_4K (0x0 << VTCR_EL2_TG0_SHIFT) +#define VTCR_EL2_TG0_64K (0x1 << VTCR_EL2_TG0_SHIFT) +#define VTCR_EL2_TG0_16K (0x2 << VTCR_EL2_TG0_SHIFT) +#define VTCR_EL2_PS_SHIFT 16 +#define VTCR_EL2_PS_32BIT (0x0 << VTCR_EL2_PS_SHIFT) +#define VTCR_EL2_PS_36BIT (0x1 << VTCR_EL2_PS_SHIFT) +#define VTCR_EL2_PS_40BIT (0x2 << VTCR_EL2_PS_SHIFT) +#define VTCR_EL2_PS_42BIT (0x3 << VTCR_EL2_PS_SHIFT) +#define VTCR_EL2_PS_44BIT (0x4 << VTCR_EL2_PS_SHIFT) +#define VTCR_EL2_PS_48BIT (0x5 << VTCR_EL2_PS_SHIFT) + +/* VTTBR_EL2 - Virtualization Translation Table Base Register */ +#define VTTBR_VMID_MASK 0xffff000000000000 +#define VTTBR_VMID_SHIFT 48 +#define VTTBR_HOST 0x0000000000000000 +#endif /* !_MACHINE_HYPERVISOR_H_ */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c index dee78d5971ed..8e73d388a945 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c @@ -1,4007 +1,3998 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright 2016 Gary Mills * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright 2017 Joyent, Inc. * Copyright (c) 2017 Datto Inc. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _KERNEL #include #endif /* * Grand theory statement on scan queue sorting * * Scanning is implemented by recursively traversing all indirection levels * in an object and reading all blocks referenced from said objects. This * results in us approximately traversing the object from lowest logical * offset to the highest. For best performance, we would want the logical * blocks to be physically contiguous. However, this is frequently not the * case with pools given the allocation patterns of copy-on-write filesystems. * So instead, we put the I/Os into a reordering queue and issue them in a * way that will most benefit physical disks (LBA-order). * * Queue management: * * Ideally, we would want to scan all metadata and queue up all block I/O * prior to starting to issue it, because that allows us to do an optimal * sorting job. This can however consume large amounts of memory. Therefore * we continuously monitor the size of the queues and constrain them to 5% * (zfs_scan_mem_lim_fact) of physmem. If the queues grow larger than this * limit, we clear out a few of the largest extents at the head of the queues * to make room for more scanning. Hopefully, these extents will be fairly * large and contiguous, allowing us to approach sequential I/O throughput * even without a fully sorted tree. * * Metadata scanning takes place in dsl_scan_visit(), which is called from * dsl_scan_sync() every spa_sync(). If we have either fully scanned all * metadata on the pool, or we need to make room in memory because our * queues are too large, dsl_scan_visit() is postponed and * scan_io_queues_run() is called from dsl_scan_sync() instead. This implies * that metadata scanning and queued I/O issuing are mutually exclusive. This * allows us to provide maximum sequential I/O throughput for the majority of * I/O's issued since sequential I/O performance is significantly negatively * impacted if it is interleaved with random I/O. * * Implementation Notes * * One side effect of the queued scanning algorithm is that the scanning code * needs to be notified whenever a block is freed. This is needed to allow * the scanning code to remove these I/Os from the issuing queue. Additionally, * we do not attempt to queue gang blocks to be issued sequentially since this * is very hard to do and would have an extremely limitted performance benefit. * Instead, we simply issue gang I/Os as soon as we find them using the legacy * algorithm. * * Backwards compatibility * * This new algorithm is backwards compatible with the legacy on-disk data * structures (and therefore does not require a new feature flag). * Periodically during scanning (see zfs_scan_checkpoint_intval), the scan * will stop scanning metadata (in logical order) and wait for all outstanding * sorted I/O to complete. Once this is done, we write out a checkpoint * bookmark, indicating that we have scanned everything logically before it. * If the pool is imported on a machine without the new sorting algorithm, * the scan simply resumes from the last checkpoint using the legacy algorithm. */ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_phys_t *); static scan_cb_t dsl_scan_scrub_cb; static int scan_ds_queue_compare(const void *a, const void *b); static int scan_prefetch_queue_compare(const void *a, const void *b); static void scan_ds_queue_clear(dsl_scan_t *scn); static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj, uint64_t *txg); static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg); static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj); static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx); static uint64_t dsl_scan_count_leaves(vdev_t *vd); extern int zfs_vdev_async_write_active_min_dirty_percent; /* * By default zfs will check to ensure it is not over the hard memory * limit before each txg. If finer-grained control of this is needed * this value can be set to 1 to enable checking before scanning each * block. */ int zfs_scan_strict_mem_lim = B_FALSE; -/* - * Maximum number of parallelly executing I/Os per top-level vdev. - * Tune with care. Very high settings (hundreds) are known to trigger - * some firmware bugs and resets on certain SSDs. - */ -int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */ unsigned int zfs_resilver_delay = 2; /* number of ticks to delay resilver -- 2 is a good number */ unsigned int zfs_scrub_delay = 4; /* number of ticks to delay scrub -- 4 is a good number */ unsigned int zfs_scan_idle = 50; /* idle window in clock ticks */ /* * Maximum number of parallelly executed bytes per leaf vdev. We attempt * to strike a balance here between keeping the vdev queues full of I/Os * at all times and not overflowing the queues to cause long latency, * which would cause long txg sync times. No matter what, we will not * overload the drives with I/O, since that is protected by * zfs_vdev_scrub_max_active. */ unsigned long zfs_scan_vdev_limit = 4 << 20; int zfs_scan_issue_strategy = 0; int zfs_scan_legacy = B_FALSE; /* don't queue & sort zios, go direct */ uint64_t zfs_scan_max_ext_gap = 2 << 20; /* in bytes */ unsigned int zfs_scan_checkpoint_intval = 7200; /* seconds */ #define ZFS_SCAN_CHECKPOINT_INTVAL SEC_TO_TICK(zfs_scan_checkpoint_intval) /* * fill_weight is non-tunable at runtime, so we copy it at module init from * zfs_scan_fill_weight. Runtime adjustments to zfs_scan_fill_weight would * break queue sorting. */ uint64_t zfs_scan_fill_weight = 3; static uint64_t fill_weight; /* See dsl_scan_should_clear() for details on the memory limit tunables */ uint64_t zfs_scan_mem_lim_min = 16 << 20; /* bytes */ uint64_t zfs_scan_mem_lim_soft_max = 128 << 20; /* bytes */ int zfs_scan_mem_lim_fact = 20; /* fraction of physmem */ int zfs_scan_mem_lim_soft_fact = 20; /* fraction of mem lim above */ unsigned int zfs_scrub_min_time_ms = 1000; /* min millisecs to scrub per txg */ unsigned int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */ unsigned int zfs_obsolete_min_time_ms = 500; /* min millisecs to obsolete per txg */ unsigned int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */ boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */ boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */ SYSCTL_DECL(_vfs_zfs); -SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RWTUN, - &zfs_top_maxinflight, 0, "Maximum I/Os per top-level vdev"); SYSCTL_UINT(_vfs_zfs, OID_AUTO, resilver_delay, CTLFLAG_RWTUN, &zfs_resilver_delay, 0, "Number of ticks to delay resilver"); SYSCTL_UINT(_vfs_zfs, OID_AUTO, scrub_delay, CTLFLAG_RWTUN, &zfs_scrub_delay, 0, "Number of ticks to delay scrub"); SYSCTL_UINT(_vfs_zfs, OID_AUTO, scan_idle, CTLFLAG_RWTUN, &zfs_scan_idle, 0, "Idle scan window in clock ticks"); SYSCTL_UINT(_vfs_zfs, OID_AUTO, scan_min_time_ms, CTLFLAG_RWTUN, &zfs_scrub_min_time_ms, 0, "Min millisecs to scrub per txg"); SYSCTL_UINT(_vfs_zfs, OID_AUTO, free_min_time_ms, CTLFLAG_RWTUN, &zfs_free_min_time_ms, 0, "Min millisecs to free per txg"); SYSCTL_UINT(_vfs_zfs, OID_AUTO, resilver_min_time_ms, CTLFLAG_RWTUN, &zfs_resilver_min_time_ms, 0, "Min millisecs to resilver per txg"); SYSCTL_INT(_vfs_zfs, OID_AUTO, no_scrub_io, CTLFLAG_RWTUN, &zfs_no_scrub_io, 0, "Disable scrub I/O"); SYSCTL_INT(_vfs_zfs, OID_AUTO, no_scrub_prefetch, CTLFLAG_RWTUN, &zfs_no_scrub_prefetch, 0, "Disable scrub prefetching"); SYSCTL_UINT(_vfs_zfs, OID_AUTO, zfs_scan_legacy, CTLFLAG_RWTUN, &zfs_scan_legacy, 0, "Scrub using legacy non-sequential method"); SYSCTL_UINT(_vfs_zfs, OID_AUTO, zfs_scan_checkpoint_interval, CTLFLAG_RWTUN, &zfs_scan_checkpoint_intval, 0, "Scan progress on-disk checkpointing interval"); enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE; /* max number of blocks to free in a single TXG */ uint64_t zfs_async_block_max_blocks = UINT64_MAX; SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, free_max_blocks, CTLFLAG_RWTUN, &zfs_async_block_max_blocks, 0, "Maximum number of blocks to free in one TXG"); /* * We wait a few txgs after importing a pool to begin scanning so that * the import / mounting code isn't held up by scrub / resilver IO. * Unfortunately, it is a bit difficult to determine exactly how long * this will take since userspace will trigger fs mounts asynchronously * and the kernel will create zvol minors asynchronously. As a result, * the value provided here is a bit arbitrary, but represents a * reasonable estimate of how many txgs it will take to finish fully * importing a pool */ #define SCAN_IMPORT_WAIT_TXGS 5 #define DSL_SCAN_IS_SCRUB_RESILVER(scn) \ ((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \ (scn)->scn_phys.scn_func == POOL_SCAN_RESILVER) extern int zfs_txg_timeout; /* * Enable/disable the processing of the free_bpobj object. */ boolean_t zfs_free_bpobj_enabled = B_TRUE; SYSCTL_INT(_vfs_zfs, OID_AUTO, free_bpobj_enabled, CTLFLAG_RWTUN, &zfs_free_bpobj_enabled, 0, "Enable free_bpobj processing"); /* the order has to match pool_scan_type */ static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = { NULL, dsl_scan_scrub_cb, /* POOL_SCAN_SCRUB */ dsl_scan_scrub_cb, /* POOL_SCAN_RESILVER */ }; /* In core node for the scn->scn_queue. Represents a dataset to be scanned */ typedef struct { uint64_t sds_dsobj; uint64_t sds_txg; avl_node_t sds_node; } scan_ds_t; /* * This controls what conditions are placed on dsl_scan_sync_state(): * SYNC_OPTIONAL) write out scn_phys iff scn_bytes_pending == 0 * SYNC_MANDATORY) write out scn_phys always. scn_bytes_pending must be 0. * SYNC_CACHED) if scn_bytes_pending == 0, write out scn_phys. Otherwise * write out the scn_phys_cached version. * See dsl_scan_sync_state for details. */ typedef enum { SYNC_OPTIONAL, SYNC_MANDATORY, SYNC_CACHED } state_sync_type_t; /* * This struct represents the minimum information needed to reconstruct a * zio for sequential scanning. This is useful because many of these will * accumulate in the sequential IO queues before being issued, so saving * memory matters here. */ typedef struct scan_io { /* fields from blkptr_t */ uint64_t sio_offset; uint64_t sio_blk_prop; uint64_t sio_phys_birth; uint64_t sio_birth; zio_cksum_t sio_cksum; uint32_t sio_asize; /* fields from zio_t */ int sio_flags; zbookmark_phys_t sio_zb; /* members for queue sorting */ union { avl_node_t sio_addr_node; /* link into issueing queue */ list_node_t sio_list_node; /* link for issuing to disk */ } sio_nodes; } scan_io_t; struct dsl_scan_io_queue { dsl_scan_t *q_scn; /* associated dsl_scan_t */ vdev_t *q_vd; /* top-level vdev that this queue represents */ /* trees used for sorting I/Os and extents of I/Os */ range_tree_t *q_exts_by_addr; avl_tree_t q_exts_by_size; avl_tree_t q_sios_by_addr; /* members for zio rate limiting */ uint64_t q_maxinflight_bytes; uint64_t q_inflight_bytes; kcondvar_t q_zio_cv; /* used under vd->vdev_scan_io_queue_lock */ /* per txg statistics */ uint64_t q_total_seg_size_this_txg; uint64_t q_segs_this_txg; uint64_t q_total_zio_size_this_txg; uint64_t q_zios_this_txg; }; /* private data for dsl_scan_prefetch_cb() */ typedef struct scan_prefetch_ctx { zfs_refcount_t spc_refcnt; /* refcount for memory management */ dsl_scan_t *spc_scn; /* dsl_scan_t for the pool */ boolean_t spc_root; /* is this prefetch for an objset? */ uint8_t spc_indblkshift; /* dn_indblkshift of current dnode */ uint16_t spc_datablkszsec; /* dn_idatablkszsec of current dnode */ } scan_prefetch_ctx_t; /* private data for dsl_scan_prefetch() */ typedef struct scan_prefetch_issue_ctx { avl_node_t spic_avl_node; /* link into scn->scn_prefetch_queue */ scan_prefetch_ctx_t *spic_spc; /* spc for the callback */ blkptr_t spic_bp; /* bp to prefetch */ zbookmark_phys_t spic_zb; /* bookmark to prefetch */ } scan_prefetch_issue_ctx_t; static void scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, const zbookmark_phys_t *zb, dsl_scan_io_queue_t *queue); static void scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue, scan_io_t *sio); static dsl_scan_io_queue_t *scan_io_queue_create(vdev_t *vd); static void scan_io_queues_destroy(dsl_scan_t *scn); static kmem_cache_t *sio_cache; void scan_init(void) { /* * This is used in ext_size_compare() to weight segments * based on how sparse they are. This cannot be changed * mid-scan and the tree comparison functions don't currently * have a mechansim for passing additional context to the * compare functions. Thus we store this value globally and * we only allow it to be set at module intiailization time */ fill_weight = zfs_scan_fill_weight; sio_cache = kmem_cache_create("sio_cache", sizeof (scan_io_t), 0, NULL, NULL, NULL, NULL, NULL, 0); } void scan_fini(void) { kmem_cache_destroy(sio_cache); } static inline boolean_t dsl_scan_is_running(const dsl_scan_t *scn) { return (scn->scn_phys.scn_state == DSS_SCANNING); } boolean_t dsl_scan_resilvering(dsl_pool_t *dp) { return (dsl_scan_is_running(dp->dp_scan) && dp->dp_scan->scn_phys.scn_func == POOL_SCAN_RESILVER); } static inline void sio2bp(const scan_io_t *sio, blkptr_t *bp, uint64_t vdev_id) { bzero(bp, sizeof (*bp)); DVA_SET_ASIZE(&bp->blk_dva[0], sio->sio_asize); DVA_SET_VDEV(&bp->blk_dva[0], vdev_id); DVA_SET_OFFSET(&bp->blk_dva[0], sio->sio_offset); bp->blk_prop = sio->sio_blk_prop; bp->blk_phys_birth = sio->sio_phys_birth; bp->blk_birth = sio->sio_birth; bp->blk_fill = 1; /* we always only work with data pointers */ bp->blk_cksum = sio->sio_cksum; } static inline void bp2sio(const blkptr_t *bp, scan_io_t *sio, int dva_i) { /* we discard the vdev id, since we can deduce it from the queue */ sio->sio_offset = DVA_GET_OFFSET(&bp->blk_dva[dva_i]); sio->sio_asize = DVA_GET_ASIZE(&bp->blk_dva[dva_i]); sio->sio_blk_prop = bp->blk_prop; sio->sio_phys_birth = bp->blk_phys_birth; sio->sio_birth = bp->blk_birth; sio->sio_cksum = bp->blk_cksum; } void dsl_scan_global_init(void) { /* * This is used in ext_size_compare() to weight segments * based on how sparse they are. This cannot be changed * mid-scan and the tree comparison functions don't currently * have a mechansim for passing additional context to the * compare functions. Thus we store this value globally and * we only allow it to be set at module intiailization time */ fill_weight = zfs_scan_fill_weight; } int dsl_scan_init(dsl_pool_t *dp, uint64_t txg) { int err; dsl_scan_t *scn; spa_t *spa = dp->dp_spa; uint64_t f; scn = dp->dp_scan = kmem_zalloc(sizeof (dsl_scan_t), KM_SLEEP); scn->scn_dp = dp; /* * It's possible that we're resuming a scan after a reboot so * make sure that the scan_async_destroying flag is initialized * appropriately. */ ASSERT(!scn->scn_async_destroying); scn->scn_async_destroying = spa_feature_is_active(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY); /* * Calculate the max number of in-flight bytes for pool-wide * scanning operations (minimum 1MB). Limits for the issuing * phase are done per top-level vdev and are handled separately. */ scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * dsl_scan_count_leaves(spa->spa_root_vdev), 1ULL << 20); avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t), offsetof(scan_ds_t, sds_node)); avl_create(&scn->scn_prefetch_queue, scan_prefetch_queue_compare, sizeof (scan_prefetch_issue_ctx_t), offsetof(scan_prefetch_issue_ctx_t, spic_avl_node)); err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, "scrub_func", sizeof (uint64_t), 1, &f); if (err == 0) { /* * There was an old-style scrub in progress. Restart a * new-style scrub from the beginning. */ scn->scn_restart_txg = txg; zfs_dbgmsg("old-style scrub was in progress; " "restarting new-style scrub in txg %llu", (longlong_t)scn->scn_restart_txg); /* * Load the queue obj from the old location so that it * can be freed by dsl_scan_done(). */ (void) zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, "scrub_queue", sizeof (uint64_t), 1, &scn->scn_phys.scn_queue_obj); } else { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, &scn->scn_phys); if (err == ENOENT) return (0); else if (err) return (err); /* * We might be restarting after a reboot, so jump the issued * counter to how far we've scanned. We know we're consistent * up to here. */ scn->scn_issued_before_pass = scn->scn_phys.scn_examined; if (dsl_scan_is_running(scn) && spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) { /* * A new-type scrub was in progress on an old * pool, and the pool was accessed by old * software. Restart from the beginning, since * the old software may have changed the pool in * the meantime. */ scn->scn_restart_txg = txg; zfs_dbgmsg("new-style scrub was modified " "by old software; restarting in txg %llu", (longlong_t)scn->scn_restart_txg); } } bcopy(&scn->scn_phys, &scn->scn_phys_cached, sizeof (scn->scn_phys)); /* reload the queue into the in-core state */ if (scn->scn_phys.scn_queue_obj != 0) { zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, dp->dp_meta_objset, scn->scn_phys.scn_queue_obj); zap_cursor_retrieve(&zc, &za) == 0; (void) zap_cursor_advance(&zc)) { scan_ds_queue_insert(scn, zfs_strtonum(za.za_name, NULL), za.za_first_integer); } zap_cursor_fini(&zc); } spa_scan_stat_init(spa); return (0); } void dsl_scan_fini(dsl_pool_t *dp) { if (dp->dp_scan != NULL) { dsl_scan_t *scn = dp->dp_scan; if (scn->scn_taskq != NULL) taskq_destroy(scn->scn_taskq); scan_ds_queue_clear(scn); avl_destroy(&scn->scn_queue); avl_destroy(&scn->scn_prefetch_queue); kmem_free(dp->dp_scan, sizeof (dsl_scan_t)); dp->dp_scan = NULL; } } static boolean_t dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx) { return (scn->scn_restart_txg != 0 && scn->scn_restart_txg <= tx->tx_txg); } boolean_t dsl_scan_scrubbing(const dsl_pool_t *dp) { dsl_scan_phys_t *scn_phys = &dp->dp_scan->scn_phys; return (scn_phys->scn_state == DSS_SCANNING && scn_phys->scn_func == POOL_SCAN_SCRUB); } boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn) { return (dsl_scan_scrubbing(scn->scn_dp) && scn->scn_phys.scn_flags & DSF_SCRUB_PAUSED); } /* * Writes out a persistent dsl_scan_phys_t record to the pool directory. * Because we can be running in the block sorting algorithm, we do not always * want to write out the record, only when it is "safe" to do so. This safety * condition is achieved by making sure that the sorting queues are empty * (scn_bytes_pending == 0). When this condition is not true, the sync'd state * is inconsistent with how much actual scanning progress has been made. The * kind of sync to be performed is specified by the sync_type argument. If the * sync is optional, we only sync if the queues are empty. If the sync is * mandatory, we do a hard ASSERT to make sure that the queues are empty. The * third possible state is a "cached" sync. This is done in response to: * 1) The dataset that was in the last sync'd dsl_scan_phys_t having been * destroyed, so we wouldn't be able to restart scanning from it. * 2) The snapshot that was in the last sync'd dsl_scan_phys_t having been * superseded by a newer snapshot. * 3) The dataset that was in the last sync'd dsl_scan_phys_t having been * swapped with its clone. * In all cases, a cached sync simply rewrites the last record we've written, * just slightly modified. For the modifications that are performed to the * last written dsl_scan_phys_t, see dsl_scan_ds_destroyed, * dsl_scan_ds_snapshotted and dsl_scan_ds_clone_swapped. */ static void dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx, state_sync_type_t sync_type) { int i; spa_t *spa = scn->scn_dp->dp_spa; ASSERT(sync_type != SYNC_MANDATORY || scn->scn_bytes_pending == 0); if (scn->scn_bytes_pending == 0) { for (i = 0; i < spa->spa_root_vdev->vdev_children; i++) { vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; dsl_scan_io_queue_t *q = vd->vdev_scan_io_queue; if (q == NULL) continue; mutex_enter(&vd->vdev_scan_io_queue_lock); ASSERT3P(avl_first(&q->q_sios_by_addr), ==, NULL); ASSERT3P(avl_first(&q->q_exts_by_size), ==, NULL); ASSERT3P(range_tree_first(q->q_exts_by_addr), ==, NULL); mutex_exit(&vd->vdev_scan_io_queue_lock); } if (scn->scn_phys.scn_queue_obj != 0) scan_ds_queue_sync(scn, tx); VERIFY0(zap_update(scn->scn_dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, &scn->scn_phys, tx)); bcopy(&scn->scn_phys, &scn->scn_phys_cached, sizeof (scn->scn_phys)); if (scn->scn_checkpointing) zfs_dbgmsg("finish scan checkpoint"); scn->scn_checkpointing = B_FALSE; scn->scn_last_checkpoint = ddi_get_lbolt(); } else if (sync_type == SYNC_CACHED) { VERIFY0(zap_update(scn->scn_dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, &scn->scn_phys_cached, tx)); } } /* ARGSUSED */ static int dsl_scan_setup_check(void *arg, dmu_tx_t *tx) { dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; if (dsl_scan_is_running(scn)) return (SET_ERROR(EBUSY)); return (0); } static void dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) { dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; pool_scan_func_t *funcp = arg; dmu_object_type_t ot = 0; dsl_pool_t *dp = scn->scn_dp; spa_t *spa = dp->dp_spa; ASSERT(!dsl_scan_is_running(scn)); ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS); bzero(&scn->scn_phys, sizeof (scn->scn_phys)); scn->scn_phys.scn_func = *funcp; scn->scn_phys.scn_state = DSS_SCANNING; scn->scn_phys.scn_min_txg = 0; scn->scn_phys.scn_max_txg = tx->tx_txg; scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */ scn->scn_phys.scn_start_time = gethrestime_sec(); scn->scn_phys.scn_errors = 0; scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc; scn->scn_issued_before_pass = 0; scn->scn_restart_txg = 0; scn->scn_done_txg = 0; scn->scn_last_checkpoint = 0; scn->scn_checkpointing = B_FALSE; spa_scan_stat_init(spa); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max; /* rewrite all disk labels */ vdev_config_dirty(spa->spa_root_vdev); if (vdev_resilver_needed(spa->spa_root_vdev, &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) { spa_event_notify(spa, NULL, NULL, ESC_ZFS_RESILVER_START); } else { spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_START); } spa->spa_scrub_started = B_TRUE; /* * If this is an incremental scrub, limit the DDT scrub phase * to just the auto-ditto class (for correctness); the rest * of the scrub should go faster using top-down pruning. */ if (scn->scn_phys.scn_min_txg > TXG_INITIAL) scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO; } /* back to the generic stuff */ if (dp->dp_blkstats == NULL) { dp->dp_blkstats = kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP); mutex_init(&dp->dp_blkstats->zab_lock, NULL, MUTEX_DEFAULT, NULL); } bzero(&dp->dp_blkstats->zab_type, sizeof (dp->dp_blkstats->zab_type)); if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) ot = DMU_OT_ZAP_OTHER; scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset, ot ? ot : DMU_OT_SCAN_QUEUE, DMU_OT_NONE, 0, tx); bcopy(&scn->scn_phys, &scn->scn_phys_cached, sizeof (scn->scn_phys)); dsl_scan_sync_state(scn, tx, SYNC_MANDATORY); spa_history_log_internal(spa, "scan setup", tx, "func=%u mintxg=%llu maxtxg=%llu", *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg); } /* * Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver. * Can also be called to resume a paused scrub. */ int dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) { spa_t *spa = dp->dp_spa; dsl_scan_t *scn = dp->dp_scan; /* * Purge all vdev caches and probe all devices. We do this here * rather than in sync context because this requires a writer lock * on the spa_config lock, which we can't do from sync context. The * spa_scrub_reopen flag indicates that vdev_open() should not * attempt to start another scrub. */ spa_vdev_state_enter(spa, SCL_NONE); spa->spa_scrub_reopen = B_TRUE; vdev_reopen(spa->spa_root_vdev); spa->spa_scrub_reopen = B_FALSE; (void) spa_vdev_state_exit(spa, NULL, 0); if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) { /* got scrub start cmd, resume paused scrub */ int err = dsl_scrub_set_pause_resume(scn->scn_dp, POOL_SCRUB_NORMAL); if (err == 0) { spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_RESUME); return (ECANCELED); } return (SET_ERROR(err)); } return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check, dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); } /* ARGSUSED */ static void dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) { static const char *old_names[] = { "scrub_bookmark", "scrub_ddt_bookmark", "scrub_ddt_class_max", "scrub_queue", "scrub_min_txg", "scrub_max_txg", "scrub_func", "scrub_errors", NULL }; dsl_pool_t *dp = scn->scn_dp; spa_t *spa = dp->dp_spa; int i; /* Remove any remnants of an old-style scrub. */ for (i = 0; old_names[i]; i++) { (void) zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, old_names[i], tx); } if (scn->scn_phys.scn_queue_obj != 0) { VERIFY0(dmu_object_free(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, tx)); scn->scn_phys.scn_queue_obj = 0; } scan_ds_queue_clear(scn); scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED; /* * If we were "restarted" from a stopped state, don't bother * with anything else. */ if (!dsl_scan_is_running(scn)) { ASSERT(!scn->scn_is_sorted); return; } if (scn->scn_is_sorted) { scan_io_queues_destroy(scn); scn->scn_is_sorted = B_FALSE; if (scn->scn_taskq != NULL) { taskq_destroy(scn->scn_taskq); scn->scn_taskq = NULL; } } scn->scn_phys.scn_state = complete ? DSS_FINISHED : DSS_CANCELED; if (dsl_scan_restarting(scn, tx)) spa_history_log_internal(spa, "scan aborted, restarting", tx, "errors=%llu", spa_get_errlog_size(spa)); else if (!complete) spa_history_log_internal(spa, "scan cancelled", tx, "errors=%llu", spa_get_errlog_size(spa)); else spa_history_log_internal(spa, "scan done", tx, "errors=%llu", spa_get_errlog_size(spa)); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { spa->spa_scrub_started = B_FALSE; spa->spa_scrub_active = B_FALSE; /* * If the scrub/resilver completed, update all DTLs to * reflect this. Whether it succeeded or not, vacate * all temporary scrub DTLs. * * As the scrub does not currently support traversing * data that have been freed but are part of a checkpoint, * we don't mark the scrub as done in the DTLs as faults * may still exist in those vdevs. */ if (complete && !spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg, scn->scn_phys.scn_max_txg, B_TRUE); spa_event_notify(spa, NULL, NULL, scn->scn_phys.scn_min_txg ? ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH); } else { vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg, 0, B_TRUE); } spa_errlog_rotate(spa); /* * We may have finished replacing a device. * Let the async thread assess this and handle the detach. */ spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); } scn->scn_phys.scn_end_time = gethrestime_sec(); ASSERT(!dsl_scan_is_running(scn)); } /* ARGSUSED */ static int dsl_scan_cancel_check(void *arg, dmu_tx_t *tx) { dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; if (!dsl_scan_is_running(scn)) return (SET_ERROR(ENOENT)); return (0); } /* ARGSUSED */ static void dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx) { dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; dsl_scan_done(scn, B_FALSE, tx); dsl_scan_sync_state(scn, tx, SYNC_MANDATORY); spa_event_notify(scn->scn_dp->dp_spa, NULL, NULL, ESC_ZFS_SCRUB_ABORT); } int dsl_scan_cancel(dsl_pool_t *dp) { return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check, dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED)); } static int dsl_scrub_pause_resume_check(void *arg, dmu_tx_t *tx) { pool_scrub_cmd_t *cmd = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_scan_t *scn = dp->dp_scan; if (*cmd == POOL_SCRUB_PAUSE) { /* can't pause a scrub when there is no in-progress scrub */ if (!dsl_scan_scrubbing(dp)) return (SET_ERROR(ENOENT)); /* can't pause a paused scrub */ if (dsl_scan_is_paused_scrub(scn)) return (SET_ERROR(EBUSY)); } else if (*cmd != POOL_SCRUB_NORMAL) { return (SET_ERROR(ENOTSUP)); } return (0); } static void dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx) { pool_scrub_cmd_t *cmd = arg; dsl_pool_t *dp = dmu_tx_pool(tx); spa_t *spa = dp->dp_spa; dsl_scan_t *scn = dp->dp_scan; if (*cmd == POOL_SCRUB_PAUSE) { /* can't pause a scrub when there is no in-progress scrub */ spa->spa_scan_pass_scrub_pause = gethrestime_sec(); scn->scn_phys.scn_flags |= DSF_SCRUB_PAUSED; scn->scn_phys_cached.scn_flags |= DSF_SCRUB_PAUSED; dsl_scan_sync_state(scn, tx, SYNC_CACHED); spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_PAUSED); } else { ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL); if (dsl_scan_is_paused_scrub(scn)) { /* * We need to keep track of how much time we spend * paused per pass so that we can adjust the scrub rate * shown in the output of 'zpool status' */ spa->spa_scan_pass_scrub_spent_paused += gethrestime_sec() - spa->spa_scan_pass_scrub_pause; spa->spa_scan_pass_scrub_pause = 0; scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED; scn->scn_phys_cached.scn_flags &= ~DSF_SCRUB_PAUSED; dsl_scan_sync_state(scn, tx, SYNC_CACHED); } } } /* * Set scrub pause/resume state if it makes sense to do so */ int dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd) { return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scrub_pause_resume_check, dsl_scrub_pause_resume_sync, &cmd, 3, ZFS_SPACE_CHECK_RESERVED)); } /* start a new scan, or restart an existing one. */ void dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg) { if (txg == 0) { dmu_tx_t *tx; tx = dmu_tx_create_dd(dp->dp_mos_dir); VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT)); txg = dmu_tx_get_txg(tx); dp->dp_scan->scn_restart_txg = txg; dmu_tx_commit(tx); } else { dp->dp_scan->scn_restart_txg = txg; } zfs_dbgmsg("restarting resilver txg=%llu", txg); } void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bp) { zio_free(dp->dp_spa, txg, bp); } void dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp) { ASSERT(dsl_pool_sync_context(dp)); zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, BP_GET_PSIZE(bpp), pio->io_flags)); } static int scan_ds_queue_compare(const void *a, const void *b) { const scan_ds_t *sds_a = a, *sds_b = b; if (sds_a->sds_dsobj < sds_b->sds_dsobj) return (-1); if (sds_a->sds_dsobj == sds_b->sds_dsobj) return (0); return (1); } static void scan_ds_queue_clear(dsl_scan_t *scn) { void *cookie = NULL; scan_ds_t *sds; while ((sds = avl_destroy_nodes(&scn->scn_queue, &cookie)) != NULL) { kmem_free(sds, sizeof (*sds)); } } static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj, uint64_t *txg) { scan_ds_t srch, *sds; srch.sds_dsobj = dsobj; sds = avl_find(&scn->scn_queue, &srch, NULL); if (sds != NULL && txg != NULL) *txg = sds->sds_txg; return (sds != NULL); } static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg) { scan_ds_t *sds; avl_index_t where; sds = kmem_zalloc(sizeof (*sds), KM_SLEEP); sds->sds_dsobj = dsobj; sds->sds_txg = txg; VERIFY3P(avl_find(&scn->scn_queue, sds, &where), ==, NULL); avl_insert(&scn->scn_queue, sds, where); } static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj) { scan_ds_t srch, *sds; srch.sds_dsobj = dsobj; sds = avl_find(&scn->scn_queue, &srch, NULL); VERIFY(sds != NULL); avl_remove(&scn->scn_queue, sds); kmem_free(sds, sizeof (*sds)); } static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx) { dsl_pool_t *dp = scn->scn_dp; spa_t *spa = dp->dp_spa; dmu_object_type_t ot = (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) ? DMU_OT_SCAN_QUEUE : DMU_OT_ZAP_OTHER; ASSERT0(scn->scn_bytes_pending); ASSERT(scn->scn_phys.scn_queue_obj != 0); VERIFY0(dmu_object_free(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, tx)); scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset, ot, DMU_OT_NONE, 0, tx); for (scan_ds_t *sds = avl_first(&scn->scn_queue); sds != NULL; sds = AVL_NEXT(&scn->scn_queue, sds)) { VERIFY0(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, sds->sds_dsobj, sds->sds_txg, tx)); } } /* * Computes the memory limit state that we're currently in. A sorted scan * needs quite a bit of memory to hold the sorting queue, so we need to * reasonably constrain the size so it doesn't impact overall system * performance. We compute two limits: * 1) Hard memory limit: if the amount of memory used by the sorting * queues on a pool gets above this value, we stop the metadata * scanning portion and start issuing the queued up and sorted * I/Os to reduce memory usage. * This limit is calculated as a fraction of physmem (by default 5%). * We constrain the lower bound of the hard limit to an absolute * minimum of zfs_scan_mem_lim_min (default: 16 MiB). We also constrain * the upper bound to 5% of the total pool size - no chance we'll * ever need that much memory, but just to keep the value in check. * 2) Soft memory limit: once we hit the hard memory limit, we start * issuing I/O to reduce queue memory usage, but we don't want to * completely empty out the queues, since we might be able to find I/Os * that will fill in the gaps of our non-sequential IOs at some point * in the future. So we stop the issuing of I/Os once the amount of * memory used drops below the soft limit (at which point we stop issuing * I/O and start scanning metadata again). * * This limit is calculated by subtracting a fraction of the hard * limit from the hard limit. By default this fraction is 5%, so * the soft limit is 95% of the hard limit. We cap the size of the * difference between the hard and soft limits at an absolute * maximum of zfs_scan_mem_lim_soft_max (default: 128 MiB) - this is * sufficient to not cause too frequent switching between the * metadata scan and I/O issue (even at 2k recordsize, 128 MiB's * worth of queues is about 1.2 GiB of on-pool data, so scanning * that should take at least a decent fraction of a second). */ static boolean_t dsl_scan_should_clear(dsl_scan_t *scn) { vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev; uint64_t mlim_hard, mlim_soft, mused; uint64_t alloc = metaslab_class_get_alloc(spa_normal_class( scn->scn_dp->dp_spa)); mlim_hard = MAX((physmem / zfs_scan_mem_lim_fact) * PAGESIZE, zfs_scan_mem_lim_min); mlim_hard = MIN(mlim_hard, alloc / 20); mlim_soft = mlim_hard - MIN(mlim_hard / zfs_scan_mem_lim_soft_fact, zfs_scan_mem_lim_soft_max); mused = 0; for (uint64_t i = 0; i < rvd->vdev_children; i++) { vdev_t *tvd = rvd->vdev_child[i]; dsl_scan_io_queue_t *queue; mutex_enter(&tvd->vdev_scan_io_queue_lock); queue = tvd->vdev_scan_io_queue; if (queue != NULL) { /* #extents in exts_by_size = # in exts_by_addr */ mused += avl_numnodes(&queue->q_exts_by_size) * sizeof (range_seg_t) + avl_numnodes(&queue->q_sios_by_addr) * sizeof (scan_io_t); } mutex_exit(&tvd->vdev_scan_io_queue_lock); } dprintf("current scan memory usage: %llu bytes\n", (longlong_t)mused); if (mused == 0) ASSERT0(scn->scn_bytes_pending); /* * If we are above our hard limit, we need to clear out memory. * If we are below our soft limit, we need to accumulate sequential IOs. * Otherwise, we should keep doing whatever we are currently doing. */ if (mused >= mlim_hard) return (B_TRUE); else if (mused < mlim_soft) return (B_FALSE); else return (scn->scn_clearing); } static boolean_t dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb) { /* we never skip user/group accounting objects */ if (zb && (int64_t)zb->zb_object < 0) return (B_FALSE); if (scn->scn_suspending) return (B_TRUE); /* we're already suspending */ if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark)) return (B_FALSE); /* we're resuming */ /* We only know how to resume from level-0 blocks. */ if (zb && zb->zb_level != 0) return (B_FALSE); /* * We suspend if: * - we have scanned for at least the minimum time (default 1 sec * for scrub, 3 sec for resilver), and either we have sufficient * dirty data that we are starting to write more quickly * (default 30%), or someone is explicitly waiting for this txg * to complete. * or * - the spa is shutting down because this pool is being exported * or the machine is rebooting. * or * - the scan queue has reached its memory use limit */ uint64_t elapsed_nanosecs = gethrtime(); uint64_t curr_time_ns = gethrtime(); uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time; uint64_t sync_time_ns = curr_time_ns - scn->scn_dp->dp_spa->spa_sync_starttime; int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max; int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ? zfs_resilver_min_time_ms : zfs_scrub_min_time_ms; if ((NSEC2MSEC(scan_time_ns) > mintime && (dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent || txg_sync_waiting(scn->scn_dp) || NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) || spa_shutting_down(scn->scn_dp->dp_spa) || (zfs_scan_strict_mem_lim && dsl_scan_should_clear(scn))) { if (zb) { dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n", (longlong_t)zb->zb_objset, (longlong_t)zb->zb_object, (longlong_t)zb->zb_level, (longlong_t)zb->zb_blkid); scn->scn_phys.scn_bookmark = *zb; } else { dsl_scan_phys_t *scnp = &scn->scn_phys; dprintf("suspending at at DDT bookmark " "%llx/%llx/%llx/%llx\n", (longlong_t)scnp->scn_ddt_bookmark.ddb_class, (longlong_t)scnp->scn_ddt_bookmark.ddb_type, (longlong_t)scnp->scn_ddt_bookmark.ddb_checksum, (longlong_t)scnp->scn_ddt_bookmark.ddb_cursor); } scn->scn_suspending = B_TRUE; return (B_TRUE); } return (B_FALSE); } typedef struct zil_scan_arg { dsl_pool_t *zsa_dp; zil_header_t *zsa_zh; } zil_scan_arg_t; /* ARGSUSED */ static int dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) { zil_scan_arg_t *zsa = arg; dsl_pool_t *dp = zsa->zsa_dp; dsl_scan_t *scn = dp->dp_scan; zil_header_t *zh = zsa->zsa_zh; zbookmark_phys_t zb; if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) return (0); /* * One block ("stubby") can be allocated a long time ago; we * want to visit that one because it has been allocated * (on-disk) even if it hasn't been claimed (even though for * scrub there's nothing to do to it). */ if (claim_txg == 0 && bp->blk_birth >= spa_min_claim_txg(dp->dp_spa)) return (0); SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb)); return (0); } /* ARGSUSED */ static int dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg) { if (lrc->lrc_txtype == TX_WRITE) { zil_scan_arg_t *zsa = arg; dsl_pool_t *dp = zsa->zsa_dp; dsl_scan_t *scn = dp->dp_scan; zil_header_t *zh = zsa->zsa_zh; lr_write_t *lr = (lr_write_t *)lrc; blkptr_t *bp = &lr->lr_blkptr; zbookmark_phys_t zb; if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) return (0); /* * birth can be < claim_txg if this record's txg is * already txg sync'ed (but this log block contains * other records that are not synced) */ if (claim_txg == 0 || bp->blk_birth < claim_txg) return (0); SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET], lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb)); } return (0); } static void dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh) { uint64_t claim_txg = zh->zh_claim_txg; zil_scan_arg_t zsa = { dp, zh }; zilog_t *zilog; ASSERT(spa_writeable(dp->dp_spa)); /* * We only want to visit blocks that have been claimed * but not yet replayed. */ if (claim_txg == 0) return; zilog = zil_alloc(dp->dp_meta_objset, zh); (void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa, claim_txg); zil_free(zilog); } /* * We compare scan_prefetch_issue_ctx_t's based on their bookmarks. The idea * here is to sort the AVL tree by the order each block will be needed. */ static int scan_prefetch_queue_compare(const void *a, const void *b) { const scan_prefetch_issue_ctx_t *spic_a = a, *spic_b = b; const scan_prefetch_ctx_t *spc_a = spic_a->spic_spc; const scan_prefetch_ctx_t *spc_b = spic_b->spic_spc; return (zbookmark_compare(spc_a->spc_datablkszsec, spc_a->spc_indblkshift, spc_b->spc_datablkszsec, spc_b->spc_indblkshift, &spic_a->spic_zb, &spic_b->spic_zb)); } static void scan_prefetch_ctx_rele(scan_prefetch_ctx_t *spc, void *tag) { if (zfs_refcount_remove(&spc->spc_refcnt, tag) == 0) { zfs_refcount_destroy(&spc->spc_refcnt); kmem_free(spc, sizeof (scan_prefetch_ctx_t)); } } static scan_prefetch_ctx_t * scan_prefetch_ctx_create(dsl_scan_t *scn, dnode_phys_t *dnp, void *tag) { scan_prefetch_ctx_t *spc; spc = kmem_alloc(sizeof (scan_prefetch_ctx_t), KM_SLEEP); zfs_refcount_create(&spc->spc_refcnt); zfs_refcount_add(&spc->spc_refcnt, tag); spc->spc_scn = scn; if (dnp != NULL) { spc->spc_datablkszsec = dnp->dn_datablkszsec; spc->spc_indblkshift = dnp->dn_indblkshift; spc->spc_root = B_FALSE; } else { spc->spc_datablkszsec = 0; spc->spc_indblkshift = 0; spc->spc_root = B_TRUE; } return (spc); } static void scan_prefetch_ctx_add_ref(scan_prefetch_ctx_t *spc, void *tag) { zfs_refcount_add(&spc->spc_refcnt, tag); } static boolean_t dsl_scan_check_prefetch_resume(scan_prefetch_ctx_t *spc, const zbookmark_phys_t *zb) { zbookmark_phys_t *last_zb = &spc->spc_scn->scn_prefetch_bookmark; dnode_phys_t tmp_dnp; dnode_phys_t *dnp = (spc->spc_root) ? NULL : &tmp_dnp; if (zb->zb_objset != last_zb->zb_objset) return (B_TRUE); if ((int64_t)zb->zb_object < 0) return (B_FALSE); tmp_dnp.dn_datablkszsec = spc->spc_datablkszsec; tmp_dnp.dn_indblkshift = spc->spc_indblkshift; if (zbookmark_subtree_completed(dnp, zb, last_zb)) return (B_TRUE); return (B_FALSE); } static void dsl_scan_prefetch(scan_prefetch_ctx_t *spc, blkptr_t *bp, zbookmark_phys_t *zb) { avl_index_t idx; dsl_scan_t *scn = spc->spc_scn; spa_t *spa = scn->scn_dp->dp_spa; scan_prefetch_issue_ctx_t *spic; if (zfs_no_scrub_prefetch) return; if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg || (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) return; if (dsl_scan_check_prefetch_resume(spc, zb)) return; scan_prefetch_ctx_add_ref(spc, scn); spic = kmem_alloc(sizeof (scan_prefetch_issue_ctx_t), KM_SLEEP); spic->spic_spc = spc; spic->spic_bp = *bp; spic->spic_zb = *zb; /* * Add the IO to the queue of blocks to prefetch. This allows us to * prioritize blocks that we will need first for the main traversal * thread. */ mutex_enter(&spa->spa_scrub_lock); if (avl_find(&scn->scn_prefetch_queue, spic, &idx) != NULL) { /* this block is already queued for prefetch */ kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t)); scan_prefetch_ctx_rele(spc, scn); mutex_exit(&spa->spa_scrub_lock); return; } avl_insert(&scn->scn_prefetch_queue, spic, idx); cv_broadcast(&spa->spa_scrub_io_cv); mutex_exit(&spa->spa_scrub_lock); } static void dsl_scan_prefetch_dnode(dsl_scan_t *scn, dnode_phys_t *dnp, uint64_t objset, uint64_t object) { int i; zbookmark_phys_t zb; scan_prefetch_ctx_t *spc; if (dnp->dn_nblkptr == 0 && !(dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) return; SET_BOOKMARK(&zb, objset, object, 0, 0); spc = scan_prefetch_ctx_create(scn, dnp, FTAG); for (i = 0; i < dnp->dn_nblkptr; i++) { zb.zb_level = BP_GET_LEVEL(&dnp->dn_blkptr[i]); zb.zb_blkid = i; dsl_scan_prefetch(spc, &dnp->dn_blkptr[i], &zb); } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { zb.zb_level = 0; zb.zb_blkid = DMU_SPILL_BLKID; dsl_scan_prefetch(spc, &dnp->dn_spill, &zb); } scan_prefetch_ctx_rele(spc, FTAG); } void dsl_scan_prefetch_cb(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, arc_buf_t *buf, void *private) { scan_prefetch_ctx_t *spc = private; dsl_scan_t *scn = spc->spc_scn; spa_t *spa = scn->scn_dp->dp_spa; /* broadcast that the IO has completed for rate limitting purposes */ mutex_enter(&spa->spa_scrub_lock); ASSERT3U(spa->spa_scrub_inflight, >=, BP_GET_PSIZE(bp)); spa->spa_scrub_inflight -= BP_GET_PSIZE(bp); cv_broadcast(&spa->spa_scrub_io_cv); mutex_exit(&spa->spa_scrub_lock); /* if there was an error or we are done prefetching, just cleanup */ if (buf == NULL || scn->scn_suspending) goto out; if (BP_GET_LEVEL(bp) > 0) { int i; blkptr_t *cbp; int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; zbookmark_phys_t czb; for (i = 0, cbp = buf->b_data; i < epb; i++, cbp++) { SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, zb->zb_level - 1, zb->zb_blkid * epb + i); dsl_scan_prefetch(spc, cbp, &czb); } } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { dnode_phys_t *cdnp = buf->b_data; int i; int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; for (i = 0, cdnp = buf->b_data; i < epb; i += cdnp->dn_extra_slots + 1, cdnp += cdnp->dn_extra_slots + 1) { dsl_scan_prefetch_dnode(scn, cdnp, zb->zb_objset, zb->zb_blkid * epb + i); } } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { objset_phys_t *osp = buf->b_data; dsl_scan_prefetch_dnode(scn, &osp->os_meta_dnode, zb->zb_objset, DMU_META_DNODE_OBJECT); if (OBJSET_BUF_HAS_USERUSED(buf)) { dsl_scan_prefetch_dnode(scn, &osp->os_groupused_dnode, zb->zb_objset, DMU_GROUPUSED_OBJECT); dsl_scan_prefetch_dnode(scn, &osp->os_userused_dnode, zb->zb_objset, DMU_USERUSED_OBJECT); } } out: if (buf != NULL) arc_buf_destroy(buf, private); scan_prefetch_ctx_rele(spc, scn); } /* ARGSUSED */ static void dsl_scan_prefetch_thread(void *arg) { dsl_scan_t *scn = arg; spa_t *spa = scn->scn_dp->dp_spa; vdev_t *rvd = spa->spa_root_vdev; - uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight; scan_prefetch_issue_ctx_t *spic; /* loop until we are told to stop */ while (!scn->scn_prefetch_stop) { arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PRESCIENT_PREFETCH | ARC_FLAG_PREFETCH; int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD; mutex_enter(&spa->spa_scrub_lock); /* * Wait until we have an IO to issue and are not above our * maximum in flight limit. */ while (!scn->scn_prefetch_stop && (avl_numnodes(&scn->scn_prefetch_queue) == 0 || spa->spa_scrub_inflight >= scn->scn_maxinflight_bytes)) { cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); } /* recheck if we should stop since we waited for the cv */ if (scn->scn_prefetch_stop) { mutex_exit(&spa->spa_scrub_lock); break; } /* remove the prefetch IO from the tree */ spic = avl_first(&scn->scn_prefetch_queue); spa->spa_scrub_inflight += BP_GET_PSIZE(&spic->spic_bp); avl_remove(&scn->scn_prefetch_queue, spic); mutex_exit(&spa->spa_scrub_lock); /* issue the prefetch asynchronously */ (void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, &spic->spic_bp, dsl_scan_prefetch_cb, spic->spic_spc, ZIO_PRIORITY_SCRUB, zio_flags, &flags, &spic->spic_zb); kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t)); } ASSERT(scn->scn_prefetch_stop); /* free any prefetches we didn't get to complete */ mutex_enter(&spa->spa_scrub_lock); while ((spic = avl_first(&scn->scn_prefetch_queue)) != NULL) { avl_remove(&scn->scn_prefetch_queue, spic); scan_prefetch_ctx_rele(spic->spic_spc, scn); kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t)); } ASSERT0(avl_numnodes(&scn->scn_prefetch_queue)); mutex_exit(&spa->spa_scrub_lock); } static boolean_t dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp, const zbookmark_phys_t *zb) { /* * We never skip over user/group accounting objects (obj<0) */ if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark) && (int64_t)zb->zb_object >= 0) { /* * If we already visited this bp & everything below (in * a prior txg sync), don't bother doing it again. */ if (zbookmark_subtree_completed(dnp, zb, &scn->scn_phys.scn_bookmark)) return (B_TRUE); /* * If we found the block we're trying to resume from, or * we went past it to a different object, zero it out to * indicate that it's OK to start checking for suspending * again. */ if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 || zb->zb_object > scn->scn_phys.scn_bookmark.zb_object) { dprintf("resuming at %llx/%llx/%llx/%llx\n", (longlong_t)zb->zb_objset, (longlong_t)zb->zb_object, (longlong_t)zb->zb_level, (longlong_t)zb->zb_blkid); bzero(&scn->scn_phys.scn_bookmark, sizeof (*zb)); } } return (B_FALSE); } static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype, dmu_tx_t *tx); static void dsl_scan_visitdnode( dsl_scan_t *, dsl_dataset_t *ds, dmu_objset_type_t ostype, dnode_phys_t *dnp, uint64_t object, dmu_tx_t *tx); /* * Return nonzero on i/o error. * Return new buf to write out in *bufp. */ static int dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_phys_t *zb, dmu_tx_t *tx) { dsl_pool_t *dp = scn->scn_dp; int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD; int err; if (BP_GET_LEVEL(bp) > 0) { arc_flags_t flags = ARC_FLAG_WAIT; int i; blkptr_t *cbp; int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; arc_buf_t *buf; err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb); if (err) { scn->scn_phys.scn_errors++; return (err); } for (i = 0, cbp = buf->b_data; i < epb; i++, cbp++) { zbookmark_phys_t czb; SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, zb->zb_level - 1, zb->zb_blkid * epb + i); dsl_scan_visitbp(cbp, &czb, dnp, ds, scn, ostype, tx); } arc_buf_destroy(buf, &buf); } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { arc_flags_t flags = ARC_FLAG_WAIT; dnode_phys_t *cdnp; int i; int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; arc_buf_t *buf; err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb); if (err) { scn->scn_phys.scn_errors++; return (err); } for (i = 0, cdnp = buf->b_data; i < epb; i += cdnp->dn_extra_slots + 1, cdnp += cdnp->dn_extra_slots + 1) { dsl_scan_visitdnode(scn, ds, ostype, cdnp, zb->zb_blkid * epb + i, tx); } arc_buf_destroy(buf, &buf); } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { arc_flags_t flags = ARC_FLAG_WAIT; objset_phys_t *osp; arc_buf_t *buf; err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb); if (err) { scn->scn_phys.scn_errors++; return (err); } osp = buf->b_data; dsl_scan_visitdnode(scn, ds, osp->os_type, &osp->os_meta_dnode, DMU_META_DNODE_OBJECT, tx); if (OBJSET_BUF_HAS_USERUSED(buf)) { /* * We also always visit user/group accounting * objects, and never skip them, even if we are * suspending. This is necessary so that the space * deltas from this txg get integrated. */ dsl_scan_visitdnode(scn, ds, osp->os_type, &osp->os_groupused_dnode, DMU_GROUPUSED_OBJECT, tx); dsl_scan_visitdnode(scn, ds, osp->os_type, &osp->os_userused_dnode, DMU_USERUSED_OBJECT, tx); } arc_buf_destroy(buf, &buf); } return (0); } static void dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, dnode_phys_t *dnp, uint64_t object, dmu_tx_t *tx) { int j; for (j = 0; j < dnp->dn_nblkptr; j++) { zbookmark_phys_t czb; SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object, dnp->dn_nlevels - 1, j); dsl_scan_visitbp(&dnp->dn_blkptr[j], &czb, dnp, ds, scn, ostype, tx); } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { zbookmark_phys_t czb; SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object, 0, DMU_SPILL_BLKID); dsl_scan_visitbp(DN_SPILL_BLKPTR(dnp), &czb, dnp, ds, scn, ostype, tx); } } /* * The arguments are in this order because mdb can only print the * first 5; we want them to be useful. */ static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype, dmu_tx_t *tx) { dsl_pool_t *dp = scn->scn_dp; blkptr_t *bp_toread = NULL; if (dsl_scan_check_suspend(scn, zb)) return; if (dsl_scan_check_resume(scn, dnp, zb)) return; scn->scn_visited_this_txg++; dprintf_bp(bp, "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx bp=%p", ds, ds ? ds->ds_object : 0, zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid, bp); if (BP_IS_HOLE(bp)) { scn->scn_holes_this_txg++; return; } if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) { scn->scn_lt_min_this_txg++; return; } bp_toread = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); *bp_toread = *bp; if (dsl_scan_recurse(scn, ds, ostype, dnp, bp_toread, zb, tx) != 0) return; /* * If dsl_scan_ddt() has already visited this block, it will have * already done any translations or scrubbing, so don't call the * callback again. */ if (ddt_class_contains(dp->dp_spa, scn->scn_phys.scn_ddt_class_max, bp)) { scn->scn_ddt_contained_this_txg++; goto out; } /* * If this block is from the future (after cur_max_txg), then we * are doing this on behalf of a deleted snapshot, and we will * revisit the future block on the next pass of this dataset. * Don't scan it now unless we need to because something * under it was modified. */ if (BP_PHYSICAL_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) { scn->scn_gt_max_this_txg++; goto out; } scan_funcs[scn->scn_phys.scn_func](dp, bp, zb); out: kmem_free(bp_toread, sizeof (blkptr_t)); } static void dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) { zbookmark_phys_t zb; scan_prefetch_ctx_t *spc; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (ZB_IS_ZERO(&scn->scn_phys.scn_bookmark)) { SET_BOOKMARK(&scn->scn_prefetch_bookmark, zb.zb_objset, 0, 0, 0); } else { scn->scn_prefetch_bookmark = scn->scn_phys.scn_bookmark; } scn->scn_objsets_visited_this_txg++; spc = scan_prefetch_ctx_create(scn, NULL, FTAG); dsl_scan_prefetch(spc, bp, &zb); scan_prefetch_ctx_rele(spc, FTAG); dsl_scan_visitbp(bp, &zb, NULL, ds, scn, DMU_OST_NONE, tx); dprintf_ds(ds, "finished scan%s", ""); } static void ds_destroyed_scn_phys(dsl_dataset_t *ds, dsl_scan_phys_t *scn_phys) { if (scn_phys->scn_bookmark.zb_objset == ds->ds_object) { if (ds->ds_is_snapshot) { /* * Note: * - scn_cur_{min,max}_txg stays the same. * - Setting the flag is not really necessary if * scn_cur_max_txg == scn_max_txg, because there * is nothing after this snapshot that we care * about. However, we set it anyway and then * ignore it when we retraverse it in * dsl_scan_visitds(). */ scn_phys->scn_bookmark.zb_objset = dsl_dataset_phys(ds)->ds_next_snap_obj; zfs_dbgmsg("destroying ds %llu; currently traversing; " "reset zb_objset to %llu", (u_longlong_t)ds->ds_object, (u_longlong_t)dsl_dataset_phys(ds)-> ds_next_snap_obj); scn_phys->scn_flags |= DSF_VISIT_DS_AGAIN; } else { SET_BOOKMARK(&scn_phys->scn_bookmark, ZB_DESTROYED_OBJSET, 0, 0, 0); zfs_dbgmsg("destroying ds %llu; currently traversing; " "reset bookmark to -1,0,0,0", (u_longlong_t)ds->ds_object); } } } /* * Invoked when a dataset is destroyed. We need to make sure that: * * 1) If it is the dataset that was currently being scanned, we write * a new dsl_scan_phys_t and marking the objset reference in it * as destroyed. * 2) Remove it from the work queue, if it was present. * * If the dataset was actually a snapshot, instead of marking the dataset * as destroyed, we instead substitute the next snapshot in line. */ void dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx) { dsl_pool_t *dp = ds->ds_dir->dd_pool; dsl_scan_t *scn = dp->dp_scan; uint64_t mintxg; if (!dsl_scan_is_running(scn)) return; ds_destroyed_scn_phys(ds, &scn->scn_phys); ds_destroyed_scn_phys(ds, &scn->scn_phys_cached); if (scan_ds_queue_contains(scn, ds->ds_object, &mintxg)) { scan_ds_queue_remove(scn, ds->ds_object); if (ds->ds_is_snapshot) scan_ds_queue_insert(scn, dsl_dataset_phys(ds)->ds_next_snap_obj, mintxg); } if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) { ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds->ds_object, tx)); if (ds->ds_is_snapshot) { /* * We keep the same mintxg; it could be > * ds_creation_txg if the previous snapshot was * deleted too. */ VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, dsl_dataset_phys(ds)->ds_next_snap_obj, mintxg, tx) == 0); zfs_dbgmsg("destroying ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds->ds_object, (u_longlong_t)dsl_dataset_phys(ds)-> ds_next_snap_obj); } else { zfs_dbgmsg("destroying ds %llu; in queue; removing", (u_longlong_t)ds->ds_object); } } /* * dsl_scan_sync() should be called after this, and should sync * out our changed state, but just to be safe, do it here. */ dsl_scan_sync_state(scn, tx, SYNC_CACHED); } static void ds_snapshotted_bookmark(dsl_dataset_t *ds, zbookmark_phys_t *scn_bookmark) { if (scn_bookmark->zb_objset == ds->ds_object) { scn_bookmark->zb_objset = dsl_dataset_phys(ds)->ds_prev_snap_obj; zfs_dbgmsg("snapshotting ds %llu; currently traversing; " "reset zb_objset to %llu", (u_longlong_t)ds->ds_object, (u_longlong_t)dsl_dataset_phys(ds)->ds_prev_snap_obj); } } /* * Called when a dataset is snapshotted. If we were currently traversing * this snapshot, we reset our bookmark to point at the newly created * snapshot. We also modify our work queue to remove the old snapshot and * replace with the new one. */ void dsl_scan_ds_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx) { dsl_pool_t *dp = ds->ds_dir->dd_pool; dsl_scan_t *scn = dp->dp_scan; uint64_t mintxg; if (!dsl_scan_is_running(scn)) return; ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0); ds_snapshotted_bookmark(ds, &scn->scn_phys.scn_bookmark); ds_snapshotted_bookmark(ds, &scn->scn_phys_cached.scn_bookmark); if (scan_ds_queue_contains(scn, ds->ds_object, &mintxg)) { scan_ds_queue_remove(scn, ds->ds_object); scan_ds_queue_insert(scn, dsl_dataset_phys(ds)->ds_prev_snap_obj, mintxg); } if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) { VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds->ds_object, tx)); VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, dsl_dataset_phys(ds)->ds_prev_snap_obj, mintxg, tx) == 0); zfs_dbgmsg("snapshotting ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds->ds_object, (u_longlong_t)dsl_dataset_phys(ds)->ds_prev_snap_obj); } dsl_scan_sync_state(scn, tx, SYNC_CACHED); } static void ds_clone_swapped_bookmark(dsl_dataset_t *ds1, dsl_dataset_t *ds2, zbookmark_phys_t *scn_bookmark) { if (scn_bookmark->zb_objset == ds1->ds_object) { scn_bookmark->zb_objset = ds2->ds_object; zfs_dbgmsg("clone_swap ds %llu; currently traversing; " "reset zb_objset to %llu", (u_longlong_t)ds1->ds_object, (u_longlong_t)ds2->ds_object); } else if (scn_bookmark->zb_objset == ds2->ds_object) { scn_bookmark->zb_objset = ds1->ds_object; zfs_dbgmsg("clone_swap ds %llu; currently traversing; " "reset zb_objset to %llu", (u_longlong_t)ds2->ds_object, (u_longlong_t)ds1->ds_object); } } /* * Called when an origin dataset and its clone are swapped. If we were * currently traversing the dataset, we need to switch to traversing the * newly promoted clone. */ void dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) { dsl_pool_t *dp = ds1->ds_dir->dd_pool; dsl_scan_t *scn = dp->dp_scan; uint64_t mintxg1, mintxg2; boolean_t ds1_queued, ds2_queued; if (!dsl_scan_is_running(scn)) return; ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys.scn_bookmark); ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys_cached.scn_bookmark); /* * Handle the in-memory scan queue. */ ds1_queued = scan_ds_queue_contains(scn, ds1->ds_object, &mintxg1); ds2_queued = scan_ds_queue_contains(scn, ds2->ds_object, &mintxg2); /* Sanity checking. */ if (ds1_queued) { ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); } if (ds2_queued) { ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); } if (ds1_queued && ds2_queued) { /* * If both are queued, we don't need to do anything. * The swapping code below would not handle this case correctly, * since we can't insert ds2 if it is already there. That's * because scan_ds_queue_insert() prohibits a duplicate insert * and panics. */ } else if (ds1_queued) { scan_ds_queue_remove(scn, ds1->ds_object); scan_ds_queue_insert(scn, ds2->ds_object, mintxg1); } else if (ds2_queued) { scan_ds_queue_remove(scn, ds2->ds_object); scan_ds_queue_insert(scn, ds1->ds_object, mintxg2); } /* * Handle the on-disk scan queue. * The on-disk state is an out-of-date version of the in-memory state, * so the in-memory and on-disk values for ds1_queued and ds2_queued may * be different. Therefore we need to apply the swap logic to the * on-disk state independently of the in-memory state. */ ds1_queued = zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds1->ds_object, &mintxg1) == 0; ds2_queued = zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg2) == 0; /* Sanity checking. */ if (ds1_queued) { ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); } if (ds2_queued) { ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); } if (ds1_queued && ds2_queued) { /* * If both are queued, we don't need to do anything. * Alternatively, we could check for EEXIST from * zap_add_int_key() and back out to the original state, but * that would be more work than checking for this case upfront. */ } else if (ds1_queued) { VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds1->ds_object, tx)); VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg1, tx)); zfs_dbgmsg("clone_swap ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds1->ds_object, (u_longlong_t)ds2->ds_object); } else if (ds2_queued) { VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds2->ds_object, tx)); VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg2, tx)); zfs_dbgmsg("clone_swap ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds2->ds_object, (u_longlong_t)ds1->ds_object); } dsl_scan_sync_state(scn, tx, SYNC_CACHED); } /* ARGSUSED */ static int enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { uint64_t originobj = *(uint64_t *)arg; dsl_dataset_t *ds; int err; dsl_scan_t *scn = dp->dp_scan; if (dsl_dir_phys(hds->ds_dir)->dd_origin_obj != originobj) return (0); err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); while (dsl_dataset_phys(ds)->ds_prev_snap_obj != originobj) { dsl_dataset_t *prev; err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); dsl_dataset_rele(ds, FTAG); if (err) return (err); ds = prev; } scan_ds_queue_insert(scn, ds->ds_object, dsl_dataset_phys(ds)->ds_prev_snap_txg); dsl_dataset_rele(ds, FTAG); return (0); } static void dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) { dsl_pool_t *dp = scn->scn_dp; dsl_dataset_t *ds; VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); if (scn->scn_phys.scn_cur_min_txg >= scn->scn_phys.scn_max_txg) { /* * This can happen if this snapshot was created after the * scan started, and we already completed a previous snapshot * that was created after the scan started. This snapshot * only references blocks with: * * birth < our ds_creation_txg * cur_min_txg is no less than ds_creation_txg. * We have already visited these blocks. * or * birth > scn_max_txg * The scan requested not to visit these blocks. * * Subsequent snapshots (and clones) can reference our * blocks, or blocks with even higher birth times. * Therefore we do not need to visit them either, * so we do not add them to the work queue. * * Note that checking for cur_min_txg >= cur_max_txg * is not sufficient, because in that case we may need to * visit subsequent snapshots. This happens when min_txg > 0, * which raises cur_min_txg. In this case we will visit * this dataset but skip all of its blocks, because the * rootbp's birth time is < cur_min_txg. Then we will * add the next snapshots/clones to the work queue. */ char *dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); dsl_dataset_name(ds, dsname); zfs_dbgmsg("scanning dataset %llu (%s) is unnecessary because " "cur_min_txg (%llu) >= max_txg (%llu)", (longlong_t)dsobj, dsname, (longlong_t)scn->scn_phys.scn_cur_min_txg, (longlong_t)scn->scn_phys.scn_max_txg); kmem_free(dsname, MAXNAMELEN); goto out; } /* * Only the ZIL in the head (non-snapshot) is valid. Even though * snapshots can have ZIL block pointers (which may be the same * BP as in the head), they must be ignored. In addition, $ORIGIN * doesn't have a objset (i.e. its ds_bp is a hole) so we don't * need to look for a ZIL in it either. So we traverse the ZIL here, * rather than in scan_recurse(), because the regular snapshot * block-sharing rules don't apply to it. */ if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !dsl_dataset_is_snapshot(ds) && (dp->dp_origin_snap == NULL || ds->ds_dir != dp->dp_origin_snap->ds_dir)) { objset_t *os; if (dmu_objset_from_ds(ds, &os) != 0) { goto out; } dsl_scan_zil(dp, &os->os_zil_header); } /* * Iterate over the bps in this ds. */ dmu_buf_will_dirty(ds->ds_dbuf, tx); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx); rrw_exit(&ds->ds_bp_rwlock, FTAG); char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(ds, dsname); zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; " "suspending=%u", (longlong_t)dsobj, dsname, (longlong_t)scn->scn_phys.scn_cur_min_txg, (longlong_t)scn->scn_phys.scn_cur_max_txg, (int)scn->scn_suspending); kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); if (scn->scn_suspending) goto out; /* * We've finished this pass over this dataset. */ /* * If we did not completely visit this dataset, do another pass. */ if (scn->scn_phys.scn_flags & DSF_VISIT_DS_AGAIN) { zfs_dbgmsg("incomplete pass; visiting again"); scn->scn_phys.scn_flags &= ~DSF_VISIT_DS_AGAIN; scan_ds_queue_insert(scn, ds->ds_object, scn->scn_phys.scn_cur_max_txg); goto out; } /* * Add descendent datasets to work queue. */ if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) { scan_ds_queue_insert(scn, dsl_dataset_phys(ds)->ds_next_snap_obj, dsl_dataset_phys(ds)->ds_creation_txg); } if (dsl_dataset_phys(ds)->ds_num_children > 1) { boolean_t usenext = B_FALSE; if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { uint64_t count; /* * A bug in a previous version of the code could * cause upgrade_clones_cb() to not set * ds_next_snap_obj when it should, leading to a * missing entry. Therefore we can only use the * next_clones_obj when its count is correct. */ int err = zap_count(dp->dp_meta_objset, dsl_dataset_phys(ds)->ds_next_clones_obj, &count); if (err == 0 && count == dsl_dataset_phys(ds)->ds_num_children - 1) usenext = B_TRUE; } if (usenext) { zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, dp->dp_meta_objset, dsl_dataset_phys(ds)->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; (void) zap_cursor_advance(&zc)) { scan_ds_queue_insert(scn, zfs_strtonum(za.za_name, NULL), dsl_dataset_phys(ds)->ds_creation_txg); } zap_cursor_fini(&zc); } else { VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, enqueue_clones_cb, &ds->ds_object, DS_FIND_CHILDREN)); } } out: dsl_dataset_rele(ds, FTAG); } /* ARGSUSED */ static int enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { dsl_dataset_t *ds; int err; dsl_scan_t *scn = dp->dp_scan; err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); while (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { dsl_dataset_t *prev; err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); if (err) { dsl_dataset_rele(ds, FTAG); return (err); } /* * If this is a clone, we don't need to worry about it for now. */ if (dsl_dataset_phys(prev)->ds_next_snap_obj != ds->ds_object) { dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(prev, FTAG); return (0); } dsl_dataset_rele(ds, FTAG); ds = prev; } scan_ds_queue_insert(scn, ds->ds_object, dsl_dataset_phys(ds)->ds_prev_snap_txg); dsl_dataset_rele(ds, FTAG); return (0); } /* ARGSUSED */ void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, ddt_entry_t *dde, dmu_tx_t *tx) { const ddt_key_t *ddk = &dde->dde_key; ddt_phys_t *ddp = dde->dde_phys; blkptr_t bp; zbookmark_phys_t zb = { 0 }; int p; if (!dsl_scan_is_running(scn)) return; /* * This function is special because it is the only thing * that can add scan_io_t's to the vdev scan queues from * outside dsl_scan_sync(). For the most part this is ok * as long as it is called from within syncing context. * However, dsl_scan_sync() expects that no new sio's will * be added between when all the work for a scan is done * and the next txg when the scan is actually marked as * completed. This check ensures we do not issue new sio's * during this period. */ if (scn->scn_done_txg != 0) return; for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { if (ddp->ddp_phys_birth == 0 || ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg) continue; ddt_bp_create(checksum, ddk, ddp, &bp); scn->scn_visited_this_txg++; scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb); } } /* * Scrub/dedup interaction. * * If there are N references to a deduped block, we don't want to scrub it * N times -- ideally, we should scrub it exactly once. * * We leverage the fact that the dde's replication class (enum ddt_class) * is ordered from highest replication class (DDT_CLASS_DITTO) to lowest * (DDT_CLASS_UNIQUE) so that we may walk the DDT in that order. * * To prevent excess scrubbing, the scrub begins by walking the DDT * to find all blocks with refcnt > 1, and scrubs each of these once. * Since there are two replication classes which contain blocks with * refcnt > 1, we scrub the highest replication class (DDT_CLASS_DITTO) first. * Finally the top-down scrub begins, only visiting blocks with refcnt == 1. * * There would be nothing more to say if a block's refcnt couldn't change * during a scrub, but of course it can so we must account for changes * in a block's replication class. * * Here's an example of what can occur: * * If a block has refcnt > 1 during the DDT scrub phase, but has refcnt == 1 * when visited during the top-down scrub phase, it will be scrubbed twice. * This negates our scrub optimization, but is otherwise harmless. * * If a block has refcnt == 1 during the DDT scrub phase, but has refcnt > 1 * on each visit during the top-down scrub phase, it will never be scrubbed. * To catch this, ddt_sync_entry() notifies the scrub code whenever a block's * reference class transitions to a higher level (i.e DDT_CLASS_UNIQUE to * DDT_CLASS_DUPLICATE); if it transitions from refcnt == 1 to refcnt > 1 * while a scrub is in progress, it scrubs the block right then. */ static void dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx) { ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark; ddt_entry_t dde = { 0 }; int error; uint64_t n = 0; while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) { ddt_t *ddt; if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max) break; dprintf("visiting ddb=%llu/%llu/%llu/%llx\n", (longlong_t)ddb->ddb_class, (longlong_t)ddb->ddb_type, (longlong_t)ddb->ddb_checksum, (longlong_t)ddb->ddb_cursor); /* There should be no pending changes to the dedup table */ ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum]; ASSERT(avl_first(&ddt->ddt_tree) == NULL); dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx); n++; if (dsl_scan_check_suspend(scn, NULL)) break; } zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; " "suspending=%u", (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max, (int)scn->scn_suspending); ASSERT(error == 0 || error == ENOENT); ASSERT(error != ENOENT || ddb->ddb_class > scn->scn_phys.scn_ddt_class_max); } static uint64_t dsl_scan_ds_maxtxg(dsl_dataset_t *ds) { uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg; if (ds->ds_is_snapshot) return (MIN(smt, dsl_dataset_phys(ds)->ds_creation_txg)); return (smt); } static void dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) { scan_ds_t *sds; dsl_pool_t *dp = scn->scn_dp; if (scn->scn_phys.scn_ddt_bookmark.ddb_class <= scn->scn_phys.scn_ddt_class_max) { scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg; scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg; dsl_scan_ddt(scn, tx); if (scn->scn_suspending) return; } if (scn->scn_phys.scn_bookmark.zb_objset == DMU_META_OBJSET) { /* First do the MOS & ORIGIN */ scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg; scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg; dsl_scan_visit_rootbp(scn, NULL, &dp->dp_meta_rootbp, tx); spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); if (scn->scn_suspending) return; if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) { VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, enqueue_cb, NULL, DS_FIND_CHILDREN)); } else { dsl_scan_visitds(scn, dp->dp_origin_snap->ds_object, tx); } ASSERT(!scn->scn_suspending); } else if (scn->scn_phys.scn_bookmark.zb_objset != ZB_DESTROYED_OBJSET) { uint64_t dsobj = scn->scn_phys.scn_bookmark.zb_objset; /* * If we were suspended, continue from here. Note if the * ds we were suspended on was deleted, the zb_objset may * be -1, so we will skip this and find a new objset * below. */ dsl_scan_visitds(scn, dsobj, tx); if (scn->scn_suspending) return; } /* * In case we suspended right at the end of the ds, zero the * bookmark so we don't think that we're still trying to resume. */ bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t)); /* * Keep pulling things out of the dataset avl queue. Updates to the * persistent zap-object-as-queue happen only at checkpoints. */ while ((sds = avl_first(&scn->scn_queue)) != NULL) { dsl_dataset_t *ds; uint64_t dsobj = sds->sds_dsobj; uint64_t txg = sds->sds_txg; /* dequeue and free the ds from the queue */ scan_ds_queue_remove(scn, dsobj); sds = NULL; /* must not be touched after removal */ /* Set up min / max txg */ VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); if (txg != 0) { scn->scn_phys.scn_cur_min_txg = MAX(scn->scn_phys.scn_min_txg, txg); } else { scn->scn_phys.scn_cur_min_txg = MAX(scn->scn_phys.scn_min_txg, dsl_dataset_phys(ds)->ds_prev_snap_txg); } scn->scn_phys.scn_cur_max_txg = dsl_scan_ds_maxtxg(ds); dsl_dataset_rele(ds, FTAG); dsl_scan_visitds(scn, dsobj, tx); if (scn->scn_suspending) return; } /* No more objsets to fetch, we're done */ scn->scn_phys.scn_bookmark.zb_objset = ZB_DESTROYED_OBJSET; ASSERT0(scn->scn_suspending); } static uint64_t dsl_scan_count_leaves(vdev_t *vd) { uint64_t i, leaves = 0; /* we only count leaves that belong to the main pool and are readable */ if (vd->vdev_islog || vd->vdev_isspare || vd->vdev_isl2cache || !vdev_readable(vd)) return (0); if (vd->vdev_ops->vdev_op_leaf) return (1); for (i = 0; i < vd->vdev_children; i++) { leaves += dsl_scan_count_leaves(vd->vdev_child[i]); } return (leaves); } static void scan_io_queues_update_zio_stats(dsl_scan_io_queue_t *q, const blkptr_t *bp) { int i; uint64_t cur_size = 0; for (i = 0; i < BP_GET_NDVAS(bp); i++) { cur_size += DVA_GET_ASIZE(&bp->blk_dva[i]); } q->q_total_zio_size_this_txg += cur_size; q->q_zios_this_txg++; } static void scan_io_queues_update_seg_stats(dsl_scan_io_queue_t *q, uint64_t start, uint64_t end) { q->q_total_seg_size_this_txg += end - start; q->q_segs_this_txg++; } static boolean_t scan_io_queue_check_suspend(dsl_scan_t *scn) { /* See comment in dsl_scan_check_suspend() */ uint64_t curr_time_ns = gethrtime(); uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time; uint64_t sync_time_ns = curr_time_ns - scn->scn_dp->dp_spa->spa_sync_starttime; int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max; int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ? zfs_resilver_min_time_ms : zfs_scrub_min_time_ms; return ((NSEC2MSEC(scan_time_ns) > mintime && (dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent || txg_sync_waiting(scn->scn_dp) || NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) || spa_shutting_down(scn->scn_dp->dp_spa)); } /* * Given a list of scan_io_t's in io_list, this issues the io's out to * disk. This consumes the io_list and frees the scan_io_t's. This is * called when emptying queues, either when we're up against the memory * limit or when we have finished scanning. Returns B_TRUE if we stopped * processing the list before we finished. Any zios that were not issued * will remain in the io_list. */ static boolean_t scan_io_queue_issue(dsl_scan_io_queue_t *queue, list_t *io_list) { dsl_scan_t *scn = queue->q_scn; scan_io_t *sio; int64_t bytes_issued = 0; boolean_t suspended = B_FALSE; while ((sio = list_head(io_list)) != NULL) { blkptr_t bp; if (scan_io_queue_check_suspend(scn)) { suspended = B_TRUE; break; } sio2bp(sio, &bp, queue->q_vd->vdev_id); bytes_issued += sio->sio_asize; scan_exec_io(scn->scn_dp, &bp, sio->sio_flags, &sio->sio_zb, queue); (void) list_remove_head(io_list); scan_io_queues_update_zio_stats(queue, &bp); kmem_free(sio, sizeof (*sio)); } atomic_add_64(&scn->scn_bytes_pending, -bytes_issued); return (suspended); } /* * Given a range_seg_t (extent) and a list, this function passes over a * scan queue and gathers up the appropriate ios which fit into that * scan seg (starting from lowest LBA). At the end, we remove the segment * from the q_exts_by_addr range tree. */ static boolean_t scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list) { scan_io_t srch_sio, *sio, *next_sio; avl_index_t idx; uint_t num_sios = 0; int64_t bytes_issued = 0; ASSERT(rs != NULL); ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); srch_sio.sio_offset = rs->rs_start; /* * The exact start of the extent might not contain any matching zios, * so if that's the case, examine the next one in the tree. */ sio = avl_find(&queue->q_sios_by_addr, &srch_sio, &idx); if (sio == NULL) sio = avl_nearest(&queue->q_sios_by_addr, idx, AVL_AFTER); while (sio != NULL && sio->sio_offset < rs->rs_end && num_sios <= 32) { ASSERT3U(sio->sio_offset, >=, rs->rs_start); ASSERT3U(sio->sio_offset + sio->sio_asize, <=, rs->rs_end); next_sio = AVL_NEXT(&queue->q_sios_by_addr, sio); avl_remove(&queue->q_sios_by_addr, sio); bytes_issued += sio->sio_asize; num_sios++; list_insert_tail(list, sio); sio = next_sio; } /* * We limit the number of sios we process at once to 32 to avoid * biting off more than we can chew. If we didn't take everything * in the segment we update it to reflect the work we were able to * complete. Otherwise, we remove it from the range tree entirely. */ if (sio != NULL && sio->sio_offset < rs->rs_end) { range_tree_adjust_fill(queue->q_exts_by_addr, rs, -bytes_issued); range_tree_resize_segment(queue->q_exts_by_addr, rs, sio->sio_offset, rs->rs_end - sio->sio_offset); return (B_TRUE); } else { range_tree_remove(queue->q_exts_by_addr, rs->rs_start, rs->rs_end - rs->rs_start); return (B_FALSE); } } /* * This is called from the queue emptying thread and selects the next * extent from which we are to issue io's. The behavior of this function * depends on the state of the scan, the current memory consumption and * whether or not we are performing a scan shutdown. * 1) We select extents in an elevator algorithm (LBA-order) if the scan * needs to perform a checkpoint * 2) We select the largest available extent if we are up against the * memory limit. * 3) Otherwise we don't select any extents. */ static const range_seg_t * scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue) { dsl_scan_t *scn = queue->q_scn; ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); ASSERT(scn->scn_is_sorted); /* handle tunable overrides */ if (scn->scn_checkpointing || scn->scn_clearing) { if (zfs_scan_issue_strategy == 1) { return (range_tree_first(queue->q_exts_by_addr)); } else if (zfs_scan_issue_strategy == 2) { return (avl_first(&queue->q_exts_by_size)); } } /* * During normal clearing, we want to issue our largest segments * first, keeping IO as sequential as possible, and leaving the * smaller extents for later with the hope that they might eventually * grow to larger sequential segments. However, when the scan is * checkpointing, no new extents will be added to the sorting queue, * so the way we are sorted now is as good as it will ever get. * In this case, we instead switch to issuing extents in LBA order. */ if (scn->scn_checkpointing) { return (range_tree_first(queue->q_exts_by_addr)); } else if (scn->scn_clearing) { return (avl_first(&queue->q_exts_by_size)); } else { return (NULL); } } static void scan_io_queues_run_one(void *arg) { dsl_scan_io_queue_t *queue = arg; kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock; boolean_t suspended = B_FALSE; range_seg_t *rs = NULL; scan_io_t *sio = NULL; list_t sio_list; uint64_t bytes_per_leaf = zfs_scan_vdev_limit; uint64_t nr_leaves = dsl_scan_count_leaves(queue->q_vd); ASSERT(queue->q_scn->scn_is_sorted); list_create(&sio_list, sizeof (scan_io_t), offsetof(scan_io_t, sio_nodes.sio_list_node)); mutex_enter(q_lock); /* calculate maximum in-flight bytes for this txg (min 1MB) */ queue->q_maxinflight_bytes = MAX(nr_leaves * bytes_per_leaf, 1ULL << 20); /* reset per-queue scan statistics for this txg */ queue->q_total_seg_size_this_txg = 0; queue->q_segs_this_txg = 0; queue->q_total_zio_size_this_txg = 0; queue->q_zios_this_txg = 0; /* loop until we have run out of time or sios */ while ((rs = (range_seg_t*)scan_io_queue_fetch_ext(queue)) != NULL) { uint64_t seg_start = 0, seg_end = 0; boolean_t more_left = B_TRUE; ASSERT(list_is_empty(&sio_list)); /* loop while we still have sios left to process in this rs */ while (more_left) { scan_io_t *first_sio, *last_sio; /* * We have selected which extent needs to be * processed next. Gather up the corresponding sios. */ more_left = scan_io_queue_gather(queue, rs, &sio_list); ASSERT(!list_is_empty(&sio_list)); first_sio = list_head(&sio_list); last_sio = list_tail(&sio_list); seg_end = last_sio->sio_offset + last_sio->sio_asize; if (seg_start == 0) seg_start = first_sio->sio_offset; /* * Issuing sios can take a long time so drop the * queue lock. The sio queue won't be updated by * other threads since we're in syncing context so * we can be sure that our trees will remain exactly * as we left them. */ mutex_exit(q_lock); suspended = scan_io_queue_issue(queue, &sio_list); mutex_enter(q_lock); if (suspended) break; } /* update statistics for debugging purposes */ scan_io_queues_update_seg_stats(queue, seg_start, seg_end); if (suspended) break; } /* If we were suspended in the middle of processing, * requeue any unfinished sios and exit. */ while ((sio = list_head(&sio_list)) != NULL) { list_remove(&sio_list, sio); scan_io_queue_insert_impl(queue, sio); } mutex_exit(q_lock); list_destroy(&sio_list); } /* * Performs an emptying run on all scan queues in the pool. This just * punches out one thread per top-level vdev, each of which processes * only that vdev's scan queue. We can parallelize the I/O here because * we know that each queue's io's only affect its own top-level vdev. * * This function waits for the queue runs to complete, and must be * called from dsl_scan_sync (or in general, syncing context). */ static void scan_io_queues_run(dsl_scan_t *scn) { spa_t *spa = scn->scn_dp->dp_spa; ASSERT(scn->scn_is_sorted); ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); if (scn->scn_bytes_pending == 0) return; if (scn->scn_taskq == NULL) { char *tq_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN + 16, KM_SLEEP); int nthreads = spa->spa_root_vdev->vdev_children; /* * We need to make this taskq *always* execute as many * threads in parallel as we have top-level vdevs and no * less, otherwise strange serialization of the calls to * scan_io_queues_run_one can occur during spa_sync runs * and that significantly impacts performance. */ (void) snprintf(tq_name, ZFS_MAX_DATASET_NAME_LEN + 16, "dsl_scan_tq_%s", spa->spa_name); scn->scn_taskq = taskq_create(tq_name, nthreads, minclsyspri, nthreads, nthreads, TASKQ_PREPOPULATE); kmem_free(tq_name, ZFS_MAX_DATASET_NAME_LEN + 16); } for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) { vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; mutex_enter(&vd->vdev_scan_io_queue_lock); if (vd->vdev_scan_io_queue != NULL) { VERIFY(taskq_dispatch(scn->scn_taskq, scan_io_queues_run_one, vd->vdev_scan_io_queue, TQ_SLEEP) != TASKQID_INVALID); } mutex_exit(&vd->vdev_scan_io_queue_lock); } /* * Wait for the queues to finish issuing thir IOs for this run * before we return. There may still be IOs in flight at this * point. */ taskq_wait(scn->scn_taskq); } static boolean_t dsl_scan_async_block_should_pause(dsl_scan_t *scn) { uint64_t elapsed_nanosecs; if (zfs_recover) return (B_FALSE); if (scn->scn_visited_this_txg >= zfs_async_block_max_blocks) return (B_TRUE); elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || (NSEC2MSEC(elapsed_nanosecs) > scn->scn_async_block_min_time_ms && txg_sync_waiting(scn->scn_dp)) || spa_shutting_down(scn->scn_dp->dp_spa)); } static int dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { dsl_scan_t *scn = arg; if (!scn->scn_is_bptree || (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) { if (dsl_scan_async_block_should_pause(scn)) return (SET_ERROR(ERESTART)); } zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa, dmu_tx_get_txg(tx), bp, BP_GET_PSIZE(bp), 0)); dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD, -bp_get_dsize_sync(scn->scn_dp->dp_spa, bp), -BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx); scn->scn_visited_this_txg++; return (0); } static void dsl_scan_update_stats(dsl_scan_t *scn) { spa_t *spa = scn->scn_dp->dp_spa; uint64_t i; uint64_t seg_size_total = 0, zio_size_total = 0; uint64_t seg_count_total = 0, zio_count_total = 0; for (i = 0; i < spa->spa_root_vdev->vdev_children; i++) { vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; dsl_scan_io_queue_t *queue = vd->vdev_scan_io_queue; if (queue == NULL) continue; seg_size_total += queue->q_total_seg_size_this_txg; zio_size_total += queue->q_total_zio_size_this_txg; seg_count_total += queue->q_segs_this_txg; zio_count_total += queue->q_zios_this_txg; } if (seg_count_total == 0 || zio_count_total == 0) { scn->scn_avg_seg_size_this_txg = 0; scn->scn_avg_zio_size_this_txg = 0; scn->scn_segs_this_txg = 0; scn->scn_zios_this_txg = 0; return; } scn->scn_avg_seg_size_this_txg = seg_size_total / seg_count_total; scn->scn_avg_zio_size_this_txg = zio_size_total / zio_count_total; scn->scn_segs_this_txg = seg_count_total; scn->scn_zios_this_txg = zio_count_total; } static int dsl_scan_obsolete_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { dsl_scan_t *scn = arg; const dva_t *dva = &bp->blk_dva[0]; if (dsl_scan_async_block_should_pause(scn)) return (SET_ERROR(ERESTART)); spa_vdev_indirect_mark_obsolete(scn->scn_dp->dp_spa, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva), tx); scn->scn_visited_this_txg++; return (0); } boolean_t dsl_scan_active(dsl_scan_t *scn) { spa_t *spa = scn->scn_dp->dp_spa; uint64_t used = 0, comp, uncomp; if (spa->spa_load_state != SPA_LOAD_NONE) return (B_FALSE); if (spa_shutting_down(spa)) return (B_FALSE); if ((dsl_scan_is_running(scn) && !dsl_scan_is_paused_scrub(scn)) || (scn->scn_async_destroying && !scn->scn_async_stalled)) return (B_TRUE); if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) { (void) bpobj_space(&scn->scn_dp->dp_free_bpobj, &used, &comp, &uncomp); } return (used != 0); } static boolean_t dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize, uint64_t phys_birth) { vdev_t *vd; vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva)); if (vd->vdev_ops == &vdev_indirect_ops) { /* * The indirect vdev can point to multiple * vdevs. For simplicity, always create * the resilver zio_t. zio_vdev_io_start() * will bypass the child resilver i/o's if * they are on vdevs that don't have DTL's. */ return (B_TRUE); } if (DVA_GET_GANG(dva)) { /* * Gang members may be spread across multiple * vdevs, so the best estimate we have is the * scrub range, which has already been checked. * XXX -- it would be better to change our * allocation policy to ensure that all * gang members reside on the same vdev. */ return (B_TRUE); } /* * Check if the txg falls within the range which must be * resilvered. DVAs outside this range can always be skipped. */ if (!vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1)) return (B_FALSE); /* * Check if the top-level vdev must resilver this offset. * When the offset does not intersect with a dirty leaf DTL * then it may be possible to skip the resilver IO. The psize * is provided instead of asize to simplify the check for RAIDZ. */ if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize)) return (B_FALSE); return (B_TRUE); } static int dsl_process_async_destroys(dsl_pool_t *dp, dmu_tx_t *tx) { int err = 0; dsl_scan_t *scn = dp->dp_scan; spa_t *spa = dp->dp_spa; if (spa_suspend_async_destroy(spa)) return (0); if (zfs_free_bpobj_enabled && spa_version(spa) >= SPA_VERSION_DEADLISTS) { scn->scn_is_bptree = B_FALSE; scn->scn_async_block_min_time_ms = zfs_free_min_time_ms; scn->scn_zio_root = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); err = bpobj_iterate(&dp->dp_free_bpobj, dsl_scan_free_block_cb, scn, tx); VERIFY0(zio_wait(scn->scn_zio_root)); scn->scn_zio_root = NULL; if (err != 0 && err != ERESTART) zfs_panic_recover("error %u from bpobj_iterate()", err); } if (err == 0 && spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) { ASSERT(scn->scn_async_destroying); scn->scn_is_bptree = B_TRUE; scn->scn_zio_root = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); err = bptree_iterate(dp->dp_meta_objset, dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb, scn, tx); VERIFY0(zio_wait(scn->scn_zio_root)); scn->scn_zio_root = NULL; if (err == EIO || err == ECKSUM) { err = 0; } else if (err != 0 && err != ERESTART) { zfs_panic_recover("error %u from " "traverse_dataset_destroyed()", err); } if (bptree_is_empty(dp->dp_meta_objset, dp->dp_bptree_obj)) { /* finished; deactivate async destroy feature */ spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY, tx); ASSERT(!spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)); VERIFY0(zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_BPTREE_OBJ, tx)); VERIFY0(bptree_free(dp->dp_meta_objset, dp->dp_bptree_obj, tx)); dp->dp_bptree_obj = 0; scn->scn_async_destroying = B_FALSE; scn->scn_async_stalled = B_FALSE; } else { /* * If we didn't make progress, mark the async * destroy as stalled, so that we will not initiate * a spa_sync() on its behalf. Note that we only * check this if we are not finished, because if the * bptree had no blocks for us to visit, we can * finish without "making progress". */ scn->scn_async_stalled = (scn->scn_visited_this_txg == 0); } } if (scn->scn_visited_this_txg) { zfs_dbgmsg("freed %llu blocks in %llums from " "free_bpobj/bptree txg %llu; err=%d", (longlong_t)scn->scn_visited_this_txg, (longlong_t) NSEC2MSEC(gethrtime() - scn->scn_sync_start_time), (longlong_t)tx->tx_txg, err); scn->scn_visited_this_txg = 0; /* * Write out changes to the DDT that may be required as a * result of the blocks freed. This ensures that the DDT * is clean when a scrub/resilver runs. */ ddt_sync(spa, tx->tx_txg); } if (err != 0) return (err); if (dp->dp_free_dir != NULL && !scn->scn_async_destroying && zfs_free_leak_on_eio && (dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes != 0 || dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes != 0 || dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes != 0)) { /* * We have finished background destroying, but there is still * some space left in the dp_free_dir. Transfer this leaked * space to the dp_leak_dir. */ if (dp->dp_leak_dir == NULL) { rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); (void) dsl_dir_create_sync(dp, dp->dp_root_dir, LEAK_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, LEAK_DIR_NAME, &dp->dp_leak_dir)); rrw_exit(&dp->dp_config_rwlock, FTAG); } dsl_dir_diduse_space(dp->dp_leak_dir, DD_USED_HEAD, dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes, dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes, dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes, tx); dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, -dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes, -dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes, -dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes, tx); } if (dp->dp_free_dir != NULL && !scn->scn_async_destroying) { /* finished; verify that space accounting went to zero */ ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes); ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes); ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes); } EQUIV(bpobj_is_open(&dp->dp_obsolete_bpobj), 0 == zap_contains(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_OBSOLETE_BPOBJ)); if (err == 0 && bpobj_is_open(&dp->dp_obsolete_bpobj)) { ASSERT(spa_feature_is_active(dp->dp_spa, SPA_FEATURE_OBSOLETE_COUNTS)); scn->scn_is_bptree = B_FALSE; scn->scn_async_block_min_time_ms = zfs_obsolete_min_time_ms; err = bpobj_iterate(&dp->dp_obsolete_bpobj, dsl_scan_obsolete_block_cb, scn, tx); if (err != 0 && err != ERESTART) zfs_panic_recover("error %u from bpobj_iterate()", err); if (bpobj_is_empty(&dp->dp_obsolete_bpobj)) dsl_pool_destroy_obsolete_bpobj(dp, tx); } return (0); } /* * This is the primary entry point for scans that is called from syncing * context. Scans must happen entirely during syncing context so that we * cna guarantee that blocks we are currently scanning will not change out * from under us. While a scan is active, this funciton controls how quickly * transaction groups proceed, instead of the normal handling provided by * txg_sync_thread(). */ void dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) { dsl_scan_t *scn = dp->dp_scan; spa_t *spa = dp->dp_spa; int err = 0; state_sync_type_t sync_type = SYNC_OPTIONAL; /* * Check for scn_restart_txg before checking spa_load_state, so * that we can restart an old-style scan while the pool is being * imported (see dsl_scan_init). */ if (dsl_scan_restarting(scn, tx)) { pool_scan_func_t func = POOL_SCAN_SCRUB; dsl_scan_done(scn, B_FALSE, tx); if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) func = POOL_SCAN_RESILVER; zfs_dbgmsg("restarting scan func=%u txg=%llu", func, (longlong_t)tx->tx_txg); dsl_scan_setup_sync(&func, tx); } /* * Only process scans in sync pass 1. */ if (spa_sync_pass(dp->dp_spa) > 1) return; /* * If the spa is shutting down, then stop scanning. This will * ensure that the scan does not dirty any new data during the * shutdown phase. */ if (spa_shutting_down(spa)) return; /* * If the scan is inactive due to a stalled async destroy, try again. */ if (!scn->scn_async_stalled && !dsl_scan_active(scn)) return; /* reset scan statistics */ scn->scn_visited_this_txg = 0; scn->scn_holes_this_txg = 0; scn->scn_lt_min_this_txg = 0; scn->scn_gt_max_this_txg = 0; scn->scn_ddt_contained_this_txg = 0; scn->scn_objsets_visited_this_txg = 0; scn->scn_avg_seg_size_this_txg = 0; scn->scn_segs_this_txg = 0; scn->scn_avg_zio_size_this_txg = 0; scn->scn_zios_this_txg = 0; scn->scn_suspending = B_FALSE; scn->scn_sync_start_time = gethrtime(); spa->spa_scrub_active = B_TRUE; /* * First process the async destroys. If we pause, don't do * any scrubbing or resilvering. This ensures that there are no * async destroys while we are scanning, so the scan code doesn't * have to worry about traversing it. It is also faster to free the * blocks than to scrub them. */ err = dsl_process_async_destroys(dp, tx); if (err != 0) return; if (!dsl_scan_is_running(scn) || dsl_scan_is_paused_scrub(scn)) return; /* * Wait a few txgs after importing to begin scanning so that * we can get the pool imported quickly. */ if (spa->spa_syncing_txg < spa->spa_first_txg + SCAN_IMPORT_WAIT_TXGS) return; /* * It is possible to switch from unsorted to sorted at any time, * but afterwards the scan will remain sorted unless reloaded from * a checkpoint after a reboot. */ if (!zfs_scan_legacy) { scn->scn_is_sorted = B_TRUE; if (scn->scn_last_checkpoint == 0) scn->scn_last_checkpoint = ddi_get_lbolt(); } /* * For sorted scans, determine what kind of work we will be doing * this txg based on our memory limitations and whether or not we * need to perform a checkpoint. */ if (scn->scn_is_sorted) { /* * If we are over our checkpoint interval, set scn_clearing * so that we can begin checkpointing immediately. The * checkpoint allows us to save a consisent bookmark * representing how much data we have scrubbed so far. * Otherwise, use the memory limit to determine if we should * scan for metadata or start issue scrub IOs. We accumulate * metadata until we hit our hard memory limit at which point * we issue scrub IOs until we are at our soft memory limit. */ if (scn->scn_checkpointing || ddi_get_lbolt() - scn->scn_last_checkpoint > SEC_TO_TICK(zfs_scan_checkpoint_intval)) { if (!scn->scn_checkpointing) zfs_dbgmsg("begin scan checkpoint"); scn->scn_checkpointing = B_TRUE; scn->scn_clearing = B_TRUE; } else { boolean_t should_clear = dsl_scan_should_clear(scn); if (should_clear && !scn->scn_clearing) { zfs_dbgmsg("begin scan clearing"); scn->scn_clearing = B_TRUE; } else if (!should_clear && scn->scn_clearing) { zfs_dbgmsg("finish scan clearing"); scn->scn_clearing = B_FALSE; } } } else { ASSERT0(scn->scn_checkpointing); ASSERT0(scn->scn_clearing); } if (!scn->scn_clearing && scn->scn_done_txg == 0) { /* Need to scan metadata for more blocks to scrub */ dsl_scan_phys_t *scnp = &scn->scn_phys; taskqid_t prefetch_tqid; uint64_t bytes_per_leaf = zfs_scan_vdev_limit; uint64_t nr_leaves = dsl_scan_count_leaves(spa->spa_root_vdev); /* * Recalculate the max number of in-flight bytes for pool-wide * scanning operations (minimum 1MB). Limits for the issuing * phase are done per top-level vdev and are handled separately. */ scn->scn_maxinflight_bytes = MAX(nr_leaves * bytes_per_leaf, 1ULL << 20); if (scnp->scn_ddt_bookmark.ddb_class <= scnp->scn_ddt_class_max) { ASSERT(ZB_IS_ZERO(&scnp->scn_bookmark)); zfs_dbgmsg("doing scan sync txg %llu; " "ddt bm=%llu/%llu/%llu/%llx", (longlong_t)tx->tx_txg, (longlong_t)scnp->scn_ddt_bookmark.ddb_class, (longlong_t)scnp->scn_ddt_bookmark.ddb_type, (longlong_t)scnp->scn_ddt_bookmark.ddb_checksum, (longlong_t)scnp->scn_ddt_bookmark.ddb_cursor); } else { zfs_dbgmsg("doing scan sync txg %llu; " "bm=%llu/%llu/%llu/%llu", (longlong_t)tx->tx_txg, (longlong_t)scnp->scn_bookmark.zb_objset, (longlong_t)scnp->scn_bookmark.zb_object, (longlong_t)scnp->scn_bookmark.zb_level, (longlong_t)scnp->scn_bookmark.zb_blkid); } scn->scn_zio_root = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_CANFAIL); scn->scn_prefetch_stop = B_FALSE; prefetch_tqid = taskq_dispatch(dp->dp_sync_taskq, dsl_scan_prefetch_thread, scn, TQ_SLEEP); ASSERT(prefetch_tqid != TASKQID_INVALID); dsl_pool_config_enter(dp, FTAG); dsl_scan_visit(scn, tx); dsl_pool_config_exit(dp, FTAG); mutex_enter(&dp->dp_spa->spa_scrub_lock); scn->scn_prefetch_stop = B_TRUE; cv_broadcast(&spa->spa_scrub_io_cv); mutex_exit(&dp->dp_spa->spa_scrub_lock); taskq_wait_id(dp->dp_sync_taskq, prefetch_tqid); (void) zio_wait(scn->scn_zio_root); scn->scn_zio_root = NULL; zfs_dbgmsg("scan visited %llu blocks in %llums " "(%llu os's, %llu holes, %llu < mintxg, " "%llu in ddt, %llu > maxtxg)", (longlong_t)scn->scn_visited_this_txg, (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time), (longlong_t)scn->scn_objsets_visited_this_txg, (longlong_t)scn->scn_holes_this_txg, (longlong_t)scn->scn_lt_min_this_txg, (longlong_t)scn->scn_ddt_contained_this_txg, (longlong_t)scn->scn_gt_max_this_txg); if (!scn->scn_suspending) { ASSERT0(avl_numnodes(&scn->scn_queue)); scn->scn_done_txg = tx->tx_txg + 1; if (scn->scn_is_sorted) { scn->scn_checkpointing = B_TRUE; scn->scn_clearing = B_TRUE; } zfs_dbgmsg("scan complete txg %llu", (longlong_t)tx->tx_txg); } } else if (scn->scn_is_sorted && scn->scn_bytes_pending != 0) { ASSERT(scn->scn_clearing); /* need to issue scrubbing IOs from per-vdev queues */ scn->scn_zio_root = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_CANFAIL); scan_io_queues_run(scn); (void) zio_wait(scn->scn_zio_root); scn->scn_zio_root = NULL; /* calculate and dprintf the current memory usage */ (void) dsl_scan_should_clear(scn); dsl_scan_update_stats(scn); zfs_dbgmsg("scrubbed %llu blocks (%llu segs) in %llums " "(avg_block_size = %llu, avg_seg_size = %llu)", (longlong_t)scn->scn_zios_this_txg, (longlong_t)scn->scn_segs_this_txg, (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time), (longlong_t)scn->scn_avg_zio_size_this_txg, (longlong_t)scn->scn_avg_seg_size_this_txg); } else if (scn->scn_done_txg != 0 && scn->scn_done_txg <= tx->tx_txg) { /* Finished with everything. Mark the scrub as complete */ zfs_dbgmsg("scan issuing complete txg %llu", (longlong_t)tx->tx_txg); ASSERT3U(scn->scn_done_txg, !=, 0); ASSERT0(spa->spa_scrub_inflight); ASSERT0(scn->scn_bytes_pending); dsl_scan_done(scn, B_TRUE, tx); sync_type = SYNC_MANDATORY; } dsl_scan_sync_state(scn, tx, sync_type); } static void count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp) { int i; /* update the spa's stats on how many bytes we have issued */ for (i = 0; i < BP_GET_NDVAS(bp); i++) { atomic_add_64(&scn->scn_dp->dp_spa->spa_scan_pass_issued, DVA_GET_ASIZE(&bp->blk_dva[i])); } /* * If we resume after a reboot, zab will be NULL; don't record * incomplete stats in that case. */ if (zab == NULL) return; mutex_enter(&zab->zab_lock); for (i = 0; i < 4; i++) { int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS; int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL; if (t & DMU_OT_NEWTYPE) t = DMU_OT_OTHER; zfs_blkstat_t *zb = &zab->zab_type[l][t]; int equal; zb->zb_count++; zb->zb_asize += BP_GET_ASIZE(bp); zb->zb_lsize += BP_GET_LSIZE(bp); zb->zb_psize += BP_GET_PSIZE(bp); zb->zb_gangs += BP_COUNT_GANG(bp); switch (BP_GET_NDVAS(bp)) { case 2: if (DVA_GET_VDEV(&bp->blk_dva[0]) == DVA_GET_VDEV(&bp->blk_dva[1])) zb->zb_ditto_2_of_2_samevdev++; break; case 3: equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == DVA_GET_VDEV(&bp->blk_dva[1])) + (DVA_GET_VDEV(&bp->blk_dva[0]) == DVA_GET_VDEV(&bp->blk_dva[2])) + (DVA_GET_VDEV(&bp->blk_dva[1]) == DVA_GET_VDEV(&bp->blk_dva[2])); if (equal == 1) zb->zb_ditto_2_of_3_samevdev++; else if (equal == 3) zb->zb_ditto_3_of_3_samevdev++; break; } } mutex_exit(&zab->zab_lock); } static void scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue, scan_io_t *sio) { avl_index_t idx; int64_t asize = sio->sio_asize; dsl_scan_t *scn = queue->q_scn; ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); if (avl_find(&queue->q_sios_by_addr, sio, &idx) != NULL) { /* block is already scheduled for reading */ atomic_add_64(&scn->scn_bytes_pending, -asize); kmem_free(sio, sizeof (*sio)); return; } avl_insert(&queue->q_sios_by_addr, sio, idx); range_tree_add(queue->q_exts_by_addr, sio->sio_offset, asize); } /* * Given all the info we got from our metadata scanning process, we * construct a scan_io_t and insert it into the scan sorting queue. The * I/O must already be suitable for us to process. This is controlled * by dsl_scan_enqueue(). */ static void scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i, int zio_flags, const zbookmark_phys_t *zb) { dsl_scan_t *scn = queue->q_scn; scan_io_t *sio = kmem_zalloc(sizeof (*sio), KM_SLEEP); ASSERT0(BP_IS_GANG(bp)); ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); bp2sio(bp, sio, dva_i); sio->sio_flags = zio_flags; sio->sio_zb = *zb; /* * Increment the bytes pending counter now so that we can't * get an integer underflow in case the worker processes the * zio before we get to incrementing this counter. */ atomic_add_64(&scn->scn_bytes_pending, sio->sio_asize); scan_io_queue_insert_impl(queue, sio); } /* * Given a set of I/O parameters as discovered by the metadata traversal * process, attempts to place the I/O into the sorted queues (if allowed), * or immediately executes the I/O. */ static void dsl_scan_enqueue(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, const zbookmark_phys_t *zb) { spa_t *spa = dp->dp_spa; ASSERT(!BP_IS_EMBEDDED(bp)); /* * Gang blocks are hard to issue sequentially, so we just issue them * here immediately instead of queuing them. */ if (!dp->dp_scan->scn_is_sorted || BP_IS_GANG(bp)) { scan_exec_io(dp, bp, zio_flags, zb, NULL); return; } for (int i = 0; i < BP_GET_NDVAS(bp); i++) { dva_t dva; vdev_t *vdev; dva = bp->blk_dva[i]; vdev = vdev_lookup_top(spa, DVA_GET_VDEV(&dva)); ASSERT(vdev != NULL); mutex_enter(&vdev->vdev_scan_io_queue_lock); if (vdev->vdev_scan_io_queue == NULL) vdev->vdev_scan_io_queue = scan_io_queue_create(vdev); ASSERT(dp->dp_scan != NULL); scan_io_queue_insert(vdev->vdev_scan_io_queue, bp, i, zio_flags, zb); mutex_exit(&vdev->vdev_scan_io_queue_lock); } } static int dsl_scan_scrub_cb(dsl_pool_t *dp, const blkptr_t *bp, const zbookmark_phys_t *zb) { dsl_scan_t *scn = dp->dp_scan; spa_t *spa = dp->dp_spa; uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp); size_t psize = BP_GET_PSIZE(bp); boolean_t needs_io; int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL; int d; if (phys_birth <= scn->scn_phys.scn_min_txg || phys_birth >= scn->scn_phys.scn_max_txg) { count_block(scn, dp->dp_blkstats, bp); return (0); } /* Embedded BP's have phys_birth==0, so we reject them above. */ ASSERT(!BP_IS_EMBEDDED(bp)); ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn)); if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) { zio_flags |= ZIO_FLAG_SCRUB; needs_io = B_TRUE; } else { ASSERT3U(scn->scn_phys.scn_func, ==, POOL_SCAN_RESILVER); zio_flags |= ZIO_FLAG_RESILVER; needs_io = B_FALSE; } /* If it's an intent log block, failure is expected. */ if (zb->zb_level == ZB_ZIL_LEVEL) zio_flags |= ZIO_FLAG_SPECULATIVE; for (d = 0; d < BP_GET_NDVAS(bp); d++) { const dva_t *dva = &bp->blk_dva[d]; /* * Keep track of how much data we've examined so that * zpool(1M) status can make useful progress reports. */ scn->scn_phys.scn_examined += DVA_GET_ASIZE(dva); spa->spa_scan_pass_exam += DVA_GET_ASIZE(dva); /* if it's a resilver, this may not be in the target range */ if (!needs_io) needs_io = dsl_scan_need_resilver(spa, dva, psize, phys_birth); } if (needs_io && !zfs_no_scrub_io) { dsl_scan_enqueue(dp, bp, zio_flags, zb); } else { count_block(scn, dp->dp_blkstats, bp); } /* do not relocate this block */ return (0); } static void dsl_scan_scrub_done(zio_t *zio) { spa_t *spa = zio->io_spa; blkptr_t *bp = zio->io_bp; dsl_scan_io_queue_t *queue = zio->io_private; abd_free(zio->io_abd); if (queue == NULL) { mutex_enter(&spa->spa_scrub_lock); ASSERT3U(spa->spa_scrub_inflight, >=, BP_GET_PSIZE(bp)); spa->spa_scrub_inflight -= BP_GET_PSIZE(bp); cv_broadcast(&spa->spa_scrub_io_cv); mutex_exit(&spa->spa_scrub_lock); } else { mutex_enter(&queue->q_vd->vdev_scan_io_queue_lock); ASSERT3U(queue->q_inflight_bytes, >=, BP_GET_PSIZE(bp)); queue->q_inflight_bytes -= BP_GET_PSIZE(bp); cv_broadcast(&queue->q_zio_cv); mutex_exit(&queue->q_vd->vdev_scan_io_queue_lock); } if (zio->io_error && (zio->io_error != ECKSUM || !(zio->io_flags & ZIO_FLAG_SPECULATIVE))) { atomic_inc_64(&spa->spa_dsl_pool->dp_scan->scn_phys.scn_errors); } } /* * Given a scanning zio's information, executes the zio. The zio need * not necessarily be only sortable, this function simply executes the * zio, no matter what it is. The optional queue argument allows the * caller to specify that they want per top level vdev IO rate limiting * instead of the legacy global limiting. */ static void scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, const zbookmark_phys_t *zb, dsl_scan_io_queue_t *queue) { spa_t *spa = dp->dp_spa; dsl_scan_t *scn = dp->dp_scan; size_t size = BP_GET_PSIZE(bp); abd_t *data = abd_alloc_for_io(size, B_FALSE); unsigned int scan_delay = 0; ASSERT3U(scn->scn_maxinflight_bytes, >, 0); if (queue == NULL) { mutex_enter(&spa->spa_scrub_lock); while (spa->spa_scrub_inflight >= scn->scn_maxinflight_bytes) cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); spa->spa_scrub_inflight += BP_GET_PSIZE(bp); mutex_exit(&spa->spa_scrub_lock); } else { kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock; mutex_enter(q_lock); while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes) cv_wait(&queue->q_zio_cv, q_lock); queue->q_inflight_bytes += BP_GET_PSIZE(bp); mutex_exit(q_lock); } if (zio_flags & ZIO_FLAG_RESILVER) scan_delay = zfs_resilver_delay; else { ASSERT(zio_flags & ZIO_FLAG_SCRUB); scan_delay = zfs_scrub_delay; } if (scan_delay && (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle)) delay(MAX((int)scan_delay, 0)); count_block(dp->dp_scan, dp->dp_blkstats, bp); zio_nowait(zio_read(dp->dp_scan->scn_zio_root, spa, bp, data, size, dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb)); } /* * This is the primary extent sorting algorithm. We balance two parameters: * 1) how many bytes of I/O are in an extent * 2) how well the extent is filled with I/O (as a fraction of its total size) * Since we allow extents to have gaps between their constituent I/Os, it's * possible to have a fairly large extent that contains the same amount of * I/O bytes than a much smaller extent, which just packs the I/O more tightly. * The algorithm sorts based on a score calculated from the extent's size, * the relative fill volume (in %) and a "fill weight" parameter that controls * the split between whether we prefer larger extents or more well populated * extents: * * SCORE = FILL_IN_BYTES + (FILL_IN_PERCENT * FILL_IN_BYTES * FILL_WEIGHT) * * Example: * 1) assume extsz = 64 MiB * 2) assume fill = 32 MiB (extent is half full) * 3) assume fill_weight = 3 * 4) SCORE = 32M + (((32M * 100) / 64M) * 3 * 32M) / 100 * SCORE = 32M + (50 * 3 * 32M) / 100 * SCORE = 32M + (4800M / 100) * SCORE = 32M + 48M * ^ ^ * | +--- final total relative fill-based score * +--------- final total fill-based score * SCORE = 80M * * As can be seen, at fill_ratio=3, the algorithm is slightly biased towards * extents that are more completely filled (in a 3:2 ratio) vs just larger. * Note that as an optimization, we replace multiplication and division by * 100 with bitshifting by 7 (which effecitvely multiplies and divides by 128). */ static int ext_size_compare(const void *x, const void *y) { const range_seg_t *rsa = x, *rsb = y; uint64_t sa = rsa->rs_end - rsa->rs_start, sb = rsb->rs_end - rsb->rs_start; uint64_t score_a, score_b; score_a = rsa->rs_fill + ((((rsa->rs_fill << 7) / sa) * fill_weight * rsa->rs_fill) >> 7); score_b = rsb->rs_fill + ((((rsb->rs_fill << 7) / sb) * fill_weight * rsb->rs_fill) >> 7); if (score_a > score_b) return (-1); if (score_a == score_b) { if (rsa->rs_start < rsb->rs_start) return (-1); if (rsa->rs_start == rsb->rs_start) return (0); return (1); } return (1); } /* * Comparator for the q_sios_by_addr tree. Sorting is simply performed * based on LBA-order (from lowest to highest). */ static int io_addr_compare(const void *x, const void *y) { const scan_io_t *a = x, *b = y; if (a->sio_offset < b->sio_offset) return (-1); if (a->sio_offset == b->sio_offset) return (0); return (1); } /* IO queues are created on demand when they are needed. */ static dsl_scan_io_queue_t * scan_io_queue_create(vdev_t *vd) { dsl_scan_t *scn = vd->vdev_spa->spa_dsl_pool->dp_scan; dsl_scan_io_queue_t *q = kmem_zalloc(sizeof (*q), KM_SLEEP); q->q_scn = scn; q->q_vd = vd; cv_init(&q->q_zio_cv, NULL, CV_DEFAULT, NULL); q->q_exts_by_addr = range_tree_create_impl(&rt_avl_ops, &q->q_exts_by_size, ext_size_compare, zfs_scan_max_ext_gap); avl_create(&q->q_sios_by_addr, io_addr_compare, sizeof (scan_io_t), offsetof(scan_io_t, sio_nodes.sio_addr_node)); return (q); } /* * Destroys a scan queue and all segments and scan_io_t's contained in it. * No further execution of I/O occurs, anything pending in the queue is * simply freed without being executed. */ void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue) { dsl_scan_t *scn = queue->q_scn; scan_io_t *sio; void *cookie = NULL; int64_t bytes_dequeued = 0; ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); while ((sio = avl_destroy_nodes(&queue->q_sios_by_addr, &cookie)) != NULL) { ASSERT(range_tree_contains(queue->q_exts_by_addr, sio->sio_offset, sio->sio_asize)); bytes_dequeued += sio->sio_asize; kmem_free(sio, sizeof (*sio)); } atomic_add_64(&scn->scn_bytes_pending, -bytes_dequeued); range_tree_vacate(queue->q_exts_by_addr, NULL, queue); range_tree_destroy(queue->q_exts_by_addr); avl_destroy(&queue->q_sios_by_addr); cv_destroy(&queue->q_zio_cv); kmem_free(queue, sizeof (*queue)); } /* * Properly transfers a dsl_scan_queue_t from `svd' to `tvd'. This is * called on behalf of vdev_top_transfer when creating or destroying * a mirror vdev due to zpool attach/detach. */ void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd) { mutex_enter(&svd->vdev_scan_io_queue_lock); mutex_enter(&tvd->vdev_scan_io_queue_lock); VERIFY3P(tvd->vdev_scan_io_queue, ==, NULL); tvd->vdev_scan_io_queue = svd->vdev_scan_io_queue; svd->vdev_scan_io_queue = NULL; if (tvd->vdev_scan_io_queue != NULL) tvd->vdev_scan_io_queue->q_vd = tvd; mutex_exit(&tvd->vdev_scan_io_queue_lock); mutex_exit(&svd->vdev_scan_io_queue_lock); } static void scan_io_queues_destroy(dsl_scan_t *scn) { vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev; for (uint64_t i = 0; i < rvd->vdev_children; i++) { vdev_t *tvd = rvd->vdev_child[i]; mutex_enter(&tvd->vdev_scan_io_queue_lock); if (tvd->vdev_scan_io_queue != NULL) dsl_scan_io_queue_destroy(tvd->vdev_scan_io_queue); tvd->vdev_scan_io_queue = NULL; mutex_exit(&tvd->vdev_scan_io_queue_lock); } } static void dsl_scan_freed_dva(spa_t *spa, const blkptr_t *bp, int dva_i) { dsl_pool_t *dp = spa->spa_dsl_pool; dsl_scan_t *scn = dp->dp_scan; vdev_t *vdev; kmutex_t *q_lock; dsl_scan_io_queue_t *queue; scan_io_t srch, *sio; avl_index_t idx; uint64_t start, size; vdev = vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[dva_i])); ASSERT(vdev != NULL); q_lock = &vdev->vdev_scan_io_queue_lock; queue = vdev->vdev_scan_io_queue; mutex_enter(q_lock); if (queue == NULL) { mutex_exit(q_lock); return; } bp2sio(bp, &srch, dva_i); start = srch.sio_offset; size = srch.sio_asize; /* * We can find the zio in two states: * 1) Cold, just sitting in the queue of zio's to be issued at * some point in the future. In this case, all we do is * remove the zio from the q_sios_by_addr tree, decrement * its data volume from the containing range_seg_t and * resort the q_exts_by_size tree to reflect that the * range_seg_t has lost some of its 'fill'. We don't shorten * the range_seg_t - this is usually rare enough not to be * worth the extra hassle of trying keep track of precise * extent boundaries. * 2) Hot, where the zio is currently in-flight in * dsl_scan_issue_ios. In this case, we can't simply * reach in and stop the in-flight zio's, so we instead * block the caller. Eventually, dsl_scan_issue_ios will * be done with issuing the zio's it gathered and will * signal us. */ sio = avl_find(&queue->q_sios_by_addr, &srch, &idx); if (sio != NULL) { int64_t asize = sio->sio_asize; blkptr_t tmpbp; /* Got it while it was cold in the queue */ ASSERT3U(start, ==, sio->sio_offset); ASSERT3U(size, ==, asize); avl_remove(&queue->q_sios_by_addr, sio); ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size)); range_tree_remove_fill(queue->q_exts_by_addr, start, size); /* * We only update scn_bytes_pending in the cold path, * otherwise it will already have been accounted for as * part of the zio's execution. */ atomic_add_64(&scn->scn_bytes_pending, -asize); /* count the block as though we issued it */ sio2bp(sio, &tmpbp, dva_i); count_block(scn, dp->dp_blkstats, &tmpbp); kmem_free(sio, sizeof (*sio)); } mutex_exit(q_lock); } /* * Callback invoked when a zio_free() zio is executing. This needs to be * intercepted to prevent the zio from deallocating a particular portion * of disk space and it then getting reallocated and written to, while we * still have it queued up for processing. */ void dsl_scan_freed(spa_t *spa, const blkptr_t *bp) { dsl_pool_t *dp = spa->spa_dsl_pool; dsl_scan_t *scn = dp->dp_scan; ASSERT(!BP_IS_EMBEDDED(bp)); ASSERT(scn != NULL); if (!dsl_scan_is_running(scn)) return; for (int i = 0; i < BP_GET_NDVAS(bp); i++) dsl_scan_freed_dva(spa, bp, i); } diff --git a/sys/conf/NOTES b/sys/conf/NOTES index d4a48f64ffa2..5f3a22088bc7 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1,2827 +1,2830 @@ # $FreeBSD$ # # NOTES -- Lines that can be cut/pasted into kernel and hints configs. # # Lines that begin with 'device', 'options', 'machine', 'ident', 'maxusers', # 'makeoptions', 'hints', etc. go into the kernel configuration that you # run config(8) with. # # Lines that begin with 'hint.' are NOT for config(8), they go into your # hints file. See /boot/device.hints and/or the 'hints' config(8) directive. # # Please use ``make LINT'' to create an old-style LINT file if you want to # do kernel test-builds. # # This file contains machine independent kernel configuration notes. For # machine dependent notes, look in /sys//conf/NOTES. # # # NOTES conventions and style guide: # # Large block comments should begin and end with a line containing only a # comment character. # # To describe a particular object, a block comment (if it exists) should # come first. Next should come device, options, and hints lines in that # order. All device and option lines must be described by a comment that # doesn't just expand the device or option name. Use only a concise # comment on the same line if possible. Very detailed descriptions of # devices and subsystems belong in man pages. # # A space followed by a tab separates 'options' from an option name. Two # spaces followed by a tab separate 'device' from a device name. Comments # after an option or device should use one space after the comment character. # To comment out a negative option that disables code and thus should not be # enabled for LINT builds, precede 'options' with "#!". # # # This is the ``identification'' of the kernel. Usually this should # be the same as the name of your kernel. # ident LINT # # The `maxusers' parameter controls the static sizing of a number of # internal system tables by a formula defined in subr_param.c. # Omitting this parameter or setting it to 0 will cause the system to # auto-size based on physical memory. # maxusers 10 # To statically compile in device wiring instead of /boot/device.hints #hints "LINT.hints" # Default places to look for devices. # Use the following to compile in values accessible to the kernel # through getenv() (or kenv(1) in userland). The format of the file # is 'variable=value', see kenv(1) # #env "LINT.env" # # The `makeoptions' parameter allows variables to be passed to the # generated Makefile in the build area. # # CONF_CFLAGS gives some extra compiler flags that are added to ${CFLAGS} # after most other flags. Here we use it to inhibit use of non-optimal # gcc built-in functions (e.g., memcmp). # # DEBUG happens to be magic. # The following is equivalent to 'config -g KERNELNAME' and creates # 'kernel.debug' compiled with -g debugging as well as a normal # 'kernel'. Use 'make install.debug' to install the debug kernel # but that isn't normally necessary as the debug symbols are not loaded # by the kernel and are not useful there anyway. # # KERNEL can be overridden so that you can change the default name of your # kernel. # # MODULES_OVERRIDE can be used to limit modules built to a specific list. # makeoptions CONF_CFLAGS=-fno-builtin #Don't allow use of memcmp, etc. #makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols #makeoptions KERNEL=foo #Build kernel "foo" and install "/foo" # Only build ext2fs module plus those parts of the sound system I need. #makeoptions MODULES_OVERRIDE="ext2fs sound/sound sound/driver/maestro3" makeoptions DESTDIR=/tmp # # FreeBSD processes are subject to certain limits to their consumption # of system resources. See getrlimit(2) for more details. Each # resource limit has two values, a "soft" limit and a "hard" limit. # The soft limits can be modified during normal system operation, but # the hard limits are set at boot time. Their default values are # in sys//include/vmparam.h. There are two ways to change them: # # 1. Set the values at kernel build time. The options below are one # way to allow that limit to grow to 1GB. They can be increased # further by changing the parameters: # # 2. In /boot/loader.conf, set the tunables kern.maxswzone, # kern.maxbcache, kern.maxtsiz, kern.dfldsiz, kern.maxdsiz, # kern.dflssiz, kern.maxssiz and kern.sgrowsiz. # # The options in /boot/loader.conf override anything in the kernel # configuration file. See the function init_param1 in # sys/kern/subr_param.c for more details. # options MAXDSIZ=(1024UL*1024*1024) options MAXSSIZ=(128UL*1024*1024) options DFLDSIZ=(1024UL*1024*1024) # # BLKDEV_IOSIZE sets the default block size used in user block # device I/O. Note that this value will be overridden by the label # when specifying a block device from a label with a non-0 # partition blocksize. The default is PAGE_SIZE. # options BLKDEV_IOSIZE=8192 # # MAXPHYS and DFLTPHYS # # These are the maximal and safe 'raw' I/O block device access sizes. # Reads and writes will be split into MAXPHYS chunks for known good # devices and DFLTPHYS for the rest. Some applications have better # performance with larger raw I/O access sizes. Note that certain VM # parameters are derived from these values and making them too large # can make an unbootable kernel. # # The defaults are 64K and 128K respectively. options DFLTPHYS=(64*1024) options MAXPHYS=(128*1024) # This allows you to actually store this configuration file into # the kernel binary itself. See config(8) for more details. # options INCLUDE_CONFIG_FILE # Include this file in kernel # # Compile-time defaults for various boot parameters # options BOOTVERBOSE=1 options BOOTHOWTO=RB_MULTIPLE # # Compile-time defaults for dmesg boot tagging # # Default boot tag; may use 'kern.boot_tag' loader tunable to override. The # current boot's tag is also exposed via the 'kern.boot_tag' sysctl. options BOOT_TAG=\"\" # Maximum boot tag size the kernel's static buffer should accomodate. Maximum # size for both BOOT_TAG and the assocated tunable. options BOOT_TAG_SZ=32 options GEOM_BDE # Disk encryption. options GEOM_CACHE # Disk cache. options GEOM_CONCAT # Disk concatenation. options GEOM_ELI # Disk encryption. options GEOM_GATE # Userland services. options GEOM_JOURNAL # Journaling. options GEOM_LABEL # Providers labelization. options GEOM_LINUX_LVM # Linux LVM2 volumes options GEOM_MAP # Map based partitioning options GEOM_MIRROR # Disk mirroring. options GEOM_MULTIPATH # Disk multipath options GEOM_NOP # Test class. options GEOM_PART_APM # Apple partitioning options GEOM_PART_BSD # BSD disklabel options GEOM_PART_BSD64 # BSD disklabel64 options GEOM_PART_EBR # Extended Boot Records options GEOM_PART_EBR_COMPAT # Backward compatible partition names options GEOM_PART_GPT # GPT partitioning options GEOM_PART_LDM # Logical Disk Manager options GEOM_PART_MBR # MBR partitioning options GEOM_PART_VTOC8 # SMI VTOC8 disk label options GEOM_RAID # Soft RAID functionality. options GEOM_RAID3 # RAID3 functionality. options GEOM_SHSEC # Shared secret. options GEOM_STRIPE # Disk striping. options GEOM_UZIP # Read-only compressed disks options GEOM_VINUM # Vinum logical volume manager options GEOM_VIRSTOR # Virtual storage. options GEOM_ZERO # Performance testing helper. # # The root device and filesystem type can be compiled in; # this provides a fallback option if the root device cannot # be correctly guessed by the bootstrap code, or an override if # the RB_DFLTROOT flag (-r) is specified when booting the kernel. # options ROOTDEVNAME=\"ufs:da0s2e\" ##################################################################### # Scheduler options: # # Specifying one of SCHED_4BSD or SCHED_ULE is mandatory. These options # select which scheduler is compiled in. # # SCHED_4BSD is the historical, proven, BSD scheduler. It has a global run # queue and no CPU affinity which makes it suboptimal for SMP. It has very # good interactivity and priority selection. # # SCHED_ULE provides significant performance advantages over 4BSD on many # workloads on SMP machines. It supports cpu-affinity, per-cpu runqueues # and scheduler locks. It also has a stronger notion of interactivity # which leads to better responsiveness even on uniprocessor machines. This # is the default scheduler. # # SCHED_STATS is a debugging option which keeps some stats in the sysctl # tree at 'kern.sched.stats' and is useful for debugging scheduling decisions. # options SCHED_4BSD options SCHED_STATS #options SCHED_ULE ##################################################################### # SMP OPTIONS: # # SMP enables building of a Symmetric MultiProcessor Kernel. # Mandatory: options SMP # Symmetric MultiProcessor Kernel # EARLY_AP_STARTUP releases the Application Processors earlier in the # kernel startup process (before devices are probed) rather than at the # end. This is a temporary option for use during the transition from # late to early AP startup. options EARLY_AP_STARTUP # MAXCPU defines the maximum number of CPUs that can boot in the system. # A default value should be already present, for every architecture. options MAXCPU=32 # NUMA enables use of Non-Uniform Memory Access policies in various kernel # subsystems. options NUMA # MAXMEMDOM defines the maximum number of memory domains that can boot in the # system. A default value should already be defined by every architecture. options MAXMEMDOM=2 # ADAPTIVE_MUTEXES changes the behavior of blocking mutexes to spin # if the thread that currently owns the mutex is executing on another # CPU. This behavior is enabled by default, so this option can be used # to disable it. options NO_ADAPTIVE_MUTEXES # ADAPTIVE_RWLOCKS changes the behavior of reader/writer locks to spin # if the thread that currently owns the rwlock is executing on another # CPU. This behavior is enabled by default, so this option can be used # to disable it. options NO_ADAPTIVE_RWLOCKS # ADAPTIVE_SX changes the behavior of sx locks to spin if the thread that # currently owns the sx lock is executing on another CPU. # This behavior is enabled by default, so this option can be used to # disable it. options NO_ADAPTIVE_SX # MUTEX_NOINLINE forces mutex operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options MUTEX_NOINLINE # RWLOCK_NOINLINE forces rwlock operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options RWLOCK_NOINLINE # SX_NOINLINE forces sx lock operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options SX_NOINLINE # SMP Debugging Options: # # CALLOUT_PROFILING enables rudimentary profiling of the callwheel data # structure used as backend in callout(9). # PREEMPTION allows the threads that are in the kernel to be preempted by # higher priority [interrupt] threads. It helps with interactivity # and allows interrupt threads to run sooner rather than waiting. # WARNING! Only tested on amd64 and i386. # FULL_PREEMPTION instructs the kernel to preempt non-realtime kernel # threads. Its sole use is to expose race conditions and other # bugs during development. Enabling this option will reduce # performance and increase the frequency of kernel panics by # design. If you aren't sure that you need it then you don't. # Relies on the PREEMPTION option. DON'T TURN THIS ON. # SLEEPQUEUE_PROFILING enables rudimentary profiling of the hash table # used to hold active sleep queues as well as sleep wait message # frequency. # TURNSTILE_PROFILING enables rudimentary profiling of the hash table # used to hold active lock queues. # UMTX_PROFILING enables rudimentary profiling of the hash table used # to hold active lock queues. # WITNESS enables the witness code which detects deadlocks and cycles # during locking operations. # WITNESS_KDB causes the witness code to drop into the kernel debugger if # a lock hierarchy violation occurs or if locks are held when going to # sleep. # WITNESS_SKIPSPIN disables the witness checks on spin mutexes. options PREEMPTION options FULL_PREEMPTION options WITNESS options WITNESS_KDB options WITNESS_SKIPSPIN # LOCK_PROFILING - Profiling locks. See LOCK_PROFILING(9) for details. options LOCK_PROFILING # Set the number of buffers and the hash size. The hash size MUST be larger # than the number of buffers. Hash size should be prime. options MPROF_BUFFERS="1536" options MPROF_HASH_SIZE="1543" # Profiling for the callout(9) backend. options CALLOUT_PROFILING # Profiling for internal hash tables. options SLEEPQUEUE_PROFILING options TURNSTILE_PROFILING options UMTX_PROFILING ##################################################################### # COMPATIBILITY OPTIONS # Old tty interface. options COMPAT_43TTY # Note that as a general rule, COMPAT_FREEBSD depends on # COMPAT_FREEBSD, COMPAT_FREEBSD, etc. # Enable FreeBSD4 compatibility syscalls options COMPAT_FREEBSD4 # Enable FreeBSD5 compatibility syscalls options COMPAT_FREEBSD5 # Enable FreeBSD6 compatibility syscalls options COMPAT_FREEBSD6 # Enable FreeBSD7 compatibility syscalls options COMPAT_FREEBSD7 # Enable FreeBSD9 compatibility syscalls options COMPAT_FREEBSD9 # Enable FreeBSD10 compatibility syscalls options COMPAT_FREEBSD10 # Enable FreeBSD11 compatibility syscalls options COMPAT_FREEBSD11 # Enable FreeBSD12 compatibility syscalls options COMPAT_FREEBSD12 # Enable Linux Kernel Programming Interface options COMPAT_LINUXKPI # # These three options provide support for System V Interface # Definition-style interprocess communication, in the form of shared # memory, semaphores, and message queues, respectively. # options SYSVSHM options SYSVSEM options SYSVMSG ##################################################################### # DEBUGGING OPTIONS # # Compile with kernel debugger related code. # options KDB # # Print a stack trace of the current thread on the console for a panic. # options KDB_TRACE # # Don't enter the debugger for a panic. Intended for unattended operation # where you may want to enter the debugger from the console, but still want # the machine to recover from a panic. # options KDB_UNATTENDED # # Enable the ddb debugger backend. # options DDB # # Print the numerical value of symbols in addition to the symbolic # representation. # options DDB_NUMSYM # # Enable the remote gdb debugger backend. # options GDB # # SYSCTL_DEBUG enables a 'sysctl' debug tree that can be used to dump the # contents of the registered sysctl nodes on the console. It is disabled by # default because it generates excessively verbose console output that can # interfere with serial console operation. # options SYSCTL_DEBUG # # Enable textdump by default, this disables kernel core dumps. # options TEXTDUMP_PREFERRED # # Enable extra debug messages while performing textdumps. # options TEXTDUMP_VERBOSE # # NO_SYSCTL_DESCR omits the sysctl node descriptions to save space in the # resulting kernel. options NO_SYSCTL_DESCR # # MALLOC_DEBUG_MAXZONES enables multiple uma zones for malloc(9) # allocations that are smaller than a page. The purpose is to isolate # different malloc types into hash classes, so that any buffer # overruns or use-after-free will usually only affect memory from # malloc types in that hash class. This is purely a debugging tool; # by varying the hash function and tracking which hash class was # corrupted, the intersection of the hash classes from each instance # will point to a single malloc type that is being misused. At this # point inspection or memguard(9) can be used to catch the offending # code. # options MALLOC_DEBUG_MAXZONES=8 # # DEBUG_MEMGUARD builds and enables memguard(9), a replacement allocator # for the kernel used to detect modify-after-free scenarios. See the # memguard(9) man page for more information on usage. # options DEBUG_MEMGUARD # # DEBUG_REDZONE enables buffer underflows and buffer overflows detection for # malloc(9). # options DEBUG_REDZONE # # EARLY_PRINTF enables support for calling a special printf (eprintf) # very early in the kernel (before cn_init() has been called). This # should only be used for debugging purposes early in boot. Normally, # it is not defined. It is commented out here because this feature # isn't generally available. And the required eputc() isn't defined. # #options EARLY_PRINTF # # KTRACE enables the system-call tracing facility ktrace(2). To be more # SMP-friendly, KTRACE uses a worker thread to process most trace events # asynchronously to the thread generating the event. This requires a # pre-allocated store of objects representing trace events. The # KTRACE_REQUEST_POOL option specifies the initial size of this store. # The size of the pool can be adjusted both at boottime and runtime via # the kern.ktrace_request_pool tunable and sysctl. # options KTRACE #kernel tracing options KTRACE_REQUEST_POOL=101 # # KTR is a kernel tracing facility imported from BSD/OS. It is # enabled with the KTR option. KTR_ENTRIES defines the number of # entries in the circular trace buffer; it may be an arbitrary number. # KTR_BOOT_ENTRIES defines the number of entries during the early boot, # before malloc(9) is functional. # KTR_COMPILE defines the mask of events to compile into the kernel as # defined by the KTR_* constants in . KTR_MASK defines the # initial value of the ktr_mask variable which determines at runtime # what events to trace. KTR_CPUMASK determines which CPU's log # events, with bit X corresponding to CPU X. The layout of the string # passed as KTR_CPUMASK must match a series of bitmasks each of them # separated by the "," character (ie: # KTR_CPUMASK=0xAF,0xFFFFFFFFFFFFFFFF). KTR_VERBOSE enables # dumping of KTR events to the console by default. This functionality # can be toggled via the debug.ktr_verbose sysctl and defaults to off # if KTR_VERBOSE is not defined. See ktr(4) and ktrdump(8) for details. # options KTR options KTR_BOOT_ENTRIES=1024 options KTR_ENTRIES=(128*1024) options KTR_COMPILE=(KTR_ALL) options KTR_MASK=KTR_INTR options KTR_CPUMASK=0x3 options KTR_VERBOSE # # ALQ(9) is a facility for the asynchronous queuing of records from the kernel # to a vnode, and is employed by services such as ktr(4) to produce trace # files based on a kernel event stream. Records are written asynchronously # in a worker thread. # options ALQ options KTR_ALQ # # The INVARIANTS option is used in a number of source files to enable # extra sanity checking of internal structures. This support is not # enabled by default because of the extra time it would take to check # for these conditions, which can only occur as a result of # programming errors. # options INVARIANTS # # The INVARIANT_SUPPORT option makes us compile in support for # verifying some of the internal structures. It is a prerequisite for # 'INVARIANTS', as enabling 'INVARIANTS' will make these functions be # called. The intent is that you can set 'INVARIANTS' for single # source files (by changing the source file or specifying it on the # command line) if you have 'INVARIANT_SUPPORT' enabled. Also, if you # wish to build a kernel module with 'INVARIANTS', then adding # 'INVARIANT_SUPPORT' to your kernel will provide all the necessary # infrastructure without the added overhead. # options INVARIANT_SUPPORT # # The KASSERT_PANIC_OPTIONAL option allows kasserts to fire without # necessarily inducing a panic. Panic is the default behavior, but # runtime options can configure it either entirely off, or off with a # limit. # options KASSERT_PANIC_OPTIONAL # # The DIAGNOSTIC option is used to enable extra debugging information # and invariants checking. The added checks are too expensive or noisy # for an INVARIANTS kernel and thus are disabled by default. It is # expected that a kernel configured with DIAGNOSTIC will also have the # INVARIANTS option enabled. # options DIAGNOSTIC # # REGRESSION causes optional kernel interfaces necessary only for regression # testing to be enabled. These interfaces may constitute security risks # when enabled, as they permit processes to easily modify aspects of the # run-time environment to reproduce unlikely or unusual (possibly normally # impossible) scenarios. # options REGRESSION # # This option lets some drivers co-exist that can't co-exist in a running # system. This is used to be able to compile all kernel code in one go for # quality assurance purposes (like this file, which the option takes it name # from.) # options COMPILING_LINT # # STACK enables the stack(9) facility, allowing the capture of kernel stack # for the purpose of procinfo(1), etc. stack(9) will also be compiled in # automatically if DDB(4) is compiled into the kernel. # options STACK # # The NUM_CORE_FILES option specifies the limit for the number of core # files generated by a particular process, when the core file format # specifier includes the %I pattern. Since we only have 1 character for # the core count in the format string, meaning the range will be 0-9, the # maximum value allowed for this option is 10. # This core file limit can be adjusted at runtime via the debug.ncores # sysctl. # options NUM_CORE_FILES=5 # # The TSLOG option enables timestamped logging of events, especially # function entries/exits, in order to track the time spent by the kernel. # In particular, this is useful when investigating the early boot process, # before it is possible to use more sophisticated tools like DTrace. # The TSLOGSIZE option controls the size of the (preallocated, fixed # length) buffer used for storing these events (default: 262144 records). # # For security reasons the TSLOG option should not be enabled on systems # used in production. # options TSLOG options TSLOGSIZE=262144 ##################################################################### # PERFORMANCE MONITORING OPTIONS # # The hwpmc driver that allows the use of in-CPU performance monitoring # counters for performance monitoring. The base kernel needs to be configured # with the 'options' line, while the hwpmc device can be either compiled # in or loaded as a loadable kernel module. # # Additional configuration options may be required on specific architectures, # please see hwpmc(4). device hwpmc # Driver (also a loadable module) options HWPMC_DEBUG options HWPMC_HOOKS # Other necessary kernel hooks ##################################################################### # NETWORKING OPTIONS # # Protocol families # options INET #Internet communications protocols options INET6 #IPv6 communications protocols options RATELIMIT # TX rate limiting support options ROUTETABLES=2 # allocated fibs up to 65536. default is 1. # but that would be a bad idea as they are large. options TCP_OFFLOAD # TCP offload support. options TCP_RFC7413 # TCP Fast Open options TCPHPTS # In order to enable IPSEC you MUST also add device crypto to # your kernel configuration options IPSEC #IP security (requires device crypto) # Option IPSEC_SUPPORT does not enable IPsec, but makes it possible to # load it as a kernel module. You still MUST add device crypto to your kernel # configuration. options IPSEC_SUPPORT #options IPSEC_DEBUG #debug for IP security # TLS framing and encryption of data transmitted over TCP sockets. options KERN_TLS # TLS transmit offload # # SMB/CIFS requester # NETSMB enables support for SMB protocol, it requires LIBMCHAIN and LIBICONV # options. options NETSMB #SMB/CIFS requester # mchain library. It can be either loaded as KLD or compiled into kernel options LIBMCHAIN # libalias library, performing NAT options LIBALIAS # # SCTP is a NEW transport protocol defined by # RFC2960 updated by RFC3309 and RFC3758.. and # soon to have a new base RFC and many many more # extensions. This release supports all the extensions # including many drafts (most about to become RFC's). # It is the reference implementation of SCTP # and is quite well tested. # # Note YOU MUST have both INET and INET6 defined. # You don't have to enable V6, but SCTP is # dual stacked and so far we have not torn apart # the V6 and V4.. since an association can span # both a V6 and V4 address at the SAME time :-) # options SCTP # There are bunches of options: # this one turns on all sorts of # nastily printing that you can # do. It's all controlled by a # bit mask (settable by socket opt and # by sysctl). Including will not cause # logging until you set the bits.. but it # can be quite verbose.. so without this # option we don't do any of the tests for # bits and prints.. which makes the code run # faster.. if you are not debugging don't use. options SCTP_DEBUG # # All that options after that turn on specific types of # logging. You can monitor CWND growth, flight size # and all sorts of things. Go look at the code and # see. I have used this to produce interesting # charts and graphs as well :-> # # I have not yet committed the tools to get and print # the logs, I will do that eventually .. before then # if you want them send me an email rrs@freebsd.org # You basically must have ktr(4) enabled for these # and you then set the sysctl to turn on/off various # logging bits. Use ktrdump(8) to pull the log and run # it through a display program.. and graphs and other # things too. # options SCTP_LOCK_LOGGING options SCTP_MBUF_LOGGING options SCTP_MBCNT_LOGGING options SCTP_PACKET_LOGGING options SCTP_LTRACE_CHUNKS options SCTP_LTRACE_ERRORS # altq(9). Enable the base part of the hooks with the ALTQ option. # Individual disciplines must be built into the base system and can not be # loaded as modules at this point. ALTQ requires a stable TSC so if yours is # broken or changes with CPU throttling then you must also have the ALTQ_NOPCC # option. options ALTQ options ALTQ_CBQ # Class Based Queueing options ALTQ_RED # Random Early Detection options ALTQ_RIO # RED In/Out options ALTQ_CODEL # CoDel Active Queueing options ALTQ_HFSC # Hierarchical Packet Scheduler options ALTQ_FAIRQ # Fair Packet Scheduler options ALTQ_CDNR # Traffic conditioner options ALTQ_PRIQ # Priority Queueing options ALTQ_NOPCC # Required if the TSC is unusable options ALTQ_DEBUG # netgraph(4). Enable the base netgraph code with the NETGRAPH option. # Individual node types can be enabled with the corresponding option # listed below; however, this is not strictly necessary as netgraph # will automatically load the corresponding KLD module if the node type # is not already compiled into the kernel. Each type below has a # corresponding man page, e.g., ng_async(8). options NETGRAPH # netgraph(4) system options NETGRAPH_DEBUG # enable extra debugging, this # affects netgraph(4) and nodes # Node types options NETGRAPH_ASYNC options NETGRAPH_ATMLLC options NETGRAPH_ATM_ATMPIF options NETGRAPH_BLUETOOTH # ng_bluetooth(4) options NETGRAPH_BLUETOOTH_BT3C # ng_bt3c(4) options NETGRAPH_BLUETOOTH_HCI # ng_hci(4) options NETGRAPH_BLUETOOTH_L2CAP # ng_l2cap(4) options NETGRAPH_BLUETOOTH_SOCKET # ng_btsocket(4) options NETGRAPH_BLUETOOTH_UBT # ng_ubt(4) options NETGRAPH_BLUETOOTH_UBTBCMFW # ubtbcmfw(4) options NETGRAPH_BPF options NETGRAPH_BRIDGE options NETGRAPH_CAR options NETGRAPH_CHECKSUM options NETGRAPH_CISCO options NETGRAPH_DEFLATE options NETGRAPH_DEVICE options NETGRAPH_ECHO options NETGRAPH_EIFACE options NETGRAPH_ETHER options NETGRAPH_FRAME_RELAY options NETGRAPH_GIF options NETGRAPH_GIF_DEMUX options NETGRAPH_HOLE options NETGRAPH_IFACE options NETGRAPH_IP_INPUT options NETGRAPH_IPFW options NETGRAPH_KSOCKET options NETGRAPH_L2TP options NETGRAPH_LMI options NETGRAPH_MPPC_COMPRESSION options NETGRAPH_MPPC_ENCRYPTION options NETGRAPH_NETFLOW options NETGRAPH_NAT options NETGRAPH_ONE2MANY options NETGRAPH_PATCH options NETGRAPH_PIPE options NETGRAPH_PPP options NETGRAPH_PPPOE options NETGRAPH_PPTPGRE options NETGRAPH_PRED1 options NETGRAPH_RFC1490 options NETGRAPH_SOCKET options NETGRAPH_SPLIT options NETGRAPH_SPPP options NETGRAPH_TAG options NETGRAPH_TCPMSS options NETGRAPH_TEE options NETGRAPH_UI options NETGRAPH_VJC options NETGRAPH_VLAN # NgATM - Netgraph ATM options NGATM_ATM options NGATM_ATMBASE options NGATM_SSCOP options NGATM_SSCFU options NGATM_UNI options NGATM_CCATM device mn # Munich32x/Falc54 Nx64kbit/sec cards. # Network stack virtualization. options VIMAGE options VNET_DEBUG # debug for VIMAGE # # Network interfaces: # The `loop' device is MANDATORY when networking is enabled. device loop # The `ether' device provides generic code to handle # Ethernets; it is MANDATORY when an Ethernet device driver is # configured. device ether # The `vlan' device implements the VLAN tagging of Ethernet frames # according to IEEE 802.1Q. device vlan # The `vxlan' device implements the VXLAN encapsulation of Ethernet # frames in UDP packets according to RFC7348. device vxlan # The `wlan' device provides generic code to support 802.11 # drivers, including host AP mode; it is MANDATORY for the wi, # and ath drivers and will eventually be required by all 802.11 drivers. device wlan options IEEE80211_DEBUG #enable debugging msgs options IEEE80211_SUPPORT_MESH #enable 802.11s D3.0 support options IEEE80211_SUPPORT_TDMA #enable TDMA support # The `wlan_wep', `wlan_tkip', and `wlan_ccmp' devices provide # support for WEP, TKIP, and AES-CCMP crypto protocols optionally # used with 802.11 devices that depend on the `wlan' module. device wlan_wep device wlan_ccmp device wlan_tkip # The `wlan_xauth' device provides support for external (i.e. user-mode) # authenticators for use with 802.11 drivers that use the `wlan' # module and support 802.1x and/or WPA security protocols. device wlan_xauth # The `wlan_acl' device provides a MAC-based access control mechanism # for use with 802.11 drivers operating in ap mode and using the # `wlan' module. # The 'wlan_amrr' device provides AMRR transmit rate control algorithm device wlan_acl device wlan_amrr # The `sppp' device serves a similar role for certain types # of synchronous PPP links (like `cx', `ar'). device sppp # The `bpf' device enables the Berkeley Packet Filter. Be # aware of the legal and administrative consequences of enabling this # option. DHCP requires bpf. device bpf # The `netmap' device implements memory-mapped access to network # devices from userspace, enabling wire-speed packet capture and # generation even at 10Gbit/s. Requires support in the device # driver. Supported drivers are ixgbe, e1000, re. device netmap # The `disc' device implements a minimal network interface, # which throws away all packets sent and never receives any. It is # included for testing and benchmarking purposes. device disc # The `epair' device implements a virtual back-to-back connected Ethernet # like interface pair. device epair # The `edsc' device implements a minimal Ethernet interface, # which discards all packets sent and receives none. device edsc # The `tuntap' device implements (user-)ppp, nos-tun(8) and a pty-like virtual # Ethernet interface device tuntap # The `gif' device implements IPv6 over IP4 tunneling, # IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and # IPv6 over IPv6 tunneling. # The `gre' device implements GRE (Generic Routing Encapsulation) tunneling, # as specified in the RFC 2784 and RFC 2890. # The `me' device implements Minimal Encapsulation within IPv4 as # specified in the RFC 2004. # The XBONEHACK option allows the same pair of addresses to be configured on # multiple gif interfaces. device gif device gre device me options XBONEHACK # The `stf' device implements 6to4 encapsulation. device stf # The pf packet filter consists of three devices: # The `pf' device provides /dev/pf and the firewall code itself. # The `pflog' device provides the pflog0 interface which logs packets. # The `pfsync' device provides the pfsync0 interface used for # synchronization of firewall state tables (over the net). device pf device pflog device pfsync # Bridge interface. device if_bridge # Common Address Redundancy Protocol. See carp(4) for more details. device carp # IPsec interface. device enc # Link aggregation interface. device lagg # # Internet family options: # # MROUTING enables the kernel multicast packet forwarder, which works # with mrouted and XORP. # # IPFIREWALL enables support for IP firewall construction, in # conjunction with the `ipfw' program. IPFIREWALL_VERBOSE sends # logged packets to the system logger. IPFIREWALL_VERBOSE_LIMIT # limits the number of times a matching entry can be logged. # # WARNING: IPFIREWALL defaults to a policy of "deny ip from any to any" # and if you do not add other rules during startup to allow access, # YOU WILL LOCK YOURSELF OUT. It is suggested that you set firewall_type=open # in /etc/rc.conf when first enabling this feature, then refining the # firewall rules in /etc/rc.firewall after you've tested that the new kernel # feature works properly. # # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to # allow everything. Use with care, if a cracker can crash your # firewall machine, they can get to your protected machines. However, # if you are using it as an as-needed filter for specific problems as # they arise, then this may be for you. Changing the default to 'allow' # means that you won't get stuck if the kernel and /sbin/ipfw binary get # out of sync. # # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''. It # depends on IPFIREWALL if compiled into the kernel. # # IPFIREWALL_NAT adds support for in kernel nat in ipfw, and it requires # LIBALIAS. # # IPFIREWALL_NAT64 adds support for in kernel NAT64 in ipfw. # # IPFIREWALL_NPTV6 adds support for in kernel NPTv6 in ipfw. # # IPFIREWALL_PMOD adds support for protocols modification module. Currently # it supports only TCP MSS modification. # # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding # packets without touching the TTL). This can be useful to hide firewalls # from traceroute and similar tools. # # PF_DEFAULT_TO_DROP causes the default pf(4) rule to deny everything. # # TCPDEBUG enables code which keeps traces of the TCP state machine # for sockets with the SO_DEBUG option set, which can then be examined # using the trpt(8) utility. # # TCPPCAP enables code which keeps the last n packets sent and received # on a TCP socket. # # TCP_BLACKBOX enables enhanced TCP event logging. # # TCP_HHOOK enables the hhook(9) framework hooks for the TCP stack. # # RADIX_MPATH provides support for equal-cost multi-path routing. # options MROUTING # Multicast routing options IPFIREWALL #firewall options IPFIREWALL_VERBOSE #enable logging to syslogd(8) options IPFIREWALL_VERBOSE_LIMIT=100 #limit verbosity options IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default options IPFIREWALL_NAT #ipfw kernel nat support options IPFIREWALL_NAT64 #ipfw kernel NAT64 support options IPFIREWALL_NPTV6 #ipfw kernel IPv6 NPT support options IPDIVERT #divert sockets options IPFILTER #ipfilter support options IPFILTER_LOG #ipfilter logging options IPFILTER_LOOKUP #ipfilter pools options IPFILTER_DEFAULT_BLOCK #block all packets by default options IPSTEALTH #support for stealth forwarding options PF_DEFAULT_TO_DROP #drop everything by default options TCPDEBUG options TCPPCAP options TCP_BLACKBOX options TCP_HHOOK options RADIX_MPATH # The MBUF_STRESS_TEST option enables options which create # various random failures / extreme cases related to mbuf # functions. See mbuf(9) for a list of available test cases. # MBUF_PROFILING enables code to profile the mbuf chains # exiting the system (via participating interfaces) and # return a logarithmic histogram of monitored parameters # (e.g. packet size, wasted space, number of mbufs in chain). options MBUF_STRESS_TEST options MBUF_PROFILING # Statically link in accept filters options ACCEPT_FILTER_DATA options ACCEPT_FILTER_DNS options ACCEPT_FILTER_HTTP # TCP_SIGNATURE adds support for RFC 2385 (TCP-MD5) digests. These are # carried in TCP option 19. This option is commonly used to protect # TCP sessions (e.g. BGP) where IPSEC is not available nor desirable. # This is enabled on a per-socket basis using the TCP_MD5SIG socket option. # This requires the use of 'device crypto' and either 'options IPSEC' or # 'options IPSEC_SUPPORT'. options TCP_SIGNATURE #include support for RFC 2385 # DUMMYNET enables the "dummynet" bandwidth limiter. You need IPFIREWALL # as well. See dummynet(4) and ipfw(8) for more info. When you run # DUMMYNET it is advisable to also have at least "options HZ=1000" to achieve # a smooth scheduling of the traffic. options DUMMYNET # The DEBUGNET option enables a basic debug/panic-time networking API. It # is used by NETDUMP and NETGDB. options DEBUGNET # The NETDUMP option enables netdump(4) client support in the kernel. # This allows a panicking kernel to transmit a kernel dump to a remote host. options NETDUMP # The NETGDB option enables netgdb(4) support in the kernel. This allows a # panicking kernel to be debugged as a GDB remote over the network. options NETGDB ##################################################################### # FILESYSTEM OPTIONS # # Only the root filesystem needs to be statically compiled or preloaded # as module; everything else will be automatically loaded at mount # time. Some people still prefer to statically compile other # filesystems as well. # # NB: The UNION filesystem was known to be buggy in the past. It is now # being actively maintained, although there are still some issues being # resolved. # # One of these is mandatory: options FFS #Fast filesystem options NFSCL #Network File System client # The rest are optional: options AUTOFS #Automounter filesystem options CD9660 #ISO 9660 filesystem options FDESCFS #File descriptor filesystem options FUSEFS #FUSEFS support module options MSDOSFS #MS DOS File System (FAT, FAT32) options NFSLOCKD #Network Lock Manager options NFSD #Network Filesystem Server options KGSSAPI #Kernel GSSAPI implementation options NULLFS #NULL filesystem options PROCFS #Process filesystem (requires PSEUDOFS) options PSEUDOFS #Pseudo-filesystem framework options PSEUDOFS_TRACE #Debugging support for PSEUDOFS options SMBFS #SMB/CIFS filesystem options TMPFS #Efficient memory filesystem options UDF #Universal Disk Format options UNIONFS #Union filesystem # The xFS_ROOT options REQUIRE the associated ``options xFS'' options NFS_ROOT #NFS usable as root device # Soft updates is a technique for improving filesystem speed and # making abrupt shutdown less risky. # options SOFTUPDATES # Extended attributes allow additional data to be associated with files, # and is used for ACLs, Capabilities, and MAC labels. # See src/sys/ufs/ufs/README.extattr for more information. options UFS_EXTATTR options UFS_EXTATTR_AUTOSTART # Access Control List support for UFS filesystems. The current ACL # implementation requires extended attribute support, UFS_EXTATTR, # for the underlying filesystem. # See src/sys/ufs/ufs/README.acls for more information. options UFS_ACL # Directory hashing improves the speed of operations on very large # directories at the expense of some memory. options UFS_DIRHASH # Gjournal-based UFS journaling support. options UFS_GJOURNAL # Make space in the kernel for a root filesystem on a md device. # Define to the number of kilobytes to reserve for the filesystem. # This is now optional. # If not defined, the root filesystem passed in as the MFS_IMAGE makeoption # will be automatically embedded in the kernel during linking. Its exact size # will be consumed within the kernel. # If defined, the old way of embedding the filesystem in the kernel will be # used. That is to say MD_ROOT_SIZE KB will be allocated in the kernel and # later, the filesystem image passed in as the MFS_IMAGE makeoption will be # dd'd into the reserved space if it fits. options MD_ROOT_SIZE=10 # Make the md device a potential root device, either with preloaded # images of type mfs_root or md_root. options MD_ROOT # Write-protect the md root device so that it may not be mounted writeable. options MD_ROOT_READONLY # Allow to read MD image from external memory regions options MD_ROOT_MEM # Disk quotas are supported when this option is enabled. options QUOTA #enable disk quotas # If you are running a machine just as a fileserver for PC and MAC # users, using SAMBA, you may consider setting this option # and keeping all those users' directories on a filesystem that is # mounted with the suiddir option. This gives new files the same # ownership as the directory (similar to group). It's a security hole # if you let these users run programs, so confine it to file-servers # (but it'll save you lots of headaches in those cases). Root owned # directories are exempt and X bits are cleared. The suid bit must be # set on the directory as well; see chmod(1). PC owners can't see/set # ownerships so they keep getting their toes trodden on. This saves # you all the support calls as the filesystem it's used on will act as # they expect: "It's my dir so it must be my file". # options SUIDDIR # NFS options: options NFS_MINATTRTIMO=3 # VREG attrib cache timeout in sec options NFS_MAXATTRTIMO=60 options NFS_MINDIRATTRTIMO=30 # VDIR attrib cache timeout in sec options NFS_MAXDIRATTRTIMO=60 options NFS_DEBUG # Enable NFS Debugging # # Add support for the EXT2FS filesystem of Linux fame. Be a bit # careful with this - the ext2fs code has a tendency to lag behind # changes and not be exercised very much, so mounting read/write could # be dangerous (and even mounting read only could result in panics.) # options EXT2FS # The system memory devices; /dev/mem, /dev/kmem device mem # The kernel symbol table device; /dev/ksyms device ksyms # Optional character code conversion support with LIBICONV. # Each option requires their base file system and LIBICONV. options CD9660_ICONV options MSDOSFS_ICONV options UDF_ICONV ##################################################################### # POSIX P1003.1B # Real time extensions added in the 1993 POSIX # _KPOSIX_PRIORITY_SCHEDULING: Build in _POSIX_PRIORITY_SCHEDULING options _KPOSIX_PRIORITY_SCHEDULING # p1003_1b_semaphores are very experimental, # user should be ready to assist in debugging if problems arise. options P1003_1B_SEMAPHORES # POSIX message queue options P1003_1B_MQUEUE ##################################################################### # SECURITY POLICY PARAMETERS # Support for BSM audit options AUDIT # Support for Mandatory Access Control (MAC): options MAC options MAC_BIBA options MAC_BSDEXTENDED options MAC_IFOFF options MAC_LOMAC options MAC_MLS options MAC_NONE options MAC_NTPD options MAC_PARTITION options MAC_PORTACL options MAC_SEEOTHERUIDS options MAC_STUB options MAC_TEST # Support for Capsicum options CAPABILITIES # fine-grained rights on file descriptors options CAPABILITY_MODE # sandboxes with no global namespace access ##################################################################### # CLOCK OPTIONS # The granularity of operation is controlled by the kernel option HZ whose # default value (1000 on most architectures) means a granularity of 1ms # (1s/HZ). Historically, the default was 100, but finer granularity is # required for DUMMYNET and other systems on modern hardware. There are # reasonable arguments that HZ should, in fact, be 100 still; consider, # that reducing the granularity too much might cause excessive overhead in # clock interrupt processing, potentially causing ticks to be missed and thus # actually reducing the accuracy of operation. options HZ=100 # Enable support for the kernel PLL to use an external PPS signal, # under supervision of [x]ntpd(8) # More info in ntpd documentation: http://www.eecis.udel.edu/~ntp options PPS_SYNC # Enable support for generic feed-forward clocks in the kernel. # The feed-forward clock support is an alternative to the feedback oriented # ntpd/system clock approach, and is to be used with a feed-forward # synchronization algorithm such as the RADclock: # More info here: http://www.synclab.org/radclock options FFCLOCK ##################################################################### # SCSI DEVICES # SCSI DEVICE CONFIGURATION # The SCSI subsystem consists of the `base' SCSI code, a number of # high-level SCSI device `type' drivers, and the low-level host-adapter # device drivers. The host adapters are listed in the ISA and PCI # device configuration sections below. # # It is possible to wire down your SCSI devices so that a given bus, # target, and LUN always come on line as the same device unit. In # earlier versions the unit numbers were assigned in the order that # the devices were probed on the SCSI bus. This means that if you # removed a disk drive, you may have had to rewrite your /etc/fstab # file, and also that you had to be careful when adding a new disk # as it may have been probed earlier and moved your device configuration # around. # This old behavior is maintained as the default behavior. The unit # assignment begins with the first non-wired down unit for a device # type. For example, if you wire a disk as "da3" then the first # non-wired disk will be assigned da4. # The syntax for wiring down devices is: hint.scbus.0.at="ahc0" hint.scbus.1.at="ahc1" hint.scbus.1.bus="0" hint.scbus.3.at="ahc2" hint.scbus.3.bus="0" hint.scbus.2.at="ahc2" hint.scbus.2.bus="1" hint.da.0.at="scbus0" hint.da.0.target="0" hint.da.0.unit="0" hint.da.1.at="scbus3" hint.da.1.target="1" hint.da.2.at="scbus2" hint.da.2.target="3" hint.sa.1.at="scbus1" hint.sa.1.target="6" # "units" (SCSI logical unit number) that are not specified are # treated as if specified as LUN 0. # All SCSI devices allocate as many units as are required. # The ch driver drives SCSI Media Changer ("jukebox") devices. # # The da driver drives SCSI Direct Access ("disk") and Optical Media # ("WORM") devices. # # The sa driver drives SCSI Sequential Access ("tape") devices. # # The cd driver drives SCSI Read Only Direct Access ("cd") devices. # # The ses driver drives SCSI Environment Services ("ses") and # SAF-TE ("SCSI Accessible Fault-Tolerant Enclosure") devices. # # The pt driver drives SCSI Processor devices. # # The sg driver provides a passthrough API that is compatible with the # Linux SG driver. It will work in conjunction with the COMPAT_LINUX # option to run linux SG apps. It can also stand on its own and provide # source level API compatibility for porting apps to FreeBSD. # # Target Mode support is provided here but also requires that a SIM # (SCSI Host Adapter Driver) provide support as well. # # The targ driver provides target mode support as a Processor type device. # It exists to give the minimal context necessary to respond to Inquiry # commands. There is a sample user application that shows how the rest # of the command support might be done in /usr/share/examples/scsi_target. # # The targbh driver provides target mode support and exists to respond # to incoming commands that do not otherwise have a logical unit assigned # to them. # # The pass driver provides a passthrough API to access the CAM subsystem. device scbus #base SCSI code device ch #SCSI media changers device da #SCSI direct access devices (aka disks) device sa #SCSI tapes device cd #SCSI CD-ROMs device ses #Enclosure Services (SES and SAF-TE) device pt #SCSI processor device targ #SCSI Target Mode Code device targbh #SCSI Target Mode Blackhole Device device pass #CAM passthrough driver device sg #Linux SCSI passthrough device ctl #CAM Target Layer # CAM OPTIONS: # debugging options: # CAMDEBUG Compile in all possible debugging. # CAM_DEBUG_COMPILE Debug levels to compile in. # CAM_DEBUG_FLAGS Debug levels to enable on boot. # CAM_DEBUG_BUS Limit debugging to the given bus. # CAM_DEBUG_TARGET Limit debugging to the given target. # CAM_DEBUG_LUN Limit debugging to the given lun. # CAM_DEBUG_DELAY Delay in us after printing each debug line. # CAM_IO_STATS Publish additional CAM device statics by sysctl # # CAM_MAX_HIGHPOWER: Maximum number of concurrent high power (start unit) cmds # SCSI_NO_SENSE_STRINGS: When defined disables sense descriptions # SCSI_NO_OP_STRINGS: When defined disables opcode descriptions # SCSI_DELAY: The number of MILLISECONDS to freeze the SIM (scsi adapter) # queue after a bus reset, and the number of milliseconds to # freeze the device queue after a bus device reset. This # can be changed at boot and runtime with the # kern.cam.scsi_delay tunable/sysctl. options CAMDEBUG options CAM_DEBUG_COMPILE=-1 options CAM_DEBUG_FLAGS=(CAM_DEBUG_INFO|CAM_DEBUG_PROBE|CAM_DEBUG_PERIPH) options CAM_DEBUG_BUS=-1 options CAM_DEBUG_TARGET=-1 options CAM_DEBUG_LUN=-1 options CAM_DEBUG_DELAY=1 options CAM_MAX_HIGHPOWER=4 options SCSI_NO_SENSE_STRINGS options SCSI_NO_OP_STRINGS options SCSI_DELAY=5000 # Be pessimistic about Joe SCSI device options CAM_IOSCHED_DYNAMIC options CAM_IO_STATS options CAM_TEST_FAILURE # Options for the CAM CDROM driver: # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN # CHANGER_MAX_BUSY_SECONDS: Maximum time quantum per changer LUN, only # enforced if there is I/O waiting for another LUN # The compiled in defaults for these variables are 2 and 10 seconds, # respectively. # # These can also be changed on the fly with the following sysctl variables: # kern.cam.cd.changer.min_busy_seconds # kern.cam.cd.changer.max_busy_seconds # options CHANGER_MIN_BUSY_SECONDS=2 options CHANGER_MAX_BUSY_SECONDS=10 # Options for the CAM sequential access driver: # SA_IO_TIMEOUT: Timeout for read/write/wfm operations, in minutes # SA_SPACE_TIMEOUT: Timeout for space operations, in minutes # SA_REWIND_TIMEOUT: Timeout for rewind operations, in minutes # SA_ERASE_TIMEOUT: Timeout for erase operations, in minutes # SA_1FM_AT_EOD: Default to model which only has a default one filemark at EOT. options SA_IO_TIMEOUT=4 options SA_SPACE_TIMEOUT=60 options SA_REWIND_TIMEOUT=(2*60) options SA_ERASE_TIMEOUT=(4*60) options SA_1FM_AT_EOD # Optional timeout for the CAM processor target (pt) device # This is specified in seconds. The default is 60 seconds. options SCSI_PT_DEFAULT_TIMEOUT=60 # Optional enable of doing SES passthrough on other devices (e.g., disks) # # Normally disabled because a lot of newer SCSI disks report themselves # as having SES capabilities, but this can then clot up attempts to build # a topology with the SES device that's on the box these drives are in.... options SES_ENABLE_PASSTHROUGH ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS device pty #BSD-style compatibility pseudo ttys device nmdm #back-to-back tty devices device md #Memory/malloc disk device snp #Snoop device - to look at pty/vty/etc.. device ccd #Concatenated disk driver device firmware #firmware(9) support # Kernel side iconv library options LIBICONV # Size of the kernel message buffer. Should be N * pagesize. options MSGBUF_SIZE=40960 ##################################################################### # HARDWARE BUS CONFIGURATION # # PCI bus & PCI options: # device pci options PCI_HP # PCI-Express native HotPlug options PCI_IOV # PCI SR-IOV support ##################################################################### # HARDWARE DEVICE CONFIGURATION # For ISA the required hints are listed. # PCI, CardBus, SD/MMC and pccard are self identifying buses, so # no hints are needed. # # Mandatory devices: # # These options are valid for other keyboard drivers as well. options KBD_DISABLE_KEYMAP_LOAD # refuse to load a keymap options KBD_INSTALL_CDEV # install a CDEV entry in /dev device kbdmux # keyboard multiplexer options KBDMUX_DFLT_KEYMAP # specify the built-in keymap makeoptions KBDMUX_DFLT_KEYMAP=it.iso options FB_DEBUG # Frame buffer debugging # Enable experimental features of the syscons terminal emulator (teken). options TEKEN_CONS25 # cons25-style terminal emulation options TEKEN_UTF8 # UTF-8 output handling # The vt video console driver. device vt options VT_ALT_TO_ESC_HACK=1 # Prepend ESC sequence to ALT keys options VT_MAXWINDOWS=16 # Number of virtual consoles options VT_TWOBUTTON_MOUSE # Use right mouse button to paste # The following options set the default framebuffer size. options VT_FB_DEFAULT_HEIGHT=480 options VT_FB_DEFAULT_WIDTH=640 # The following options will let you change the default vt terminal colors. options TERMINAL_NORM_ATTR=(FG_GREEN|BG_BLACK) options TERMINAL_KERN_ATTR=(FG_LIGHTRED|BG_BLACK) # # Optional devices: # # # SCSI host adapters: # +# aacraid: Adaptec by PMC RAID controllers, Series 6/7/8 and upcoming +# families. Container interface, CAM required. # ahc: Adaptec 274x/284x/2910/293x/294x/394x/3950x/3960x/398X/4944/ # 19160x/29160x, aic7770/aic78xx # ahd: Adaptec 29320/39320 Controllers. # esp: Emulex ESP, NCR 53C9x and QLogic FAS families based controllers # including the AMD Am53C974 (found on devices such as the Tekram # DC-390(T)) and the Sun ESP and FAS families of controllers # isp: Qlogic ISP 1020, 1040 and 1040B PCI SCSI host adapters, # ISP 1240 Dual Ultra SCSI, ISP 1080 and 1280 (Dual) Ultra2, # ISP 12160 Ultra3 SCSI, # Qlogic ISP 2100 and ISP 2200 1Gb Fibre Channel host adapters. # Qlogic ISP 2300 and ISP 2312 2Gb Fibre Channel host adapters. # Qlogic ISP 2322 and ISP 6322 2Gb Fibre Channel host adapters. # ispfw: Firmware module for Qlogic host adapters # mpr: LSI-Logic MPT/Fusion Gen 3 # mps: LSI-Logic MPT/Fusion Gen 2 # mpt: LSI-Logic MPT/Fusion 53c1020 or 53c1030 Ultra4 # or FC9x9 Fibre Channel host adapters. # sym: Symbios/Logic 53C8XX family of PCI-SCSI I/O processors: # 53C810, 53C810A, 53C815, 53C825, 53C825A, 53C860, 53C875, # 53C876, 53C885, 53C895, 53C895A, 53C896, 53C897, 53C1510D, # 53C1010-33, 53C1010-66. +device aacraid device ahc device ahd device esp device iscsi_initiator device isp hint.isp.0.disable="1" hint.isp.0.role="3" hint.isp.0.prefer_iomap="1" hint.isp.0.prefer_memmap="1" hint.isp.0.fwload_disable="1" hint.isp.0.ignore_nvram="1" hint.isp.0.fullduplex="1" hint.isp.0.topology="lport" hint.isp.0.topology="nport" hint.isp.0.topology="lport-only" hint.isp.0.topology="nport-only" # we can't get u_int64_t types, nor can we get strings if it's got # a leading 0x, hence this silly dodge. hint.isp.0.portwnn="w50000000aaaa0000" hint.isp.0.nodewnn="w50000000aaaa0001" device ispfw device mpr # LSI-Logic MPT-Fusion 3 device mps # LSI-Logic MPT-Fusion 2 device mpt # LSI-Logic MPT-Fusion device sym # The aic7xxx driver will attempt to use memory mapped I/O for all PCI # controllers that have it configured only if this option is set. Unfortunately, # this doesn't work on some motherboards, which prevents it from being the # default. options AHC_ALLOW_MEMIO # Dump the contents of the ahc controller configuration PROM. options AHC_DUMP_EEPROM # Bitmap of units to enable targetmode operations. options AHC_TMODE_ENABLE # Compile in Aic7xxx Debugging code. options AHC_DEBUG # Aic7xxx driver debugging options. See sys/dev/aic7xxx/aic7xxx.h options AHC_DEBUG_OPTS # Print register bitfields in debug output. Adds ~128k to driver # See ahc(4). options AHC_REG_PRETTY_PRINT # Compile in aic79xx debugging code. options AHD_DEBUG # Aic79xx driver debugging options. Adds ~215k to driver. See ahd(4). options AHD_DEBUG_OPTS=0xFFFFFFFF # Print human-readable register definitions when debugging options AHD_REG_PRETTY_PRINT # Bitmap of units to enable targetmode operations. options AHD_TMODE_ENABLE # Options used in dev/iscsi (Software iSCSI stack) # options ISCSI_INITIATOR_DEBUG=9 # Options used in dev/isp/ (Qlogic SCSI/FC driver). # # ISP_TARGET_MODE - enable target mode operation # options ISP_TARGET_MODE=1 # # ISP_DEFAULT_ROLES - default role # none=0 # target=1 # initiator=2 # both=3 (not supported currently) # # ISP_INTERNAL_TARGET (trivial internal disk target, for testing) # options ISP_DEFAULT_ROLES=0 #options SYM_SETUP_SCSI_DIFF #-HVD support for 825a, 875, 885 # disabled:0 (default), enabled:1 #options SYM_SETUP_PCI_PARITY #-PCI parity checking # disabled:0, enabled:1 (default) #options SYM_SETUP_MAX_LUN #-Number of LUNs supported # default:8, range:[1..64] # # Compaq "CISS" RAID controllers (SmartRAID 5* series) # These controllers have a SCSI-like interface, and require the # CAM infrastructure. # device ciss # # Intel Integrated RAID controllers. # This driver was developed and is maintained by Intel. Contacts # at Intel for this driver are # "Kannanthanam, Boji T" and # "Leubner, Achim" . # device iir # # Mylex AcceleRAID and eXtremeRAID controllers with v6 and later # firmware. These controllers have a SCSI-like interface, and require # the CAM infrastructure. # device mly # # Compaq Smart RAID, Mylex DAC960 and AMI MegaRAID controllers. Only # one entry is needed; the code will find and configure all supported # controllers. # device ida # Compaq Smart RAID device mlx # Mylex DAC960 device amr # AMI MegaRAID device amrp # SCSI Passthrough interface (optional, CAM req.) device mfi # LSI MegaRAID SAS device mfip # LSI MegaRAID SAS passthrough, requires CAM options MFI_DEBUG device mrsas # LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s # # 3ware ATA RAID # device twe # 3ware ATA RAID # # Serial ATA host controllers: # # ahci: Advanced Host Controller Interface (AHCI) compatible # mvs: Marvell 88SX50XX/88SX60XX/88SX70XX/SoC controllers # siis: SiliconImage SiI3124/SiI3132/SiI3531 controllers # # These drivers are part of cam(4) subsystem. They supersede less featured # ata(4) subsystem drivers, supporting same hardware. device ahci device mvs device siis # # The 'ATA' driver supports all legacy ATA/ATAPI controllers, including # PC Card devices. You only need one "device ata" for it to find all # PCI and PC Card ATA/ATAPI devices on modern machines. # Alternatively, individual bus and chipset drivers may be chosen by using # the 'atacore' driver then selecting the drivers on a per vendor basis. # For example to build a system which only supports a VIA chipset, # omit 'ata' and include the 'atacore', 'atapci' and 'atavia' drivers. device ata # Modular ATA #device atacore # Core ATA functionality #device atapccard # CARDBUS support #device ataisa # ISA bus support #device atapci # PCI bus support; only generic chipset support # PCI ATA chipsets #device ataacard # ACARD #device ataacerlabs # Acer Labs Inc. (ALI) #device ataamd # American Micro Devices (AMD) #device ataati # ATI #device atacenatek # Cenatek #device atacypress # Cypress #device atacyrix # Cyrix #device atahighpoint # HighPoint #device ataintel # Intel #device ataite # Integrated Technology Inc. (ITE) #device atajmicron # JMicron #device atamarvell # Marvell #device atamicron # Micron #device atanational # National #device atanetcell # NetCell #device atanvidia # nVidia #device atapromise # Promise #device ataserverworks # ServerWorks #device atasiliconimage # Silicon Image Inc. (SiI) (formerly CMD) #device atasis # Silicon Integrated Systems Corp.(SiS) #device atavia # VIA Technologies Inc. # # For older non-PCI, non-PnPBIOS systems, these are the hints lines to add: hint.ata.0.at="isa" hint.ata.0.port="0x1f0" hint.ata.0.irq="14" hint.ata.1.at="isa" hint.ata.1.port="0x170" hint.ata.1.irq="15" # # uart: newbusified driver for serial interfaces. It consolidates the sio(4), # sab(4) and zs(4) drivers. # device uart # Options for uart(4) options UART_PPS_ON_CTS # Do time pulse capturing using CTS # instead of DCD. options UART_POLL_FREQ # Set polling rate, used when hw has # no interrupt support (50 Hz default). # The following hint should only be used for pure ISA devices. It is not # needed otherwise. Use of hints is strongly discouraged. hint.uart.0.at="isa" # The following 3 hints are used when the UART is a system device (i.e., a # console or debug port), but only on platforms that don't have any other # means to pass the information to the kernel. The unit number of the hint # is only used to bundle the hints together. There is no relation to the # unit number of the probed UART. hint.uart.0.port="0x3f8" hint.uart.0.flags="0x10" hint.uart.0.baud="115200" # `flags' for serial drivers that support consoles like sio(4) and uart(4): # 0x10 enable console support for this unit. Other console flags # (if applicable) are ignored unless this is set. Enabling # console support does not make the unit the preferred console. # Boot with -h or set boot_serial=YES in the loader. For sio(4) # specifically, the 0x20 flag can also be set (see above). # Currently, at most one unit can have console support; the # first one (in config file order) with this flag set is # preferred. Setting this flag for sio0 gives the old behavior. # 0x80 use this port for serial line gdb support in ddb. Also known # as debug port. # # Options for serial drivers that support consoles: options BREAK_TO_DEBUGGER # A BREAK/DBG on the console goes to # ddb, if available. # Solaris implements a new BREAK which is initiated by a character # sequence CR ~ ^b which is similar to a familiar pattern used on # Sun servers by the Remote Console. There are FreeBSD extensions: # CR ~ ^p requests force panic and CR ~ ^r requests a clean reboot. options ALT_BREAK_TO_DEBUGGER # Serial Communications Controller # Supports the Siemens SAB 82532 and Zilog Z8530 multi-channel # communications controllers. device scc # PCI Universal Communications driver # Supports various multi port PCI I/O cards. device puc # # Network interfaces: # # MII bus support is required for many PCI Ethernet NICs, # namely those which use MII-compliant transceivers or implement # transceiver control interfaces that operate like an MII. Adding # "device miibus" to the kernel config pulls in support for the generic # miibus API, the common support for for bit-bang'ing the MII and all # of the PHY drivers, including a generic one for PHYs that aren't # specifically handled by an individual driver. Support for specific # PHYs may be built by adding "device mii", "device mii_bitbang" if # needed by the NIC driver and then adding the appropriate PHY driver. device mii # Minimal MII support device mii_bitbang # Common module for bit-bang'ing the MII device miibus # MII support w/ bit-bang'ing and all PHYs device acphy # Altima Communications AC101 device amphy # AMD AM79c873 / Davicom DM910{1,2} device atphy # Attansic/Atheros F1 device axphy # Asix Semiconductor AX88x9x device bmtphy # Broadcom BCM5201/BCM5202 and 3Com 3c905C device bnxt # Broadcom NetXtreme-C/NetXtreme-E device brgphy # Broadcom BCM54xx/57xx 1000baseTX device ciphy # Cicada/Vitesse CS/VSC8xxx device e1000phy # Marvell 88E1000 1000/100/10-BT device gentbi # Generic 10-bit 1000BASE-{LX,SX} fiber ifaces device icsphy # ICS ICS1889-1893 device ip1000phy # IC Plus IP1000A/IP1001 device jmphy # JMicron JMP211/JMP202 device lxtphy # Level One LXT-970 device mlphy # Micro Linear 6692 device nsgphy # NatSemi DP8361/DP83865/DP83891 device nsphy # NatSemi DP83840A device nsphyter # NatSemi DP83843/DP83815 device pnaphy # HomePNA device qsphy # Quality Semiconductor QS6612 device rdcphy # RDC Semiconductor R6040 device rgephy # RealTek 8169S/8110S/8211B/8211C device rlphy # RealTek 8139 device rlswitch # RealTek 8305 device smcphy # SMSC LAN91C111 device tdkphy # TDK 89Q2120 device tlphy # Texas Instruments ThunderLAN device truephy # LSI TruePHY device xmphy # XaQti XMAC II # an: Aironet 4500/4800 802.11 wireless adapters. Supports the PCMCIA, # PCI and ISA varieties. # ae: Support for gigabit ethernet adapters based on the Attansic/Atheros # L2 PCI-Express FastEthernet controllers. # age: Support for gigabit ethernet adapters based on the Attansic/Atheros # L1 PCI express gigabit ethernet controllers. # alc: Support for Atheros AR8131/AR8132 PCIe ethernet controllers. # ale: Support for Atheros AR8121/AR8113/AR8114 PCIe ethernet controllers. # ath: Atheros a/b/g WiFi adapters (requires ath_hal and wlan) # bce: Broadcom NetXtreme II (BCM5706/BCM5708) PCI/PCIe Gigabit Ethernet # adapters. # bfe: Broadcom BCM4401 Ethernet adapter. # bge: Support for gigabit ethernet adapters based on the Broadcom # BCM570x family of controllers, including the 3Com 3c996-T, # the Netgear GA302T, the SysKonnect SK-9D21 and SK-9D41, and # the embedded gigE NICs on Dell PowerEdge 2550 servers. # bnxt: Broadcom NetXtreme-C and NetXtreme-E PCIe 10/25/50G Ethernet adapters. # bxe: Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet # adapters. # bwi: Broadcom BCM430* and BCM431* family of wireless adapters. # bwn: Broadcom BCM43xx family of wireless adapters. # cas: Sun Cassini/Cassini+ and National Semiconductor DP83065 Saturn # cxgb: Chelsio T3 based 1GbE/10GbE PCIe Ethernet adapters. # cxgbe:Chelsio T4, T5, and T6-based 1/10/25/40/100GbE PCIe Ethernet # adapters. # cxgbev: Chelsio T4, T5, and T6-based PCIe Virtual Functions. # dc: Support for PCI fast ethernet adapters based on the DEC/Intel 21143 # and various workalikes including: # the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics # AX88140A and AX88141, the Davicom DM9100 and DM9102, the Lite-On # 82c168 and 82c169 PNIC, the Lite-On/Macronix LC82C115 PNIC II # and the Macronix 98713/98713A/98715/98715A/98725 PMAC. This driver # replaces the old al, ax, dm, pn and mx drivers. List of brands: # Digital DE500-BA, Kingston KNE100TX, D-Link DFE-570TX, SOHOware SFA110, # SVEC PN102-TX, CNet Pro110B, 120A, and 120B, Compex RL100-TX, # LinkSys LNE100TX, LNE100TX V2.0, Jaton XpressNet, Alfa Inc GFC2204, # KNE110TX. # em: Intel Pro/1000 Gigabit Ethernet 82542, 82543, 82544 based adapters. # fxp: Intel EtherExpress Pro/100B # (hint of prefer_iomap can be done to prefer I/O instead of Mem mapping) # gem: Apple GMAC/Sun ERI/Sun GEM # hme: Sun HME (Happy Meal Ethernet) # jme: JMicron JMC260 Fast Ethernet/JMC250 Gigabit Ethernet based adapters. # le: AMD Am7900 LANCE and Am79C9xx PCnet # lge: Support for PCI gigabit ethernet adapters based on the Level 1 # LXT1001 NetCellerator chipset. This includes the D-Link DGE-500SX, # SMC TigerCard 1000 (SMC9462SX), and some Addtron cards. # lio: Support for Cavium 23XX Ethernet adapters # malo: Marvell Libertas wireless NICs. # mwl: Marvell 88W8363 802.11n wireless NICs. # Requires the mwl firmware module # mwlfw: Marvell 88W8363 firmware # msk: Support for gigabit ethernet adapters based on the Marvell/SysKonnect # Yukon II Gigabit controllers, including 88E8021, 88E8022, 88E8061, # 88E8062, 88E8035, 88E8036, 88E8038, 88E8050, 88E8052, 88E8053, # 88E8055, 88E8056 and D-Link 560T/550SX. # mlxfw: Mellanox firmware update module. # mlx5: Mellanox ConnectX-4 and ConnectX-4 LX IB and Eth shared code module. # mlx5en:Mellanox ConnectX-4 and ConnectX-4 LX PCIe Ethernet adapters. # my: Myson Fast Ethernet (MTD80X, MTD89X) # nge: Support for PCI gigabit ethernet adapters based on the National # Semiconductor DP83820 and DP83821 chipset. This includes the # SMC EZ Card 1000 (SMC9462TX), D-Link DGE-500T, Asante FriendlyNet # GigaNIX 1000TA and 1000TPC, the Addtron AEG320T, the Surecom # EP-320G-TX and the Netgear GA622T. # oce: Emulex 10 Gbit adapters (OneConnect Ethernet) # ral: Ralink Technology IEEE 802.11 wireless adapter # re: RealTek 8139C+/8169/816xS/811xS/8101E PCI/PCIe Ethernet adapter # rl: Support for PCI fast ethernet adapters based on the RealTek 8129/8139 # chipset. Note that the RealTek driver defaults to using programmed # I/O to do register accesses because memory mapped mode seems to cause # severe lockups on SMP hardware. This driver also supports the # Accton EN1207D `Cheetah' adapter, which uses a chip called # the MPX 5030/5038, which is either a RealTek in disguise or a # RealTek workalike. Note that the D-Link DFE-530TX+ uses the RealTek # chipset and is supported by this driver, not the 'vr' driver. # rtwn: RealTek wireless adapters. # rtwnfw: RealTek wireless firmware. # sge: Silicon Integrated Systems SiS190/191 Fast/Gigabit Ethernet adapter # sis: Support for NICs based on the Silicon Integrated Systems SiS 900, # SiS 7016 and NS DP83815 PCI fast ethernet controller chips. # sk: Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs. # This includes the SK-9841 and SK-9842 single port cards (single mode # and multimode fiber) and the SK-9843 and SK-9844 dual port cards # (also single mode and multimode). # The driver will autodetect the number of ports on the card and # attach each one as a separate network interface. # ste: Sundance Technologies ST201 PCI fast ethernet controller, includes # the D-Link DFE-550TX. # stge: Support for gigabit ethernet adapters based on the Sundance/Tamarack # TC9021 family of controllers, including the Sundance ST2021/ST2023, # the Sundance/Tamarack TC9021, the D-Link DL-4000 and ASUS NX1101. # ti: Support for PCI gigabit ethernet NICs based on the Alteon Networks # Tigon 1 and Tigon 2 chipsets. This includes the Alteon AceNIC, the # 3Com 3c985, the Netgear GA620 and various others. Note that you will # probably want to bump up kern.ipc.nmbclusters a lot to use this driver. # vr: Support for various fast ethernet adapters based on the VIA # Technologies VT3043 `Rhine I' and VT86C100A `Rhine II' chips, # including the D-Link DFE520TX and D-Link DFE530TX (see 'rl' for # DFE530TX+), the Hawking Technologies PN102TX, and the AOpen/Acer ALN-320. # vte: DM&P Vortex86 RDC R6040 Fast Ethernet # wi: Lucent WaveLAN/IEEE 802.11 PCMCIA adapters. Note: this supports both # the PCMCIA and ISA cards: the ISA card is really a PCMCIA to ISA # bridge with a PCMCIA adapter plugged into it. # xl: Support for the 3Com 3c900, 3c905, 3c905B and 3c905C (Fast) # Etherlink XL cards and integrated controllers. This includes the # integrated 3c905B-TX chips in certain Dell Optiplex and Dell # Precision desktop machines and the integrated 3c905-TX chips # in Dell Latitude laptop docking stations. # Also supported: 3Com 3c980(C)-TX, 3Com 3cSOHO100-TX, 3Com 3c450-TX # Order for ISA devices is important here device an device wi # PCI Ethernet NICs that use the common MII bus controller code. device ae # Attansic/Atheros L2 FastEthernet device age # Attansic/Atheros L1 Gigabit Ethernet device alc # Atheros AR8131/AR8132 Ethernet device ale # Atheros AR8121/AR8113/AR8114 Ethernet device bce # Broadcom BCM5706/BCM5708 Gigabit Ethernet device bfe # Broadcom BCM440x 10/100 Ethernet device bge # Broadcom BCM570xx Gigabit Ethernet device cas # Sun Cassini/Cassini+ and NS DP83065 Saturn device dc # DEC/Intel 21143 and various workalikes device et # Agere ET1310 10/100/Gigabit Ethernet device fxp # Intel EtherExpress PRO/100B (82557, 82558) hint.fxp.0.prefer_iomap="0" device gem # Apple GMAC/Sun ERI/Sun GEM device hme # Sun HME (Happy Meal Ethernet) device jme # JMicron JMC250 Gigabit/JMC260 Fast Ethernet device lge # Level 1 LXT1001 gigabit Ethernet device mlxfw # Mellanox firmware update module device mlx5 # Shared code module between IB and Ethernet device mlx5en # Mellanox ConnectX-4 and ConnectX-4 LX device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device my # Myson Fast Ethernet (MTD80X, MTD89X) device nge # NatSemi DP83820 gigabit Ethernet device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) device stge # Sundance/Tamarack TC9021 gigabit Ethernet device vr # VIA Rhine, Rhine II device vte # DM&P Vortex86 RDC R6040 Fast Ethernet device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') # PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure device iflib device em # Intel Pro/1000 Gigabit Ethernet device ix # Intel Pro/10Gbe PCIE Ethernet device ixv # Intel Pro/10Gbe PCIE Ethernet VF # PCI Ethernet NICs. device cxgb # Chelsio T3 10 Gigabit Ethernet device cxgb_t3fw # Chelsio T3 10 Gigabit Ethernet firmware device cxgbe # Chelsio T4-T6 1/10/25/40/100 Gigabit Ethernet device cxgbev # Chelsio T4-T6 Virtual Functions device le # AMD Am7900 LANCE and Am79C9xx PCnet device mxge # Myricom Myri-10G 10GbE NIC device oce # Emulex 10 GbE (OneConnect Ethernet) device ti # Alteon Networks Tigon I/II gigabit Ethernet # PCI IEEE 802.11 Wireless NICs device ath # Atheros pci/cardbus NIC's device ath_hal # pci/cardbus chip support #device ath_ar5210 # AR5210 chips #device ath_ar5211 # AR5211 chips #device ath_ar5212 # AR5212 chips #device ath_rf2413 #device ath_rf2417 #device ath_rf2425 #device ath_rf5111 #device ath_rf5112 #device ath_rf5413 #device ath_ar5416 # AR5416 chips # All of the AR5212 parts have a problem when paired with the AR71xx # CPUS. These parts have a bug that triggers a fatal bus error on the AR71xx # only. Details of the exact nature of the bug are sketchy, but some can be # found at https://forum.openwrt.org/viewtopic.php?pid=70060 on pages 4, 5 and # 6. This option enables this workaround. There is a performance penalty # for this work around, but without it things don't work at all. The DMA # from the card usually bursts 128 bytes, but on the affected CPUs, only # 4 are safe. options AH_RXCFG_SDMAMW_4BYTES #device ath_ar9160 # AR9160 chips #device ath_ar9280 # AR9280 chips #device ath_ar9285 # AR9285 chips device ath_rate_sample # SampleRate tx rate control for ath device bwi # Broadcom BCM430* BCM431* device bwn # Broadcom BCM43xx device malo # Marvell Libertas wireless NICs. device mwl # Marvell 88W8363 802.11n wireless NICs. device mwlfw device ral # Ralink Technology RT2500 wireless NICs. device rtwn # Realtek wireless NICs device rtwnfw # Use sf_buf(9) interface for jumbo buffers on ti(4) controllers. #options TI_SF_BUF_JUMBO # Turn on the header splitting option for the ti(4) driver firmware. This # only works for Tigon II chips, and has no effect for Tigon I chips. # This option requires the TI_SF_BUF_JUMBO option above. #options TI_JUMBO_HDRSPLIT # These two options allow manipulating the mbuf cluster size and mbuf size, # respectively. Be very careful with NIC driver modules when changing # these from their default values, because that can potentially cause a # mismatch between the mbuf size assumed by the kernel and the mbuf size # assumed by a module. The only driver that currently has the ability to # detect a mismatch is ti(4). options MCLSHIFT=12 # mbuf cluster shift in bits, 12 == 4KB options MSIZE=512 # mbuf size in bytes # # Sound drivers # # sound: The generic sound driver. # device sound # # snd_*: Device-specific drivers. # # The flags of the device tell the device a bit more info about the # device that normally is obtained through the PnP interface. # bit 2..0 secondary DMA channel; # bit 4 set if the board uses two dma channels; # bit 15..8 board type, overrides autodetection; leave it # zero if don't know what to put in (and you don't, # since this is unsupported at the moment...). # # snd_ad1816: Analog Devices AD1816 ISA PnP/non-PnP. # snd_als4000: Avance Logic ALS4000 PCI. # snd_atiixp: ATI IXP 200/300/400 PCI. # snd_cmi: CMedia CMI8338/CMI8738 PCI. # snd_cs4281: Crystal Semiconductor CS4281 PCI. # snd_csa: Crystal Semiconductor CS461x/428x PCI. (except # 4281) # snd_ds1: Yamaha DS-1 PCI. # snd_emu10k1: Creative EMU10K1 PCI and EMU10K2 (Audigy) PCI. # snd_emu10kx: Creative SoundBlaster Live! and Audigy # snd_envy24: VIA Envy24 and compatible, needs snd_spicds. # snd_envy24ht: VIA Envy24HT and compatible, needs snd_spicds. # snd_es137x: Ensoniq AudioPCI ES137x PCI. # snd_ess: Ensoniq ESS ISA PnP/non-PnP, to be used in # conjunction with snd_sbc. # snd_fm801: Forte Media FM801 PCI. # snd_gusc: Gravis UltraSound ISA PnP/non-PnP. # snd_hda: Intel High Definition Audio (Controller) and # compatible. # snd_hdspe: RME HDSPe AIO and RayDAT. # snd_ich: Intel ICH AC'97 and some more audio controllers # embedded in a chipset, for example nVidia # nForce controllers. # snd_maestro: ESS Technology Maestro-1/2x PCI. # snd_maestro3: ESS Technology Maestro-3/Allegro PCI. # snd_mss: Microsoft Sound System ISA PnP/non-PnP. # snd_neomagic: Neomagic 256 AV/ZX PCI. # snd_sb16: Creative SoundBlaster16, to be used in # conjunction with snd_sbc. # snd_sb8: Creative SoundBlaster (pre-16), to be used in # conjunction with snd_sbc. # snd_sbc: Creative SoundBlaster ISA PnP/non-PnP. # Supports ESS and Avance ISA chips as well. # snd_solo: ESS Solo-1x PCI. # snd_spicds: SPI codec driver, needed by Envy24/Envy24HT drivers. # snd_t4dwave: Trident 4DWave DX/NX PCI, Sis 7018 PCI and Acer Labs # M5451 PCI. # snd_uaudio: USB audio. # snd_via8233: VIA VT8233x PCI. # snd_via82c686: VIA VT82C686A PCI. # snd_vibes: S3 Sonicvibes PCI. device snd_ad1816 device snd_als4000 device snd_atiixp device snd_cmi device snd_cs4281 device snd_csa device snd_ds1 device snd_emu10k1 device snd_emu10kx device snd_envy24 device snd_envy24ht device snd_es137x device snd_ess device snd_fm801 device snd_gusc device snd_hda device snd_hdspe device snd_ich device snd_maestro device snd_maestro3 device snd_mss device snd_neomagic device snd_sb16 device snd_sb8 device snd_sbc device snd_solo device snd_spicds device snd_t4dwave device snd_uaudio device snd_via8233 device snd_via82c686 device snd_vibes # For non-PnP sound cards: hint.pcm.0.at="isa" hint.pcm.0.irq="10" hint.pcm.0.drq="1" hint.pcm.0.flags="0x0" hint.sbc.0.at="isa" hint.sbc.0.port="0x220" hint.sbc.0.irq="5" hint.sbc.0.drq="1" hint.sbc.0.flags="0x15" hint.gusc.0.at="isa" hint.gusc.0.port="0x220" hint.gusc.0.irq="5" hint.gusc.0.drq="1" hint.gusc.0.flags="0x13" # # Following options are intended for debugging/testing purposes: # # SND_DEBUG Enable extra debugging code that includes # sanity checking and possible increase of # verbosity. # # SND_DIAGNOSTIC Similar in a spirit of INVARIANTS/DIAGNOSTIC, # zero tolerance against inconsistencies. # # SND_FEEDER_MULTIFORMAT By default, only 16/32 bit feeders are compiled # in. This options enable most feeder converters # except for 8bit. WARNING: May bloat the kernel. # # SND_FEEDER_FULL_MULTIFORMAT Ditto, but includes 8bit feeders as well. # # SND_FEEDER_RATE_HP (feeder_rate) High precision 64bit arithmetic # as much as possible (the default trying to # avoid it). Possible slowdown. # # SND_PCM_64 (Only applicable for i386/32bit arch) # Process 32bit samples through 64bit # integer/arithmetic. Slight increase of dynamic # range at a cost of possible slowdown. # # SND_OLDSTEREO Only 2 channels are allowed, effectively # disabling multichannel processing. # options SND_DEBUG options SND_DIAGNOSTIC options SND_FEEDER_MULTIFORMAT options SND_FEEDER_FULL_MULTIFORMAT options SND_FEEDER_RATE_HP options SND_PCM_64 options SND_OLDSTEREO # # Miscellaneous hardware: # # cmx: OmniKey CardMan 4040 pccard smartcard reader device cmx # # PC Card/PCMCIA and Cardbus # # cbb: pci/cardbus bridge implementing YENTA interface # pccard: pccard slots # cardbus: cardbus slots device cbb device pccard device cardbus # # MMC/SD # # mmc MMC/SD bus # mmcsd MMC/SD memory card # sdhci Generic PCI SD Host Controller # device mmc device mmcsd device sdhci # # SMB bus # # System Management Bus support is provided by the 'smbus' device. # Access to the SMBus device is via the 'smb' device (/dev/smb*), # which is a child of the 'smbus' device. # # Supported devices: # smb standard I/O through /dev/smb* # # Supported SMB interfaces: # iicsmb I2C to SMB bridge with any iicbus interface # intpm Intel PIIX4 (82371AB, 82443MX) Power Management Unit # alpm Acer Aladdin-IV/V/Pro2 Power Management Unit # ichsmb Intel ICH SMBus controller chips (82801AA, 82801AB, 82801BA) # viapm VIA VT82C586B/596B/686A and VT8233 Power Management Unit # amdpm AMD 756 Power Management Unit # amdsmb AMD 8111 SMBus 2.0 Controller # nfpm NVIDIA nForce Power Management Unit # nfsmb NVIDIA nForce2/3/4 MCP SMBus 2.0 Controller # ismt Intel SMBus 2.0 controller chips (on Atom S1200, C2000) # device smbus # Bus support, required for smb below. device intpm device alpm device ichsmb device viapm device amdpm device amdsmb device nfpm device nfsmb device ismt device smb # SMBus peripheral devices # # jedec_dimm Asset and temperature reporting for DDR3 and DDR4 DIMMs # device jedec_dimm # I2C Bus # # Philips i2c bus support is provided by the `iicbus' device. # # Supported devices: # ic i2c network interface # iic i2c standard io # iicsmb i2c to smb bridge. Allow i2c i/o with smb commands. # iicoc simple polling driver for OpenCores I2C controller # # Other: # iicbb generic I2C bit-banging code (needed by lpbb) # device iicbus # Bus support, required for ic/iic/iicsmb below. device iicbb # bitbang driver; implements i2c on a pair of gpio pins device ic device iic # userland access to i2c slave devices via ioctl(8) device iicsmb # smb over i2c bridge device iicoc # OpenCores I2C controller support # I2C bus multiplexer (mux) devices device iicmux # i2c mux core driver device iic_gpiomux # i2c mux hardware controlled via gpio pins device ltc430x # LTC4305 and LTC4306 i2c mux chips # I2C peripheral devices # device ad7418 # Analog Devices temp and voltage sensor device ads111x # Texas Instruments ADS101x and ADS111x ADCs device ds1307 # Dallas DS1307 RTC and compatible device ds13rtc # All Dallas/Maxim ds13xx chips device ds1672 # Dallas DS1672 RTC device ds3231 # Dallas DS3231 RTC + temperature device icee # AT24Cxxx and compatible EEPROMs device isl12xx # Intersil ISL12xx RTC device lm75 # LM75 compatible temperature sensor device nxprtc # NXP RTCs: PCA/PFC212x PCA/PCF85xx device rtc8583 # Epson RTC-8583 device s35390a # Seiko Instruments S-35390A RTC device sy8106a # Silergy Corp. SY8106A buck regulator device syr827 # Silergy Corp. DC/DC regulator # Parallel-Port Bus # # Parallel port bus support is provided by the `ppbus' device. # Multiple devices may be attached to the parallel port, devices # are automatically probed and attached when found. # # Supported devices: # lpt Parallel Printer # plip Parallel network interface # ppi General-purpose I/O ("Geek Port") + IEEE1284 I/O # pps Pulse per second Timing Interface # lpbb Philips official parallel port I2C bit-banging interface # pcfclock Parallel port clock driver. # # Supported interfaces: # ppc ISA-bus parallel port interfaces. # options PPC_PROBE_CHIPSET # Enable chipset specific detection # (see flags in ppc(4)) options DEBUG_1284 # IEEE1284 signaling protocol debug options PERIPH_1284 # Makes your computer act as an IEEE1284 # compliant peripheral options DONTPROBE_1284 # Avoid boot detection of PnP parallel devices options LPT_DEBUG # Printer driver debug options PPC_DEBUG # Parallel chipset level debug options PLIP_DEBUG # Parallel network IP interface debug options PCFCLOCK_VERBOSE # Verbose pcfclock driver options PCFCLOCK_MAX_RETRIES=5 # Maximum read tries (default 10) device ppc hint.ppc.0.at="isa" hint.ppc.0.irq="7" device ppbus device lpt device plip device ppi device pps device lpbb device pcfclock # General Purpose I/O pins device dwgpio # Synopsys DesignWare APB GPIO Controller device gpio # gpio interfaces and bus support device gpiobacklight # sysctl control of gpio-based backlight device gpioiic # i2c via gpio bitbang device gpiokeys # kbd(4) glue for gpio-based key input device gpioled # led(4) gpio glue device gpiopower # event handler for gpio-based powerdown device gpiopps # Pulse per second input from gpio pin device gpioregulator # extres/regulator glue for gpio pin device gpiospi # SPI via gpio bitbang device gpioths # 1-wire temp/humidity sensor on gpio pin # Pulse width modulation device pwmbus # pwm interface and bus support device pwmc # userland control access to pwm outputs # # Etherswitch framework and drivers # # etherswitch The etherswitch(4) framework # miiproxy Proxy device for miibus(4) functionality # # Switch hardware support: # arswitch Atheros switches # ip17x IC+ 17x family switches # rtl8366r Realtek RTL8366 switches # ukswitch Multi-PHY switches # device etherswitch device miiproxy device arswitch device ip17x device rtl8366rb device ukswitch # Kernel BOOTP support options BOOTP # Use BOOTP to obtain IP address/hostname # Requires NFSCL and NFS_ROOT options BOOTP_NFSROOT # NFS mount root filesystem using BOOTP info options BOOTP_NFSV3 # Use NFS v3 to NFS mount root options BOOTP_COMPAT # Workaround for broken bootp daemons. options BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP options BOOTP_BLOCKSIZE=8192 # Override NFS block size # # Enable software watchdog routines, even if hardware watchdog is present. # By default, software watchdog timer is enabled only if no hardware watchdog # is present. # options SW_WATCHDOG # # Add the software deadlock resolver thread. # options DEADLKRES # # Disable swapping of stack pages. This option removes all # code which actually performs swapping, so it's not possible to turn # it back on at run-time. # # This is sometimes usable for systems which don't have any swap space # (see also sysctl "vm.disable_swapspace_pageouts") # #options NO_SWAPPING # Set the number of sf_bufs to allocate. sf_bufs are virtual buffers # for sendfile(2) that are used to map file VM pages, and normally # default to a quantity that is roughly 16*MAXUSERS+512. You would # typically want about 4 of these for each simultaneous file send. # options NSFBUFS=1024 # # Enable extra debugging code for locks. This stores the filename and # line of whatever acquired the lock in the lock itself, and changes a # number of function calls to pass around the relevant data. This is # not at all useful unless you are debugging lock code. Note that # modules should be recompiled as this option modifies KBI. # options DEBUG_LOCKS ##################################################################### # USB support # UHCI controller device uhci # OHCI controller device ohci # EHCI controller device ehci # XHCI controller device xhci # SL811 Controller #device slhci # General USB code (mandatory for USB) device usb # # USB Double Bulk Pipe devices device udbp # USB Fm Radio device ufm # USB temperature meter device ugold # USB LED device uled # Human Interface Device (anything with buttons and dials) device uhid # USB keyboard device ukbd # USB printer device ulpt # USB mass storage driver (Requires scbus and da) device umass # USB mass storage driver for device-side mode device usfs # USB support for Belkin F5U109 and Magic Control Technology serial adapters device umct # USB modem support device umodem # USB mouse device ums # USB touchpad(s) device atp device wsp # eGalax USB touch screen device uep # Diamond Rio 500 MP3 player device urio # # USB serial support device ucom # USB support for 3G modem cards by Option, Novatel, Huawei and Sierra device u3g # USB support for Technologies ARK3116 based serial adapters device uark # USB support for Belkin F5U103 and compatible serial adapters device ubsa # USB support for serial adapters based on the FT8U100AX and FT8U232AM device uftdi # USB support for some Windows CE based serial communication. device uipaq # USB support for Prolific PL-2303 serial adapters device uplcom # USB support for Silicon Laboratories CP2101/CP2102 based USB serial adapters device uslcom # USB Visor and Palm devices device uvisor # USB serial support for DDI pocket's PHS device uvscom # # USB ethernet support device uether # ADMtek USB ethernet. Supports the LinkSys USB100TX, # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus # eval board. device aue # ASIX Electronics AX88172 USB 2.0 ethernet driver. Used in the # LinkSys USB200M and various other adapters. device axe # ASIX Electronics AX88178A/AX88179 USB 2.0/3.0 gigabit ethernet driver. device axge # # Devices which communicate using Ethernet over USB, particularly # Communication Device Class (CDC) Ethernet specification. Supports # Sharp Zaurus PDAs, some DOCSIS cable modems and so on. device cdce # # CATC USB-EL1201A USB ethernet. Supports the CATC Netmate # and Netmate II, and the Belkin F5U111. device cue # # Kawasaki LSI ethernet. Supports the LinkSys USB10T, # Entrega USB-NET-E45, Peracom Ethernet Adapter, the # 3Com 3c19250, the ADS Technologies USB-10BT, the ATen UC10T, # the Netgear EA101, the D-Link DSB-650, the SMC 2102USB # and 2104USB, and the Corega USB-T. device kue # # RealTek RTL8150 USB to fast ethernet. Supports the Melco LUA-KTX # and the GREEN HOUSE GH-USB100B. device rue # # Davicom DM9601E USB to fast ethernet. Supports the Corega FEther USB-TXC. device udav # # RealTek RTL8152/RTL8153 USB Ethernet driver device ure # # Moschip MCS7730/MCS7840 USB to fast ethernet. Supports the Sitecom LN030. device mos # # HSxPA devices from Option N.V device uhso # Realtek RTL8188SU/RTL8191SU/RTL8192SU wireless driver device rsu # # Ralink Technology RT2501USB/RT2601USB wireless driver device rum # Ralink Technology RT2700U/RT2800U/RT3000U wireless driver device run # # Atheros AR5523 wireless driver device uath # # Conexant/Intersil PrismGT wireless driver device upgt # # Ralink Technology RT2500USB wireless driver device ural # # RNDIS USB ethernet driver device urndis # Realtek RTL8187B/L wireless driver device urtw # # ZyDas ZD1211/ZD1211B wireless driver device zyd # # Sierra USB wireless driver device usie # # debugging options for the USB subsystem # options USB_DEBUG options U3G_DEBUG # options for ukbd: options UKBD_DFLT_KEYMAP # specify the built-in keymap makeoptions UKBD_DFLT_KEYMAP=jp.106 # options for uplcom: options UPLCOM_INTR_INTERVAL=100 # interrupt pipe interval # in milliseconds # options for uvscom: options UVSCOM_DEFAULT_OPKTSIZE=8 # default output packet size options UVSCOM_INTR_INTERVAL=100 # interrupt pipe interval # in milliseconds ##################################################################### # FireWire support device firewire # FireWire bus code device sbp # SCSI over Firewire (Requires scbus and da) device sbp_targ # SBP-2 Target mode (Requires scbus and targ) device fwe # Ethernet over FireWire (non-standard!) device fwip # IP over FireWire (RFC2734 and RFC3146) ##################################################################### # dcons support (Dumb Console Device) device dcons # dumb console driver device dcons_crom # FireWire attachment options DCONS_BUF_SIZE=16384 # buffer size options DCONS_POLL_HZ=100 # polling rate options DCONS_FORCE_CONSOLE=0 # force to be the primary console options DCONS_FORCE_GDB=1 # force to be the gdb device ##################################################################### # crypto subsystem # # This is a port of the OpenBSD crypto framework. Include this when # configuring IPSEC and when you have a h/w crypto device to accelerate # user applications that link to OpenSSL. # # Drivers are ports from OpenBSD with some simple enhancements that have # been fed back to OpenBSD. device crypto # core crypto support # Only install the cryptodev device if you are running tests, or know # specifically why you need it. In most cases, it is not needed and # will make things slower. device cryptodev # /dev/crypto for access to h/w device rndtest # FIPS 140-2 entropy tester device ccr # Chelsio T6 device hifn # Hifn 7951, 7781, etc. options HIFN_DEBUG # enable debugging support: hw.hifn.debug options HIFN_RNDTEST # enable rndtest support device ubsec # Broadcom 5501, 5601, 58xx options UBSEC_DEBUG # enable debugging support: hw.ubsec.debug options UBSEC_RNDTEST # enable rndtest support ##################################################################### # # Embedded system options: # # An embedded system might want to run something other than init. options INIT_PATH=/sbin/init:/rescue/init # Debug options options BUS_DEBUG # enable newbus debugging options DEBUG_VFS_LOCKS # enable VFS lock debugging options SOCKBUF_DEBUG # enable sockbuf last record/mb tail checking options IFMEDIA_DEBUG # enable debugging in net/if_media.c # # Verbose SYSINIT # # Make the SYSINIT process performed by mi_startup() verbose. This is very # useful when porting to a new architecture. If DDB is also enabled, this # will print function names instead of addresses. If defined with a value # of zero, the verbose code is compiled-in but disabled by default, and can # be enabled with the debug.verbose_sysinit=1 tunable. options VERBOSE_SYSINIT ##################################################################### # SYSV IPC KERNEL PARAMETERS # # Maximum number of System V semaphores that can be used on the system at # one time. options SEMMNI=11 # Total number of semaphores system wide options SEMMNS=61 # Total number of undo structures in system options SEMMNU=31 # Maximum number of System V semaphores that can be used by a single process # at one time. options SEMMSL=61 # Maximum number of operations that can be outstanding on a single System V # semaphore at one time. options SEMOPM=101 # Maximum number of undo operations that can be outstanding on a single # System V semaphore at one time. options SEMUME=11 # Maximum number of shared memory pages system wide. options SHMALL=1025 # Maximum size, in bytes, of a single System V shared memory region. options SHMMAX=(SHMMAXPGS*PAGE_SIZE+1) options SHMMAXPGS=1025 # Minimum size, in bytes, of a single System V shared memory region. options SHMMIN=2 # Maximum number of shared memory regions that can be used on the system # at one time. options SHMMNI=33 # Maximum number of System V shared memory regions that can be attached to # a single process at one time. options SHMSEG=9 # Set the amount of time (in seconds) the system will wait before # rebooting automatically when a kernel panic occurs. If set to (-1), # the system will wait indefinitely until a key is pressed on the # console. options PANIC_REBOOT_WAIT_TIME=16 # Attempt to bypass the buffer cache and put data directly into the # userland buffer for read operation when O_DIRECT flag is set on the # file. Both offset and length of the read operation must be # multiples of the physical media sector size. # options DIRECTIO # Specify a lower limit for the number of swap I/O buffers. They are # (among other things) used when bypassing the buffer cache due to # DIRECTIO kernel option enabled and O_DIRECT flag set on file. # options NSWBUF_MIN=120 ##################################################################### # More undocumented options for linting. # Note that documenting these is not considered an affront. options CAM_DEBUG_DELAY # VFS cluster debugging. options CLUSTERDEBUG options DEBUG # Kernel filelock debugging. options LOCKF_DEBUG # System V compatible message queues # Please note that the values provided here are used to test kernel # building. The defaults in the sources provide almost the same numbers. # MSGSSZ must be a power of 2 between 8 and 1024. options MSGMNB=2049 # Max number of chars in queue options MSGMNI=41 # Max number of message queue identifiers options MSGSEG=2049 # Max number of message segments options MSGSSZ=16 # Size of a message segment options MSGTQL=41 # Max number of messages in system options NBUF=512 # Number of buffer headers options SC_DEBUG_LEVEL=5 # Syscons debug level options SC_RENDER_DEBUG # syscons rendering debugging options VFS_BIO_DEBUG # VFS buffer I/O debugging options KSTACK_MAX_PAGES=32 # Maximum pages to give the kernel stack options KSTACK_USAGE_PROF # Adaptec Array Controller driver options options AAC_DEBUG # Debugging levels: # 0 - quiet, only emit warnings # 1 - noisy, emit major function # points and things done # 2 - extremely noisy, emit trace # items in loops, etc. # Resource Accounting options RACCT # Resource Limits options RCTL # Yet more undocumented options for linting. options MAXFILES=999 # Random number generator # Allow the CSPRNG algorithm to be loaded as a module. #options RANDOM_LOADABLE # Select this to allow high-rate but potentially expensive # harvesting of Slab-Allocator entropy. In very high-rate # situations the value of doing this is dubious at best. options RANDOM_ENABLE_UMA # slab allocator # Select this to allow high-rate but potentially expensive # harvesting of of the m_next pointer in the mbuf. Note that # the m_next pointer is NULL except when receiving > 4K # jumbo frames or sustained bursts by way of LRO. Thus in # the common case it is stirring zero in to the entropy # pool. In cases where it is not NULL it is pointing to one # of a small (in the thousands to 10s of thousands) number # of 256 byte aligned mbufs. Hence it is, even in the best # case, a poor source of entropy. And in the absence of actual # runtime analysis of entropy collection may mislead the user in # to believe that substantially more entropy is being collected # than in fact is - leading to a different class of security # risk. In high packet rate situations ethernet entropy # collection is also very expensive, possibly leading to as # much as a 50% drop in packets received. # This option is present to maintain backwards compatibility # if desired, however it cannot be recommended for use in any # environment. options RANDOM_ENABLE_ETHER # ether_input # Module to enable execution of application via emulators like QEMU options IMAGACT_BINMISC # zlib I/O stream support # This enables support for compressed core dumps. options GZIO # zstd support # This enables support for Zstd compressed core dumps and GEOM_UZIP images. options ZSTDIO # BHND(4) drivers options BHND_LOGLEVEL # Logging threshold level # evdev interface device evdev # input event device support options EVDEV_SUPPORT # evdev support in legacy drivers options EVDEV_DEBUG # enable event debug msgs device uinput # install /dev/uinput cdev options UINPUT_DEBUG # enable uinput debug msgs # Encrypted kernel crash dumps. options EKCD # Serial Peripheral Interface (SPI) support. device spibus # Bus support. device at45d # DataFlash driver device cqspi # device mx25l # SPIFlash driver device n25q # device spigen # Generic access to SPI devices from userland. # Enable legacy /dev/spigenN name aliases for /dev/spigenX.Y devices. options SPIGEN_LEGACY_CDEVNAME # legacy device names for spigen # Compression supports. device zlib # gzip/zlib compression/decompression library device xz # xz_embedded LZMA de-compression library # Kernel support for stats(3). options STATS diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index e2136ee1416d..4013eae2836f 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -1,274 +1,275 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # # # There is only an asm version on ppc64. cddl/compat/opensolaris/kern/opensolaris_atomic.c optional zfs powerpc | dtrace powerpc | zfs powerpcspe | dtrace powerpcspe compile-with "${ZFS_C}" cddl/dev/dtrace/powerpc/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/powerpc/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/powerpc/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb +dev/aacraid/aacraid_endian.c optional aacraid dev/adb/adb_bus.c optional adb dev/adb/adb_kbd.c optional adb dev/adb/adb_mouse.c optional adb dev/adb/adb_hb_if.m optional adb dev/adb/adb_if.m optional adb dev/adb/adb_buttons.c optional adb dev/agp/agp_apple.c optional agp powermac dev/fb/fb.c optional sc dev/hwpmc/hwpmc_e500.c optional hwpmc dev/hwpmc/hwpmc_mpc7xxx.c optional hwpmc dev/hwpmc/hwpmc_powerpc.c optional hwpmc dev/hwpmc/hwpmc_ppc970.c optional hwpmc dev/iicbus/ad7417.c optional ad7417 powermac dev/iicbus/adm1030.c optional powermac windtunnel | adm1030 powermac dev/iicbus/adt746x.c optional adt746x powermac dev/iicbus/ds1631.c optional ds1631 powermac dev/iicbus/ds1775.c optional ds1775 powermac dev/iicbus/max6690.c optional max6690 powermac dev/iicbus/ofw_iicbus.c optional iicbus aim dev/ipmi/ipmi.c optional ipmi dev/ipmi/ipmi_opal.c optional powernv ipmi # Most ofw stuff below is brought in by conf/files for options FDT, but # we always want it, even on non-FDT platforms. dev/fdt/simplebus.c standard dev/ofw/openfirm.c standard dev/ofw/openfirmio.c standard dev/ofw/ofw_bus_if.m standard dev/ofw/ofw_cpu.c standard dev/ofw/ofw_if.m standard dev/ofw/ofw_bus_subr.c standard dev/ofw/ofw_console.c optional aim dev/ofw/ofw_disk.c optional ofwd aim dev/ofw/ofwbus.c standard dev/ofw/ofwpci.c optional pci dev/ofw/ofw_standard.c optional aim powerpc dev/ofw/ofw_subr.c standard dev/powermac_nvram/powermac_nvram.c optional powermac_nvram powermac dev/quicc/quicc_bfe_fdt.c optional quicc mpc85xx dev/random/darn.c optional powerpc64 !random_loadable dev/scc/scc_bfe_macio.c optional scc powermac dev/sdhci/fsl_sdhci.c optional mpc85xx sdhci dev/sec/sec.c optional sec mpc85xx dev/sound/macio/aoa.c optional snd_davbus | snd_ai2s powermac dev/sound/macio/davbus.c optional snd_davbus powermac dev/sound/macio/i2s.c optional snd_ai2s powermac dev/sound/macio/onyx.c optional snd_ai2s iicbus powermac dev/sound/macio/snapper.c optional snd_ai2s iicbus powermac dev/sound/macio/tumbler.c optional snd_ai2s iicbus powermac dev/syscons/scgfbrndr.c optional sc dev/tsec/if_tsec.c optional tsec dev/tsec/if_tsec_fdt.c optional tsec dev/uart/uart_cpu_powerpc.c optional uart dev/usb/controller/ehci_fsl.c optional ehci mpc85xx dev/vt/hw/ofwfb/ofwfb.c optional vt aim kern/kern_clocksource.c standard kern/subr_atomic64.c optional powerpc | powerpcspe kern/subr_dummy_vdso_tc.c standard kern/syscalls.c optional ktr kern/subr_sfbuf.c standard libkern/ashldi3.c optional powerpc | powerpcspe libkern/ashrdi3.c optional powerpc | powerpcspe libkern/bcmp.c standard libkern/bcopy.c standard libkern/cmpdi2.c optional powerpc | powerpcspe libkern/divdi3.c optional powerpc | powerpcspe libkern/ffs.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/flsll.c standard libkern/lshrdi3.c optional powerpc | powerpcspe libkern/memcmp.c standard libkern/memset.c standard libkern/moddi3.c optional powerpc | powerpcspe libkern/qdivrem.c optional powerpc | powerpcspe libkern/ucmpdi2.c optional powerpc | powerpcspe libkern/udivdi3.c optional powerpc | powerpcspe libkern/umoddi3.c optional powerpc | powerpcspe powerpc/aim/locore.S optional aim no-obj powerpc/aim/aim_machdep.c optional aim powerpc/aim/mmu_oea.c optional aim powerpc powerpc/aim/mmu_oea64.c optional aim powerpc/aim/moea64_if.m optional aim powerpc/aim/moea64_native.c optional aim powerpc/aim/mp_cpudep.c optional aim powerpc/aim/slb.c optional aim powerpc64 powerpc/amigaone/platform_amigaone.c optional amigaone powerpc/amigaone/cpld_x5000.c optional powerpc amigaone | powerpc64 amigaone powerpc/booke/locore.S optional booke no-obj powerpc/booke/booke_machdep.c optional booke powerpc/booke/machdep_e500.c optional booke_e500 powerpc/booke/mp_cpudep.c optional booke smp powerpc/booke/platform_bare.c optional booke powerpc/booke/pmap.c optional booke powerpc/booke/spe.c optional powerpcspe powerpc/cpufreq/dfs.c optional cpufreq powerpc/cpufreq/mpc85xx_jog.c optional cpufreq mpc85xx powerpc/cpufreq/pcr.c optional cpufreq aim powerpc/cpufreq/pmcr.c optional cpufreq aim powerpc64 powerpc/cpufreq/pmufreq.c optional cpufreq aim pmu powerpc/fpu/fpu_add.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_compare.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_div.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_emu.c optional fpu_emu powerpc/fpu/fpu_explode.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_implode.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_mul.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_sqrt.c optional fpu_emu powerpc/fpu/fpu_subr.c optional fpu_emu | powerpcspe powerpc/mambo/mambocall.S optional mambo powerpc/mambo/mambo.c optional mambo powerpc/mambo/mambo_console.c optional mambo powerpc/mambo/mambo_disk.c optional mambo powerpc/mikrotik/platform_rb.c optional mikrotik powerpc/mikrotik/rb_led.c optional mikrotik powerpc/mpc85xx/atpic.c optional mpc85xx isa powerpc/mpc85xx/ds1553_bus_fdt.c optional ds1553 powerpc/mpc85xx/ds1553_core.c optional ds1553 powerpc/mpc85xx/fsl_diu.c optional mpc85xx diu powerpc/mpc85xx/fsl_espi.c optional mpc85xx spibus powerpc/mpc85xx/fsl_sata.c optional mpc85xx ata powerpc/mpc85xx/i2c.c optional mpc85xx iicbus powerpc/mpc85xx/isa.c optional mpc85xx isa powerpc/mpc85xx/lbc.c optional mpc85xx powerpc/mpc85xx/mpc85xx.c optional mpc85xx powerpc/mpc85xx/mpc85xx_cache.c optional mpc85xx powerpc/mpc85xx/mpc85xx_gpio.c optional mpc85xx gpio powerpc/mpc85xx/platform_mpc85xx.c optional mpc85xx powerpc/mpc85xx/pci_mpc85xx.c optional pci mpc85xx powerpc/mpc85xx/pci_mpc85xx_pcib.c optional pci mpc85xx powerpc/mpc85xx/qoriq_gpio.c optional mpc85xx gpio powerpc/ofw/ofw_machdep.c standard powerpc/ofw/ofw_pcibus.c optional pci powerpc/ofw/ofw_pcib_pci.c optional pci powerpc/ofw/ofw_real.c optional aim powerpc/ofw/ofw_syscons.c optional sc aim powerpc/ofw/ofwcall32.S optional aim powerpc powerpc/ofw/ofwcall64.S optional aim powerpc64 powerpc/ofw/openpic_ofw.c standard powerpc/ofw/rtas.c optional aim powerpc/ofw/ofw_initrd.c optional md_root_mem powerpc64 powerpc/powermac/ata_kauai.c optional powermac ata | powermac atamacio powerpc/powermac/ata_macio.c optional powermac ata | powermac atamacio powerpc/powermac/ata_dbdma.c optional powermac ata | powermac atamacio powerpc/powermac/atibl.c optional powermac atibl powerpc/powermac/cuda.c optional powermac cuda powerpc/powermac/cpcht.c optional powermac pci powerpc/powermac/dbdma.c optional powermac pci powerpc/powermac/fcu.c optional powermac fcu powerpc/powermac/grackle.c optional powermac pci powerpc/powermac/hrowpic.c optional powermac pci powerpc/powermac/kiic.c optional powermac kiic powerpc/powermac/macgpio.c optional powermac pci powerpc/powermac/macio.c optional powermac pci powerpc/powermac/nvbl.c optional powermac nvbl powerpc/powermac/platform_powermac.c optional powermac powerpc/powermac/powermac_thermal.c optional powermac powerpc/powermac/pswitch.c optional powermac pswitch powerpc/powermac/pmu.c optional powermac pmu powerpc/powermac/smu.c optional powermac smu powerpc/powermac/smusat.c optional powermac smu powerpc/powermac/uninorth.c optional powermac powerpc/powermac/uninorthpci.c optional powermac pci powerpc/powermac/vcoregpio.c optional powermac powerpc/powernv/opal.c optional powernv powerpc/powernv/opal_async.c optional powernv powerpc/powernv/opal_console.c optional powernv powerpc/powernv/opal_dbg.c optional powernv gdb powerpc/powernv/opal_dev.c optional powernv powerpc/powernv/opal_flash.c optional powernv opalflash powerpc/powernv/opal_hmi.c optional powernv powerpc/powernv/opal_i2c.c optional iicbus fdt powernv powerpc/powernv/opal_i2cm.c optional iicbus fdt powernv powerpc/powernv/opal_nvram.c optional powernv nvram powerpc/powernv/opal_pci.c optional powernv pci powerpc/powernv/opal_sensor.c optional powernv powerpc/powernv/opalcall.S optional powernv powerpc/powernv/platform_powernv.c optional powernv powerpc/powernv/powernv_centaur.c optional powernv powerpc/powernv/powernv_xscom.c optional powernv powerpc/powernv/xive.c optional powernv powerpc/powerpc/altivec.c optional powerpc | powerpc64 powerpc/powerpc/autoconf.c standard powerpc/powerpc/bus_machdep.c standard powerpc/powerpc/busdma_machdep.c standard powerpc/powerpc/clock.c standard powerpc/powerpc/copyinout.c standard powerpc/powerpc/copystr.c standard powerpc/powerpc/cpu.c standard powerpc/powerpc/cpu_subr64.S optional powerpc64 powerpc/powerpc/db_disasm.c optional ddb powerpc/powerpc/db_hwwatch.c optional ddb powerpc/powerpc/db_interface.c optional ddb powerpc/powerpc/db_trace.c optional ddb powerpc/powerpc/dump_machdep.c standard powerpc/powerpc/elf32_machdep.c optional powerpc | powerpcspe | compat_freebsd32 powerpc/powerpc/elf64_machdep.c optional powerpc64 powerpc/powerpc/exec_machdep.c standard powerpc/powerpc/fpu.c standard powerpc/powerpc/gdb_machdep.c optional gdb powerpc/powerpc/in_cksum.c optional inet | inet6 powerpc/powerpc/interrupt.c standard powerpc/powerpc/intr_machdep.c standard powerpc/powerpc/iommu_if.m standard powerpc/powerpc/machdep.c standard powerpc/powerpc/mem.c optional mem powerpc/powerpc/minidump_machdep.c optional powerpc64 powerpc/powerpc/mmu_if.m standard powerpc/powerpc/mp_machdep.c optional smp powerpc/powerpc/nexus.c standard powerpc/powerpc/openpic.c standard powerpc/powerpc/pic_if.m standard powerpc/powerpc/pmap_dispatch.c standard powerpc/powerpc/platform.c standard powerpc/powerpc/platform_if.m standard powerpc/powerpc/ptrace_machdep.c standard powerpc/powerpc/sc_machdep.c optional sc powerpc/powerpc/setjmp.S standard powerpc/powerpc/sigcode32.S optional powerpc | powerpcspe | compat_freebsd32 powerpc/powerpc/sigcode64.S optional powerpc64 powerpc/powerpc/swtch32.S optional powerpc | powerpcspe powerpc/powerpc/swtch64.S optional powerpc64 powerpc/powerpc/stack_machdep.c optional ddb | stack powerpc/powerpc/syncicache.c standard powerpc/powerpc/sys_machdep.c standard powerpc/powerpc/trap.c standard powerpc/powerpc/uio_machdep.c standard powerpc/powerpc/uma_machdep.c standard powerpc/powerpc/vm_machdep.c standard powerpc/ps3/ehci_ps3.c optional ps3 ehci powerpc/ps3/ohci_ps3.c optional ps3 ohci powerpc/ps3/if_glc.c optional ps3 glc powerpc/ps3/mmu_ps3.c optional ps3 powerpc/ps3/platform_ps3.c optional ps3 powerpc/ps3/ps3bus.c optional ps3 powerpc/ps3/ps3cdrom.c optional ps3 scbus powerpc/ps3/ps3disk.c optional ps3 powerpc/ps3/ps3pic.c optional ps3 powerpc/ps3/ps3_syscons.c optional ps3 vt powerpc/ps3/ps3-hvcall.S optional ps3 powerpc/pseries/phyp-hvcall.S optional pseries powerpc64 powerpc/pseries/mmu_phyp.c optional pseries powerpc64 powerpc/pseries/phyp_console.c optional pseries powerpc64 uart powerpc/pseries/phyp_dbg.c optional pseries powerpc64 gdb powerpc/pseries/phyp_llan.c optional llan powerpc/pseries/phyp_vscsi.c optional pseries powerpc64 scbus powerpc/pseries/platform_chrp.c optional pseries powerpc/pseries/plpar_iommu.c optional pseries powerpc64 powerpc/pseries/plpar_pcibus.c optional pseries powerpc64 pci powerpc/pseries/rtas_dev.c optional pseries powerpc/pseries/rtas_pci.c optional pseries pci powerpc/pseries/vdevice.c optional pseries powerpc64 powerpc/pseries/xics.c optional pseries powerpc64 powerpc/psim/iobus.c optional psim powerpc/psim/ata_iobus.c optional ata psim powerpc/psim/openpic_iobus.c optional psim powerpc/psim/uart_iobus.c optional uart psim diff --git a/sys/dev/aacraid/aacraid.c b/sys/dev/aacraid/aacraid.c index c8184fe5497b..cd7d27558e8a 100644 --- a/sys/dev/aacraid/aacraid.c +++ b/sys/dev/aacraid/aacraid.c @@ -1,3867 +1,3927 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2000 Michael Smith * Copyright (c) 2001 Scott Long * Copyright (c) 2000 BSDi * Copyright (c) 2001-2010 Adaptec, Inc. * Copyright (c) 2010-2012 PMC-Sierra, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Adaptec by PMC Series 6,7,8,... families of RAID controllers */ #define AAC_DRIVERNAME "aacraid" #include "opt_aacraid.h" /* #include */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #ifndef FILTER_HANDLED #define FILTER_HANDLED 0x02 #endif static void aac_add_container(struct aac_softc *sc, struct aac_mntinforesp *mir, int f, u_int32_t uid); static void aac_get_bus_info(struct aac_softc *sc); static void aac_container_bus(struct aac_softc *sc); static void aac_daemon(void *arg); static int aac_convert_sgraw2(struct aac_softc *sc, struct aac_raw_io2 *raw, int pages, int nseg, int nseg_new); /* Command Processing */ static void aac_timeout(struct aac_softc *sc); static void aac_command_thread(struct aac_softc *sc); static int aac_sync_fib(struct aac_softc *sc, u_int32_t command, u_int32_t xferstate, struct aac_fib *fib, u_int16_t datasize); /* Command Buffer Management */ static void aac_map_command_helper(void *arg, bus_dma_segment_t *segs, int nseg, int error); static int aac_alloc_commands(struct aac_softc *sc); static void aac_free_commands(struct aac_softc *sc); static void aac_unmap_command(struct aac_command *cm); /* Hardware Interface */ static int aac_alloc(struct aac_softc *sc); static void aac_common_map(void *arg, bus_dma_segment_t *segs, int nseg, int error); static int aac_check_firmware(struct aac_softc *sc); static void aac_define_int_mode(struct aac_softc *sc); static int aac_init(struct aac_softc *sc); static int aac_find_pci_capability(struct aac_softc *sc, int cap); static int aac_setup_intr(struct aac_softc *sc); static int aac_check_config(struct aac_softc *sc); /* PMC SRC interface */ static int aac_src_get_fwstatus(struct aac_softc *sc); static void aac_src_qnotify(struct aac_softc *sc, int qbit); static int aac_src_get_istatus(struct aac_softc *sc); static void aac_src_clear_istatus(struct aac_softc *sc, int mask); static void aac_src_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3); static int aac_src_get_mailbox(struct aac_softc *sc, int mb); static void aac_src_access_devreg(struct aac_softc *sc, int mode); static int aac_src_send_command(struct aac_softc *sc, struct aac_command *cm); static int aac_src_get_outb_queue(struct aac_softc *sc); static void aac_src_set_outb_queue(struct aac_softc *sc, int index); struct aac_interface aacraid_src_interface = { aac_src_get_fwstatus, aac_src_qnotify, aac_src_get_istatus, aac_src_clear_istatus, aac_src_set_mailbox, aac_src_get_mailbox, aac_src_access_devreg, aac_src_send_command, aac_src_get_outb_queue, aac_src_set_outb_queue }; /* PMC SRCv interface */ static void aac_srcv_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3); static int aac_srcv_get_mailbox(struct aac_softc *sc, int mb); struct aac_interface aacraid_srcv_interface = { aac_src_get_fwstatus, aac_src_qnotify, aac_src_get_istatus, aac_src_clear_istatus, aac_srcv_set_mailbox, aac_srcv_get_mailbox, aac_src_access_devreg, aac_src_send_command, aac_src_get_outb_queue, aac_src_set_outb_queue }; /* Debugging and Diagnostics */ static struct aac_code_lookup aac_cpu_variant[] = { {"i960JX", CPUI960_JX}, {"i960CX", CPUI960_CX}, {"i960HX", CPUI960_HX}, {"i960RX", CPUI960_RX}, {"i960 80303", CPUI960_80303}, {"StrongARM SA110", CPUARM_SA110}, {"PPC603e", CPUPPC_603e}, {"XScale 80321", CPU_XSCALE_80321}, {"MIPS 4KC", CPU_MIPS_4KC}, {"MIPS 5KC", CPU_MIPS_5KC}, {"Unknown StrongARM", CPUARM_xxx}, {"Unknown PowerPC", CPUPPC_xxx}, {NULL, 0}, {"Unknown processor", 0} }; static struct aac_code_lookup aac_battery_platform[] = { {"required battery present", PLATFORM_BAT_REQ_PRESENT}, {"REQUIRED BATTERY NOT PRESENT", PLATFORM_BAT_REQ_NOTPRESENT}, {"optional battery present", PLATFORM_BAT_OPT_PRESENT}, {"optional battery not installed", PLATFORM_BAT_OPT_NOTPRESENT}, {"no battery support", PLATFORM_BAT_NOT_SUPPORTED}, {NULL, 0}, {"unknown battery platform", 0} }; static void aac_describe_controller(struct aac_softc *sc); static char *aac_describe_code(struct aac_code_lookup *table, u_int32_t code); /* Management Interface */ static d_open_t aac_open; static d_ioctl_t aac_ioctl; static d_poll_t aac_poll; static void aac_cdevpriv_dtor(void *arg); static int aac_ioctl_sendfib(struct aac_softc *sc, caddr_t ufib); static int aac_ioctl_send_raw_srb(struct aac_softc *sc, caddr_t arg); static void aac_handle_aif(struct aac_softc *sc, struct aac_fib *fib); static void aac_request_aif(struct aac_softc *sc); static int aac_rev_check(struct aac_softc *sc, caddr_t udata); static int aac_open_aif(struct aac_softc *sc, caddr_t arg); static int aac_close_aif(struct aac_softc *sc, caddr_t arg); static int aac_getnext_aif(struct aac_softc *sc, caddr_t arg); static int aac_return_aif(struct aac_softc *sc, struct aac_fib_context *ctx, caddr_t uptr); static int aac_query_disk(struct aac_softc *sc, caddr_t uptr); static int aac_get_pci_info(struct aac_softc *sc, caddr_t uptr); static int aac_supported_features(struct aac_softc *sc, caddr_t uptr); static void aac_ioctl_event(struct aac_softc *sc, struct aac_event *event, void *arg); static int aac_reset_adapter(struct aac_softc *sc); static int aac_get_container_info(struct aac_softc *sc, struct aac_fib *fib, int cid, struct aac_mntinforesp *mir, u_int32_t *uid); static u_int32_t aac_check_adapter_health(struct aac_softc *sc, u_int8_t *bled); static struct cdevsw aacraid_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = aac_open, .d_ioctl = aac_ioctl, .d_poll = aac_poll, .d_name = "aacraid", }; MALLOC_DEFINE(M_AACRAIDBUF, "aacraid_buf", "Buffers for the AACRAID driver"); /* sysctl node */ SYSCTL_NODE(_hw, OID_AUTO, aacraid, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "AACRAID driver parameters"); /* * Device Interface */ /* * Initialize the controller and softc */ int aacraid_attach(struct aac_softc *sc) { int error, unit; struct aac_fib *fib; struct aac_mntinforesp mir; int count = 0, i = 0; u_int32_t uid; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->hint_flags = device_get_flags(sc->aac_dev); /* * Initialize per-controller queues. */ aac_initq_free(sc); aac_initq_ready(sc); aac_initq_busy(sc); /* mark controller as suspended until we get ourselves organised */ sc->aac_state |= AAC_STATE_SUSPEND; /* * Check that the firmware on the card is supported. */ sc->msi_enabled = sc->msi_tupelo = FALSE; if ((error = aac_check_firmware(sc)) != 0) return(error); /* * Initialize locks */ mtx_init(&sc->aac_io_lock, "AACRAID I/O lock", NULL, MTX_DEF); TAILQ_INIT(&sc->aac_container_tqh); TAILQ_INIT(&sc->aac_ev_cmfree); /* Initialize the clock daemon callout. */ callout_init_mtx(&sc->aac_daemontime, &sc->aac_io_lock, 0); /* * Initialize the adapter. */ if ((error = aac_alloc(sc)) != 0) return(error); aac_define_int_mode(sc); if (!(sc->flags & AAC_FLAGS_SYNC_MODE)) { if ((error = aac_init(sc)) != 0) return(error); } /* * Allocate and connect our interrupt. */ if ((error = aac_setup_intr(sc)) != 0) return(error); /* * Print a little information about the controller. */ aac_describe_controller(sc); /* * Make the control device. */ unit = device_get_unit(sc->aac_dev); sc->aac_dev_t = make_dev(&aacraid_cdevsw, unit, UID_ROOT, GID_OPERATOR, 0640, "aacraid%d", unit); sc->aac_dev_t->si_drv1 = sc; /* Create the AIF thread */ if (aac_kthread_create((void(*)(void *))aac_command_thread, sc, &sc->aifthread, 0, 0, "aacraid%daif", unit)) panic("Could not create AIF thread"); /* Register the shutdown method to only be called post-dump */ if ((sc->eh = EVENTHANDLER_REGISTER(shutdown_final, aacraid_shutdown, sc->aac_dev, SHUTDOWN_PRI_DEFAULT)) == NULL) device_printf(sc->aac_dev, "shutdown event registration failed\n"); /* Find containers */ mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); /* loop over possible containers */ do { if ((aac_get_container_info(sc, fib, i, &mir, &uid)) != 0) continue; if (i == 0) count = mir.MntRespCount; aac_add_container(sc, &mir, 0, uid); i++; } while ((i < count) && (i < AAC_MAX_CONTAINERS)); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); /* Register with CAM for the containers */ TAILQ_INIT(&sc->aac_sim_tqh); aac_container_bus(sc); /* Register with CAM for the non-DASD devices */ if ((sc->flags & AAC_FLAGS_ENABLE_CAM) != 0) aac_get_bus_info(sc); /* poke the bus to actually attach the child devices */ bus_generic_attach(sc->aac_dev); /* mark the controller up */ sc->aac_state &= ~AAC_STATE_SUSPEND; /* enable interrupts now */ AAC_ACCESS_DEVREG(sc, AAC_ENABLE_INTERRUPT); mtx_lock(&sc->aac_io_lock); callout_reset(&sc->aac_daemontime, 60 * hz, aac_daemon, sc); mtx_unlock(&sc->aac_io_lock); return(0); } static void aac_daemon(void *arg) { struct aac_softc *sc; struct timeval tv; struct aac_command *cm; struct aac_fib *fib; sc = arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); if (callout_pending(&sc->aac_daemontime) || callout_active(&sc->aac_daemontime) == 0) return; getmicrotime(&tv); if (!aacraid_alloc_command(sc, &cm)) { fib = cm->cm_fib; cm->cm_timestamp = time_uptime; cm->cm_datalen = 0; cm->cm_flags |= AAC_CMD_WAIT; fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(u_int32_t); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; fib->Header.Command = SendHostTime; - *(uint32_t *)fib->data = tv.tv_sec; + *(uint32_t *)fib->data = htole32(tv.tv_sec); aacraid_map_command_sg(cm, NULL, 0, 0); aacraid_release_command(cm); } callout_schedule(&sc->aac_daemontime, 30 * 60 * hz); } void aacraid_add_event(struct aac_softc *sc, struct aac_event *event) { switch (event->ev_type & AAC_EVENT_MASK) { case AAC_EVENT_CMFREE: TAILQ_INSERT_TAIL(&sc->aac_ev_cmfree, event, ev_links); break; default: device_printf(sc->aac_dev, "aac_add event: unknown event %d\n", event->ev_type); break; } return; } /* * Request information of container #cid */ static int aac_get_container_info(struct aac_softc *sc, struct aac_fib *sync_fib, int cid, struct aac_mntinforesp *mir, u_int32_t *uid) { struct aac_command *cm; struct aac_fib *fib; struct aac_mntinfo *mi; struct aac_cnt_config *ccfg; int rval; if (sync_fib == NULL) { if (aacraid_alloc_command(sc, &cm)) { device_printf(sc->aac_dev, "Warning, no free command available\n"); return (-1); } fib = cm->cm_fib; } else { fib = sync_fib; } mi = (struct aac_mntinfo *)&fib->data[0]; /* 4KB support?, 64-bit LBA? */ if (sc->aac_support_opt2 & AAC_SUPPORTED_VARIABLE_BLOCK_SIZE) mi->Command = VM_NameServeAllBlk; else if (sc->flags & AAC_FLAGS_LBA_64BIT) mi->Command = VM_NameServe64; else mi->Command = VM_NameServe; mi->MntType = FT_FILESYS; mi->MntCount = cid; + aac_mntinfo_tole(mi); if (sync_fib) { if (aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_mntinfo))) { device_printf(sc->aac_dev, "Error probing container %d\n", cid); return (-1); } } else { cm->cm_timestamp = time_uptime; cm->cm_datalen = 0; fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(struct aac_mntinfo); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; fib->Header.Command = ContainerCommand; if (aacraid_wait_command(cm) != 0) { device_printf(sc->aac_dev, "Error probing container %d\n", cid); aacraid_release_command(cm); return (-1); } } bcopy(&fib->data[0], mir, sizeof(struct aac_mntinforesp)); + aac_mntinforesp_toh(mir); /* UID */ *uid = cid; if (mir->MntTable[0].VolType != CT_NONE && !(mir->MntTable[0].ContentState & AAC_FSCS_HIDDEN)) { if (!(sc->aac_support_opt2 & AAC_SUPPORTED_VARIABLE_BLOCK_SIZE)) { mir->MntTable[0].ObjExtension.BlockDevice.BlockSize = 0x200; mir->MntTable[0].ObjExtension.BlockDevice.bdLgclPhysMap = 0; } ccfg = (struct aac_cnt_config *)&fib->data[0]; bzero(ccfg, sizeof (*ccfg) - CT_PACKET_SIZE); ccfg->Command = VM_ContainerConfig; ccfg->CTCommand.command = CT_CID_TO_32BITS_UID; ccfg->CTCommand.param[0] = cid; + aac_cnt_config_tole(ccfg); if (sync_fib) { rval = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_cnt_config)); + aac_cnt_config_toh(ccfg); if (rval == 0 && ccfg->Command == ST_OK && ccfg->CTCommand.param[0] == CT_OK && mir->MntTable[0].VolType != CT_PASSTHRU) *uid = ccfg->CTCommand.param[1]; } else { fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(struct aac_cnt_config); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; fib->Header.Command = ContainerCommand; rval = aacraid_wait_command(cm); + aac_cnt_config_toh(ccfg); if (rval == 0 && ccfg->Command == ST_OK && ccfg->CTCommand.param[0] == CT_OK && mir->MntTable[0].VolType != CT_PASSTHRU) *uid = ccfg->CTCommand.param[1]; aacraid_release_command(cm); } } return (0); } /* * Create a device to represent a new container */ static void aac_add_container(struct aac_softc *sc, struct aac_mntinforesp *mir, int f, u_int32_t uid) { struct aac_container *co; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Check container volume type for validity. Note that many of * the possible types may never show up. */ if ((mir->Status == ST_OK) && (mir->MntTable[0].VolType != CT_NONE)) { co = (struct aac_container *)malloc(sizeof *co, M_AACRAIDBUF, M_NOWAIT | M_ZERO); if (co == NULL) { panic("Out of memory?!"); } co->co_found = f; bcopy(&mir->MntTable[0], &co->co_mntobj, sizeof(struct aac_mntobj)); co->co_uid = uid; TAILQ_INSERT_TAIL(&sc->aac_container_tqh, co, co_link); } } /* * Allocate resources associated with (sc) */ static int aac_alloc(struct aac_softc *sc) { bus_size_t maxsize; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Create DMA tag for mapping buffers into controller-addressable space. */ if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_SG_64BIT) ? BUS_SPACE_MAXADDR : BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->aac_max_sectors << 9, /* maxsize */ sc->aac_sg_tablesize, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->aac_io_lock, /* lockfuncarg */ &sc->aac_buffer_dmat)) { device_printf(sc->aac_dev, "can't allocate buffer DMA tag\n"); return (ENOMEM); } /* * Create DMA tag for mapping FIBs into controller-addressable space.. */ if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE1) maxsize = sc->aac_max_fibs_alloc * (sc->aac_max_fib_size + sizeof(struct aac_fib_xporthdr) + 31); else maxsize = sc->aac_max_fibs_alloc * (sc->aac_max_fib_size + 31); if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_4GB_WINDOW) ? BUS_SPACE_MAXADDR_32BIT : 0x7fffffff, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ maxsize, /* maxsize */ 1, /* nsegments */ maxsize, /* maxsize */ 0, /* flags */ NULL, NULL, /* No locking needed */ &sc->aac_fib_dmat)) { device_printf(sc->aac_dev, "can't allocate FIB DMA tag\n"); return (ENOMEM); } /* * Create DMA tag for the common structure and allocate it. */ maxsize = sizeof(struct aac_common); maxsize += sc->aac_max_fibs * sizeof(u_int32_t); if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_4GB_WINDOW) ? BUS_SPACE_MAXADDR_32BIT : 0x7fffffff, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ maxsize, /* maxsize */ 1, /* nsegments */ maxsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* No locking needed */ &sc->aac_common_dmat)) { device_printf(sc->aac_dev, "can't allocate common structure DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->aac_common_dmat, (void **)&sc->aac_common, BUS_DMA_NOWAIT, &sc->aac_common_dmamap)) { device_printf(sc->aac_dev, "can't allocate common structure\n"); return (ENOMEM); } (void)bus_dmamap_load(sc->aac_common_dmat, sc->aac_common_dmamap, sc->aac_common, maxsize, aac_common_map, sc, 0); bzero(sc->aac_common, maxsize); /* Allocate some FIBs and associated command structs */ TAILQ_INIT(&sc->aac_fibmap_tqh); sc->aac_commands = malloc(sc->aac_max_fibs * sizeof(struct aac_command), M_AACRAIDBUF, M_WAITOK|M_ZERO); mtx_lock(&sc->aac_io_lock); while (sc->total_fibs < sc->aac_max_fibs) { if (aac_alloc_commands(sc) != 0) break; } mtx_unlock(&sc->aac_io_lock); if (sc->total_fibs == 0) return (ENOMEM); return (0); } /* * Free all of the resources associated with (sc) * * Should not be called if the controller is active. */ void aacraid_free(struct aac_softc *sc) { int i; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* remove the control device */ if (sc->aac_dev_t != NULL) destroy_dev(sc->aac_dev_t); /* throw away any FIB buffers, discard the FIB DMA tag */ aac_free_commands(sc); if (sc->aac_fib_dmat) bus_dma_tag_destroy(sc->aac_fib_dmat); free(sc->aac_commands, M_AACRAIDBUF); /* destroy the common area */ if (sc->aac_common) { bus_dmamap_unload(sc->aac_common_dmat, sc->aac_common_dmamap); bus_dmamem_free(sc->aac_common_dmat, sc->aac_common, sc->aac_common_dmamap); } if (sc->aac_common_dmat) bus_dma_tag_destroy(sc->aac_common_dmat); /* disconnect the interrupt handler */ for (i = 0; i < AAC_MAX_MSIX; ++i) { if (sc->aac_intr[i]) bus_teardown_intr(sc->aac_dev, sc->aac_irq[i], sc->aac_intr[i]); if (sc->aac_irq[i]) bus_release_resource(sc->aac_dev, SYS_RES_IRQ, sc->aac_irq_rid[i], sc->aac_irq[i]); else break; } if (sc->msi_enabled || sc->msi_tupelo) pci_release_msi(sc->aac_dev); /* destroy data-transfer DMA tag */ if (sc->aac_buffer_dmat) bus_dma_tag_destroy(sc->aac_buffer_dmat); /* destroy the parent DMA tag */ if (sc->aac_parent_dmat) bus_dma_tag_destroy(sc->aac_parent_dmat); /* release the register window mapping */ if (sc->aac_regs_res0 != NULL) bus_release_resource(sc->aac_dev, SYS_RES_MEMORY, sc->aac_regs_rid0, sc->aac_regs_res0); if (sc->aac_regs_res1 != NULL) bus_release_resource(sc->aac_dev, SYS_RES_MEMORY, sc->aac_regs_rid1, sc->aac_regs_res1); } /* * Disconnect from the controller completely, in preparation for unload. */ int aacraid_detach(device_t dev) { struct aac_softc *sc; struct aac_container *co; struct aac_sim *sim; int error; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); callout_drain(&sc->aac_daemontime); /* Remove the child containers */ while ((co = TAILQ_FIRST(&sc->aac_container_tqh)) != NULL) { TAILQ_REMOVE(&sc->aac_container_tqh, co, co_link); free(co, M_AACRAIDBUF); } /* Remove the CAM SIMs */ while ((sim = TAILQ_FIRST(&sc->aac_sim_tqh)) != NULL) { TAILQ_REMOVE(&sc->aac_sim_tqh, sim, sim_link); error = device_delete_child(dev, sim->sim_dev); if (error) return (error); free(sim, M_AACRAIDBUF); } if (sc->aifflags & AAC_AIFFLAGS_RUNNING) { sc->aifflags |= AAC_AIFFLAGS_EXIT; wakeup(sc->aifthread); tsleep(sc->aac_dev, PUSER | PCATCH, "aac_dch", 30 * hz); } if (sc->aifflags & AAC_AIFFLAGS_RUNNING) panic("Cannot shutdown AIF thread"); if ((error = aacraid_shutdown(dev))) return(error); EVENTHANDLER_DEREGISTER(shutdown_final, sc->eh); aacraid_free(sc); mtx_destroy(&sc->aac_io_lock); return(0); } /* * Bring the controller down to a dormant state and detach all child devices. * * This function is called before detach or system shutdown. * * Note that we can assume that the bioq on the controller is empty, as we won't * allow shutdown if any device is open. */ int aacraid_shutdown(device_t dev) { struct aac_softc *sc; struct aac_fib *fib; struct aac_close_command *cc; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_state |= AAC_STATE_SUSPEND; /* * Send a Container shutdown followed by a HostShutdown FIB to the * controller to convince it that we don't want to talk to it anymore. * We've been closed and all I/O completed already */ device_printf(sc->aac_dev, "shutting down controller..."); mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); cc = (struct aac_close_command *)&fib->data[0]; bzero(cc, sizeof(struct aac_close_command)); - cc->Command = VM_CloseAll; - cc->ContainerId = 0xfffffffe; + cc->Command = htole32(VM_CloseAll); + cc->ContainerId = htole32(0xfffffffe); if (aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_close_command))) printf("FAILED.\n"); else printf("done\n"); AAC_ACCESS_DEVREG(sc, AAC_DISABLE_INTERRUPT); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return(0); } /* * Bring the controller to a quiescent state, ready for system suspend. */ int aacraid_suspend(device_t dev) { struct aac_softc *sc; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_state |= AAC_STATE_SUSPEND; AAC_ACCESS_DEVREG(sc, AAC_DISABLE_INTERRUPT); return(0); } /* * Bring the controller back to a state ready for operation. */ int aacraid_resume(device_t dev) { struct aac_softc *sc; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_state &= ~AAC_STATE_SUSPEND; AAC_ACCESS_DEVREG(sc, AAC_ENABLE_INTERRUPT); return(0); } /* * Interrupt handler for NEW_COMM_TYPE1, NEW_COMM_TYPE2, NEW_COMM_TYPE34 interface. */ void aacraid_new_intr_type1(void *arg) { struct aac_msix_ctx *ctx; struct aac_softc *sc; int vector_no; struct aac_command *cm; struct aac_fib *fib; u_int32_t bellbits, bellbits_shifted, index, handle; int isFastResponse, isAif, noMoreAif, mode; ctx = (struct aac_msix_ctx *)arg; sc = ctx->sc; vector_no = ctx->vector_no; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); if (sc->msi_enabled) { mode = AAC_INT_MODE_MSI; if (vector_no == 0) { bellbits = AAC_MEM0_GETREG4(sc, AAC_SRC_ODBR_MSI); if (bellbits & 0x40000) mode |= AAC_INT_MODE_AIF; else if (bellbits & 0x1000) mode |= AAC_INT_MODE_SYNC; } } else { mode = AAC_INT_MODE_INTX; bellbits = AAC_MEM0_GETREG4(sc, AAC_SRC_ODBR_R); if (bellbits & AAC_DB_RESPONSE_SENT_NS) { bellbits = AAC_DB_RESPONSE_SENT_NS; AAC_MEM0_SETREG4(sc, AAC_SRC_ODBR_C, bellbits); } else { bellbits_shifted = (bellbits >> AAC_SRC_ODR_SHIFT); AAC_MEM0_SETREG4(sc, AAC_SRC_ODBR_C, bellbits); if (bellbits_shifted & AAC_DB_AIF_PENDING) mode |= AAC_INT_MODE_AIF; else if (bellbits_shifted & AAC_DB_SYNC_COMMAND) mode |= AAC_INT_MODE_SYNC; } /* ODR readback, Prep #238630 */ AAC_MEM0_GETREG4(sc, AAC_SRC_ODBR_R); } if (mode & AAC_INT_MODE_SYNC) { if (sc->aac_sync_cm) { cm = sc->aac_sync_cm; aac_unmap_command(cm); cm->cm_flags |= AAC_CMD_COMPLETED; + aac_fib_header_toh(&cm->cm_fib->Header); + /* is there a completion handler? */ if (cm->cm_complete != NULL) { cm->cm_complete(cm); } else { /* assume that someone is sleeping on this command */ wakeup(cm); } sc->flags &= ~AAC_QUEUE_FRZN; sc->aac_sync_cm = NULL; } mode = 0; } if (mode & AAC_INT_MODE_AIF) { if (mode & AAC_INT_MODE_INTX) { aac_request_aif(sc); mode = 0; } } if (mode) { /* handle async. status */ index = sc->aac_host_rrq_idx[vector_no]; for (;;) { isFastResponse = isAif = noMoreAif = 0; /* remove toggle bit (31) */ - handle = (sc->aac_common->ac_host_rrq[index] & 0x7fffffff); + handle = (le32toh(sc->aac_common->ac_host_rrq[index]) & + 0x7fffffff); /* check fast response bit (30) */ if (handle & 0x40000000) isFastResponse = 1; /* check AIF bit (23) */ else if (handle & 0x00800000) isAif = TRUE; handle &= 0x0000ffff; if (handle == 0) break; cm = sc->aac_commands + (handle - 1); fib = cm->cm_fib; + aac_fib_header_toh(&fib->Header); sc->aac_rrq_outstanding[vector_no]--; if (isAif) { noMoreAif = (fib->Header.XferState & AAC_FIBSTATE_NOMOREAIF) ? 1:0; if (!noMoreAif) aac_handle_aif(sc, fib); aac_remove_busy(cm); aacraid_release_command(cm); } else { if (isFastResponse) { fib->Header.XferState |= AAC_FIBSTATE_DONEADAP; - *((u_int32_t *)(fib->data)) = ST_OK; + *((u_int32_t *)(fib->data)) = htole32(ST_OK); cm->cm_flags |= AAC_CMD_FASTRESP; } aac_remove_busy(cm); aac_unmap_command(cm); cm->cm_flags |= AAC_CMD_COMPLETED; /* is there a completion handler? */ if (cm->cm_complete != NULL) { cm->cm_complete(cm); } else { /* assume that someone is sleeping on this command */ wakeup(cm); } sc->flags &= ~AAC_QUEUE_FRZN; } sc->aac_common->ac_host_rrq[index++] = 0; if (index == (vector_no + 1) * sc->aac_vector_cap) index = vector_no * sc->aac_vector_cap; sc->aac_host_rrq_idx[vector_no] = index; if ((isAif && !noMoreAif) || sc->aif_pending) aac_request_aif(sc); } } if (mode & AAC_INT_MODE_AIF) { aac_request_aif(sc); AAC_ACCESS_DEVREG(sc, AAC_CLEAR_AIF_BIT); mode = 0; } /* see if we can start some more I/O */ if ((sc->flags & AAC_QUEUE_FRZN) == 0) aacraid_startio(sc); mtx_unlock(&sc->aac_io_lock); } /* * Handle notification of one or more FIBs coming from the controller. */ static void aac_command_thread(struct aac_softc *sc) { int retval; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); sc->aifflags = AAC_AIFFLAGS_RUNNING; while ((sc->aifflags & AAC_AIFFLAGS_EXIT) == 0) { retval = 0; if ((sc->aifflags & AAC_AIFFLAGS_PENDING) == 0) retval = msleep(sc->aifthread, &sc->aac_io_lock, PRIBIO, "aacraid_aifthd", AAC_PERIODIC_INTERVAL * hz); /* * First see if any FIBs need to be allocated. */ if ((sc->aifflags & AAC_AIFFLAGS_ALLOCFIBS) != 0) { aac_alloc_commands(sc); sc->aifflags &= ~AAC_AIFFLAGS_ALLOCFIBS; aacraid_startio(sc); } /* * While we're here, check to see if any commands are stuck. * This is pretty low-priority, so it's ok if it doesn't * always fire. */ if (retval == EWOULDBLOCK) aac_timeout(sc); /* Check the hardware printf message buffer */ if (sc->aac_common->ac_printf[0] != 0) aac_print_printf(sc); } sc->aifflags &= ~AAC_AIFFLAGS_RUNNING; mtx_unlock(&sc->aac_io_lock); wakeup(sc->aac_dev); aac_kthread_exit(0); } /* * Submit a command to the controller, return when it completes. * XXX This is very dangerous! If the card has gone out to lunch, we could * be stuck here forever. At the same time, signals are not caught * because there is a risk that a signal could wakeup the sleep before * the card has a chance to complete the command. Since there is no way * to cancel a command that is in progress, we can't protect against the * card completing a command late and spamming the command and data * memory. So, we are held hostage until the command completes. */ int aacraid_wait_command(struct aac_command *cm) { struct aac_softc *sc; int error; sc = cm->cm_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); /* Put the command on the ready queue and get things going */ aac_enqueue_ready(cm); aacraid_startio(sc); error = msleep(cm, &sc->aac_io_lock, PRIBIO, "aacraid_wait", 0); return(error); } /* *Command Buffer Management */ /* * Allocate a command. */ int aacraid_alloc_command(struct aac_softc *sc, struct aac_command **cmp) { struct aac_command *cm; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if ((cm = aac_dequeue_free(sc)) == NULL) { if (sc->total_fibs < sc->aac_max_fibs) { sc->aifflags |= AAC_AIFFLAGS_ALLOCFIBS; wakeup(sc->aifthread); } return (EBUSY); } *cmp = cm; return(0); } /* * Release a command back to the freelist. */ void aacraid_release_command(struct aac_command *cm) { struct aac_event *event; struct aac_softc *sc; sc = cm->cm_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); /* (re)initialize the command/FIB */ cm->cm_sgtable = NULL; cm->cm_flags = 0; cm->cm_complete = NULL; cm->cm_ccb = NULL; cm->cm_passthr_dmat = 0; cm->cm_fib->Header.XferState = AAC_FIBSTATE_EMPTY; cm->cm_fib->Header.StructType = AAC_FIBTYPE_TFIB; cm->cm_fib->Header.Unused = 0; cm->cm_fib->Header.SenderSize = cm->cm_sc->aac_max_fib_size; /* * These are duplicated in aac_start to cover the case where an * intermediate stage may have destroyed them. They're left * initialized here for debugging purposes only. */ cm->cm_fib->Header.u.ReceiverFibAddress = (u_int32_t)cm->cm_fibphys; cm->cm_fib->Header.Handle = 0; aac_enqueue_free(cm); /* * Dequeue all events so that there's no risk of events getting * stranded. */ while ((event = TAILQ_FIRST(&sc->aac_ev_cmfree)) != NULL) { TAILQ_REMOVE(&sc->aac_ev_cmfree, event, ev_links); event->ev_callback(sc, event, event->ev_arg); } } /* * Map helper for command/FIB allocation. */ static void aac_map_command_helper(void *arg, bus_dma_segment_t *segs, int nseg, int error) { uint64_t *fibphys; fibphys = (uint64_t *)arg; *fibphys = segs[0].ds_addr; } /* * Allocate and initialize commands/FIBs for this adapter. */ static int aac_alloc_commands(struct aac_softc *sc) { struct aac_command *cm; struct aac_fibmap *fm; uint64_t fibphys; int i, error; u_int32_t maxsize; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); if (sc->total_fibs + sc->aac_max_fibs_alloc > sc->aac_max_fibs) return (ENOMEM); fm = malloc(sizeof(struct aac_fibmap), M_AACRAIDBUF, M_NOWAIT|M_ZERO); if (fm == NULL) return (ENOMEM); mtx_unlock(&sc->aac_io_lock); /* allocate the FIBs in DMAable memory and load them */ if (bus_dmamem_alloc(sc->aac_fib_dmat, (void **)&fm->aac_fibs, BUS_DMA_NOWAIT, &fm->aac_fibmap)) { device_printf(sc->aac_dev, "Not enough contiguous memory available.\n"); free(fm, M_AACRAIDBUF); mtx_lock(&sc->aac_io_lock); return (ENOMEM); } maxsize = sc->aac_max_fib_size + 31; if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE1) maxsize += sizeof(struct aac_fib_xporthdr); /* Ignore errors since this doesn't bounce */ (void)bus_dmamap_load(sc->aac_fib_dmat, fm->aac_fibmap, fm->aac_fibs, sc->aac_max_fibs_alloc * maxsize, aac_map_command_helper, &fibphys, 0); mtx_lock(&sc->aac_io_lock); /* initialize constant fields in the command structure */ bzero(fm->aac_fibs, sc->aac_max_fibs_alloc * maxsize); for (i = 0; i < sc->aac_max_fibs_alloc; i++) { cm = sc->aac_commands + sc->total_fibs; fm->aac_commands = cm; cm->cm_sc = sc; cm->cm_fib = (struct aac_fib *) ((u_int8_t *)fm->aac_fibs + i * maxsize); cm->cm_fibphys = fibphys + i * maxsize; if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE1) { u_int64_t fibphys_aligned; fibphys_aligned = (cm->cm_fibphys + sizeof(struct aac_fib_xporthdr) + 31) & ~31; cm->cm_fib = (struct aac_fib *) ((u_int8_t *)cm->cm_fib + (fibphys_aligned - cm->cm_fibphys)); cm->cm_fibphys = fibphys_aligned; } else { u_int64_t fibphys_aligned; fibphys_aligned = (cm->cm_fibphys + 31) & ~31; cm->cm_fib = (struct aac_fib *) ((u_int8_t *)cm->cm_fib + (fibphys_aligned - cm->cm_fibphys)); cm->cm_fibphys = fibphys_aligned; } cm->cm_index = sc->total_fibs; if ((error = bus_dmamap_create(sc->aac_buffer_dmat, 0, &cm->cm_datamap)) != 0) break; if (sc->aac_max_fibs <= 1 || sc->aac_max_fibs - sc->total_fibs > 1) aacraid_release_command(cm); sc->total_fibs++; } if (i > 0) { TAILQ_INSERT_TAIL(&sc->aac_fibmap_tqh, fm, fm_link); fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "total_fibs= %d\n", sc->total_fibs); return (0); } bus_dmamap_unload(sc->aac_fib_dmat, fm->aac_fibmap); bus_dmamem_free(sc->aac_fib_dmat, fm->aac_fibs, fm->aac_fibmap); free(fm, M_AACRAIDBUF); return (ENOMEM); } /* * Free FIBs owned by this adapter. */ static void aac_free_commands(struct aac_softc *sc) { struct aac_fibmap *fm; struct aac_command *cm; int i; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); while ((fm = TAILQ_FIRST(&sc->aac_fibmap_tqh)) != NULL) { TAILQ_REMOVE(&sc->aac_fibmap_tqh, fm, fm_link); /* * We check against total_fibs to handle partially * allocated blocks. */ for (i = 0; i < sc->aac_max_fibs_alloc && sc->total_fibs--; i++) { cm = fm->aac_commands + i; bus_dmamap_destroy(sc->aac_buffer_dmat, cm->cm_datamap); } bus_dmamap_unload(sc->aac_fib_dmat, fm->aac_fibmap); bus_dmamem_free(sc->aac_fib_dmat, fm->aac_fibs, fm->aac_fibmap); free(fm, M_AACRAIDBUF); } } /* * Command-mapping helper function - populate this command's s/g table. */ void aacraid_map_command_sg(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct aac_softc *sc; struct aac_command *cm; struct aac_fib *fib; int i; cm = (struct aac_command *)arg; sc = cm->cm_sc; fib = cm->cm_fib; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "nseg %d", nseg); mtx_assert(&sc->aac_io_lock, MA_OWNED); if ((sc->flags & AAC_FLAGS_SYNC_MODE) && sc->aac_sync_cm) return; /* copy into the FIB */ if (cm->cm_sgtable != NULL) { if (fib->Header.Command == RawIo2) { struct aac_raw_io2 *raw; struct aac_sge_ieee1212 *sg; u_int32_t min_size = PAGE_SIZE, cur_size; int conformable = TRUE; raw = (struct aac_raw_io2 *)&fib->data[0]; sg = (struct aac_sge_ieee1212 *)cm->cm_sgtable; raw->sgeCnt = nseg; for (i = 0; i < nseg; i++) { cur_size = segs[i].ds_len; sg[i].addrHigh = 0; *(bus_addr_t *)&sg[i].addrLow = segs[i].ds_addr; sg[i].length = cur_size; sg[i].flags = 0; if (i == 0) { raw->sgeFirstSize = cur_size; } else if (i == 1) { raw->sgeNominalSize = cur_size; min_size = cur_size; } else if ((i+1) < nseg && cur_size != raw->sgeNominalSize) { conformable = FALSE; if (cur_size < min_size) min_size = cur_size; } } /* not conformable: evaluate required sg elements */ if (!conformable) { int j, err_found, nseg_new = nseg; for (i = min_size / PAGE_SIZE; i >= 1; --i) { err_found = FALSE; nseg_new = 2; for (j = 1; j < nseg - 1; ++j) { if (sg[j].length % (i*PAGE_SIZE)) { err_found = TRUE; break; } nseg_new += (sg[j].length / (i*PAGE_SIZE)); } if (!err_found) break; } if (i>0 && nseg_new<=sc->aac_sg_tablesize && !(sc->hint_flags & 4)) nseg = aac_convert_sgraw2(sc, raw, i, nseg, nseg_new); } else { raw->flags |= RIO2_SGL_CONFORMANT; } + for (i = 0; i < nseg; i++) + aac_sge_ieee1212_tole(sg + i); + aac_raw_io2_tole(raw); + /* update the FIB size for the s/g count */ fib->Header.Size += nseg * sizeof(struct aac_sge_ieee1212); } else if (fib->Header.Command == RawIo) { struct aac_sg_tableraw *sg; sg = (struct aac_sg_tableraw *)cm->cm_sgtable; - sg->SgCount = nseg; + sg->SgCount = htole32(nseg); for (i = 0; i < nseg; i++) { sg->SgEntryRaw[i].SgAddress = segs[i].ds_addr; sg->SgEntryRaw[i].SgByteCount = segs[i].ds_len; sg->SgEntryRaw[i].Next = 0; sg->SgEntryRaw[i].Prev = 0; sg->SgEntryRaw[i].Flags = 0; + aac_sg_entryraw_tole(&sg->SgEntryRaw[i]); } + aac_raw_io_tole((struct aac_raw_io *)&fib->data[0]); /* update the FIB size for the s/g count */ fib->Header.Size += nseg*sizeof(struct aac_sg_entryraw); } else if ((cm->cm_sc->flags & AAC_FLAGS_SG_64BIT) == 0) { struct aac_sg_table *sg; sg = cm->cm_sgtable; - sg->SgCount = nseg; + sg->SgCount = htole32(nseg); for (i = 0; i < nseg; i++) { sg->SgEntry[i].SgAddress = segs[i].ds_addr; sg->SgEntry[i].SgByteCount = segs[i].ds_len; + aac_sg_entry_tole(&sg->SgEntry[i]); } /* update the FIB size for the s/g count */ fib->Header.Size += nseg*sizeof(struct aac_sg_entry); } else { struct aac_sg_table64 *sg; sg = (struct aac_sg_table64 *)cm->cm_sgtable; - sg->SgCount = nseg; + sg->SgCount = htole32(nseg); for (i = 0; i < nseg; i++) { sg->SgEntry64[i].SgAddress = segs[i].ds_addr; sg->SgEntry64[i].SgByteCount = segs[i].ds_len; + aac_sg_entry64_tole(&sg->SgEntry64[i]); } /* update the FIB size for the s/g count */ fib->Header.Size += nseg*sizeof(struct aac_sg_entry64); } } /* Fix up the address values in the FIB. Use the command array index * instead of a pointer since these fields are only 32 bits. Shift * the SenderFibAddress over to make room for the fast response bit * and for the AIF bit */ cm->cm_fib->Header.SenderFibAddress = (cm->cm_index << 2); cm->cm_fib->Header.u.ReceiverFibAddress = (u_int32_t)cm->cm_fibphys; /* save a pointer to the command for speedy reverse-lookup */ cm->cm_fib->Header.Handle += cm->cm_index + 1; if (cm->cm_passthr_dmat == 0) { if (cm->cm_flags & AAC_CMD_DATAIN) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_PREREAD); if (cm->cm_flags & AAC_CMD_DATAOUT) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_PREWRITE); } cm->cm_flags |= AAC_CMD_MAPPED; if (cm->cm_flags & AAC_CMD_WAIT) { + aac_fib_header_tole(&fib->Header); aacraid_sync_command(sc, AAC_MONKER_SYNCFIB, cm->cm_fibphys, 0, 0, 0, NULL, NULL); } else if (sc->flags & AAC_FLAGS_SYNC_MODE) { u_int32_t wait = 0; sc->aac_sync_cm = cm; + aac_fib_header_tole(&fib->Header); aacraid_sync_command(sc, AAC_MONKER_SYNCFIB, cm->cm_fibphys, 0, 0, 0, &wait, NULL); } else { int count = 10000000L; while (AAC_SEND_COMMAND(sc, cm) != 0) { if (--count == 0) { aac_unmap_command(cm); sc->flags |= AAC_QUEUE_FRZN; aac_requeue_ready(cm); } DELAY(5); /* wait 5 usec. */ } } } static int aac_convert_sgraw2(struct aac_softc *sc, struct aac_raw_io2 *raw, int pages, int nseg, int nseg_new) { struct aac_sge_ieee1212 *sge; int i, j, pos; u_int32_t addr_low; sge = malloc(nseg_new * sizeof(struct aac_sge_ieee1212), M_AACRAIDBUF, M_NOWAIT|M_ZERO); if (sge == NULL) return nseg; for (i = 1, pos = 1; i < nseg - 1; ++i) { for (j = 0; j < raw->sge[i].length / (pages*PAGE_SIZE); ++j) { addr_low = raw->sge[i].addrLow + j * pages * PAGE_SIZE; sge[pos].addrLow = addr_low; sge[pos].addrHigh = raw->sge[i].addrHigh; if (addr_low < raw->sge[i].addrLow) sge[pos].addrHigh++; sge[pos].length = pages * PAGE_SIZE; sge[pos].flags = 0; pos++; } } sge[pos] = raw->sge[nseg-1]; for (i = 1; i < nseg_new; ++i) raw->sge[i] = sge[i]; free(sge, M_AACRAIDBUF); raw->sgeCnt = nseg_new; raw->flags |= RIO2_SGL_CONFORMANT; raw->sgeNominalSize = pages * PAGE_SIZE; return nseg_new; } /* * Unmap a command from controller-visible space. */ static void aac_unmap_command(struct aac_command *cm) { struct aac_softc *sc; sc = cm->cm_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if (!(cm->cm_flags & AAC_CMD_MAPPED)) return; if (cm->cm_datalen != 0 && cm->cm_passthr_dmat == 0) { if (cm->cm_flags & AAC_CMD_DATAIN) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_POSTREAD); if (cm->cm_flags & AAC_CMD_DATAOUT) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->aac_buffer_dmat, cm->cm_datamap); } cm->cm_flags &= ~AAC_CMD_MAPPED; } /* * Hardware Interface */ /* * Initialize the adapter. */ static void aac_common_map(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct aac_softc *sc; sc = (struct aac_softc *)arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_common_busaddr = segs[0].ds_addr; } static int aac_check_firmware(struct aac_softc *sc) { u_int32_t code, major, minor, maxsize; u_int32_t options = 0, atu_size = 0, status, waitCount; time_t then; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* check if flash update is running */ if (AAC_GET_FWSTATUS(sc) & AAC_FLASH_UPD_PENDING) { then = time_uptime; do { code = AAC_GET_FWSTATUS(sc); if (time_uptime > (then + AAC_FWUPD_TIMEOUT)) { device_printf(sc->aac_dev, "FATAL: controller not coming ready, " "status %x\n", code); return(ENXIO); } } while (!(code & AAC_FLASH_UPD_SUCCESS) && !(code & AAC_FLASH_UPD_FAILED)); /* * Delay 10 seconds. Because right now FW is doing a soft reset, * do not read scratch pad register at this time */ waitCount = 10 * 10000; while (waitCount) { DELAY(100); /* delay 100 microseconds */ waitCount--; } } /* * Wait for the adapter to come ready. */ then = time_uptime; do { code = AAC_GET_FWSTATUS(sc); if (time_uptime > (then + AAC_BOOT_TIMEOUT)) { device_printf(sc->aac_dev, "FATAL: controller not coming ready, " "status %x\n", code); return(ENXIO); } } while (!(code & AAC_UP_AND_RUNNING) || code == 0xffffffff); /* * Retrieve the firmware version numbers. Dell PERC2/QC cards with * firmware version 1.x are not compatible with this driver. */ if (sc->flags & AAC_FLAGS_PERC2QC) { if (aacraid_sync_command(sc, AAC_MONKER_GETKERNVER, 0, 0, 0, 0, NULL, NULL)) { device_printf(sc->aac_dev, "Error reading firmware version\n"); return (EIO); } /* These numbers are stored as ASCII! */ major = (AAC_GET_MAILBOX(sc, 1) & 0xff) - 0x30; minor = (AAC_GET_MAILBOX(sc, 2) & 0xff) - 0x30; if (major == 1) { device_printf(sc->aac_dev, "Firmware version %d.%d is not supported.\n", major, minor); return (EINVAL); } } /* * Retrieve the capabilities/supported options word so we know what * work-arounds to enable. Some firmware revs don't support this * command. */ if (aacraid_sync_command(sc, AAC_MONKER_GETINFO, 0, 0, 0, 0, &status, NULL)) { if (status != AAC_SRB_STS_INVALID_REQUEST) { device_printf(sc->aac_dev, "RequestAdapterInfo failed\n"); return (EIO); } } else { options = AAC_GET_MAILBOX(sc, 1); atu_size = AAC_GET_MAILBOX(sc, 2); sc->supported_options = options; sc->doorbell_mask = AAC_GET_MAILBOX(sc, 3); if ((options & AAC_SUPPORTED_4GB_WINDOW) != 0 && (sc->flags & AAC_FLAGS_NO4GB) == 0) sc->flags |= AAC_FLAGS_4GB_WINDOW; if (options & AAC_SUPPORTED_NONDASD) sc->flags |= AAC_FLAGS_ENABLE_CAM; if ((options & AAC_SUPPORTED_SGMAP_HOST64) != 0 && (sizeof(bus_addr_t) > 4) && (sc->hint_flags & 0x1)) { device_printf(sc->aac_dev, "Enabling 64-bit address support\n"); sc->flags |= AAC_FLAGS_SG_64BIT; } if (sc->aac_if.aif_send_command) { if (options & AAC_SUPPORTED_NEW_COMM_TYPE2) sc->flags |= AAC_FLAGS_NEW_COMM | AAC_FLAGS_NEW_COMM_TYPE2; else if (options & AAC_SUPPORTED_NEW_COMM_TYPE1) sc->flags |= AAC_FLAGS_NEW_COMM | AAC_FLAGS_NEW_COMM_TYPE1; else if ((options & AAC_SUPPORTED_NEW_COMM_TYPE3) || (options & AAC_SUPPORTED_NEW_COMM_TYPE4)) sc->flags |= AAC_FLAGS_NEW_COMM | AAC_FLAGS_NEW_COMM_TYPE34; } if (options & AAC_SUPPORTED_64BIT_ARRAYSIZE) sc->flags |= AAC_FLAGS_ARRAY_64BIT; } if (!(sc->flags & AAC_FLAGS_NEW_COMM)) { device_printf(sc->aac_dev, "Communication interface not supported!\n"); return (ENXIO); } if (sc->hint_flags & 2) { device_printf(sc->aac_dev, "Sync. mode enforced by driver parameter. This will cause a significant performance decrease!\n"); sc->flags |= AAC_FLAGS_SYNC_MODE; } else if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE34) { device_printf(sc->aac_dev, "Async. mode not supported by current driver, sync. mode enforced.\nPlease update driver to get full performance.\n"); sc->flags |= AAC_FLAGS_SYNC_MODE; } /* Check for broken hardware that does a lower number of commands */ sc->aac_max_fibs = (sc->flags & AAC_FLAGS_256FIBS ? 256:512); /* Remap mem. resource, if required */ if (atu_size > rman_get_size(sc->aac_regs_res0)) { bus_release_resource( sc->aac_dev, SYS_RES_MEMORY, sc->aac_regs_rid0, sc->aac_regs_res0); sc->aac_regs_res0 = bus_alloc_resource_anywhere( sc->aac_dev, SYS_RES_MEMORY, &sc->aac_regs_rid0, atu_size, RF_ACTIVE); if (sc->aac_regs_res0 == NULL) { sc->aac_regs_res0 = bus_alloc_resource_any( sc->aac_dev, SYS_RES_MEMORY, &sc->aac_regs_rid0, RF_ACTIVE); if (sc->aac_regs_res0 == NULL) { device_printf(sc->aac_dev, "couldn't allocate register window\n"); return (ENXIO); } } sc->aac_btag0 = rman_get_bustag(sc->aac_regs_res0); sc->aac_bhandle0 = rman_get_bushandle(sc->aac_regs_res0); } /* Read preferred settings */ sc->aac_max_fib_size = sizeof(struct aac_fib); sc->aac_max_sectors = 128; /* 64KB */ sc->aac_max_aif = 1; if (sc->flags & AAC_FLAGS_SG_64BIT) sc->aac_sg_tablesize = (AAC_FIB_DATASIZE - sizeof(struct aac_blockwrite64)) / sizeof(struct aac_sg_entry64); else sc->aac_sg_tablesize = (AAC_FIB_DATASIZE - sizeof(struct aac_blockwrite)) / sizeof(struct aac_sg_entry); if (!aacraid_sync_command(sc, AAC_MONKER_GETCOMMPREF, 0, 0, 0, 0, NULL, NULL)) { options = AAC_GET_MAILBOX(sc, 1); sc->aac_max_fib_size = (options & 0xFFFF); sc->aac_max_sectors = (options >> 16) << 1; options = AAC_GET_MAILBOX(sc, 2); sc->aac_sg_tablesize = (options >> 16); options = AAC_GET_MAILBOX(sc, 3); sc->aac_max_fibs = ((options >> 16) & 0xFFFF); if (sc->aac_max_fibs == 0 || sc->aac_hwif != AAC_HWIF_SRCV) sc->aac_max_fibs = (options & 0xFFFF); options = AAC_GET_MAILBOX(sc, 4); sc->aac_max_aif = (options & 0xFFFF); options = AAC_GET_MAILBOX(sc, 5); sc->aac_max_msix =(sc->flags & AAC_FLAGS_NEW_COMM_TYPE2) ? options : 0; } maxsize = sc->aac_max_fib_size + 31; if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE1) maxsize += sizeof(struct aac_fib_xporthdr); if (maxsize > PAGE_SIZE) { sc->aac_max_fib_size -= (maxsize - PAGE_SIZE); maxsize = PAGE_SIZE; } sc->aac_max_fibs_alloc = PAGE_SIZE / maxsize; if (sc->aac_max_fib_size > sizeof(struct aac_fib)) { sc->flags |= AAC_FLAGS_RAW_IO; device_printf(sc->aac_dev, "Enable Raw I/O\n"); } if ((sc->flags & AAC_FLAGS_RAW_IO) && (sc->flags & AAC_FLAGS_ARRAY_64BIT)) { sc->flags |= AAC_FLAGS_LBA_64BIT; device_printf(sc->aac_dev, "Enable 64-bit array\n"); } #ifdef AACRAID_DEBUG aacraid_get_fw_debug_buffer(sc); #endif return (0); } static int aac_init(struct aac_softc *sc) { struct aac_adapter_init *ip; int i, error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* reset rrq index */ sc->aac_fibs_pushed_no = 0; for (i = 0; i < sc->aac_max_msix; i++) sc->aac_host_rrq_idx[i] = i * sc->aac_vector_cap; /* * Fill in the init structure. This tells the adapter about the * physical location of various important shared data structures. */ ip = &sc->aac_common->ac_init; ip->InitStructRevision = AAC_INIT_STRUCT_REVISION; if (sc->aac_max_fib_size > sizeof(struct aac_fib)) { ip->InitStructRevision = AAC_INIT_STRUCT_REVISION_4; sc->flags |= AAC_FLAGS_RAW_IO; } ip->NoOfMSIXVectors = sc->aac_max_msix; ip->AdapterFibsPhysicalAddress = sc->aac_common_busaddr + offsetof(struct aac_common, ac_fibs); ip->AdapterFibsVirtualAddress = 0; ip->AdapterFibsSize = AAC_ADAPTER_FIBS * sizeof(struct aac_fib); ip->AdapterFibAlign = sizeof(struct aac_fib); ip->PrintfBufferAddress = sc->aac_common_busaddr + offsetof(struct aac_common, ac_printf); ip->PrintfBufferSize = AAC_PRINTF_BUFSIZE; /* * The adapter assumes that pages are 4K in size, except on some * broken firmware versions that do the page->byte conversion twice, * therefore 'assuming' that this value is in 16MB units (2^24). * Round up since the granularity is so high. */ ip->HostPhysMemPages = ctob(physmem) / AAC_PAGE_SIZE; if (sc->flags & AAC_FLAGS_BROKEN_MEMMAP) { ip->HostPhysMemPages = (ip->HostPhysMemPages + AAC_PAGE_SIZE) / AAC_PAGE_SIZE; } ip->HostElapsedSeconds = time_uptime; /* reset later if invalid */ ip->InitFlags = AAC_INITFLAGS_NEW_COMM_SUPPORTED; if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE1) { ip->InitStructRevision = AAC_INIT_STRUCT_REVISION_6; ip->InitFlags |= (AAC_INITFLAGS_NEW_COMM_TYPE1_SUPPORTED | AAC_INITFLAGS_FAST_JBOD_SUPPORTED); device_printf(sc->aac_dev, "New comm. interface type1 enabled\n"); } else if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE2) { ip->InitStructRevision = AAC_INIT_STRUCT_REVISION_7; ip->InitFlags |= (AAC_INITFLAGS_NEW_COMM_TYPE2_SUPPORTED | AAC_INITFLAGS_FAST_JBOD_SUPPORTED); device_printf(sc->aac_dev, "New comm. interface type2 enabled\n"); } ip->MaxNumAif = sc->aac_max_aif; ip->HostRRQ_AddrLow = sc->aac_common_busaddr + offsetof(struct aac_common, ac_host_rrq); /* always 32-bit address */ ip->HostRRQ_AddrHigh = 0; if (sc->aac_support_opt2 & AAC_SUPPORTED_POWER_MANAGEMENT) { ip->InitFlags |= AAC_INITFLAGS_DRIVER_SUPPORTS_PM; ip->InitFlags |= AAC_INITFLAGS_DRIVER_USES_UTC_TIME; device_printf(sc->aac_dev, "Power Management enabled\n"); } ip->MaxIoCommands = sc->aac_max_fibs; ip->MaxIoSize = sc->aac_max_sectors << 9; ip->MaxFibSize = sc->aac_max_fib_size; + aac_adapter_init_tole(ip); + /* * Do controller-type-specific initialisation */ AAC_MEM0_SETREG4(sc, AAC_SRC_ODBR_C, ~0); /* * Give the init structure to the controller. */ if (aacraid_sync_command(sc, AAC_MONKER_INITSTRUCT, sc->aac_common_busaddr + offsetof(struct aac_common, ac_init), 0, 0, 0, NULL, NULL)) { device_printf(sc->aac_dev, "error establishing init structure\n"); error = EIO; goto out; } /* * Check configuration issues */ if ((error = aac_check_config(sc)) != 0) goto out; error = 0; out: return(error); } static void aac_define_int_mode(struct aac_softc *sc) { device_t dev; int cap, msi_count, error = 0; uint32_t val; dev = sc->aac_dev; if (sc->flags & AAC_FLAGS_SYNC_MODE) { device_printf(dev, "using line interrupts\n"); sc->aac_max_msix = 1; sc->aac_vector_cap = sc->aac_max_fibs; return; } /* max. vectors from AAC_MONKER_GETCOMMPREF */ if (sc->aac_max_msix == 0) { if (sc->aac_hwif == AAC_HWIF_SRC) { msi_count = 1; if ((error = pci_alloc_msi(dev, &msi_count)) != 0) { device_printf(dev, "alloc msi failed - err=%d; " "will use INTx\n", error); pci_release_msi(dev); } else { sc->msi_tupelo = TRUE; } } if (sc->msi_tupelo) device_printf(dev, "using MSI interrupts\n"); else device_printf(dev, "using line interrupts\n"); sc->aac_max_msix = 1; sc->aac_vector_cap = sc->aac_max_fibs; return; } /* OS capability */ msi_count = pci_msix_count(dev); if (msi_count > AAC_MAX_MSIX) msi_count = AAC_MAX_MSIX; if (msi_count > sc->aac_max_msix) msi_count = sc->aac_max_msix; if (msi_count == 0 || (error = pci_alloc_msix(dev, &msi_count)) != 0) { device_printf(dev, "alloc msix failed - msi_count=%d, err=%d; " "will try MSI\n", msi_count, error); pci_release_msi(dev); } else { sc->msi_enabled = TRUE; device_printf(dev, "using MSI-X interrupts (%u vectors)\n", msi_count); } if (!sc->msi_enabled) { msi_count = 1; if ((error = pci_alloc_msi(dev, &msi_count)) != 0) { device_printf(dev, "alloc msi failed - err=%d; " "will use INTx\n", error); pci_release_msi(dev); } else { sc->msi_enabled = TRUE; device_printf(dev, "using MSI interrupts\n"); } } if (sc->msi_enabled) { /* now read controller capability from PCI config. space */ cap = aac_find_pci_capability(sc, PCIY_MSIX); val = (cap != 0 ? pci_read_config(dev, cap + 2, 2) : 0); if (!(val & AAC_PCI_MSI_ENABLE)) { pci_release_msi(dev); sc->msi_enabled = FALSE; } } if (!sc->msi_enabled) { device_printf(dev, "using legacy interrupts\n"); sc->aac_max_msix = 1; } else { AAC_ACCESS_DEVREG(sc, AAC_ENABLE_MSIX); if (sc->aac_max_msix > msi_count) sc->aac_max_msix = msi_count; } sc->aac_vector_cap = sc->aac_max_fibs / sc->aac_max_msix; fwprintf(sc, HBA_FLAGS_DBG_DEBUG_B, "msi_enabled %d vector_cap %d max_fibs %d max_msix %d", sc->msi_enabled,sc->aac_vector_cap, sc->aac_max_fibs, sc->aac_max_msix); } static int aac_find_pci_capability(struct aac_softc *sc, int cap) { device_t dev; uint32_t status; uint8_t ptr; dev = sc->aac_dev; status = pci_read_config(dev, PCIR_STATUS, 2); if (!(status & PCIM_STATUS_CAPPRESENT)) return (0); status = pci_read_config(dev, PCIR_HDRTYPE, 1); switch (status & PCIM_HDRTYPE) { case 0: case 1: ptr = PCIR_CAP_PTR; break; case 2: ptr = PCIR_CAP_PTR_2; break; default: return (0); break; } ptr = pci_read_config(dev, ptr, 1); while (ptr != 0) { int next, val; next = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1); val = pci_read_config(dev, ptr + PCICAP_ID, 1); if (val == cap) return (ptr); ptr = next; } return (0); } static int aac_setup_intr(struct aac_softc *sc) { int i, msi_count, rid; struct resource *res; void *tag; msi_count = sc->aac_max_msix; rid = ((sc->msi_enabled || sc->msi_tupelo)? 1:0); for (i = 0; i < msi_count; i++, rid++) { if ((res = bus_alloc_resource_any(sc->aac_dev,SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(sc->aac_dev,"can't allocate interrupt\n"); return (EINVAL); } sc->aac_irq_rid[i] = rid; sc->aac_irq[i] = res; if (aac_bus_setup_intr(sc->aac_dev, res, INTR_MPSAFE | INTR_TYPE_BIO, NULL, aacraid_new_intr_type1, &sc->aac_msix[i], &tag)) { device_printf(sc->aac_dev, "can't set up interrupt\n"); return (EINVAL); } sc->aac_msix[i].vector_no = i; sc->aac_msix[i].sc = sc; sc->aac_intr[i] = tag; } return (0); } static int aac_check_config(struct aac_softc *sc) { struct aac_fib *fib; struct aac_cnt_config *ccfg; struct aac_cf_status_hdr *cf_shdr; int rval; mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); ccfg = (struct aac_cnt_config *)&fib->data[0]; bzero(ccfg, sizeof (*ccfg) - CT_PACKET_SIZE); ccfg->Command = VM_ContainerConfig; ccfg->CTCommand.command = CT_GET_CONFIG_STATUS; ccfg->CTCommand.param[CNT_SIZE] = sizeof(struct aac_cf_status_hdr); + aac_cnt_config_tole(ccfg); rval = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof (struct aac_cnt_config)); + aac_cnt_config_toh(ccfg); + cf_shdr = (struct aac_cf_status_hdr *)ccfg->CTCommand.data; if (rval == 0 && ccfg->Command == ST_OK && ccfg->CTCommand.param[0] == CT_OK) { - if (cf_shdr->action <= CFACT_PAUSE) { + if (le32toh(cf_shdr->action) <= CFACT_PAUSE) { bzero(ccfg, sizeof (*ccfg) - CT_PACKET_SIZE); ccfg->Command = VM_ContainerConfig; ccfg->CTCommand.command = CT_COMMIT_CONFIG; + aac_cnt_config_tole(ccfg); rval = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof (struct aac_cnt_config)); + aac_cnt_config_toh(ccfg); + if (rval == 0 && ccfg->Command == ST_OK && ccfg->CTCommand.param[0] == CT_OK) { /* successful completion */ rval = 0; } else { /* auto commit aborted due to error(s) */ rval = -2; } } else { /* auto commit aborted due to adapter indicating config. issues too dangerous to auto commit */ rval = -3; } } else { /* error */ rval = -1; } aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return(rval); } /* * Send a synchronous command to the controller and wait for a result. * Indicate if the controller completed the command with an error status. */ int aacraid_sync_command(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3, u_int32_t *sp, u_int32_t *r1) { time_t then; u_int32_t status; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* populate the mailbox */ AAC_SET_MAILBOX(sc, command, arg0, arg1, arg2, arg3); /* ensure the sync command doorbell flag is cleared */ if (!sc->msi_enabled) AAC_CLEAR_ISTATUS(sc, AAC_DB_SYNC_COMMAND); /* then set it to signal the adapter */ AAC_QNOTIFY(sc, AAC_DB_SYNC_COMMAND); if ((command != AAC_MONKER_SYNCFIB) || (sp == NULL) || (*sp != 0)) { /* spin waiting for the command to complete */ then = time_uptime; do { if (time_uptime > (then + AAC_SYNC_TIMEOUT)) { fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "timed out"); return(EIO); } } while (!(AAC_GET_ISTATUS(sc) & AAC_DB_SYNC_COMMAND)); /* clear the completion flag */ AAC_CLEAR_ISTATUS(sc, AAC_DB_SYNC_COMMAND); /* get the command status */ status = AAC_GET_MAILBOX(sc, 0); if (sp != NULL) *sp = status; /* return parameter */ if (r1 != NULL) *r1 = AAC_GET_MAILBOX(sc, 1); if (status != AAC_SRB_STS_SUCCESS) return (-1); } return(0); } static int aac_sync_fib(struct aac_softc *sc, u_int32_t command, u_int32_t xferstate, struct aac_fib *fib, u_int16_t datasize) { + uint32_t ReceiverFibAddress; + fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); if (datasize > AAC_FIB_DATASIZE) return(EINVAL); /* * Set up the sync FIB */ fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY; fib->Header.XferState |= xferstate; fib->Header.Command = command; fib->Header.StructType = AAC_FIBTYPE_TFIB; fib->Header.Size = sizeof(struct aac_fib_header) + datasize; fib->Header.SenderSize = sizeof(struct aac_fib); fib->Header.SenderFibAddress = 0; /* Not needed */ - fib->Header.u.ReceiverFibAddress = sc->aac_common_busaddr + + ReceiverFibAddress = sc->aac_common_busaddr + offsetof(struct aac_common, ac_sync_fib); + fib->Header.u.ReceiverFibAddress = ReceiverFibAddress; + aac_fib_header_tole(&fib->Header); /* * Give the FIB to the controller, wait for a response. */ if (aacraid_sync_command(sc, AAC_MONKER_SYNCFIB, - fib->Header.u.ReceiverFibAddress, 0, 0, 0, NULL, NULL)) { + ReceiverFibAddress, 0, 0, 0, NULL, NULL)) { fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "IO error"); + aac_fib_header_toh(&fib->Header); return(EIO); } + aac_fib_header_toh(&fib->Header); return (0); } /* * Check for commands that have been outstanding for a suspiciously long time, * and complain about them. */ static void aac_timeout(struct aac_softc *sc) { struct aac_command *cm; time_t deadline; int timedout; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Traverse the busy command list, bitch about late commands once * only. */ timedout = 0; deadline = time_uptime - AAC_CMD_TIMEOUT; TAILQ_FOREACH(cm, &sc->aac_busy, cm_link) { if (cm->cm_timestamp < deadline) { device_printf(sc->aac_dev, "COMMAND %p TIMEOUT AFTER %d SECONDS\n", cm, (int)(time_uptime-cm->cm_timestamp)); AAC_PRINT_FIB(sc, cm->cm_fib); timedout++; } } if (timedout) aac_reset_adapter(sc); aacraid_print_queues(sc); } /* * Interface Function Vectors */ /* * Read the current firmware status word. */ static int aac_src_get_fwstatus(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_SRC_OMR)); } /* * Notify the controller of a change in a given queue */ static void aac_src_qnotify(struct aac_softc *sc, int qbit) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, qbit << AAC_SRC_IDR_SHIFT); } /* * Get the interrupt reason bits */ static int aac_src_get_istatus(struct aac_softc *sc) { int val; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if (sc->msi_enabled) { val = AAC_MEM0_GETREG4(sc, AAC_SRC_ODBR_MSI); if (val & AAC_MSI_SYNC_STATUS) val = AAC_DB_SYNC_COMMAND; else val = 0; } else { val = AAC_MEM0_GETREG4(sc, AAC_SRC_ODBR_R) >> AAC_SRC_ODR_SHIFT; } return(val); } /* * Clear some interrupt reason bits */ static void aac_src_clear_istatus(struct aac_softc *sc, int mask) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if (sc->msi_enabled) { if (mask == AAC_DB_SYNC_COMMAND) AAC_ACCESS_DEVREG(sc, AAC_CLEAR_SYNC_BIT); } else { AAC_MEM0_SETREG4(sc, AAC_SRC_ODBR_C, mask << AAC_SRC_ODR_SHIFT); } } /* * Populate the mailbox and set the command word */ static void aac_src_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_SRC_MAILBOX, command); AAC_MEM0_SETREG4(sc, AAC_SRC_MAILBOX + 4, arg0); AAC_MEM0_SETREG4(sc, AAC_SRC_MAILBOX + 8, arg1); AAC_MEM0_SETREG4(sc, AAC_SRC_MAILBOX + 12, arg2); AAC_MEM0_SETREG4(sc, AAC_SRC_MAILBOX + 16, arg3); } static void aac_srcv_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_SRCV_MAILBOX, command); AAC_MEM0_SETREG4(sc, AAC_SRCV_MAILBOX + 4, arg0); AAC_MEM0_SETREG4(sc, AAC_SRCV_MAILBOX + 8, arg1); AAC_MEM0_SETREG4(sc, AAC_SRCV_MAILBOX + 12, arg2); AAC_MEM0_SETREG4(sc, AAC_SRCV_MAILBOX + 16, arg3); } /* * Fetch the immediate command status word */ static int aac_src_get_mailbox(struct aac_softc *sc, int mb) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_SRC_MAILBOX + (mb * 4))); } static int aac_srcv_get_mailbox(struct aac_softc *sc, int mb) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_SRCV_MAILBOX + (mb * 4))); } /* * Set/clear interrupt masks */ static void aac_src_access_devreg(struct aac_softc *sc, int mode) { u_int32_t val; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); switch (mode) { case AAC_ENABLE_INTERRUPT: AAC_MEM0_SETREG4(sc, AAC_SRC_OIMR, (sc->msi_enabled ? AAC_INT_ENABLE_TYPE1_MSIX : AAC_INT_ENABLE_TYPE1_INTX)); break; case AAC_DISABLE_INTERRUPT: AAC_MEM0_SETREG4(sc, AAC_SRC_OIMR, AAC_INT_DISABLE_ALL); break; case AAC_ENABLE_MSIX: /* set bit 6 */ val = AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); val |= 0x40; AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, val); AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); /* unmask int. */ val = PMC_ALL_INTERRUPT_BITS; AAC_MEM0_SETREG4(sc, AAC_SRC_IOAR, val); val = AAC_MEM0_GETREG4(sc, AAC_SRC_OIMR); AAC_MEM0_SETREG4(sc, AAC_SRC_OIMR, val & (~(PMC_GLOBAL_INT_BIT2 | PMC_GLOBAL_INT_BIT0))); break; case AAC_DISABLE_MSIX: /* reset bit 6 */ val = AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); val &= ~0x40; AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, val); AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); break; case AAC_CLEAR_AIF_BIT: /* set bit 5 */ val = AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); val |= 0x20; AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, val); AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); break; case AAC_CLEAR_SYNC_BIT: /* set bit 4 */ val = AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); val |= 0x10; AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, val); AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); break; case AAC_ENABLE_INTX: /* set bit 7 */ val = AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); val |= 0x80; AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, val); AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); /* unmask int. */ val = PMC_ALL_INTERRUPT_BITS; AAC_MEM0_SETREG4(sc, AAC_SRC_IOAR, val); val = AAC_MEM0_GETREG4(sc, AAC_SRC_OIMR); AAC_MEM0_SETREG4(sc, AAC_SRC_OIMR, val & (~(PMC_GLOBAL_INT_BIT2))); break; default: break; } } /* * New comm. interface: Send command functions */ static int aac_src_send_command(struct aac_softc *sc, struct aac_command *cm) { struct aac_fib_xporthdr *pFibX; u_int32_t fibsize, high_addr; u_int64_t address; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "send command (new comm. type1)"); if (sc->msi_enabled && cm->cm_fib->Header.Command != AifRequest && sc->aac_max_msix > 1) { u_int16_t vector_no, first_choice = 0xffff; vector_no = sc->aac_fibs_pushed_no % sc->aac_max_msix; do { vector_no += 1; if (vector_no == sc->aac_max_msix) vector_no = 1; if (sc->aac_rrq_outstanding[vector_no] < sc->aac_vector_cap) break; if (0xffff == first_choice) first_choice = vector_no; else if (vector_no == first_choice) break; } while (1); if (vector_no == first_choice) vector_no = 0; sc->aac_rrq_outstanding[vector_no]++; if (sc->aac_fibs_pushed_no == 0xffffffff) sc->aac_fibs_pushed_no = 0; else sc->aac_fibs_pushed_no++; cm->cm_fib->Header.Handle += (vector_no << 16); } if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE2) { /* Calculate the amount to the fibsize bits */ fibsize = (cm->cm_fib->Header.Size + 127) / 128 - 1; /* Fill new FIB header */ address = cm->cm_fibphys; high_addr = (u_int32_t)(address >> 32); if (high_addr == 0L) { cm->cm_fib->Header.StructType = AAC_FIBTYPE_TFIB2; cm->cm_fib->Header.u.TimeStamp = 0L; } else { cm->cm_fib->Header.StructType = AAC_FIBTYPE_TFIB2_64; cm->cm_fib->Header.u.SenderFibAddressHigh = high_addr; } cm->cm_fib->Header.SenderFibAddress = (u_int32_t)address; } else { /* Calculate the amount to the fibsize bits */ fibsize = (sizeof(struct aac_fib_xporthdr) + cm->cm_fib->Header.Size + 127) / 128 - 1; /* Fill XPORT header */ pFibX = (struct aac_fib_xporthdr *) ((unsigned char *)cm->cm_fib - sizeof(struct aac_fib_xporthdr)); pFibX->Handle = cm->cm_fib->Header.Handle; pFibX->HostAddress = cm->cm_fibphys; pFibX->Size = cm->cm_fib->Header.Size; + aac_fib_xporthdr_tole(pFibX); address = cm->cm_fibphys - sizeof(struct aac_fib_xporthdr); high_addr = (u_int32_t)(address >> 32); } + aac_fib_header_tole(&cm->cm_fib->Header); + if (fibsize > 31) fibsize = 31; aac_enqueue_busy(cm); if (high_addr) { AAC_MEM0_SETREG4(sc, AAC_SRC_IQUE64_H, high_addr); AAC_MEM0_SETREG4(sc, AAC_SRC_IQUE64_L, (u_int32_t)address + fibsize); } else { AAC_MEM0_SETREG4(sc, AAC_SRC_IQUE32, (u_int32_t)address + fibsize); } return 0; } /* * New comm. interface: get, set outbound queue index */ static int aac_src_get_outb_queue(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(-1); } static void aac_src_set_outb_queue(struct aac_softc *sc, int index) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); } /* * Debugging and Diagnostics */ /* * Print some information about the controller. */ static void aac_describe_controller(struct aac_softc *sc) { struct aac_fib *fib; struct aac_adapter_info *info; char *adapter_type = "Adaptec RAID controller"; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); if (sc->supported_options & AAC_SUPPORTED_SUPPLEMENT_ADAPTER_INFO) { fib->data[0] = 0; if (aac_sync_fib(sc, RequestSupplementAdapterInfo, 0, fib, 1)) device_printf(sc->aac_dev, "RequestSupplementAdapterInfo failed\n"); else { struct aac_supplement_adapter_info *supp_info; supp_info = ((struct aac_supplement_adapter_info *)&fib->data[0]); adapter_type = (char *)supp_info->AdapterTypeText; - sc->aac_feature_bits = supp_info->FeatureBits; - sc->aac_support_opt2 = supp_info->SupportedOptions2; + sc->aac_feature_bits = le32toh(supp_info->FeatureBits); + sc->aac_support_opt2 = le32toh(supp_info->SupportedOptions2); } } device_printf(sc->aac_dev, "%s, aacraid driver %d.%d.%d-%d\n", adapter_type, AAC_DRIVER_MAJOR_VERSION, AAC_DRIVER_MINOR_VERSION, AAC_DRIVER_BUGFIX_LEVEL, AAC_DRIVER_BUILD); fib->data[0] = 0; if (aac_sync_fib(sc, RequestAdapterInfo, 0, fib, 1)) { device_printf(sc->aac_dev, "RequestAdapterInfo failed\n"); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } /* save the kernel revision structure for later use */ info = (struct aac_adapter_info *)&fib->data[0]; + aac_adapter_info_toh(info); sc->aac_revision = info->KernelRevision; if (bootverbose) { device_printf(sc->aac_dev, "%s %dMHz, %dMB memory " "(%dMB cache, %dMB execution), %s\n", aac_describe_code(aac_cpu_variant, info->CpuVariant), info->ClockSpeed, info->TotalMem / (1024 * 1024), info->BufferMem / (1024 * 1024), info->ExecutionMem / (1024 * 1024), aac_describe_code(aac_battery_platform, info->batteryPlatform)); device_printf(sc->aac_dev, "Kernel %d.%d-%d, Build %d, S/N %6X\n", info->KernelRevision.external.comp.major, info->KernelRevision.external.comp.minor, info->KernelRevision.external.comp.dash, info->KernelRevision.buildNumber, (u_int32_t)(info->SerialNumber & 0xffffff)); device_printf(sc->aac_dev, "Supported Options=%b\n", sc->supported_options, "\20" "\1SNAPSHOT" "\2CLUSTERS" "\3WCACHE" "\4DATA64" "\5HOSTTIME" "\6RAID50" "\7WINDOW4GB" "\10SCSIUPGD" "\11SOFTERR" "\12NORECOND" "\13SGMAP64" "\14ALARM" "\15NONDASD" "\16SCSIMGT" "\17RAIDSCSI" "\21ADPTINFO" "\22NEWCOMM" "\23ARRAY64BIT" "\24HEATSENSOR"); } aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); } /* * Look up a text description of a numeric error code and return a pointer to * same. */ static char * aac_describe_code(struct aac_code_lookup *table, u_int32_t code) { int i; for (i = 0; table[i].string != NULL; i++) if (table[i].code == code) return(table[i].string); return(table[i + 1].string); } /* * Management Interface */ static int aac_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct aac_softc *sc; sc = dev->si_drv1; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); device_busy(sc->aac_dev); devfs_set_cdevpriv(sc, aac_cdevpriv_dtor); return 0; } static int aac_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td) { union aac_statrequest *as; struct aac_softc *sc; int error = 0; as = (union aac_statrequest *)arg; sc = dev->si_drv1; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); switch (cmd) { case AACIO_STATS: switch (as->as_item) { case AACQ_FREE: case AACQ_READY: case AACQ_BUSY: bcopy(&sc->aac_qstat[as->as_item], &as->as_qstat, sizeof(struct aac_qstat)); break; default: error = ENOENT; break; } break; case FSACTL_SENDFIB: case FSACTL_SEND_LARGE_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_SENDFIB: case FSACTL_LNX_SEND_LARGE_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_SENDFIB"); error = aac_ioctl_sendfib(sc, arg); break; case FSACTL_SEND_RAW_SRB: arg = *(caddr_t*)arg; case FSACTL_LNX_SEND_RAW_SRB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_SEND_RAW_SRB"); error = aac_ioctl_send_raw_srb(sc, arg); break; case FSACTL_AIF_THREAD: case FSACTL_LNX_AIF_THREAD: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_AIF_THREAD"); error = EINVAL; break; case FSACTL_OPEN_GET_ADAPTER_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_OPEN_GET_ADAPTER_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_OPEN_GET_ADAPTER_FIB"); error = aac_open_aif(sc, arg); break; case FSACTL_GET_NEXT_ADAPTER_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_GET_NEXT_ADAPTER_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_GET_NEXT_ADAPTER_FIB"); error = aac_getnext_aif(sc, arg); break; case FSACTL_CLOSE_GET_ADAPTER_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_CLOSE_GET_ADAPTER_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_CLOSE_GET_ADAPTER_FIB"); error = aac_close_aif(sc, arg); break; case FSACTL_MINIPORT_REV_CHECK: arg = *(caddr_t*)arg; case FSACTL_LNX_MINIPORT_REV_CHECK: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_MINIPORT_REV_CHECK"); error = aac_rev_check(sc, arg); break; case FSACTL_QUERY_DISK: arg = *(caddr_t*)arg; case FSACTL_LNX_QUERY_DISK: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_QUERY_DISK"); error = aac_query_disk(sc, arg); break; case FSACTL_DELETE_DISK: case FSACTL_LNX_DELETE_DISK: /* * We don't trust the underland to tell us when to delete a * container, rather we rely on an AIF coming from the * controller */ error = 0; break; case FSACTL_GET_PCI_INFO: arg = *(caddr_t*)arg; case FSACTL_LNX_GET_PCI_INFO: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_GET_PCI_INFO"); error = aac_get_pci_info(sc, arg); break; case FSACTL_GET_FEATURES: arg = *(caddr_t*)arg; case FSACTL_LNX_GET_FEATURES: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_GET_FEATURES"); error = aac_supported_features(sc, arg); break; default: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "unsupported cmd 0x%lx\n", cmd); error = EINVAL; break; } return(error); } static int aac_poll(struct cdev *dev, int poll_events, struct thread *td) { struct aac_softc *sc; struct aac_fib_context *ctx; int revents; sc = dev->si_drv1; revents = 0; mtx_lock(&sc->aac_io_lock); if ((poll_events & (POLLRDNORM | POLLIN)) != 0) { for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (ctx->ctx_idx != sc->aifq_idx || ctx->ctx_wrap) { revents |= poll_events & (POLLIN | POLLRDNORM); break; } } } mtx_unlock(&sc->aac_io_lock); if (revents == 0) { if (poll_events & (POLLIN | POLLRDNORM)) selrecord(td, &sc->rcv_select); } return (revents); } static void aac_ioctl_event(struct aac_softc *sc, struct aac_event *event, void *arg) { switch (event->ev_type) { case AAC_EVENT_CMFREE: mtx_assert(&sc->aac_io_lock, MA_OWNED); if (aacraid_alloc_command(sc, (struct aac_command **)arg)) { aacraid_add_event(sc, event); return; } free(event, M_AACRAIDBUF); wakeup(arg); break; default: break; } } /* * Send a FIB supplied from userspace + * + * Currently, sending a FIB from userspace in BE hosts is not supported. + * There are several things that need to be considered in order to + * support this, such as: + * - At least the FIB data part from userspace should already be in LE, + * or else the kernel would need to know all FIB types to be able to + * correctly convert it to BE. + * - SG tables are converted to BE by aacraid_map_command_sg(). This + * conversion should be supressed if the FIB comes from userspace. + * - aacraid_wait_command() calls functions that convert the FIB header + * to LE. But if the header is already in LE, the conversion should not + * be performed. */ static int aac_ioctl_sendfib(struct aac_softc *sc, caddr_t ufib) { struct aac_command *cm; int size, error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); cm = NULL; /* * Get a command */ mtx_lock(&sc->aac_io_lock); if (aacraid_alloc_command(sc, &cm)) { struct aac_event *event; event = malloc(sizeof(struct aac_event), M_AACRAIDBUF, M_NOWAIT | M_ZERO); if (event == NULL) { error = EBUSY; mtx_unlock(&sc->aac_io_lock); goto out; } event->ev_type = AAC_EVENT_CMFREE; event->ev_callback = aac_ioctl_event; event->ev_arg = &cm; aacraid_add_event(sc, event); msleep(cm, &sc->aac_io_lock, 0, "aacraid_ctlsfib", 0); } mtx_unlock(&sc->aac_io_lock); /* * Fetch the FIB header, then re-copy to get data as well. */ if ((error = copyin(ufib, cm->cm_fib, sizeof(struct aac_fib_header))) != 0) goto out; size = cm->cm_fib->Header.Size + sizeof(struct aac_fib_header); if (size > sc->aac_max_fib_size) { device_printf(sc->aac_dev, "incoming FIB oversized (%d > %d)\n", size, sc->aac_max_fib_size); size = sc->aac_max_fib_size; } if ((error = copyin(ufib, cm->cm_fib, size)) != 0) goto out; cm->cm_fib->Header.Size = size; cm->cm_timestamp = time_uptime; cm->cm_datalen = 0; /* * Pass the FIB to the controller, wait for it to complete. */ mtx_lock(&sc->aac_io_lock); error = aacraid_wait_command(cm); mtx_unlock(&sc->aac_io_lock); if (error != 0) { device_printf(sc->aac_dev, "aacraid_wait_command return %d\n", error); goto out; } /* * Copy the FIB and data back out to the caller. */ size = cm->cm_fib->Header.Size; if (size > sc->aac_max_fib_size) { device_printf(sc->aac_dev, "outbound FIB oversized (%d > %d)\n", size, sc->aac_max_fib_size); size = sc->aac_max_fib_size; } error = copyout(cm->cm_fib, ufib, size); out: if (cm != NULL) { mtx_lock(&sc->aac_io_lock); aacraid_release_command(cm); mtx_unlock(&sc->aac_io_lock); } return(error); } /* * Send a passthrough FIB supplied from userspace */ static int aac_ioctl_send_raw_srb(struct aac_softc *sc, caddr_t arg) { struct aac_command *cm; struct aac_fib *fib; struct aac_srb *srbcmd; struct aac_srb *user_srb = (struct aac_srb *)arg; void *user_reply; int error, transfer_data = 0; bus_dmamap_t orig_map = 0; u_int32_t fibsize = 0; u_int64_t srb_sg_address; u_int32_t srb_sg_bytecount; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); cm = NULL; mtx_lock(&sc->aac_io_lock); if (aacraid_alloc_command(sc, &cm)) { struct aac_event *event; event = malloc(sizeof(struct aac_event), M_AACRAIDBUF, M_NOWAIT | M_ZERO); if (event == NULL) { error = EBUSY; mtx_unlock(&sc->aac_io_lock); goto out; } event->ev_type = AAC_EVENT_CMFREE; event->ev_callback = aac_ioctl_event; event->ev_arg = &cm; aacraid_add_event(sc, event); msleep(cm, &sc->aac_io_lock, 0, "aacraid_ctlsraw", 0); } mtx_unlock(&sc->aac_io_lock); cm->cm_data = NULL; /* save original dma map */ orig_map = cm->cm_datamap; fib = cm->cm_fib; srbcmd = (struct aac_srb *)fib->data; if ((error = copyin((void *)&user_srb->data_len, &fibsize, sizeof (u_int32_t))) != 0) goto out; if (fibsize > (sc->aac_max_fib_size-sizeof(struct aac_fib_header))) { error = EINVAL; goto out; } if ((error = copyin((void *)user_srb, srbcmd, fibsize)) != 0) goto out; srbcmd->function = 0; /* SRBF_ExecuteScsi */ srbcmd->retry_limit = 0; /* obsolete */ /* only one sg element from userspace supported */ if (srbcmd->sg_map.SgCount > 1) { error = EINVAL; goto out; } /* check fibsize */ if (fibsize == (sizeof(struct aac_srb) + srbcmd->sg_map.SgCount * sizeof(struct aac_sg_entry))) { struct aac_sg_entry *sgp = srbcmd->sg_map.SgEntry; struct aac_sg_entry sg; if ((error = copyin(sgp, &sg, sizeof(sg))) != 0) goto out; srb_sg_bytecount = sg.SgByteCount; srb_sg_address = (u_int64_t)sg.SgAddress; } else if (fibsize == (sizeof(struct aac_srb) + srbcmd->sg_map.SgCount * sizeof(struct aac_sg_entry64))) { #ifdef __LP64__ struct aac_sg_entry64 *sgp = (struct aac_sg_entry64 *)srbcmd->sg_map.SgEntry; struct aac_sg_entry64 sg; if ((error = copyin(sgp, &sg, sizeof(sg))) != 0) goto out; srb_sg_bytecount = sg.SgByteCount; srb_sg_address = sg.SgAddress; #else error = EINVAL; goto out; #endif } else { error = EINVAL; goto out; } user_reply = (char *)arg + fibsize; srbcmd->data_len = srb_sg_bytecount; if (srbcmd->sg_map.SgCount == 1) transfer_data = 1; if (transfer_data) { /* * Create DMA tag for the passthr. data buffer and allocate it. */ if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_SG_64BIT) ? BUS_SPACE_MAXADDR_32BIT : 0x7fffffff, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ srb_sg_bytecount, /* size */ sc->aac_sg_tablesize, /* nsegments */ srb_sg_bytecount, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* No locking needed */ &cm->cm_passthr_dmat)) { error = ENOMEM; goto out; } if (bus_dmamem_alloc(cm->cm_passthr_dmat, (void **)&cm->cm_data, BUS_DMA_NOWAIT, &cm->cm_datamap)) { error = ENOMEM; goto out; } /* fill some cm variables */ cm->cm_datalen = srb_sg_bytecount; if (srbcmd->flags & AAC_SRB_FLAGS_DATA_IN) cm->cm_flags |= AAC_CMD_DATAIN; if (srbcmd->flags & AAC_SRB_FLAGS_DATA_OUT) cm->cm_flags |= AAC_CMD_DATAOUT; if (srbcmd->flags & AAC_SRB_FLAGS_DATA_OUT) { if ((error = copyin((void *)(uintptr_t)srb_sg_address, cm->cm_data, cm->cm_datalen)) != 0) goto out; /* sync required for bus_dmamem_alloc() alloc. mem.? */ bus_dmamap_sync(cm->cm_passthr_dmat, cm->cm_datamap, BUS_DMASYNC_PREWRITE); } } /* build the FIB */ fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(struct aac_srb); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC; fib->Header.Command = (sc->flags & AAC_FLAGS_SG_64BIT) ? ScsiPortCommandU64 : ScsiPortCommand; cm->cm_sgtable = (struct aac_sg_table *)&srbcmd->sg_map; + aac_srb_tole(srbcmd); + /* send command */ if (transfer_data) { bus_dmamap_load(cm->cm_passthr_dmat, cm->cm_datamap, cm->cm_data, cm->cm_datalen, aacraid_map_command_sg, cm, 0); } else { aacraid_map_command_sg(cm, NULL, 0, 0); } /* wait for completion */ mtx_lock(&sc->aac_io_lock); while (!(cm->cm_flags & AAC_CMD_COMPLETED)) msleep(cm, &sc->aac_io_lock, 0, "aacraid_ctlsrw2", 0); mtx_unlock(&sc->aac_io_lock); /* copy data */ - if (transfer_data && (srbcmd->flags & AAC_SRB_FLAGS_DATA_IN)) { + if (transfer_data && (le32toh(srbcmd->flags) & AAC_SRB_FLAGS_DATA_IN)) { if ((error = copyout(cm->cm_data, (void *)(uintptr_t)srb_sg_address, cm->cm_datalen)) != 0) goto out; /* sync required for bus_dmamem_alloc() allocated mem.? */ bus_dmamap_sync(cm->cm_passthr_dmat, cm->cm_datamap, BUS_DMASYNC_POSTREAD); } /* status */ + aac_srb_response_toh((struct aac_srb_response *)fib->data); error = copyout(fib->data, user_reply, sizeof(struct aac_srb_response)); out: if (cm && cm->cm_data) { if (transfer_data) bus_dmamap_unload(cm->cm_passthr_dmat, cm->cm_datamap); bus_dmamem_free(cm->cm_passthr_dmat, cm->cm_data, cm->cm_datamap); cm->cm_datamap = orig_map; } if (cm && cm->cm_passthr_dmat) bus_dma_tag_destroy(cm->cm_passthr_dmat); if (cm) { mtx_lock(&sc->aac_io_lock); aacraid_release_command(cm); mtx_unlock(&sc->aac_io_lock); } return(error); } /* * Request an AIF from the controller (new comm. type1) */ static void aac_request_aif(struct aac_softc *sc) { struct aac_command *cm; struct aac_fib *fib; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if (aacraid_alloc_command(sc, &cm)) { sc->aif_pending = 1; return; } sc->aif_pending = 0; /* build the FIB */ fib = cm->cm_fib; fib->Header.Size = sizeof(struct aac_fib); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC; /* set AIF marker */ fib->Header.Handle = 0x00800000; fib->Header.Command = AifRequest; - ((struct aac_aif_command *)fib->data)->command = AifReqEvent; + ((struct aac_aif_command *)fib->data)->command = htole32(AifReqEvent); aacraid_map_command_sg(cm, NULL, 0, 0); } /* * cdevpriv interface private destructor. */ static void aac_cdevpriv_dtor(void *arg) { struct aac_softc *sc; sc = arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); device_unbusy(sc->aac_dev); } /* * Handle an AIF sent to us by the controller; queue it for later reference. * If the queue fills up, then drop the older entries. */ static void aac_handle_aif(struct aac_softc *sc, struct aac_fib *fib) { struct aac_aif_command *aif; struct aac_container *co, *co_next; struct aac_fib_context *ctx; struct aac_fib *sync_fib; struct aac_mntinforesp mir; int next, current, found; int count = 0, changed = 0, i = 0; u_int32_t channel, uid; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); aif = (struct aac_aif_command*)&fib->data[0]; aacraid_print_aif(sc, aif); /* Is it an event that we should care about? */ - switch (aif->command) { + switch (le32toh(aif->command)) { case AifCmdEventNotify: - switch (aif->data.EN.type) { + switch (le32toh(aif->data.EN.type)) { case AifEnAddContainer: case AifEnDeleteContainer: /* * A container was added or deleted, but the message * doesn't tell us anything else! Re-enumerate the * containers and sort things out. */ aac_alloc_sync_fib(sc, &sync_fib); do { /* * Ask the controller for its containers one at * a time. * XXX What if the controller's list changes * midway through this enumaration? * XXX This should be done async. */ if (aac_get_container_info(sc, sync_fib, i, &mir, &uid) != 0) continue; if (i == 0) count = mir.MntRespCount; /* * Check the container against our list. * co->co_found was already set to 0 in a * previous run. */ if ((mir.Status == ST_OK) && (mir.MntTable[0].VolType != CT_NONE)) { found = 0; TAILQ_FOREACH(co, &sc->aac_container_tqh, co_link) { if (co->co_mntobj.ObjectId == mir.MntTable[0].ObjectId) { co->co_found = 1; found = 1; break; } } /* * If the container matched, continue * in the list. */ if (found) { i++; continue; } /* * This is a new container. Do all the * appropriate things to set it up. */ aac_add_container(sc, &mir, 1, uid); changed = 1; } i++; } while ((i < count) && (i < AAC_MAX_CONTAINERS)); aac_release_sync_fib(sc); /* * Go through our list of containers and see which ones * were not marked 'found'. Since the controller didn't * list them they must have been deleted. Do the * appropriate steps to destroy the device. Also reset * the co->co_found field. */ co = TAILQ_FIRST(&sc->aac_container_tqh); while (co != NULL) { if (co->co_found == 0) { co_next = TAILQ_NEXT(co, co_link); TAILQ_REMOVE(&sc->aac_container_tqh, co, co_link); free(co, M_AACRAIDBUF); changed = 1; co = co_next; } else { co->co_found = 0; co = TAILQ_NEXT(co, co_link); } } /* Attach the newly created containers */ if (changed) { if (sc->cam_rescan_cb != NULL) sc->cam_rescan_cb(sc, 0, AAC_CAM_TARGET_WILDCARD); } break; case AifEnEnclosureManagement: - switch (aif->data.EN.data.EEE.eventType) { + switch (le32toh(aif->data.EN.data.EEE.eventType)) { case AIF_EM_DRIVE_INSERTION: case AIF_EM_DRIVE_REMOVAL: - channel = aif->data.EN.data.EEE.unitID; + channel = le32toh(aif->data.EN.data.EEE.unitID); if (sc->cam_rescan_cb != NULL) sc->cam_rescan_cb(sc, ((channel>>24) & 0xF) + 1, (channel & 0xFFFF)); break; } break; case AifEnAddJBOD: case AifEnDeleteJBOD: case AifRawDeviceRemove: - channel = aif->data.EN.data.ECE.container; + channel = le32toh(aif->data.EN.data.ECE.container); if (sc->cam_rescan_cb != NULL) sc->cam_rescan_cb(sc, ((channel>>24) & 0xF) + 1, AAC_CAM_TARGET_WILDCARD); break; default: break; } default: break; } /* Copy the AIF data to the AIF queue for ioctl retrieval */ current = sc->aifq_idx; next = (current + 1) % AAC_AIFQ_LENGTH; if (next == 0) sc->aifq_filled = 1; bcopy(fib, &sc->aac_aifq[current], sizeof(struct aac_fib)); + /* Make aifq's FIB header and data LE */ + aac_fib_header_tole(&sc->aac_aifq[current].Header); /* modify AIF contexts */ if (sc->aifq_filled) { for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (next == ctx->ctx_idx) ctx->ctx_wrap = 1; else if (current == ctx->ctx_idx && ctx->ctx_wrap) ctx->ctx_idx = next; } } sc->aifq_idx = next; /* On the off chance that someone is sleeping for an aif... */ if (sc->aac_state & AAC_STATE_AIF_SLEEPER) wakeup(sc->aac_aifq); /* Wakeup any poll()ers */ selwakeuppri(&sc->rcv_select, PRIBIO); return; } /* * Return the Revision of the driver to userspace and check to see if the * userspace app is possibly compatible. This is extremely bogus since * our driver doesn't follow Adaptec's versioning system. Cheat by just * returning what the card reported. */ static int aac_rev_check(struct aac_softc *sc, caddr_t udata) { struct aac_rev_check rev_check; struct aac_rev_check_resp rev_check_resp; int error = 0; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Copyin the revision struct from userspace */ if ((error = copyin(udata, (caddr_t)&rev_check, sizeof(struct aac_rev_check))) != 0) { return error; } fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "Userland revision= %d\n", rev_check.callingRevision.buildNumber); /* * Doctor up the response struct. */ rev_check_resp.possiblyCompatible = 1; rev_check_resp.adapterSWRevision.external.comp.major = AAC_DRIVER_MAJOR_VERSION; rev_check_resp.adapterSWRevision.external.comp.minor = AAC_DRIVER_MINOR_VERSION; rev_check_resp.adapterSWRevision.external.comp.type = AAC_DRIVER_TYPE; rev_check_resp.adapterSWRevision.external.comp.dash = AAC_DRIVER_BUGFIX_LEVEL; rev_check_resp.adapterSWRevision.buildNumber = AAC_DRIVER_BUILD; return(copyout((caddr_t)&rev_check_resp, udata, sizeof(struct aac_rev_check_resp))); } /* * Pass the fib context to the caller */ static int aac_open_aif(struct aac_softc *sc, caddr_t arg) { struct aac_fib_context *fibctx, *ctx; int error = 0; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); fibctx = malloc(sizeof(struct aac_fib_context), M_AACRAIDBUF, M_NOWAIT|M_ZERO); if (fibctx == NULL) return (ENOMEM); mtx_lock(&sc->aac_io_lock); /* all elements are already 0, add to queue */ if (sc->fibctx == NULL) sc->fibctx = fibctx; else { for (ctx = sc->fibctx; ctx->next; ctx = ctx->next) ; ctx->next = fibctx; fibctx->prev = ctx; } /* evaluate unique value */ fibctx->unique = (*(u_int32_t *)&fibctx & 0xffffffff); ctx = sc->fibctx; while (ctx != fibctx) { if (ctx->unique == fibctx->unique) { fibctx->unique++; ctx = sc->fibctx; } else { ctx = ctx->next; } } error = copyout(&fibctx->unique, (void *)arg, sizeof(u_int32_t)); mtx_unlock(&sc->aac_io_lock); if (error) aac_close_aif(sc, (caddr_t)ctx); return error; } /* * Close the caller's fib context */ static int aac_close_aif(struct aac_softc *sc, caddr_t arg) { struct aac_fib_context *ctx; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (ctx->unique == *(uint32_t *)&arg) { if (ctx == sc->fibctx) sc->fibctx = NULL; else { ctx->prev->next = ctx->next; if (ctx->next) ctx->next->prev = ctx->prev; } break; } } if (ctx) free(ctx, M_AACRAIDBUF); mtx_unlock(&sc->aac_io_lock); return 0; } /* * Pass the caller the next AIF in their queue */ static int aac_getnext_aif(struct aac_softc *sc, caddr_t arg) { struct get_adapter_fib_ioctl agf; struct aac_fib_context *ctx; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { struct get_adapter_fib_ioctl32 agf32; error = copyin(arg, &agf32, sizeof(agf32)); if (error == 0) { agf.AdapterFibContext = agf32.AdapterFibContext; agf.Wait = agf32.Wait; agf.AifFib = (caddr_t)(uintptr_t)agf32.AifFib; } } else #endif error = copyin(arg, &agf, sizeof(agf)); if (error == 0) { for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (agf.AdapterFibContext == ctx->unique) break; } if (!ctx) { mtx_unlock(&sc->aac_io_lock); return (EFAULT); } error = aac_return_aif(sc, ctx, agf.AifFib); if (error == EAGAIN && agf.Wait) { fwprintf(sc, HBA_FLAGS_DBG_AIF_B, "aac_getnext_aif(): waiting for AIF"); sc->aac_state |= AAC_STATE_AIF_SLEEPER; while (error == EAGAIN) { mtx_unlock(&sc->aac_io_lock); error = tsleep(sc->aac_aifq, PRIBIO | PCATCH, "aacaif", 0); mtx_lock(&sc->aac_io_lock); if (error == 0) error = aac_return_aif(sc, ctx, agf.AifFib); } sc->aac_state &= ~AAC_STATE_AIF_SLEEPER; } } mtx_unlock(&sc->aac_io_lock); return(error); } /* * Hand the next AIF off the top of the queue out to userspace. */ static int aac_return_aif(struct aac_softc *sc, struct aac_fib_context *ctx, caddr_t uptr) { int current, error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); current = ctx->ctx_idx; if (current == sc->aifq_idx && !ctx->ctx_wrap) { /* empty */ return (EAGAIN); } error = copyout(&sc->aac_aifq[current], (void *)uptr, sizeof(struct aac_fib)); if (error) device_printf(sc->aac_dev, "aac_return_aif: copyout returned %d\n", error); else { ctx->ctx_wrap = 0; ctx->ctx_idx = (current + 1) % AAC_AIFQ_LENGTH; } return(error); } static int aac_get_pci_info(struct aac_softc *sc, caddr_t uptr) { struct aac_pci_info { u_int32_t bus; u_int32_t slot; } pciinf; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); pciinf.bus = pci_get_bus(sc->aac_dev); pciinf.slot = pci_get_slot(sc->aac_dev); error = copyout((caddr_t)&pciinf, uptr, sizeof(struct aac_pci_info)); return (error); } static int aac_supported_features(struct aac_softc *sc, caddr_t uptr) { struct aac_features f; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if ((error = copyin(uptr, &f, sizeof (f))) != 0) return (error); /* * When the management driver receives FSACTL_GET_FEATURES ioctl with * ALL zero in the featuresState, the driver will return the current * state of all the supported features, the data field will not be * valid. * When the management driver receives FSACTL_GET_FEATURES ioctl with * a specific bit set in the featuresState, the driver will return the * current state of this specific feature and whatever data that are * associated with the feature in the data field or perform whatever * action needed indicates in the data field. */ if (f.feat.fValue == 0) { f.feat.fBits.largeLBA = (sc->flags & AAC_FLAGS_LBA_64BIT) ? 1 : 0; f.feat.fBits.JBODSupport = 1; /* TODO: In the future, add other features state here as well */ } else { if (f.feat.fBits.largeLBA) f.feat.fBits.largeLBA = (sc->flags & AAC_FLAGS_LBA_64BIT) ? 1 : 0; /* TODO: Add other features state and data in the future */ } error = copyout(&f, uptr, sizeof (f)); return (error); } /* * Give the userland some information about the container. The AAC arch * expects the driver to be a SCSI passthrough type driver, so it expects * the containers to have b:t:l numbers. Fake it. */ static int aac_query_disk(struct aac_softc *sc, caddr_t uptr) { struct aac_query_disk query_disk; struct aac_container *co; int error, id; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); error = copyin(uptr, (caddr_t)&query_disk, sizeof(struct aac_query_disk)); if (error) { mtx_unlock(&sc->aac_io_lock); return (error); } id = query_disk.ContainerNumber; if (id == -1) { mtx_unlock(&sc->aac_io_lock); return (EINVAL); } TAILQ_FOREACH(co, &sc->aac_container_tqh, co_link) { if (co->co_mntobj.ObjectId == id) break; } if (co == NULL) { query_disk.Valid = 0; query_disk.Locked = 0; query_disk.Deleted = 1; /* XXX is this right? */ } else { query_disk.Valid = 1; query_disk.Locked = 1; query_disk.Deleted = 0; query_disk.Bus = device_get_unit(sc->aac_dev); query_disk.Target = 0; query_disk.Lun = 0; query_disk.UnMapped = 0; } error = copyout((caddr_t)&query_disk, uptr, sizeof(struct aac_query_disk)); mtx_unlock(&sc->aac_io_lock); return (error); } static void aac_container_bus(struct aac_softc *sc) { struct aac_sim *sim; device_t child; sim =(struct aac_sim *)malloc(sizeof(struct aac_sim), M_AACRAIDBUF, M_NOWAIT | M_ZERO); if (sim == NULL) { device_printf(sc->aac_dev, "No memory to add container bus\n"); panic("Out of memory?!"); } child = device_add_child(sc->aac_dev, "aacraidp", -1); if (child == NULL) { device_printf(sc->aac_dev, "device_add_child failed for container bus\n"); free(sim, M_AACRAIDBUF); panic("Out of memory?!"); } sim->TargetsPerBus = AAC_MAX_CONTAINERS; sim->BusNumber = 0; sim->BusType = CONTAINER_BUS; sim->InitiatorBusId = -1; sim->aac_sc = sc; sim->sim_dev = child; sim->aac_cam = NULL; device_set_ivars(child, sim); device_set_desc(child, "Container Bus"); TAILQ_INSERT_TAIL(&sc->aac_sim_tqh, sim, sim_link); /* device_set_desc(child, aac_describe_code(aac_container_types, mir->MntTable[0].VolType)); */ bus_generic_attach(sc->aac_dev); } static void aac_get_bus_info(struct aac_softc *sc) { struct aac_fib *fib; struct aac_ctcfg *c_cmd; struct aac_ctcfg_resp *c_resp; struct aac_vmioctl *vmi; struct aac_vmi_businf_resp *vmi_resp; struct aac_getbusinf businfo; struct aac_sim *caminf; device_t child; int i, error; mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); c_cmd = (struct aac_ctcfg *)&fib->data[0]; bzero(c_cmd, sizeof(struct aac_ctcfg)); c_cmd->Command = VM_ContainerConfig; c_cmd->cmd = CT_GET_SCSI_METHOD; c_cmd->param = 0; + aac_ctcfg_tole(c_cmd); error = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_ctcfg)); if (error) { device_printf(sc->aac_dev, "Error %d sending " "VM_ContainerConfig command\n", error); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } c_resp = (struct aac_ctcfg_resp *)&fib->data[0]; + aac_ctcfg_resp_toh(c_resp); if (c_resp->Status != ST_OK) { device_printf(sc->aac_dev, "VM_ContainerConfig returned 0x%x\n", c_resp->Status); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } sc->scsi_method_id = c_resp->param; vmi = (struct aac_vmioctl *)&fib->data[0]; bzero(vmi, sizeof(struct aac_vmioctl)); vmi->Command = VM_Ioctl; vmi->ObjType = FT_DRIVE; vmi->MethId = sc->scsi_method_id; vmi->ObjId = 0; vmi->IoctlCmd = GetBusInfo; + aac_vmioctl_tole(vmi); error = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_vmi_businf_resp)); if (error) { device_printf(sc->aac_dev, "Error %d sending VMIoctl command\n", error); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } vmi_resp = (struct aac_vmi_businf_resp *)&fib->data[0]; + aac_vmi_businf_resp_toh(vmi_resp); if (vmi_resp->Status != ST_OK) { device_printf(sc->aac_dev, "VM_Ioctl returned %d\n", vmi_resp->Status); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } bcopy(&vmi_resp->BusInf, &businfo, sizeof(struct aac_getbusinf)); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); for (i = 0; i < businfo.BusCount; i++) { if (businfo.BusValid[i] != AAC_BUS_VALID) continue; caminf = (struct aac_sim *)malloc( sizeof(struct aac_sim), M_AACRAIDBUF, M_NOWAIT | M_ZERO); if (caminf == NULL) { device_printf(sc->aac_dev, "No memory to add passthrough bus %d\n", i); break; } child = device_add_child(sc->aac_dev, "aacraidp", -1); if (child == NULL) { device_printf(sc->aac_dev, "device_add_child failed for passthrough bus %d\n", i); free(caminf, M_AACRAIDBUF); break; } caminf->TargetsPerBus = businfo.TargetsPerBus; caminf->BusNumber = i+1; caminf->BusType = PASSTHROUGH_BUS; caminf->InitiatorBusId = -1; caminf->aac_sc = sc; caminf->sim_dev = child; caminf->aac_cam = NULL; device_set_ivars(child, caminf); device_set_desc(child, "SCSI Passthrough Bus"); TAILQ_INSERT_TAIL(&sc->aac_sim_tqh, caminf, sim_link); } } /* * Check to see if the kernel is up and running. If we are in a * BlinkLED state, return the BlinkLED code. */ static u_int32_t aac_check_adapter_health(struct aac_softc *sc, u_int8_t *bled) { u_int32_t ret; ret = AAC_GET_FWSTATUS(sc); if (ret & AAC_UP_AND_RUNNING) ret = 0; else if (ret & AAC_KERNEL_PANIC && bled) *bled = (ret >> 16) & 0xff; return (ret); } /* * Once do an IOP reset, basically have to re-initialize the card as * if coming up from a cold boot, and the driver is responsible for * any IO that was outstanding to the adapter at the time of the IOP * RESET. And prepare the driver for IOP RESET by making the init code * modular with the ability to call it from multiple places. */ static int aac_reset_adapter(struct aac_softc *sc) { struct aac_command *cm; struct aac_fib *fib; struct aac_pause_command *pc; u_int32_t status, reset_mask, waitCount, max_msix_orig; int ret, msi_enabled_orig; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); if (sc->aac_state & AAC_STATE_RESET) { device_printf(sc->aac_dev, "aac_reset_adapter() already in progress\n"); return (EINVAL); } sc->aac_state |= AAC_STATE_RESET; /* disable interrupt */ AAC_ACCESS_DEVREG(sc, AAC_DISABLE_INTERRUPT); /* * Abort all pending commands: * a) on the controller */ while ((cm = aac_dequeue_busy(sc)) != NULL) { cm->cm_flags |= AAC_CMD_RESET; /* is there a completion handler? */ if (cm->cm_complete != NULL) { cm->cm_complete(cm); } else { /* assume that someone is sleeping on this * command */ wakeup(cm); } } /* b) in the waiting queues */ while ((cm = aac_dequeue_ready(sc)) != NULL) { cm->cm_flags |= AAC_CMD_RESET; /* is there a completion handler? */ if (cm->cm_complete != NULL) { cm->cm_complete(cm); } else { /* assume that someone is sleeping on this * command */ wakeup(cm); } } /* flush drives */ if (aac_check_adapter_health(sc, NULL) == 0) { mtx_unlock(&sc->aac_io_lock); (void) aacraid_shutdown(sc->aac_dev); mtx_lock(&sc->aac_io_lock); } /* execute IOP reset */ if (sc->aac_support_opt2 & AAC_SUPPORTED_MU_RESET) { AAC_MEM0_SETREG4(sc, AAC_IRCSR, AAC_IRCSR_CORES_RST); /* We need to wait for 5 seconds before accessing the MU again * 10000 * 100us = 1000,000us = 1000ms = 1s */ waitCount = 5 * 10000; while (waitCount) { DELAY(100); /* delay 100 microseconds */ waitCount--; } } else { ret = aacraid_sync_command(sc, AAC_IOP_RESET_ALWAYS, 0, 0, 0, 0, &status, &reset_mask); if (ret && !sc->doorbell_mask) { /* call IOP_RESET for older firmware */ if ((aacraid_sync_command(sc, AAC_IOP_RESET, 0,0,0,0, &status, NULL)) != 0) { if (status == AAC_SRB_STS_INVALID_REQUEST) { device_printf(sc->aac_dev, "IOP_RESET not supported\n"); } else { /* probably timeout */ device_printf(sc->aac_dev, "IOP_RESET failed\n"); } /* unwind aac_shutdown() */ aac_alloc_sync_fib(sc, &fib); pc = (struct aac_pause_command *)&fib->data[0]; pc->Command = VM_ContainerConfig; pc->Type = CT_PAUSE_IO; pc->Timeout = 1; pc->Min = 1; pc->NoRescan = 1; + aac_pause_command_tole(pc); (void) aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof (struct aac_pause_command)); aac_release_sync_fib(sc); goto finish; } } else if (sc->doorbell_mask) { ret = 0; reset_mask = sc->doorbell_mask; } if (!ret && (sc->aac_support_opt2 & AAC_SUPPORTED_DOORBELL_RESET)) { AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, reset_mask); /* * We need to wait for 5 seconds before accessing the * doorbell again; * 10000 * 100us = 1000,000us = 1000ms = 1s */ waitCount = 5 * 10000; while (waitCount) { DELAY(100); /* delay 100 microseconds */ waitCount--; } } } /* * Initialize the adapter. */ max_msix_orig = sc->aac_max_msix; msi_enabled_orig = sc->msi_enabled; sc->msi_enabled = FALSE; if (aac_check_firmware(sc) != 0) goto finish; if (!(sc->flags & AAC_FLAGS_SYNC_MODE)) { sc->aac_max_msix = max_msix_orig; if (msi_enabled_orig) { sc->msi_enabled = msi_enabled_orig; AAC_ACCESS_DEVREG(sc, AAC_ENABLE_MSIX); } mtx_unlock(&sc->aac_io_lock); aac_init(sc); mtx_lock(&sc->aac_io_lock); } finish: sc->aac_state &= ~AAC_STATE_RESET; AAC_ACCESS_DEVREG(sc, AAC_ENABLE_INTERRUPT); aacraid_startio(sc); return (0); } diff --git a/sys/dev/aacraid/aacraid_cam.c b/sys/dev/aacraid/aacraid_cam.c index f35b631daad6..48da7e8d757f 100644 --- a/sys/dev/aacraid/aacraid_cam.c +++ b/sys/dev/aacraid/aacraid_cam.c @@ -1,1393 +1,1404 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002-2010 Adaptec, Inc. * Copyright (c) 2010-2012 PMC-Sierra, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * CAM front-end for communicating with non-DASD devices */ #include "opt_aacraid.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #ifndef CAM_NEW_TRAN_CODE #define CAM_NEW_TRAN_CODE 1 #endif #ifndef SVPD_SUPPORTED_PAGE_LIST struct scsi_vpd_supported_page_list { u_int8_t device; u_int8_t page_code; #define SVPD_SUPPORTED_PAGE_LIST 0x00 u_int8_t reserved; u_int8_t length; /* number of VPD entries */ #define SVPD_SUPPORTED_PAGES_SIZE 251 u_int8_t list[SVPD_SUPPORTED_PAGES_SIZE]; }; #endif /************************** Version Compatibility *************************/ #define aac_sim_alloc cam_sim_alloc struct aac_cam { device_t dev; struct aac_sim *inf; struct cam_sim *sim; struct cam_path *path; }; static int aac_cam_probe(device_t dev); static int aac_cam_attach(device_t dev); static int aac_cam_detach(device_t dev); static void aac_cam_action(struct cam_sim *, union ccb *); static void aac_cam_poll(struct cam_sim *); static void aac_cam_complete(struct aac_command *); static void aac_container_complete(struct aac_command *); static void aac_cam_rescan(struct aac_softc *sc, uint32_t channel, uint32_t target_id); static void aac_set_scsi_error(struct aac_softc *sc, union ccb *ccb, u_int8_t status, u_int8_t key, u_int8_t asc, u_int8_t ascq); static int aac_load_map_command_sg(struct aac_softc *, struct aac_command *); static u_int64_t aac_eval_blockno(u_int8_t *); static void aac_container_rw_command(struct cam_sim *, union ccb *, u_int8_t *); static void aac_container_special_command(struct cam_sim *, union ccb *, u_int8_t *); static void aac_passthrough_command(struct cam_sim *, union ccb *); static u_int32_t aac_cam_reset_bus(struct cam_sim *, union ccb *); static u_int32_t aac_cam_abort_ccb(struct cam_sim *, union ccb *); static u_int32_t aac_cam_term_io(struct cam_sim *, union ccb *); static devclass_t aacraid_pass_devclass; static device_method_t aacraid_pass_methods[] = { DEVMETHOD(device_probe, aac_cam_probe), DEVMETHOD(device_attach, aac_cam_attach), DEVMETHOD(device_detach, aac_cam_detach), { 0, 0 } }; static driver_t aacraid_pass_driver = { "aacraidp", aacraid_pass_methods, sizeof(struct aac_cam) }; DRIVER_MODULE(aacraidp, aacraid, aacraid_pass_driver, aacraid_pass_devclass, 0, 0); MODULE_DEPEND(aacraidp, cam, 1, 1, 1); MALLOC_DEFINE(M_AACRAIDCAM, "aacraidcam", "AACRAID CAM info"); static void aac_set_scsi_error(struct aac_softc *sc, union ccb *ccb, u_int8_t status, u_int8_t key, u_int8_t asc, u_int8_t ascq) { struct scsi_sense_data_fixed *sense = (struct scsi_sense_data_fixed *)&ccb->csio.sense_data; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "Error %d!", status); ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR; ccb->csio.scsi_status = status; if (status == SCSI_STATUS_CHECK_COND) { ccb->ccb_h.status |= CAM_AUTOSNS_VALID; bzero(&ccb->csio.sense_data, ccb->csio.sense_len); ccb->csio.sense_data.error_code = SSD_CURRENT_ERROR | SSD_ERRCODE_VALID; sense->flags = key; if (ccb->csio.sense_len >= 14) { sense->extra_len = 6; sense->add_sense_code = asc; sense->add_sense_code_qual = ascq; } } } static void aac_cam_rescan(struct aac_softc *sc, uint32_t channel, uint32_t target_id) { union ccb *ccb; struct aac_sim *sim; struct aac_cam *camsc; if (target_id == AAC_CAM_TARGET_WILDCARD) target_id = CAM_TARGET_WILDCARD; TAILQ_FOREACH(sim, &sc->aac_sim_tqh, sim_link) { camsc = sim->aac_cam; if (camsc == NULL || camsc->inf == NULL || camsc->inf->BusNumber != channel) continue; ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { device_printf(sc->aac_dev, "Cannot allocate ccb for bus rescan.\n"); return; } if (xpt_create_path(&ccb->ccb_h.path, xpt_periph, cam_sim_path(camsc->sim), target_id, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_free_ccb(ccb); device_printf(sc->aac_dev, "Cannot create path for bus rescan.\n"); return; } xpt_rescan(ccb); break; } } static void aac_cam_event(struct aac_softc *sc, struct aac_event *event, void *arg) { union ccb *ccb; struct aac_cam *camsc; switch (event->ev_type) { case AAC_EVENT_CMFREE: ccb = arg; camsc = ccb->ccb_h.sim_priv.entries[0].ptr; free(event, M_AACRAIDCAM); xpt_release_simq(camsc->sim, 1); ccb->ccb_h.status = CAM_REQUEUE_REQ; xpt_done(ccb); break; default: device_printf(sc->aac_dev, "unknown event %d in aac_cam\n", event->ev_type); break; } return; } static int aac_cam_probe(device_t dev) { struct aac_cam *camsc; camsc = (struct aac_cam *)device_get_softc(dev); if (!camsc->inf) return (0); fwprintf(camsc->inf->aac_sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return (0); } static int aac_cam_detach(device_t dev) { struct aac_softc *sc; struct aac_cam *camsc; camsc = (struct aac_cam *)device_get_softc(dev); if (!camsc->inf) return (0); sc = camsc->inf->aac_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); camsc->inf->aac_cam = NULL; mtx_lock(&sc->aac_io_lock); xpt_async(AC_LOST_DEVICE, camsc->path, NULL); xpt_free_path(camsc->path); xpt_bus_deregister(cam_sim_path(camsc->sim)); cam_sim_free(camsc->sim, /*free_devq*/TRUE); sc->cam_rescan_cb = NULL; mtx_unlock(&sc->aac_io_lock); return (0); } /* * Register the driver as a CAM SIM */ static int aac_cam_attach(device_t dev) { struct cam_devq *devq; struct cam_sim *sim; struct cam_path *path; struct aac_cam *camsc; struct aac_sim *inf; camsc = (struct aac_cam *)device_get_softc(dev); inf = (struct aac_sim *)device_get_ivars(dev); if (!inf) return (EIO); fwprintf(inf->aac_sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); camsc->inf = inf; camsc->inf->aac_cam = camsc; devq = cam_simq_alloc(inf->TargetsPerBus); if (devq == NULL) return (EIO); sim = aac_sim_alloc(aac_cam_action, aac_cam_poll, "aacraidp", camsc, device_get_unit(dev), &inf->aac_sc->aac_io_lock, 1, 1, devq); if (sim == NULL) { cam_simq_free(devq); return (EIO); } /* Since every bus has it's own sim, every bus 'appears' as bus 0 */ mtx_lock(&inf->aac_sc->aac_io_lock); if (aac_xpt_bus_register(sim, dev, 0) != CAM_SUCCESS) { cam_sim_free(sim, TRUE); mtx_unlock(&inf->aac_sc->aac_io_lock); return (EIO); } if (xpt_create_path(&path, NULL, cam_sim_path(sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_bus_deregister(cam_sim_path(sim)); cam_sim_free(sim, TRUE); mtx_unlock(&inf->aac_sc->aac_io_lock); return (EIO); } inf->aac_sc->cam_rescan_cb = aac_cam_rescan; mtx_unlock(&inf->aac_sc->aac_io_lock); camsc->sim = sim; camsc->path = path; return (0); } static u_int64_t aac_eval_blockno(u_int8_t *cmdp) { u_int64_t blockno; switch (cmdp[0]) { case READ_6: case WRITE_6: blockno = scsi_3btoul(((struct scsi_rw_6 *)cmdp)->addr); break; case READ_10: case WRITE_10: blockno = scsi_4btoul(((struct scsi_rw_10 *)cmdp)->addr); break; case READ_12: case WRITE_12: blockno = scsi_4btoul(((struct scsi_rw_12 *)cmdp)->addr); break; case READ_16: case WRITE_16: blockno = scsi_8btou64(((struct scsi_rw_16 *)cmdp)->addr); break; default: blockno = 0; break; } return(blockno); } static void aac_container_rw_command(struct cam_sim *sim, union ccb *ccb, u_int8_t *cmdp) { struct aac_cam *camsc; struct aac_softc *sc; struct aac_command *cm; struct aac_fib *fib; u_int64_t blockno; camsc = (struct aac_cam *)cam_sim_softc(sim); sc = camsc->inf->aac_sc; mtx_assert(&sc->aac_io_lock, MA_OWNED); if (aacraid_alloc_command(sc, &cm)) { struct aac_event *event; xpt_freeze_simq(sim, 1); ccb->ccb_h.status = CAM_RESRC_UNAVAIL; ccb->ccb_h.sim_priv.entries[0].ptr = camsc; event = malloc(sizeof(struct aac_event), M_AACRAIDCAM, M_NOWAIT | M_ZERO); if (event == NULL) { device_printf(sc->aac_dev, "Warning, out of memory for event\n"); return; } event->ev_callback = aac_cam_event; event->ev_arg = ccb; event->ev_type = AAC_EVENT_CMFREE; aacraid_add_event(sc, event); return; } fib = cm->cm_fib; switch (ccb->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_IN: cm->cm_flags |= AAC_CMD_DATAIN; break; case CAM_DIR_OUT: cm->cm_flags |= AAC_CMD_DATAOUT; break; case CAM_DIR_NONE: break; default: cm->cm_flags |= AAC_CMD_DATAIN | AAC_CMD_DATAOUT; break; } blockno = aac_eval_blockno(cmdp); cm->cm_complete = aac_container_complete; cm->cm_ccb = ccb; cm->cm_timestamp = time_uptime; cm->cm_data = (void *)ccb->csio.data_ptr; cm->cm_datalen = ccb->csio.dxfer_len; fib->Header.Size = sizeof(struct aac_fib_header); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE2) { struct aac_raw_io2 *raw; + /* NOTE: LE conversion handled at aacraid_map_command_sg() */ raw = (struct aac_raw_io2 *)&fib->data[0]; bzero(raw, sizeof(struct aac_raw_io2)); fib->Header.Command = RawIo2; raw->strtBlkLow = (u_int32_t)blockno; raw->strtBlkHigh = (u_int32_t)(blockno >> 32); raw->byteCnt = cm->cm_datalen; raw->ldNum = ccb->ccb_h.target_id; fib->Header.Size += sizeof(struct aac_raw_io2); cm->cm_sgtable = (struct aac_sg_table *)raw->sge; if (cm->cm_flags & AAC_CMD_DATAIN) raw->flags = RIO2_IO_TYPE_READ | RIO2_SG_FORMAT_IEEE1212; else raw->flags = RIO2_IO_TYPE_WRITE | RIO2_SG_FORMAT_IEEE1212; } else if (sc->flags & AAC_FLAGS_RAW_IO) { struct aac_raw_io *raw; + /* NOTE: LE conversion handled at aacraid_map_command_sg() */ raw = (struct aac_raw_io *)&fib->data[0]; bzero(raw, sizeof(struct aac_raw_io)); fib->Header.Command = RawIo; raw->BlockNumber = blockno; raw->ByteCount = cm->cm_datalen; raw->ContainerId = ccb->ccb_h.target_id; fib->Header.Size += sizeof(struct aac_raw_io); cm->cm_sgtable = (struct aac_sg_table *) &raw->SgMapRaw; if (cm->cm_flags & AAC_CMD_DATAIN) raw->Flags = 1; } else if ((sc->flags & AAC_FLAGS_SG_64BIT) == 0) { fib->Header.Command = ContainerCommand; if (cm->cm_flags & AAC_CMD_DATAIN) { struct aac_blockread *br; br = (struct aac_blockread *)&fib->data[0]; br->Command = VM_CtBlockRead; br->ContainerId = ccb->ccb_h.target_id; br->BlockNumber = blockno; br->ByteCount = cm->cm_datalen; + aac_blockread_tole(br); fib->Header.Size += sizeof(struct aac_blockread); cm->cm_sgtable = &br->SgMap; } else { struct aac_blockwrite *bw; bw = (struct aac_blockwrite *)&fib->data[0]; bw->Command = VM_CtBlockWrite; bw->ContainerId = ccb->ccb_h.target_id; bw->BlockNumber = blockno; bw->ByteCount = cm->cm_datalen; bw->Stable = CUNSTABLE; + aac_blockwrite_tole(bw); fib->Header.Size += sizeof(struct aac_blockwrite); cm->cm_sgtable = &bw->SgMap; } } else { fib->Header.Command = ContainerCommand64; if (cm->cm_flags & AAC_CMD_DATAIN) { struct aac_blockread64 *br; br = (struct aac_blockread64 *)&fib->data[0]; br->Command = VM_CtHostRead64; br->ContainerId = ccb->ccb_h.target_id; br->SectorCount = cm->cm_datalen/AAC_BLOCK_SIZE; br->BlockNumber = blockno; br->Pad = 0; br->Flags = 0; + aac_blockread64_tole(br); fib->Header.Size += sizeof(struct aac_blockread64); cm->cm_sgtable = (struct aac_sg_table *)&br->SgMap64; } else { struct aac_blockwrite64 *bw; bw = (struct aac_blockwrite64 *)&fib->data[0]; bw->Command = VM_CtHostWrite64; bw->ContainerId = ccb->ccb_h.target_id; bw->SectorCount = cm->cm_datalen/AAC_BLOCK_SIZE; bw->BlockNumber = blockno; bw->Pad = 0; bw->Flags = 0; + aac_blockwrite64_tole(bw); fib->Header.Size += sizeof(struct aac_blockwrite64); cm->cm_sgtable = (struct aac_sg_table *)&bw->SgMap64; } } aac_enqueue_ready(cm); aacraid_startio(cm->cm_sc); } static void aac_container_special_command(struct cam_sim *sim, union ccb *ccb, u_int8_t *cmdp) { struct aac_cam *camsc; struct aac_softc *sc; struct aac_container *co; camsc = (struct aac_cam *)cam_sim_softc(sim); sc = camsc->inf->aac_sc; mtx_assert(&sc->aac_io_lock, MA_OWNED); TAILQ_FOREACH(co, &sc->aac_container_tqh, co_link) { fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "found container %d search for %d", co->co_mntobj.ObjectId, ccb->ccb_h.target_id); if (co->co_mntobj.ObjectId == ccb->ccb_h.target_id) break; } if (co == NULL || ccb->ccb_h.target_lun != 0) { fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "Container not present: cmd 0x%x id %d lun %d len %d", *cmdp, ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len); ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); return; } if (ccb->csio.dxfer_len) bzero(ccb->csio.data_ptr, ccb->csio.dxfer_len); switch (*cmdp) { case INQUIRY: { struct scsi_inquiry *inq = (struct scsi_inquiry *)cmdp; fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "Container INQUIRY id %d lun %d len %d VPD 0x%x Page 0x%x", ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len, inq->byte2, inq->page_code); if (!(inq->byte2 & SI_EVPD)) { struct scsi_inquiry_data *p = (struct scsi_inquiry_data *)ccb->csio.data_ptr; if (inq->page_code != 0) { aac_set_scsi_error(sc, ccb, SCSI_STATUS_CHECK_COND, SSD_KEY_ILLEGAL_REQUEST, 0x24, 0x00); xpt_done(ccb); return; } p->device = T_DIRECT; p->version = SCSI_REV_SPC2; p->response_format = 2; if (ccb->csio.dxfer_len >= 36) { p->additional_length = 31; p->flags = SID_WBus16|SID_Sync|SID_CmdQue; /* OEM Vendor defines */ strncpy(p->vendor, "Adaptec ", sizeof(p->vendor)); strncpy(p->product, "Array ", sizeof(p->product)); strncpy(p->revision, "V1.0", sizeof(p->revision)); } } else { if (inq->page_code == SVPD_SUPPORTED_PAGE_LIST) { struct scsi_vpd_supported_page_list *p = (struct scsi_vpd_supported_page_list *) ccb->csio.data_ptr; p->device = T_DIRECT; p->page_code = SVPD_SUPPORTED_PAGE_LIST; p->length = 2; p->list[0] = SVPD_SUPPORTED_PAGE_LIST; p->list[1] = SVPD_UNIT_SERIAL_NUMBER; } else if (inq->page_code == SVPD_UNIT_SERIAL_NUMBER) { struct scsi_vpd_unit_serial_number *p = (struct scsi_vpd_unit_serial_number *) ccb->csio.data_ptr; p->device = T_DIRECT; p->page_code = SVPD_UNIT_SERIAL_NUMBER; p->length = sprintf((char *)p->serial_num, "%08X%02X", co->co_uid, ccb->ccb_h.target_id); } else { aac_set_scsi_error(sc, ccb, SCSI_STATUS_CHECK_COND, SSD_KEY_ILLEGAL_REQUEST, 0x24, 0x00); xpt_done(ccb); return; } } ccb->ccb_h.status = CAM_REQ_CMP; break; } case REPORT_LUNS: fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "Container REPORT_LUNS id %d lun %d len %d", ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len); ccb->ccb_h.status = CAM_REQ_CMP; break; case START_STOP: { struct scsi_start_stop_unit *ss = (struct scsi_start_stop_unit *)cmdp; fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "Container START_STOP id %d lun %d len %d", ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len); if (sc->aac_support_opt2 & AAC_SUPPORTED_POWER_MANAGEMENT) { struct aac_command *cm; struct aac_fib *fib; struct aac_cnt_config *ccfg; if (aacraid_alloc_command(sc, &cm)) { struct aac_event *event; xpt_freeze_simq(sim, 1); ccb->ccb_h.status = CAM_RESRC_UNAVAIL; ccb->ccb_h.sim_priv.entries[0].ptr = camsc; event = malloc(sizeof(struct aac_event), M_AACRAIDCAM, M_NOWAIT | M_ZERO); if (event == NULL) { device_printf(sc->aac_dev, "Warning, out of memory for event\n"); return; } event->ev_callback = aac_cam_event; event->ev_arg = ccb; event->ev_type = AAC_EVENT_CMFREE; aacraid_add_event(sc, event); return; } fib = cm->cm_fib; cm->cm_timestamp = time_uptime; cm->cm_datalen = 0; fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(struct aac_cnt_config); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; fib->Header.Command = ContainerCommand; /* Start unit */ ccfg = (struct aac_cnt_config *)&fib->data[0]; bzero(ccfg, sizeof (*ccfg) - CT_PACKET_SIZE); ccfg->Command = VM_ContainerConfig; ccfg->CTCommand.command = CT_PM_DRIVER_SUPPORT; ccfg->CTCommand.param[0] = (ss->how & SSS_START ? AAC_PM_DRIVERSUP_START_UNIT : AAC_PM_DRIVERSUP_STOP_UNIT); ccfg->CTCommand.param[1] = co->co_mntobj.ObjectId; ccfg->CTCommand.param[2] = 0; /* 1 - immediate */ + aac_cnt_config_tole(ccfg); if (aacraid_wait_command(cm) != 0 || - *(u_int32_t *)&fib->data[0] != 0) { + le32toh(*(u_int32_t *)&fib->data[0]) != 0) { printf("Power Management: Error start/stop container %d\n", co->co_mntobj.ObjectId); } aacraid_release_command(cm); } ccb->ccb_h.status = CAM_REQ_CMP; break; } case TEST_UNIT_READY: fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "Container TEST_UNIT_READY id %d lun %d len %d", ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len); ccb->ccb_h.status = CAM_REQ_CMP; break; case REQUEST_SENSE: fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "Container REQUEST_SENSE id %d lun %d len %d", ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len); ccb->ccb_h.status = CAM_REQ_CMP; break; case READ_CAPACITY: { struct scsi_read_capacity_data *p = (struct scsi_read_capacity_data *)ccb->csio.data_ptr; fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "Container READ_CAPACITY id %d lun %d len %d", ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len); scsi_ulto4b(co->co_mntobj.ObjExtension.BlockDevice.BlockSize, p->length); /* check if greater than 2TB */ if (co->co_mntobj.CapacityHigh) { if (sc->flags & AAC_FLAGS_LBA_64BIT) scsi_ulto4b(0xffffffff, p->addr); } else { scsi_ulto4b(co->co_mntobj.Capacity-1, p->addr); } ccb->ccb_h.status = CAM_REQ_CMP; break; } case SERVICE_ACTION_IN: { struct scsi_read_capacity_data_long *p = (struct scsi_read_capacity_data_long *) ccb->csio.data_ptr; fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "Container SERVICE_ACTION_IN id %d lun %d len %d", ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len); if (((struct scsi_read_capacity_16 *)cmdp)->service_action != SRC16_SERVICE_ACTION) { aac_set_scsi_error(sc, ccb, SCSI_STATUS_CHECK_COND, SSD_KEY_ILLEGAL_REQUEST, 0x24, 0x00); xpt_done(ccb); return; } scsi_ulto4b(co->co_mntobj.ObjExtension.BlockDevice.BlockSize, p->length); scsi_ulto4b(co->co_mntobj.CapacityHigh, p->addr); scsi_ulto4b(co->co_mntobj.Capacity-1, &p->addr[4]); if (ccb->csio.dxfer_len >= 14) { u_int32_t mapping = co->co_mntobj.ObjExtension.BlockDevice.bdLgclPhysMap; p->prot_lbppbe = 0; while (mapping > 1) { mapping >>= 1; p->prot_lbppbe++; } p->prot_lbppbe &= 0x0f; } ccb->ccb_h.status = CAM_REQ_CMP; break; } case MODE_SENSE_6: { struct scsi_mode_sense_6 *msp =(struct scsi_mode_sense_6 *)cmdp; struct ms6_data { struct scsi_mode_hdr_6 hd; struct scsi_mode_block_descr bd; char pages; } *p = (struct ms6_data *)ccb->csio.data_ptr; char *pagep; int return_all_pages = FALSE; fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "Container MODE_SENSE id %d lun %d len %d page %d", ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len, msp->page); p->hd.datalen = sizeof(struct scsi_mode_hdr_6) - 1; if (co->co_mntobj.ContentState & AAC_FSCS_READONLY) p->hd.dev_specific = 0x80; /* WP */ p->hd.dev_specific |= 0x10; /* DPOFUA */ if (msp->byte2 & SMS_DBD) { p->hd.block_descr_len = 0; } else { p->hd.block_descr_len = sizeof(struct scsi_mode_block_descr); p->hd.datalen += p->hd.block_descr_len; scsi_ulto3b(co->co_mntobj.ObjExtension.BlockDevice.BlockSize, p->bd.block_len); if (co->co_mntobj.Capacity > 0xffffff || co->co_mntobj.CapacityHigh) { p->bd.num_blocks[0] = 0xff; p->bd.num_blocks[1] = 0xff; p->bd.num_blocks[2] = 0xff; } else { p->bd.num_blocks[0] = (u_int8_t) (co->co_mntobj.Capacity >> 16); p->bd.num_blocks[1] = (u_int8_t) (co->co_mntobj.Capacity >> 8); p->bd.num_blocks[2] = (u_int8_t) (co->co_mntobj.Capacity); } } pagep = &p->pages; switch (msp->page & SMS_PAGE_CODE) { case SMS_ALL_PAGES_PAGE: return_all_pages = TRUE; case SMS_CONTROL_MODE_PAGE: { struct scsi_control_page *cp = (struct scsi_control_page *)pagep; if (ccb->csio.dxfer_len <= p->hd.datalen + 8) { aac_set_scsi_error(sc, ccb, SCSI_STATUS_CHECK_COND, SSD_KEY_ILLEGAL_REQUEST, 0x24, 0x00); xpt_done(ccb); return; } cp->page_code = SMS_CONTROL_MODE_PAGE; cp->page_length = 6; p->hd.datalen += 8; pagep += 8; if (!return_all_pages) break; } case SMS_VENDOR_SPECIFIC_PAGE: break; default: aac_set_scsi_error(sc, ccb, SCSI_STATUS_CHECK_COND, SSD_KEY_ILLEGAL_REQUEST, 0x24, 0x00); xpt_done(ccb); return; } ccb->ccb_h.status = CAM_REQ_CMP; break; } case SYNCHRONIZE_CACHE: fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "Container SYNCHRONIZE_CACHE id %d lun %d len %d", ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len); ccb->ccb_h.status = CAM_REQ_CMP; break; default: fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "Container unsupp. cmd 0x%x id %d lun %d len %d", *cmdp, ccb->ccb_h.target_id, ccb->ccb_h.target_lun, ccb->csio.dxfer_len); ccb->ccb_h.status = CAM_REQ_CMP; /*CAM_REQ_INVALID*/ break; } xpt_done(ccb); } static void aac_passthrough_command(struct cam_sim *sim, union ccb *ccb) { struct aac_cam *camsc; struct aac_softc *sc; struct aac_command *cm; struct aac_fib *fib; struct aac_srb *srb; camsc = (struct aac_cam *)cam_sim_softc(sim); sc = camsc->inf->aac_sc; mtx_assert(&sc->aac_io_lock, MA_OWNED); if (aacraid_alloc_command(sc, &cm)) { struct aac_event *event; xpt_freeze_simq(sim, 1); ccb->ccb_h.status = CAM_RESRC_UNAVAIL; ccb->ccb_h.sim_priv.entries[0].ptr = camsc; event = malloc(sizeof(struct aac_event), M_AACRAIDCAM, M_NOWAIT | M_ZERO); if (event == NULL) { device_printf(sc->aac_dev, "Warning, out of memory for event\n"); return; } event->ev_callback = aac_cam_event; event->ev_arg = ccb; event->ev_type = AAC_EVENT_CMFREE; aacraid_add_event(sc, event); return; } fib = cm->cm_fib; switch (ccb->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_IN: cm->cm_flags |= AAC_CMD_DATAIN; break; case CAM_DIR_OUT: cm->cm_flags |= AAC_CMD_DATAOUT; break; case CAM_DIR_NONE: break; default: cm->cm_flags |= AAC_CMD_DATAIN | AAC_CMD_DATAOUT; break; } srb = (struct aac_srb *)&fib->data[0]; srb->function = AAC_SRB_FUNC_EXECUTE_SCSI; if (cm->cm_flags & (AAC_CMD_DATAIN|AAC_CMD_DATAOUT)) srb->flags = AAC_SRB_FLAGS_UNSPECIFIED_DIRECTION; if (cm->cm_flags & AAC_CMD_DATAIN) srb->flags = AAC_SRB_FLAGS_DATA_IN; else if (cm->cm_flags & AAC_CMD_DATAOUT) srb->flags = AAC_SRB_FLAGS_DATA_OUT; else srb->flags = AAC_SRB_FLAGS_NO_DATA_XFER; /* * Copy the CDB into the SRB. It's only 6-16 bytes, * so a copy is not too expensive. */ srb->cdb_len = ccb->csio.cdb_len; if (ccb->ccb_h.flags & CAM_CDB_POINTER) bcopy(ccb->csio.cdb_io.cdb_ptr, (u_int8_t *)&srb->cdb[0], srb->cdb_len); else bcopy(ccb->csio.cdb_io.cdb_bytes, (u_int8_t *)&srb->cdb[0], srb->cdb_len); /* Set command */ fib->Header.Command = (sc->flags & AAC_FLAGS_SG_64BIT) ? ScsiPortCommandU64 : ScsiPortCommand; fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(struct aac_srb); /* Map the s/g list */ cm->cm_sgtable = &srb->sg_map; if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { /* * Arrange things so that the S/G * map will get set up automagically */ cm->cm_data = (void *)ccb->csio.data_ptr; cm->cm_datalen = ccb->csio.dxfer_len; srb->data_len = ccb->csio.dxfer_len; } else { cm->cm_data = NULL; cm->cm_datalen = 0; srb->data_len = 0; } srb->bus = camsc->inf->BusNumber - 1; /* Bus no. rel. to the card */ srb->target = ccb->ccb_h.target_id; srb->lun = ccb->ccb_h.target_lun; srb->timeout = ccb->ccb_h.timeout; /* XXX */ srb->retry_limit = 0; + aac_srb_tole(srb); cm->cm_complete = aac_cam_complete; cm->cm_ccb = ccb; cm->cm_timestamp = time_uptime; fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; aac_enqueue_ready(cm); aacraid_startio(cm->cm_sc); } static void aac_cam_action(struct cam_sim *sim, union ccb *ccb) { struct aac_cam *camsc; struct aac_softc *sc; camsc = (struct aac_cam *)cam_sim_softc(sim); sc = camsc->inf->aac_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); /* Synchronous ops, and ops that don't require communication with the * controller */ switch(ccb->ccb_h.func_code) { case XPT_SCSI_IO: /* This is handled down below */ break; case XPT_CALC_GEOMETRY: { struct ccb_calc_geometry *ccg; u_int32_t size_mb; u_int32_t secs_per_cylinder; ccg = &ccb->ccg; size_mb = ccg->volume_size / ((1024L * 1024L) / ccg->block_size); if (size_mb >= (2 * 1024)) { /* 2GB */ ccg->heads = 255; ccg->secs_per_track = 63; } else if (size_mb >= (1 * 1024)) { /* 1GB */ ccg->heads = 128; ccg->secs_per_track = 32; } else { ccg->heads = 64; ccg->secs_per_track = 32; } secs_per_cylinder = ccg->heads * ccg->secs_per_track; ccg->cylinders = ccg->volume_size / secs_per_cylinder; ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->target_sprt = 0; cpi->hba_eng_cnt = 0; cpi->max_target = camsc->inf->TargetsPerBus - 1; cpi->max_lun = 7; /* Per the controller spec */ cpi->initiator_id = camsc->inf->InitiatorBusId; cpi->bus_id = camsc->inf->BusNumber; cpi->maxio = sc->aac_max_sectors << 9; /* * Resetting via the passthrough or parallel bus scan * causes problems. */ cpi->hba_misc = PIM_NOBUSRESET; cpi->hba_inquiry = PI_TAG_ABLE; cpi->base_transfer_speed = 300000; #ifdef CAM_NEW_TRAN_CODE cpi->hba_misc |= PIM_SEQSCAN; cpi->protocol = PROTO_SCSI; cpi->transport = XPORT_SAS; cpi->transport_version = 0; cpi->protocol_version = SCSI_REV_SPC2; #endif strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, "PMC-Sierra", HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_GET_TRAN_SETTINGS: { #ifdef CAM_NEW_TRAN_CODE struct ccb_trans_settings_scsi *scsi = &ccb->cts.proto_specific.scsi; struct ccb_trans_settings_spi *spi = &ccb->cts.xport_specific.spi; ccb->cts.protocol = PROTO_SCSI; ccb->cts.protocol_version = SCSI_REV_SPC2; ccb->cts.transport = XPORT_SAS; ccb->cts.transport_version = 0; scsi->valid = CTS_SCSI_VALID_TQ; scsi->flags = CTS_SCSI_FLAGS_TAG_ENB; spi->valid |= CTS_SPI_VALID_DISC; spi->flags |= CTS_SPI_FLAGS_DISC_ENB; #else ccb->cts.flags = ~(CCB_TRANS_DISC_ENB | CCB_TRANS_TAG_ENB); ccb->cts.valid = CCB_TRANS_DISC_VALID | CCB_TRANS_TQ_VALID; #endif ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_SET_TRAN_SETTINGS: ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; xpt_done(ccb); return; case XPT_RESET_BUS: if (!(sc->flags & AAC_FLAGS_CAM_NORESET) && camsc->inf->BusType != CONTAINER_BUS) { ccb->ccb_h.status = aac_cam_reset_bus(sim, ccb); } else { ccb->ccb_h.status = CAM_REQ_CMP; } xpt_done(ccb); return; case XPT_RESET_DEV: ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; case XPT_ABORT: ccb->ccb_h.status = aac_cam_abort_ccb(sim, ccb); xpt_done(ccb); return; case XPT_TERM_IO: ccb->ccb_h.status = aac_cam_term_io(sim, ccb); xpt_done(ccb); return; default: device_printf(sc->aac_dev, "Unsupported command 0x%x\n", ccb->ccb_h.func_code); ccb->ccb_h.status = CAM_PROVIDE_FAIL; xpt_done(ccb); return; } /* Async ops that require communcation with the controller */ if (camsc->inf->BusType == CONTAINER_BUS) { u_int8_t *cmdp; if (ccb->ccb_h.flags & CAM_CDB_POINTER) cmdp = ccb->csio.cdb_io.cdb_ptr; else cmdp = &ccb->csio.cdb_io.cdb_bytes[0]; if (*cmdp==READ_6 || *cmdp==WRITE_6 || *cmdp==READ_10 || *cmdp==WRITE_10 || *cmdp==READ_12 || *cmdp==WRITE_12 || *cmdp==READ_16 || *cmdp==WRITE_16) aac_container_rw_command(sim, ccb, cmdp); else aac_container_special_command(sim, ccb, cmdp); } else { aac_passthrough_command(sim, ccb); } } static void aac_cam_poll(struct cam_sim *sim) { /* * Pinging the interrupt routine isn't very safe, nor is it * really necessary. Do nothing. */ } static void aac_container_complete(struct aac_command *cm) { union ccb *ccb; u_int32_t status; fwprintf(cm->cm_sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); ccb = cm->cm_ccb; - status = ((u_int32_t *)cm->cm_fib->data)[0]; + status = le32toh(((u_int32_t *)cm->cm_fib->data)[0]); if (cm->cm_flags & AAC_CMD_RESET) { ccb->ccb_h.status = CAM_SCSI_BUS_RESET; } else if (status == ST_OK) { ccb->ccb_h.status = CAM_REQ_CMP; } else if (status == ST_NOT_READY) { ccb->ccb_h.status = CAM_BUSY; } else { ccb->ccb_h.status = CAM_REQ_CMP_ERR; } aacraid_release_command(cm); xpt_done(ccb); } static void aac_cam_complete(struct aac_command *cm) { union ccb *ccb; struct aac_srb_response *srbr; struct aac_softc *sc; sc = cm->cm_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); ccb = cm->cm_ccb; srbr = (struct aac_srb_response *)&cm->cm_fib->data[0]; + aac_srb_response_toh(srbr); if (cm->cm_flags & AAC_CMD_FASTRESP) { /* fast response */ srbr->srb_status = CAM_REQ_CMP; srbr->scsi_status = SCSI_STATUS_OK; srbr->sense_len = 0; } if (cm->cm_flags & AAC_CMD_RESET) { ccb->ccb_h.status = CAM_SCSI_BUS_RESET; } else if (srbr->fib_status != 0) { device_printf(sc->aac_dev, "Passthru FIB failed!\n"); ccb->ccb_h.status = CAM_REQ_ABORTED; } else { /* * The SRB error codes just happen to match the CAM error * codes. How convenient! */ ccb->ccb_h.status = srbr->srb_status; /* Take care of SCSI_IO ops. */ if (ccb->ccb_h.func_code == XPT_SCSI_IO) { u_int8_t command, device; ccb->csio.scsi_status = srbr->scsi_status; /* Take care of autosense */ if (srbr->sense_len) { int sense_len, scsi_sense_len; scsi_sense_len = sizeof(struct scsi_sense_data); bzero(&ccb->csio.sense_data, scsi_sense_len); sense_len = (srbr->sense_len > scsi_sense_len) ? scsi_sense_len : srbr->sense_len; bcopy(&srbr->sense[0], &ccb->csio.sense_data, sense_len); ccb->csio.sense_len = sense_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; // scsi_sense_print(&ccb->csio); } /* If this is an inquiry command, fake things out */ if (ccb->ccb_h.flags & CAM_CDB_POINTER) command = ccb->csio.cdb_io.cdb_ptr[0]; else command = ccb->csio.cdb_io.cdb_bytes[0]; if (command == INQUIRY) { if (ccb->ccb_h.status == CAM_REQ_CMP) { device = ccb->csio.data_ptr[0] & 0x1f; /* * We want DASD and PROC devices to only be * visible through the pass device. */ if ((device == T_DIRECT && !(sc->aac_feature_bits & AAC_SUPPL_SUPPORTED_JBOD)) || (device == T_PROCESSOR)) ccb->csio.data_ptr[0] = ((device & 0xe0) | T_NODEVICE); /* handle phys. components of a log. drive */ if (ccb->csio.data_ptr[0] & 0x20) { if (sc->hint_flags & 8) { /* expose phys. device (daXX) */ ccb->csio.data_ptr[0] &= 0xdf; } else { /* phys. device only visible through pass device (passXX) */ ccb->csio.data_ptr[0] |= 0x10; } } } else if (ccb->ccb_h.status == CAM_SEL_TIMEOUT && ccb->ccb_h.target_lun != 0) { /* fix for INQUIRYs on Lun>0 */ ccb->ccb_h.status = CAM_DEV_NOT_THERE; } } } } aacraid_release_command(cm); xpt_done(ccb); } static u_int32_t aac_cam_reset_bus(struct cam_sim *sim, union ccb *ccb) { struct aac_command *cm; struct aac_fib *fib; struct aac_softc *sc; struct aac_cam *camsc; struct aac_vmioctl *vmi; struct aac_resetbus *rbc; u_int32_t rval; camsc = (struct aac_cam *)cam_sim_softc(sim); sc = camsc->inf->aac_sc; if (sc == NULL) { printf("aac: Null sc?\n"); return (CAM_REQ_ABORTED); } if (aacraid_alloc_command(sc, &cm)) { struct aac_event *event; xpt_freeze_simq(sim, 1); ccb->ccb_h.status = CAM_RESRC_UNAVAIL; ccb->ccb_h.sim_priv.entries[0].ptr = camsc; event = malloc(sizeof(struct aac_event), M_AACRAIDCAM, M_NOWAIT | M_ZERO); if (event == NULL) { device_printf(sc->aac_dev, "Warning, out of memory for event\n"); return (CAM_REQ_ABORTED); } event->ev_callback = aac_cam_event; event->ev_arg = ccb; event->ev_type = AAC_EVENT_CMFREE; aacraid_add_event(sc, event); return (CAM_REQ_ABORTED); } fib = cm->cm_fib; cm->cm_timestamp = time_uptime; cm->cm_datalen = 0; fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(struct aac_vmioctl); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; fib->Header.Command = ContainerCommand; vmi = (struct aac_vmioctl *)&fib->data[0]; bzero(vmi, sizeof(struct aac_vmioctl)); vmi->Command = VM_Ioctl; vmi->ObjType = FT_DRIVE; vmi->MethId = sc->scsi_method_id; vmi->ObjId = 0; vmi->IoctlCmd = ResetBus; rbc = (struct aac_resetbus *)&vmi->IoctlBuf[0]; rbc->BusNumber = camsc->inf->BusNumber - 1; + aac_vmioctl_tole(vmi); if (aacraid_wait_command(cm) != 0) { device_printf(sc->aac_dev,"Error sending ResetBus command\n"); rval = CAM_REQ_ABORTED; } else { rval = CAM_REQ_CMP; } aacraid_release_command(cm); return (rval); } static u_int32_t aac_cam_abort_ccb(struct cam_sim *sim, union ccb *ccb) { return (CAM_UA_ABORT); } static u_int32_t aac_cam_term_io(struct cam_sim *sim, union ccb *ccb) { return (CAM_UA_TERMIO); } static int aac_load_map_command_sg(struct aac_softc *sc, struct aac_command *cm) { int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); error = bus_dmamap_load(sc->aac_buffer_dmat, cm->cm_datamap, cm->cm_data, cm->cm_datalen, aacraid_map_command_sg, cm, 0); if (error == EINPROGRESS) { fwprintf(sc, HBA_FLAGS_DBG_INIT_B, "freezing queue\n"); sc->flags |= AAC_QUEUE_FRZN; error = 0; } else if (error != 0) { panic("aac_load_map_command_sg: unexpected error %d from " "busdma", error); } return(error); } /* * Start as much queued I/O as possible on the controller */ void aacraid_startio(struct aac_softc *sc) { struct aac_command *cm; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); for (;;) { if (sc->aac_state & AAC_STATE_RESET) { fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "AAC_STATE_RESET"); break; } /* * This flag might be set if the card is out of resources. * Checking it here prevents an infinite loop of deferrals. */ if (sc->flags & AAC_QUEUE_FRZN) { fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "AAC_QUEUE_FRZN"); break; } /* * Try to get a command that's been put off for lack of * resources */ if ((sc->flags & AAC_FLAGS_SYNC_MODE) && sc->aac_sync_cm) break; cm = aac_dequeue_ready(sc); /* nothing to do? */ if (cm == NULL) break; /* don't map more than once */ if (cm->cm_flags & AAC_CMD_MAPPED) panic("aac: command %p already mapped", cm); /* * Set up the command to go to the controller. If there are no * data buffers associated with the command then it can bypass * busdma. */ if (cm->cm_datalen) aac_load_map_command_sg(sc, cm); else aacraid_map_command_sg(cm, NULL, 0, 0); } } diff --git a/sys/dev/aacraid/aacraid_endian.c b/sys/dev/aacraid/aacraid_endian.c new file mode 100644 index 000000000000..29da6c4d4d4f --- /dev/null +++ b/sys/dev/aacraid/aacraid_endian.c @@ -0,0 +1,389 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Leandro Lupori + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include + +#if _BYTE_ORDER != _LITTLE_ENDIAN + +#define TOH2(field, bits) field = le##bits##toh(field) +#define TOH(field, bits) TOH2(field, bits) + +#define TOLE2(field, bits) field = htole##bits(field) +#define TOLE(field, bits) TOLE2(field, bits) + +/* Convert from Little-Endian to host order (TOH) */ + +void +aac_fib_header_toh(struct aac_fib_header *ptr) +{ + TOH(ptr->XferState, 32); + TOH(ptr->Command, 16); + TOH(ptr->Size, 16); + TOH(ptr->SenderSize, 16); + TOH(ptr->SenderFibAddress, 32); + TOH(ptr->u.ReceiverFibAddress, 32); + TOH(ptr->Handle, 32); + TOH(ptr->Previous, 32); + TOH(ptr->Next, 32); +} + +void +aac_adapter_info_toh(struct aac_adapter_info *ptr) +{ + TOH(ptr->PlatformBase, 32); + TOH(ptr->CpuArchitecture, 32); + TOH(ptr->CpuVariant, 32); + TOH(ptr->ClockSpeed, 32); + TOH(ptr->ExecutionMem, 32); + TOH(ptr->BufferMem, 32); + TOH(ptr->TotalMem, 32); + + TOH(ptr->KernelRevision.buildNumber, 32); + TOH(ptr->MonitorRevision.buildNumber, 32); + TOH(ptr->HardwareRevision.buildNumber, 32); + TOH(ptr->BIOSRevision.buildNumber, 32); + + TOH(ptr->ClusteringEnabled, 32); + TOH(ptr->ClusterChannelMask, 32); + TOH(ptr->SerialNumber, 64); + TOH(ptr->batteryPlatform, 32); + TOH(ptr->SupportedOptions, 32); + TOH(ptr->OemVariant, 32); +} + +void +aac_container_creation_toh(struct aac_container_creation *ptr) +{ + u_int32_t *date = (u_int32_t *)ptr + 1; + + *date = le32toh(*date); + TOH(ptr->ViaAdapterSerialNumber, 64); +} + +void +aac_mntobj_toh(struct aac_mntobj *ptr) +{ + TOH(ptr->ObjectId, 32); + aac_container_creation_toh(&ptr->CreateInfo); + TOH(ptr->Capacity, 32); + TOH(ptr->VolType, 32); + TOH(ptr->ObjType, 32); + TOH(ptr->ContentState, 32); + TOH(ptr->ObjExtension.BlockDevice.BlockSize, 32); + TOH(ptr->ObjExtension.BlockDevice.bdLgclPhysMap, 32); + TOH(ptr->AlterEgoId, 32); + TOH(ptr->CapacityHigh, 32); +} + +void +aac_mntinforesp_toh(struct aac_mntinforesp *ptr) +{ + TOH(ptr->Status, 32); + TOH(ptr->MntType, 32); + TOH(ptr->MntRespCount, 32); + aac_mntobj_toh(&ptr->MntTable[0]); +} + +void +aac_fsa_ctm_toh(struct aac_fsa_ctm *ptr) +{ + int i; + + TOH(ptr->command, 32); + for (i = 0; i < CT_FIB_PARAMS; i++) + TOH(ptr->param[i], 32); +} + +void +aac_cnt_config_toh(struct aac_cnt_config *ptr) +{ + TOH(ptr->Command, 32); + aac_fsa_ctm_toh(&ptr->CTCommand); +} + +void +aac_ctcfg_resp_toh(struct aac_ctcfg_resp *ptr) +{ + TOH(ptr->Status, 32); + TOH(ptr->resp, 32); + TOH(ptr->param, 32); +} + +void +aac_getbusinf_toh(struct aac_getbusinf *ptr) +{ + TOH(ptr->ProbeComplete, 32); + TOH(ptr->BusCount, 32); + TOH(ptr->TargetsPerBus, 32); +} + +void +aac_vmi_businf_resp_toh(struct aac_vmi_businf_resp *ptr) +{ + TOH(ptr->Status, 32); + TOH(ptr->ObjType, 32); + TOH(ptr->MethId, 32); + TOH(ptr->ObjId, 32); + TOH(ptr->IoctlCmd, 32); + aac_getbusinf_toh(&ptr->BusInf); +} + +void +aac_srb_response_toh(struct aac_srb_response *ptr) +{ + TOH(ptr->fib_status, 32); + TOH(ptr->srb_status, 32); + TOH(ptr->scsi_status, 32); + TOH(ptr->data_len, 32); + TOH(ptr->sense_len, 32); +} + +/* Convert from host order to Little-Endian (TOLE) */ + +void +aac_adapter_init_tole(struct aac_adapter_init *ptr) +{ + TOLE(ptr->InitStructRevision, 32); + TOLE(ptr->NoOfMSIXVectors, 32); + TOLE(ptr->FilesystemRevision, 32); + TOLE(ptr->CommHeaderAddress, 32); + TOLE(ptr->FastIoCommAreaAddress, 32); + TOLE(ptr->AdapterFibsPhysicalAddress, 32); + TOLE(ptr->AdapterFibsVirtualAddress, 32); + TOLE(ptr->AdapterFibsSize, 32); + TOLE(ptr->AdapterFibAlign, 32); + TOLE(ptr->PrintfBufferAddress, 32); + TOLE(ptr->PrintfBufferSize, 32); + TOLE(ptr->HostPhysMemPages, 32); + TOLE(ptr->HostElapsedSeconds, 32); + TOLE(ptr->InitFlags, 32); + TOLE(ptr->MaxIoCommands, 32); + TOLE(ptr->MaxIoSize, 32); + TOLE(ptr->MaxFibSize, 32); + TOLE(ptr->MaxNumAif, 32); + TOLE(ptr->HostRRQ_AddrLow, 32); + TOLE(ptr->HostRRQ_AddrHigh, 32); +} + +void +aac_fib_header_tole(struct aac_fib_header *ptr) +{ + TOLE(ptr->XferState, 32); + TOLE(ptr->Command, 16); + TOLE(ptr->Size, 16); + TOLE(ptr->SenderSize, 16); + TOLE(ptr->SenderFibAddress, 32); + TOLE(ptr->u.ReceiverFibAddress, 32); + TOLE(ptr->Handle, 32); + TOLE(ptr->Previous, 32); + TOLE(ptr->Next, 32); +} + +void +aac_mntinfo_tole(struct aac_mntinfo *ptr) +{ + TOLE(ptr->Command, 32); + TOLE(ptr->MntType, 32); + TOLE(ptr->MntCount, 32); +} + +void +aac_fsa_ctm_tole(struct aac_fsa_ctm *ptr) +{ + int i; + + TOLE(ptr->command, 32); + for (i = 0; i < CT_FIB_PARAMS; i++) + TOLE(ptr->param[i], 32); +} + +void +aac_cnt_config_tole(struct aac_cnt_config *ptr) +{ + TOLE(ptr->Command, 32); + aac_fsa_ctm_tole(&ptr->CTCommand); +} + +void +aac_raw_io_tole(struct aac_raw_io *ptr) +{ + TOLE(ptr->BlockNumber, 64); + TOLE(ptr->ByteCount, 32); + TOLE(ptr->ContainerId, 16); + TOLE(ptr->Flags, 16); + TOLE(ptr->BpTotal, 16); + TOLE(ptr->BpComplete, 16); +} + +void +aac_raw_io2_tole(struct aac_raw_io2 *ptr) +{ + TOLE(ptr->strtBlkLow, 32); + TOLE(ptr->strtBlkHigh, 32); + TOLE(ptr->byteCnt, 32); + TOLE(ptr->ldNum, 16); + TOLE(ptr->flags, 16); + TOLE(ptr->sgeFirstSize, 32); + TOLE(ptr->sgeNominalSize, 32); +} + +void +aac_fib_xporthdr_tole(struct aac_fib_xporthdr *ptr) +{ + TOLE(ptr->HostAddress, 64); + TOLE(ptr->Size, 32); + TOLE(ptr->Handle, 32); +} + +void +aac_ctcfg_tole(struct aac_ctcfg *ptr) +{ + TOLE(ptr->Command, 32); + TOLE(ptr->cmd, 32); + TOLE(ptr->param, 32); +} + +void +aac_vmioctl_tole(struct aac_vmioctl *ptr) +{ + TOLE(ptr->Command, 32); + TOLE(ptr->ObjType, 32); + TOLE(ptr->MethId, 32); + TOLE(ptr->ObjId, 32); + TOLE(ptr->IoctlCmd, 32); + TOLE(ptr->IoctlBuf[0], 32); +} + +void +aac_pause_command_tole(struct aac_pause_command *ptr) +{ + TOLE(ptr->Command, 32); + TOLE(ptr->Type, 32); + TOLE(ptr->Timeout, 32); + TOLE(ptr->Min, 32); + TOLE(ptr->NoRescan, 32); + TOLE(ptr->Parm3, 32); + TOLE(ptr->Parm4, 32); + TOLE(ptr->Count, 32); +} + +void +aac_srb_tole(struct aac_srb *ptr) +{ + TOLE(ptr->function, 32); + TOLE(ptr->bus, 32); + TOLE(ptr->target, 32); + TOLE(ptr->lun, 32); + TOLE(ptr->timeout, 32); + TOLE(ptr->flags, 32); + TOLE(ptr->data_len, 32); + TOLE(ptr->retry_limit, 32); + TOLE(ptr->cdb_len, 32); +} + +void +aac_sge_ieee1212_tole(struct aac_sge_ieee1212 *ptr) +{ + TOLE(ptr->addrLow, 32); + TOLE(ptr->addrHigh, 32); + TOLE(ptr->length, 32); + TOLE(ptr->flags, 32); +} + +void +aac_sg_entryraw_tole(struct aac_sg_entryraw *ptr) +{ + TOLE(ptr->Next, 32); + TOLE(ptr->Prev, 32); + TOLE(ptr->SgAddress, 64); + TOLE(ptr->SgByteCount, 32); + TOLE(ptr->Flags, 32); +} + +void +aac_sg_entry_tole(struct aac_sg_entry *ptr) +{ + TOLE(ptr->SgAddress, 32); + TOLE(ptr->SgByteCount, 32); +} + +void +aac_sg_entry64_tole(struct aac_sg_entry64 *ptr) +{ + TOLE(ptr->SgAddress, 64); + TOLE(ptr->SgByteCount, 32); +} + +void +aac_blockread_tole(struct aac_blockread *ptr) +{ + TOLE(ptr->Command, 32); + TOLE(ptr->ContainerId, 32); + TOLE(ptr->BlockNumber, 32); + TOLE(ptr->ByteCount, 32); +} + +void +aac_blockwrite_tole(struct aac_blockwrite *ptr) +{ + TOLE(ptr->Command, 32); + TOLE(ptr->ContainerId, 32); + TOLE(ptr->BlockNumber, 32); + TOLE(ptr->ByteCount, 32); + TOLE(ptr->Stable, 32); +} + +void +aac_blockread64_tole(struct aac_blockread64 *ptr) +{ + TOLE(ptr->Command, 32); + TOLE(ptr->ContainerId, 16); + TOLE(ptr->SectorCount, 16); + TOLE(ptr->BlockNumber, 32); + TOLE(ptr->Pad, 16); + TOLE(ptr->Flags, 16); +} + +void +aac_blockwrite64_tole(struct aac_blockwrite64 *ptr) +{ + TOLE(ptr->Command, 32); + TOLE(ptr->ContainerId, 16); + TOLE(ptr->SectorCount, 16); + TOLE(ptr->BlockNumber, 32); + TOLE(ptr->Pad, 16); + TOLE(ptr->Flags, 16); +} + +#endif diff --git a/sys/dev/aacraid/aacraid_endian.h b/sys/dev/aacraid/aacraid_endian.h new file mode 100644 index 000000000000..c57585db8310 --- /dev/null +++ b/sys/dev/aacraid/aacraid_endian.h @@ -0,0 +1,114 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Leandro Lupori + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef AACRAID_ENDIAN_H +#define AACRAID_ENDIAN_H + +#include + +#if _BYTE_ORDER == _LITTLE_ENDIAN + +/* On Little-Endian (LE) hosts, make all FIB data conversion functions empty. */ + +/* Convert from Little-Endian to host order (TOH) */ +#define aac_fib_header_toh(ptr) +#define aac_adapter_info_toh(ptr) +#define aac_container_creation_toh(ptr) +#define aac_mntobj_toh(ptr) +#define aac_mntinforesp_toh(ptr) +#define aac_fsa_ctm_toh(ptr) +#define aac_cnt_config_toh(ptr) +#define aac_ctcfg_resp_toh(ptr) +#define aac_getbusinf_toh(ptr) +#define aac_vmi_businf_resp_toh(ptr) +#define aac_srb_response_toh(ptr) + +/* Convert from host order to Little-Endian (TOLE) */ +#define aac_adapter_init_tole(ptr) +#define aac_fib_header_tole(ptr) +#define aac_mntinfo_tole(ptr) +#define aac_fsa_ctm_tole(ptr) +#define aac_cnt_config_tole(ptr) +#define aac_raw_io_tole(ptr) +#define aac_raw_io2_tole(ptr) +#define aac_fib_xporthdr_tole(ptr) +#define aac_ctcfg_tole(ptr) +#define aac_vmioctl_tole(ptr) +#define aac_pause_command_tole(ptr) +#define aac_srb_tole(ptr) +#define aac_sge_ieee1212_tole(ptr) +#define aac_sg_entryraw_tole(ptr) +#define aac_sg_entry_tole(ptr) +#define aac_sg_entry64_tole(ptr) +#define aac_blockread_tole(ptr) +#define aac_blockwrite_tole(ptr) +#define aac_blockread64_tole(ptr) +#define aac_blockwrite64_tole(ptr) + + +#else /* _BYTE_ORDER != _LITTLE_ENDIAN */ + +/* Convert from Little-Endian to host order (TOH) */ +void aac_fib_header_toh(struct aac_fib_header *ptr); +void aac_adapter_info_toh(struct aac_adapter_info *ptr); +void aac_container_creation_toh(struct aac_container_creation *ptr); +void aac_mntobj_toh(struct aac_mntobj *ptr); +void aac_mntinforesp_toh(struct aac_mntinforesp *ptr); +void aac_fsa_ctm_toh(struct aac_fsa_ctm *ptr); +void aac_cnt_config_toh(struct aac_cnt_config *ptr); +void aac_ctcfg_resp_toh(struct aac_ctcfg_resp *ptr); +void aac_getbusinf_toh(struct aac_getbusinf *ptr); +void aac_vmi_businf_resp_toh(struct aac_vmi_businf_resp *ptr); +void aac_srb_response_toh(struct aac_srb_response *ptr); + +/* Convert from host order to Little-Endian (TOLE) */ +void aac_adapter_init_tole(struct aac_adapter_init *ptr); +void aac_fib_header_tole(struct aac_fib_header *ptr); +void aac_mntinfo_tole(struct aac_mntinfo *ptr); +void aac_fsa_ctm_tole(struct aac_fsa_ctm *ptr); +void aac_cnt_config_tole(struct aac_cnt_config *ptr); +void aac_raw_io_tole(struct aac_raw_io *ptr); +void aac_raw_io2_tole(struct aac_raw_io2 *ptr); +void aac_fib_xporthdr_tole(struct aac_fib_xporthdr *ptr); +void aac_ctcfg_tole(struct aac_ctcfg *ptr); +void aac_vmioctl_tole(struct aac_vmioctl *ptr); +void aac_pause_command_tole(struct aac_pause_command *ptr); +void aac_srb_tole(struct aac_srb *ptr); +void aac_sge_ieee1212_tole(struct aac_sge_ieee1212 *ptr); +void aac_sg_entryraw_tole(struct aac_sg_entryraw *ptr); +void aac_sg_entry_tole(struct aac_sg_entry *ptr); +void aac_sg_entry64_tole(struct aac_sg_entry64 *ptr); +void aac_blockread_tole(struct aac_blockread *ptr); +void aac_blockwrite_tole(struct aac_blockwrite *ptr); +void aac_blockread64_tole(struct aac_blockread64 *ptr); +void aac_blockwrite64_tole(struct aac_blockwrite64 *ptr); + +#endif + +#endif diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index d84a55d6d99e..e6adade93498 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -1,1711 +1,1712 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl.h" #include "ixl_pf.h" #ifdef IXL_IW #include "ixl_iw.h" #include "ixl_iw_int.h" #endif #ifdef PCI_IOV #include "ixl_pf_iov.h" #endif /********************************************************************* * Driver version *********************************************************************/ #define IXL_DRIVER_VERSION_MAJOR 2 #define IXL_DRIVER_VERSION_MINOR 1 #define IXL_DRIVER_VERSION_BUILD 0 #define IXL_DRIVER_VERSION_STRING \ __XSTRING(IXL_DRIVER_VERSION_MAJOR) "." \ __XSTRING(IXL_DRIVER_VERSION_MINOR) "." \ __XSTRING(IXL_DRIVER_VERSION_BUILD) "-k" /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * * ( Vendor ID, Device ID, Branding String ) *********************************************************************/ static pci_vendor_info_t ixl_vendor_info_array[] = { PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710, "Intel(R) Ethernet Controller X710 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_B, "Intel(R) Ethernet Controller XL710 for 40GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_C, "Intel(R) Ethernet Controller X710 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_A, "Intel(R) Ethernet Controller XL710 for 40GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_B, "Intel(R) Ethernet Controller XL710 for 40GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_C, "Intel(R) Ethernet Controller X710 for 10GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T, "Intel(R) Ethernet Controller X710 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T4, "Intel(R) Ethernet Controller X710/X557-AT 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_X722, "Intel(R) Ethernet Connection X722 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_X722, "Intel(R) Ethernet Connection X722 for 10GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_X722, "Intel(R) Ethernet Connection X722 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_1G_BASE_T_X722, "Intel(R) Ethernet Connection X722 for 1GbE"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_X722, "Intel(R) Ethernet Connection X722 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_I_X722, "Intel(R) Ethernet Connection X722 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_25G_B, "Intel(R) Ethernet Controller XXV710 for 25GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_25G_SFP28, "Intel(R) Ethernet Controller XXV710 for 25GbE SFP28"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ /*** IFLIB interface ***/ static void *ixl_register(device_t dev); static int ixl_if_attach_pre(if_ctx_t ctx); static int ixl_if_attach_post(if_ctx_t ctx); static int ixl_if_detach(if_ctx_t ctx); static int ixl_if_shutdown(if_ctx_t ctx); static int ixl_if_suspend(if_ctx_t ctx); static int ixl_if_resume(if_ctx_t ctx); static int ixl_if_msix_intr_assign(if_ctx_t ctx, int msix); static void ixl_if_enable_intr(if_ctx_t ctx); static void ixl_if_disable_intr(if_ctx_t ctx); static int ixl_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int ixl_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int ixl_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int ixl_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static void ixl_if_queues_free(if_ctx_t ctx); static void ixl_if_update_admin_status(if_ctx_t ctx); static void ixl_if_multi_set(if_ctx_t ctx); static int ixl_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ixl_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int ixl_if_media_change(if_ctx_t ctx); static int ixl_if_promisc_set(if_ctx_t ctx, int flags); static void ixl_if_timer(if_ctx_t ctx, uint16_t qid); static void ixl_if_vlan_register(if_ctx_t ctx, u16 vtag); static void ixl_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static uint64_t ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt); static int ixl_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req); static int ixl_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); #ifdef PCI_IOV static void ixl_if_vflr_handle(if_ctx_t ctx); #endif /*** Other ***/ static u_int ixl_mc_filter_apply(void *, struct sockaddr_dl *, u_int); static void ixl_save_pf_tunables(struct ixl_pf *); static int ixl_allocate_pci_resources(struct ixl_pf *); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixl_methods[] = { /* Device interface */ DEVMETHOD(device_register, ixl_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, iflib_device_iov_init), DEVMETHOD(pci_iov_uninit, iflib_device_iov_uninit), DEVMETHOD(pci_iov_add_vf, iflib_device_iov_add_vf), #endif DEVMETHOD_END }; static driver_t ixl_driver = { "ixl", ixl_methods, sizeof(struct ixl_pf), }; devclass_t ixl_devclass; DRIVER_MODULE(ixl, pci, ixl_driver, ixl_devclass, 0, 0); IFLIB_PNP_INFO(pci, ixl, ixl_vendor_info_array); MODULE_VERSION(ixl, 3); MODULE_DEPEND(ixl, pci, 1, 1, 1); MODULE_DEPEND(ixl, ether, 1, 1, 1); MODULE_DEPEND(ixl, iflib, 1, 1, 1); static device_method_t ixl_if_methods[] = { DEVMETHOD(ifdi_attach_pre, ixl_if_attach_pre), DEVMETHOD(ifdi_attach_post, ixl_if_attach_post), DEVMETHOD(ifdi_detach, ixl_if_detach), DEVMETHOD(ifdi_shutdown, ixl_if_shutdown), DEVMETHOD(ifdi_suspend, ixl_if_suspend), DEVMETHOD(ifdi_resume, ixl_if_resume), DEVMETHOD(ifdi_init, ixl_if_init), DEVMETHOD(ifdi_stop, ixl_if_stop), DEVMETHOD(ifdi_msix_intr_assign, ixl_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ixl_if_enable_intr), DEVMETHOD(ifdi_intr_disable, ixl_if_disable_intr), DEVMETHOD(ifdi_rx_queue_intr_enable, ixl_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ixl_if_tx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, ixl_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ixl_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, ixl_if_queues_free), DEVMETHOD(ifdi_update_admin_status, ixl_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ixl_if_multi_set), DEVMETHOD(ifdi_mtu_set, ixl_if_mtu_set), DEVMETHOD(ifdi_media_status, ixl_if_media_status), DEVMETHOD(ifdi_media_change, ixl_if_media_change), DEVMETHOD(ifdi_promisc_set, ixl_if_promisc_set), DEVMETHOD(ifdi_timer, ixl_if_timer), DEVMETHOD(ifdi_vlan_register, ixl_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, ixl_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, ixl_if_get_counter), DEVMETHOD(ifdi_i2c_req, ixl_if_i2c_req), DEVMETHOD(ifdi_priv_ioctl, ixl_if_priv_ioctl), #ifdef PCI_IOV DEVMETHOD(ifdi_iov_init, ixl_if_iov_init), DEVMETHOD(ifdi_iov_uninit, ixl_if_iov_uninit), DEVMETHOD(ifdi_iov_vf_add, ixl_if_iov_vf_add), DEVMETHOD(ifdi_vflr_handle, ixl_if_vflr_handle), #endif // ifdi_led_func // ifdi_debug DEVMETHOD_END }; static driver_t ixl_if_driver = { "ixl_if", ixl_if_methods, sizeof(struct ixl_pf) }; /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ixl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "ixl driver parameters"); /* * Leave this on unless you need to send flow control * frames (or other control frames) from software */ static int ixl_enable_tx_fc_filter = 1; TUNABLE_INT("hw.ixl.enable_tx_fc_filter", &ixl_enable_tx_fc_filter); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_tx_fc_filter, CTLFLAG_RDTUN, &ixl_enable_tx_fc_filter, 0, "Filter out packets with Ethertype 0x8808 from being sent out by non-HW sources"); static int ixl_i2c_access_method = 0; TUNABLE_INT("hw.ixl.i2c_access_method", &ixl_i2c_access_method); SYSCTL_INT(_hw_ixl, OID_AUTO, i2c_access_method, CTLFLAG_RDTUN, &ixl_i2c_access_method, 0, IXL_SYSCTL_HELP_I2C_METHOD); static int ixl_enable_vf_loopback = 1; TUNABLE_INT("hw.ixl.enable_vf_loopback", &ixl_enable_vf_loopback); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_vf_loopback, CTLFLAG_RDTUN, &ixl_enable_vf_loopback, 0, IXL_SYSCTL_HELP_VF_LOOPBACK); /* * Different method for processing TX descriptor * completion. */ static int ixl_enable_head_writeback = 1; TUNABLE_INT("hw.ixl.enable_head_writeback", &ixl_enable_head_writeback); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_head_writeback, CTLFLAG_RDTUN, &ixl_enable_head_writeback, 0, "For detecting last completed TX descriptor by hardware, use value written by HW instead of checking descriptors"); static int ixl_core_debug_mask = 0; TUNABLE_INT("hw.ixl.core_debug_mask", &ixl_core_debug_mask); SYSCTL_INT(_hw_ixl, OID_AUTO, core_debug_mask, CTLFLAG_RDTUN, &ixl_core_debug_mask, 0, "Display debug statements that are printed in non-shared code"); static int ixl_shared_debug_mask = 0; TUNABLE_INT("hw.ixl.shared_debug_mask", &ixl_shared_debug_mask); SYSCTL_INT(_hw_ixl, OID_AUTO, shared_debug_mask, CTLFLAG_RDTUN, &ixl_shared_debug_mask, 0, "Display debug statements that are printed in shared code"); #if 0 /* ** Controls for Interrupt Throttling ** - true/false for dynamic adjustment ** - default values for static ITR */ static int ixl_dynamic_rx_itr = 0; TUNABLE_INT("hw.ixl.dynamic_rx_itr", &ixl_dynamic_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_rx_itr, CTLFLAG_RDTUN, &ixl_dynamic_rx_itr, 0, "Dynamic RX Interrupt Rate"); static int ixl_dynamic_tx_itr = 0; TUNABLE_INT("hw.ixl.dynamic_tx_itr", &ixl_dynamic_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_tx_itr, CTLFLAG_RDTUN, &ixl_dynamic_tx_itr, 0, "Dynamic TX Interrupt Rate"); #endif static int ixl_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.ixl.rx_itr", &ixl_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &ixl_rx_itr, 0, "RX Interrupt Rate"); static int ixl_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.ixl.tx_itr", &ixl_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &ixl_tx_itr, 0, "TX Interrupt Rate"); #ifdef IXL_IW int ixl_enable_iwarp = 0; TUNABLE_INT("hw.ixl.enable_iwarp", &ixl_enable_iwarp); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_iwarp, CTLFLAG_RDTUN, &ixl_enable_iwarp, 0, "iWARP enabled"); #if __FreeBSD_version < 1100000 int ixl_limit_iwarp_msix = 1; #else int ixl_limit_iwarp_msix = IXL_IW_MAX_MSIX; #endif TUNABLE_INT("hw.ixl.limit_iwarp_msix", &ixl_limit_iwarp_msix); SYSCTL_INT(_hw_ixl, OID_AUTO, limit_iwarp_msix, CTLFLAG_RDTUN, &ixl_limit_iwarp_msix, 0, "Limit MSI-X vectors assigned to iWARP"); #endif extern struct if_txrx ixl_txrx_hwb; extern struct if_txrx ixl_txrx_dwb; static struct if_shared_ctx ixl_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_tso_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = 16384, .isc_rx_nsegments = IXL_MAX_RX_SEGS, .isc_rx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ixl_vendor_info_array, .isc_driver_version = IXL_DRIVER_VERSION_STRING, .isc_driver = &ixl_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_NEED_ZERO_CSUM | IFLIB_TSO_INIT_IP | IFLIB_ADMIN_ALWAYS_RUN, .isc_nrxd_min = {IXL_MIN_RING}, .isc_ntxd_min = {IXL_MIN_RING}, .isc_nrxd_max = {IXL_MAX_RING}, .isc_ntxd_max = {IXL_MAX_RING}, .isc_nrxd_default = {IXL_DEFAULT_RING}, .isc_ntxd_default = {IXL_DEFAULT_RING}, }; if_shared_ctx_t ixl_sctx = &ixl_sctx_init; /*** Functions ***/ static void * ixl_register(device_t dev) { return (ixl_sctx); } static int ixl_allocate_pci_resources(struct ixl_pf *pf) { device_t dev = iflib_get_dev(pf->vsi.ctx); struct i40e_hw *hw = &pf->hw; int rid; /* Map BAR0 */ rid = PCIR_BAR(0); pf->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(pf->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: PCI memory\n"); return (ENXIO); } /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); /* Save off register access information */ pf->osdep.mem_bus_space_tag = rman_get_bustag(pf->pci_mem); pf->osdep.mem_bus_space_handle = rman_get_bushandle(pf->pci_mem); pf->osdep.mem_bus_space_size = rman_get_size(pf->pci_mem); pf->osdep.flush_reg = I40E_GLGEN_STAT; pf->osdep.dev = dev; pf->hw.hw_addr = (u8 *) &pf->osdep.mem_bus_space_handle; pf->hw.back = &pf->osdep; return (0); } static int ixl_if_attach_pre(if_ctx_t ctx) { device_t dev; struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; if_softc_ctx_t scctx; struct i40e_filter_control_settings filter; enum i40e_status_code status; int error = 0; - INIT_DBG_DEV(dev, "begin"); - dev = iflib_get_dev(ctx); pf = iflib_get_softc(ctx); + INIT_DBG_DEV(dev, "begin"); + vsi = &pf->vsi; vsi->back = pf; pf->dev = dev; hw = &pf->hw; vsi->dev = dev; vsi->hw = &pf->hw; vsi->id = 0; vsi->num_vlans = 0; vsi->ctx = ctx; vsi->media = iflib_get_media(ctx); vsi->shared = scctx = iflib_get_softc_ctx(ctx); /* Save tunable values */ ixl_save_pf_tunables(pf); /* Do PCI setup - map BAR0, etc */ if (ixl_allocate_pci_resources(pf)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci_res; } /* Establish a clean starting point */ i40e_clear_hw(hw); status = i40e_pf_reset(hw); if (status) { device_printf(dev, "PF reset failure %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } /* Initialize the shared code */ status = i40e_init_shared_code(hw); if (status) { device_printf(dev, "Unable to initialize shared code, error %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } /* Set up the admin queue */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUF_SZ; hw->aq.asq_buf_size = IXL_AQ_BUF_SZ; status = i40e_init_adminq(hw); if (status != 0 && status != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } ixl_print_nvm_version(pf); if (status == I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "The driver for the device stopped " "because the NVM image is newer than expected.\n"); device_printf(dev, "You must install the most recent version of " "the network driver.\n"); error = EIO; goto err_out; } if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) { device_printf(dev, "The driver for the device detected " "a newer version of the NVM image than expected.\n"); device_printf(dev, "Please install the most recent version " "of the network driver.\n"); } else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) { device_printf(dev, "The driver for the device detected " "an older version of the NVM image than expected.\n"); device_printf(dev, "Please update the NVM image.\n"); } /* Clear PXE mode */ i40e_clear_pxe_mode(hw); /* Get capabilities from the device */ error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "get_hw_capabilities failed: %d\n", error); goto err_get_cap; } /* Set up host memory cache */ status = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (status) { device_printf(dev, "init_lan_hmc failed: %s\n", i40e_stat_str(hw, status)); goto err_get_cap; } status = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (status) { device_printf(dev, "configure_lan_hmc failed: %s\n", i40e_stat_str(hw, status)); goto err_mac_hmc; } /* Disable LLDP from the firmware for certain NVM versions */ if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || (pf->hw.aq.fw_maj_ver < 4)) { i40e_aq_stop_lldp(hw, TRUE, NULL); pf->state |= IXL_PF_STATE_FW_LLDP_DISABLED; } /* Get MAC addresses from hardware */ i40e_get_mac_addr(hw, hw->mac.addr); error = i40e_validate_mac_addr(hw->mac.addr); if (error) { device_printf(dev, "validate_mac_addr failed: %d\n", error); goto err_mac_hmc; } bcopy(hw->mac.addr, hw->mac.perm_addr, ETHER_ADDR_LEN); iflib_set_mac(ctx, hw->mac.addr); i40e_get_port_mac_addr(hw, hw->mac.port_addr); /* Set up the device filtering */ bzero(&filter, sizeof(filter)); filter.enable_ethtype = TRUE; filter.enable_macvlan = TRUE; filter.enable_fdir = FALSE; filter.hash_lut_size = I40E_HASH_LUT_SIZE_512; if (i40e_set_filter_control(hw, &filter)) device_printf(dev, "i40e_set_filter_control() failed\n"); /* Query device FW LLDP status */ ixl_get_fw_lldp_status(pf); /* Tell FW to apply DCB config on link up */ i40e_aq_set_dcb_parameters(hw, true, NULL); /* Fill out iflib parameters */ if (hw->mac.type == I40E_MAC_X722) scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 128; else scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 64; if (vsi->enable_head_writeback) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc) + sizeof(u32), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_hwb; } else { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_dwb; } scctx->isc_txrx->ift_legacy_intr = ixl_intr; scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union i40e_32byte_rx_desc), DBA_ALIGN); scctx->isc_msix_bar = PCIR_BAR(IXL_MSIX_BAR); scctx->isc_tx_nsegments = IXL_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = IXL_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = IXL_TSO_SIZE; scctx->isc_tx_tso_segsize_max = IXL_MAX_DMA_SEG_SIZE; scctx->isc_rss_table_size = pf->hw.func_caps.rss_table_size; scctx->isc_tx_csum_flags = CSUM_OFFLOAD; scctx->isc_capabilities = scctx->isc_capenable = IXL_CAPS; INIT_DBG_DEV(dev, "end"); return (0); err_mac_hmc: i40e_shutdown_lan_hmc(hw); err_get_cap: i40e_shutdown_adminq(hw); err_out: ixl_free_pci_resources(pf); err_pci_res: return (error); } static int ixl_if_attach_post(if_ctx_t ctx) { device_t dev; struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; int error = 0; enum i40e_status_code status; - INIT_DBG_DEV(dev, "begin"); - dev = iflib_get_dev(ctx); pf = iflib_get_softc(ctx); + + INIT_DBG_DEV(dev, "begin"); + vsi = &pf->vsi; vsi->ifp = iflib_get_ifp(ctx); hw = &pf->hw; /* Save off determined number of queues for interface */ vsi->num_rx_queues = vsi->shared->isc_nrxqsets; vsi->num_tx_queues = vsi->shared->isc_ntxqsets; /* Setup OS network interface / ifnet */ if (ixl_setup_interface(dev, pf)) { device_printf(dev, "interface setup failed!\n"); error = EIO; goto err; } /* Determine link state */ if (ixl_attach_get_link_status(pf)) { error = EINVAL; goto err; } error = ixl_switch_config(pf); if (error) { device_printf(dev, "Initial ixl_switch_config() failed: %d\n", error); goto err; } /* Add protocol filters to list */ ixl_init_filters(vsi); /* Init queue allocation manager */ error = ixl_pf_qmgr_init(&pf->qmgr, hw->func_caps.num_tx_qp); if (error) { device_printf(dev, "Failed to init queue manager for PF queues, error %d\n", error); goto err; } /* reserve a contiguous allocation for the PF's VSI */ error = ixl_pf_qmgr_alloc_contiguous(&pf->qmgr, max(vsi->num_rx_queues, vsi->num_tx_queues), &pf->qtag); if (error) { device_printf(dev, "Failed to reserve queues for PF LAN VSI, error %d\n", error); goto err; } device_printf(dev, "Allocating %d queues for PF LAN VSI; %d queues active\n", pf->qtag.num_allocated, pf->qtag.num_active); /* Limit PHY interrupts to link, autoneg, and modules failure */ status = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (status) { device_printf(dev, "i40e_aq_set_phy_mask() failed: err %s," " aq_err %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); goto err; } /* Get the bus configuration and set the shared code */ ixl_get_bus_info(pf); /* Keep admin queue interrupts active while driver is loaded */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } /* Set initial advertised speed sysctl value */ ixl_set_initial_advertised_speeds(pf); /* Initialize statistics & add sysctls */ ixl_add_device_sysctls(pf); ixl_pf_reset_stats(pf); ixl_update_stats_counters(pf); ixl_add_hw_stats(pf); hw->phy.get_link_info = true; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); #ifdef PCI_IOV ixl_initialize_sriov(pf); #endif #ifdef IXL_IW if (hw->func_caps.iwarp && ixl_enable_iwarp) { pf->iw_enabled = (pf->iw_msix > 0) ? true : false; if (pf->iw_enabled) { error = ixl_iw_pf_attach(pf); if (error) { device_printf(dev, "interfacing to iWARP driver failed: %d\n", error); goto err; } else device_printf(dev, "iWARP ready\n"); } else device_printf(dev, "iWARP disabled on this device " "(no MSI-X vectors)\n"); } else { pf->iw_enabled = false; device_printf(dev, "The device is not iWARP enabled\n"); } #endif INIT_DBG_DEV(dev, "end"); return (0); err: INIT_DEBUGOUT("end: error %d", error); /* ixl_if_detach() is called on error from this */ return (error); } /** * XXX: iflib always ignores the return value of detach() * -> This means that this isn't allowed to fail */ static int ixl_if_detach(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; #ifdef IXL_IW int error; #endif INIT_DBG_DEV(dev, "begin"); #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { error = ixl_iw_pf_detach(pf); if (error == EBUSY) { device_printf(dev, "iwarp in use; stop it first.\n"); //return (error); } } #endif /* Remove all previously allocated media types */ ifmedia_removeall(vsi->media); /* Shutdown LAN HMC */ if (hw->hmc.hmc_obj) { status = i40e_shutdown_lan_hmc(hw); if (status) device_printf(dev, "i40e_shutdown_lan_hmc() failed with status %s\n", i40e_stat_str(hw, status)); } /* Shutdown admin queue */ ixl_disable_intr0(hw); status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "i40e_shutdown_adminq() failed with status %s\n", i40e_stat_str(hw, status)); ixl_pf_qmgr_destroy(&pf->qmgr); ixl_free_pci_resources(pf); ixl_free_mac_filters(vsi); INIT_DBG_DEV(dev, "end"); return (0); } static int ixl_if_shutdown(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixl_if_shutdown: begin"); /* TODO: Call ixl_if_stop()? */ /* TODO: Then setup low power mode */ return (error); } static int ixl_if_suspend(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixl_if_suspend: begin"); /* TODO: Call ixl_if_stop()? */ /* TODO: Then setup low power mode */ return (error); } static int ixl_if_resume(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); INIT_DEBUGOUT("ixl_if_resume: begin"); /* Read & clear wake-up registers */ /* Required after D3->D0 transition */ if (ifp->if_flags & IFF_UP) ixl_if_init(ctx); return (0); } void ixl_if_init(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; struct ifnet *ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); u8 tmpaddr[ETHER_ADDR_LEN]; int ret; /* * If the aq is dead here, it probably means something outside of the driver * did something to the adapter, like a PF reset. * So, rebuild the driver's state here if that occurs. */ if (!i40e_check_asq_alive(&pf->hw)) { device_printf(dev, "Admin Queue is down; resetting...\n"); ixl_teardown_hw_structs(pf); ixl_rebuild_hw_structs_after_reset(pf); } /* Get the latest mac address... User might use a LAA */ bcopy(IF_LLADDR(vsi->ifp), tmpaddr, ETH_ALEN); if (!cmp_etheraddr(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { ixl_del_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); bcopy(tmpaddr, hw->mac.addr, ETH_ALEN); ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY, hw->mac.addr, NULL); if (ret) { device_printf(dev, "LLA address change failed!!\n"); return; } ixl_add_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); } iflib_set_mac(ctx, hw->mac.addr); /* Prepare the VSI: rings, hmc contexts, etc... */ if (ixl_initialize_vsi(vsi)) { device_printf(dev, "initialize vsi failed!!\n"); return; } /* Reconfigure multicast filters in HW */ ixl_if_multi_set(ctx); /* Set up RSS */ ixl_config_rss(pf); /* Set up MSI-X routing and the ITR settings */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_queue_intr_msix(pf); ixl_configure_itr(pf); } else ixl_configure_legacy(pf); if (vsi->enable_head_writeback) ixl_init_tx_cidx(vsi); else ixl_init_tx_rsqs(vsi); ixl_enable_rings(vsi); i40e_aq_set_default_vsi(hw, vsi->seid, NULL); /* Re-add configure filters to HW */ ixl_reconfigure_filters(vsi); /* Configure promiscuous mode */ ixl_if_promisc_set(ctx, if_getflags(ifp)); #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { ret = ixl_iw_pf_init(pf); if (ret) device_printf(dev, "initialize iwarp failed, code %d\n", ret); } #endif } void ixl_if_stop(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; INIT_DEBUGOUT("ixl_if_stop: begin\n"); // TODO: This may need to be reworked #ifdef IXL_IW /* Stop iWARP device */ if (ixl_enable_iwarp && pf->iw_enabled) ixl_iw_pf_stop(pf); #endif ixl_disable_rings_intr(vsi); ixl_disable_rings(pf, vsi, &pf->qtag); } static int ixl_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues; int err, i, rid, vector = 0; char buf[16]; MPASS(vsi->shared->isc_nrxqsets > 0); MPASS(vsi->shared->isc_ntxqsets > 0); /* Admin Que must use vector 0*/ rid = vector + 1; err = iflib_irq_alloc_generic(ctx, &vsi->irq, rid, IFLIB_INTR_ADMIN, ixl_msix_adminq, pf, 0, "aq"); if (err) { iflib_irq_free(ctx, &vsi->irq); device_printf(iflib_get_dev(ctx), "Failed to register Admin Que handler"); return (err); } /* Create soft IRQ for handling VFLRs */ iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_IOV, pf, 0, "iov"); /* Now set up the stations */ for (i = 0, vector = 1; i < vsi->shared->isc_nrxqsets; i++, vector++, rx_que++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); err = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RX, ixl_msix_que, rx_que, rx_que->rxr.me, buf); /* XXX: Does the driver work as expected if there are fewer num_rx_queues than * what's expected in the iflib context? */ if (err) { device_printf(iflib_get_dev(ctx), "Failed to allocate queue RX int vector %d, err: %d\n", i, err); vsi->num_rx_queues = i + 1; goto fail; } rx_que->msix = vector; } bzero(buf, sizeof(buf)); for (i = 0; i < vsi->shared->isc_ntxqsets; i++, tx_que++) { snprintf(buf, sizeof(buf), "txq%d", i); iflib_softirq_alloc_generic(ctx, &vsi->rx_queues[i % vsi->shared->isc_nrxqsets].que_irq, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); /* TODO: Maybe call a strategy function for this to figure out which * interrupts to map Tx queues to. I don't know if there's an immediately * better way than this other than a user-supplied map, though. */ tx_que->msix = (i % vsi->shared->isc_nrxqsets) + 1; } return (0); fail: iflib_irq_free(ctx, &vsi->irq); rx_que = vsi->rx_queues; for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (err); } /* * Enable all interrupts * * Called in: * iflib_init_locked, after ixl_if_init() */ static void ixl_if_enable_intr(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; ixl_enable_intr0(hw); /* Enable queue interrupts */ for (int i = 0; i < vsi->num_rx_queues; i++, que++) /* TODO: Queue index parameter is probably wrong */ ixl_enable_queue(hw, que->rxr.me); } /* * Disable queue interrupts * * Other interrupt causes need to remain active. */ static void ixl_if_disable_intr(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *rx_que = vsi->rx_queues; if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) ixl_disable_queue(hw, rx_que->msix - 1); } else { // Set PFINT_LNKLST0 FIRSTQ_INDX to 0x7FF // stops queues from triggering interrupts wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); } } static int ixl_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *rx_que = &vsi->rx_queues[rxqid]; ixl_enable_queue(hw, rx_que->msix - 1); return (0); } static int ixl_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid]; ixl_enable_queue(hw, tx_que->msix - 1); return (0); } static int ixl_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if_softc_ctx_t scctx = vsi->shared; struct ixl_tx_queue *que; int i, j, error = 0; MPASS(scctx->isc_ntxqsets > 0); MPASS(ntxqs == 1); MPASS(scctx->isc_ntxqsets == ntxqsets); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ixl_tx_queue *) malloc(sizeof(struct ixl_tx_queue) *ntxqsets, M_IXL, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = vsi->tx_queues; i < ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; txr->me = i; que->vsi = vsi; if (!vsi->enable_head_writeback) { /* Allocate report status array */ if (!(txr->tx_rsq = malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_IXL, M_NOWAIT))) { device_printf(iflib_get_dev(ctx), "failed to allocate tx_rsq memory\n"); error = ENOMEM; goto fail; } /* Init report status array */ for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; } /* get the virtual and physical address of the hardware queues */ txr->tail = I40E_QTX_TAIL(txr->me); txr->tx_base = (struct i40e_tx_desc *)vaddrs[i * ntxqs]; txr->tx_paddr = paddrs[i * ntxqs]; txr->que = que; } return (0); fail: ixl_if_queues_free(ctx); return (error); } static int ixl_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *que; int i, error = 0; #ifdef INVARIANTS if_softc_ctx_t scctx = vsi->shared; MPASS(scctx->isc_nrxqsets > 0); MPASS(nrxqs == 1); MPASS(scctx->isc_nrxqsets == nrxqsets); #endif /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ixl_rx_queue *) malloc(sizeof(struct ixl_rx_queue) * nrxqsets, M_IXL, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate RX ring memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = vsi->rx_queues; i < nrxqsets; i++, que++) { struct rx_ring *rxr = &que->rxr; rxr->me = i; que->vsi = vsi; /* get the virtual and physical address of the hardware queues */ rxr->tail = I40E_QRX_TAIL(rxr->me); rxr->rx_base = (union i40e_rx_desc *)vaddrs[i * nrxqs]; rxr->rx_paddr = paddrs[i * nrxqs]; rxr->que = que; } return (0); fail: ixl_if_queues_free(ctx); return (error); } static void ixl_if_queues_free(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if (!vsi->enable_head_writeback) { struct ixl_tx_queue *que; int i = 0; for (i = 0, que = vsi->tx_queues; i < vsi->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; if (txr->tx_rsq != NULL) { free(txr->tx_rsq, M_IXL); txr->tx_rsq = NULL; } } } if (vsi->tx_queues != NULL) { free(vsi->tx_queues, M_IXL); vsi->tx_queues = NULL; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_IXL); vsi->rx_queues = NULL; } } void ixl_update_link_status(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; u64 baudrate; if (pf->link_up) { if (vsi->link_active == FALSE) { vsi->link_active = TRUE; baudrate = ixl_max_aq_speed_to_value(hw->phy.link_info.link_speed); iflib_link_state_change(vsi->ctx, LINK_STATE_UP, baudrate); ixl_link_up_msg(pf); #ifdef PCI_IOV ixl_broadcast_link_state(pf); #endif } } else { /* Link down */ if (vsi->link_active == TRUE) { vsi->link_active = FALSE; iflib_link_state_change(vsi->ctx, LINK_STATE_DOWN, 0); #ifdef PCI_IOV ixl_broadcast_link_state(pf); #endif } } } static void ixl_handle_lan_overflow_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { device_t dev = pf->dev; u32 rxq_idx, qtx_ctl; rxq_idx = (e->desc.params.external.param0 & I40E_PRTDCB_RUPTQ_RXQNUM_MASK) >> I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT; qtx_ctl = e->desc.params.external.param1; device_printf(dev, "LAN overflow event: global rxq_idx %d\n", rxq_idx); device_printf(dev, "LAN overflow event: QTX_CTL 0x%08x\n", qtx_ctl); } static int ixl_process_adminq(struct ixl_pf *pf, u16 *pending) { enum i40e_status_code status = I40E_SUCCESS; struct i40e_arq_event_info event; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u16 opcode; u32 loop = 0, reg; event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = malloc(event.buf_len, M_IXL, M_NOWAIT | M_ZERO); if (!event.msg_buf) { device_printf(dev, "%s: Unable to allocate memory for Admin" " Queue event!\n", __func__); return (ENOMEM); } /* clean and process any events */ do { status = i40e_clean_arq_element(hw, &event, pending); if (status) break; opcode = LE16_TO_CPU(event.desc.opcode); ixl_dbg(pf, IXL_DBG_AQ, "Admin Queue event: %#06x\n", opcode); switch (opcode) { case i40e_aqc_opc_get_link_status: ixl_link_event(pf, &event); break; case i40e_aqc_opc_send_msg_to_pf: #ifdef PCI_IOV ixl_handle_vf_msg(pf, &event); #endif break; /* * This should only occur on no-drop queues, which * aren't currently configured. */ case i40e_aqc_opc_event_lan_overflow: ixl_handle_lan_overflow_event(pf, &event); break; default: break; } } while (*pending && (loop++ < IXL_ADM_LIMIT)); free(event.msg_buf, M_IXL); /* Re-enable admin queue interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); return (status); } static void ixl_if_update_admin_status(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct i40e_hw *hw = &pf->hw; u16 pending; if (pf->state & IXL_PF_STATE_ADAPTER_RESETTING) ixl_handle_empr_reset(pf); if (pf->state & IXL_PF_STATE_MDD_PENDING) ixl_handle_mdd_event(pf); ixl_process_adminq(pf, &pending); ixl_update_link_status(pf); ixl_update_stats_counters(pf); /* * If there are still messages to process, reschedule ourselves. * Otherwise, re-enable our interrupt and go to sleep. */ if (pending > 0) iflib_admin_intr_deferred(ctx); else ixl_enable_intr0(hw); } static void ixl_if_multi_set(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; int mcnt, flags; int del_mcnt; IOCTL_DEBUGOUT("ixl_if_multi_set: begin"); mcnt = min(if_llmaddr_count(iflib_get_ifp(ctx)), MAX_MULTICAST_ADDR); /* Delete filters for removed multicast addresses */ del_mcnt = ixl_del_multi(vsi); vsi->num_macs -= del_mcnt; if (__predict_false(mcnt == MAX_MULTICAST_ADDR)) { i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); return; } /* (re-)install filters for all mcast addresses */ /* XXX: This bypasses filter count tracking code! */ mcnt = if_foreach_llmaddr(iflib_get_ifp(ctx), ixl_mc_filter_apply, vsi); if (mcnt > 0) { vsi->num_macs += mcnt; flags = (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); ixl_add_hw_filters(vsi, flags, mcnt); } ixl_dbg_filter(pf, "%s: filter mac total: %d\n", __func__, vsi->num_macs); IOCTL_DEBUGOUT("ixl_if_multi_set: end"); } static int ixl_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) return (EINVAL); vsi->shared->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; return (0); } static void ixl_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct ixl_pf *pf = iflib_get_softc(ctx); struct i40e_hw *hw = &pf->hw; INIT_DEBUGOUT("ixl_media_status: begin"); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!pf->link_up) { return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware is always full-duplex */ ifmr->ifm_active |= IFM_FDX; switch (hw->phy.link_info.phy_type) { /* 100 M */ case I40E_PHY_TYPE_100BASE_TX: ifmr->ifm_active |= IFM_100_TX; break; /* 1 G */ case I40E_PHY_TYPE_1000BASE_T: ifmr->ifm_active |= IFM_1000_T; break; case I40E_PHY_TYPE_1000BASE_SX: ifmr->ifm_active |= IFM_1000_SX; break; case I40E_PHY_TYPE_1000BASE_LX: ifmr->ifm_active |= IFM_1000_LX; break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: ifmr->ifm_active |= IFM_1000_T; break; /* 10 G */ case I40E_PHY_TYPE_10GBASE_SFPP_CU: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_SR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_10GBASE_LR: ifmr->ifm_active |= IFM_10G_LR; break; case I40E_PHY_TYPE_10GBASE_T: ifmr->ifm_active |= IFM_10G_T; break; case I40E_PHY_TYPE_XAUI: case I40E_PHY_TYPE_XFI: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_AOC: ifmr->ifm_active |= IFM_10G_AOC; break; /* 25 G */ case I40E_PHY_TYPE_25GBASE_KR: ifmr->ifm_active |= IFM_25G_KR; break; case I40E_PHY_TYPE_25GBASE_CR: ifmr->ifm_active |= IFM_25G_CR; break; case I40E_PHY_TYPE_25GBASE_SR: ifmr->ifm_active |= IFM_25G_SR; break; case I40E_PHY_TYPE_25GBASE_LR: ifmr->ifm_active |= IFM_25G_LR; break; case I40E_PHY_TYPE_25GBASE_AOC: ifmr->ifm_active |= IFM_25G_AOC; break; case I40E_PHY_TYPE_25GBASE_ACC: ifmr->ifm_active |= IFM_25G_ACC; break; /* 40 G */ case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: ifmr->ifm_active |= IFM_40G_CR4; break; case I40E_PHY_TYPE_40GBASE_SR4: ifmr->ifm_active |= IFM_40G_SR4; break; case I40E_PHY_TYPE_40GBASE_LR4: ifmr->ifm_active |= IFM_40G_LR4; break; case I40E_PHY_TYPE_XLAUI: ifmr->ifm_active |= IFM_OTHER; break; case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_KX; break; case I40E_PHY_TYPE_SGMII: ifmr->ifm_active |= IFM_1000_SGMII; break; /* ERJ: What's the difference between these? */ case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_CR1; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_KX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_KR; break; case I40E_PHY_TYPE_SFI: ifmr->ifm_active |= IFM_10G_SFI; break; /* Our single 20G media type */ case I40E_PHY_TYPE_20GBASE_KR2: ifmr->ifm_active |= IFM_20G_KR2; break; case I40E_PHY_TYPE_40GBASE_KR4: ifmr->ifm_active |= IFM_40G_KR4; break; case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: ifmr->ifm_active |= IFM_40G_XLPPI; break; /* Unknown to driver */ default: ifmr->ifm_active |= IFM_UNKNOWN; break; } /* Report flow control status as well */ if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; } static int ixl_if_media_change(if_ctx_t ctx) { struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(iflib_get_ifp(ctx), "Media change is not supported.\n"); return (ENODEV); } static int ixl_if_promisc_set(if_ctx_t ctx, int flags) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = iflib_get_ifp(ctx); struct i40e_hw *hw = vsi->hw; int err; bool uni = FALSE, multi = FALSE; if (flags & IFF_PROMISC) uni = multi = TRUE; else if (flags & IFF_ALLMULTI || if_llmaddr_count(ifp) >= MAX_MULTICAST_ADDR) multi = TRUE; err = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, uni, NULL, true); if (err) return (err); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); return (err); } static void ixl_if_timer(if_ctx_t ctx, uint16_t qid) { if (qid != 0) return; /* Fire off the adminq task */ iflib_admin_intr_deferred(ctx); } static void ixl_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; ++vsi->num_vlans; ixl_add_filter(vsi, hw->mac.addr, vtag); } static void ixl_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; --vsi->num_vlans; ixl_del_filter(vsi, hw->mac.addr, vtag); } static uint64_t ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: return (vsi->ipackets); case IFCOUNTER_IERRORS: return (vsi->ierrors); case IFCOUNTER_OPACKETS: return (vsi->opackets); case IFCOUNTER_OERRORS: return (vsi->oerrors); case IFCOUNTER_COLLISIONS: /* Collisions are by standard impossible in 40G/10G Ethernet */ return (0); case IFCOUNTER_IBYTES: return (vsi->ibytes); case IFCOUNTER_OBYTES: return (vsi->obytes); case IFCOUNTER_IMCASTS: return (vsi->imcasts); case IFCOUNTER_OMCASTS: return (vsi->omcasts); case IFCOUNTER_IQDROPS: return (vsi->iqdrops); case IFCOUNTER_OQDROPS: return (vsi->oqdrops); case IFCOUNTER_NOPROTO: return (vsi->noproto); default: return (if_get_counter_default(ifp, cnt)); } } #ifdef PCI_IOV static void ixl_if_vflr_handle(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); ixl_handle_vflr(pf); } #endif static int ixl_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req) { struct ixl_pf *pf = iflib_get_softc(ctx); if (pf->read_i2c_byte == NULL) return (EINVAL); for (int i = 0; i < req->len; i++) if (pf->read_i2c_byte(pf, req->offset + i, req->dev_addr, &req->data[i])) return (EIO); return (0); } static int ixl_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ifdrv *ifd = (struct ifdrv *)data; int error = 0; /* * The iflib_if_ioctl forwards SIOCxDRVSPEC and SIOGPRIVATE_0 without * performing privilege checks. It is important that this function * perform the necessary checks for commands which should only be * executed by privileged threads. */ switch(command) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: /* NVM update command */ if (ifd->ifd_cmd == I40E_NVM_ACCESS) { error = priv_check(curthread, PRIV_DRIVER); if (error) break; error = ixl_handle_nvmupd_cmd(pf, ifd); } else { error = EINVAL; } break; default: error = EOPNOTSUPP; } return (error); } static u_int ixl_mc_filter_apply(void *arg, struct sockaddr_dl *sdl, u_int count __unused) { struct ixl_vsi *vsi = arg; ixl_add_mc_filter(vsi, (u8*)LLADDR(sdl)); return (1); } /* * Sanity check and save off tunable values. */ static void ixl_save_pf_tunables(struct ixl_pf *pf) { device_t dev = pf->dev; /* Save tunable information */ pf->enable_tx_fc_filter = ixl_enable_tx_fc_filter; pf->dbg_mask = ixl_core_debug_mask; pf->hw.debug_mask = ixl_shared_debug_mask; pf->vsi.enable_head_writeback = !!(ixl_enable_head_writeback); pf->enable_vf_loopback = !!(ixl_enable_vf_loopback); #if 0 pf->dynamic_rx_itr = ixl_dynamic_rx_itr; pf->dynamic_tx_itr = ixl_dynamic_tx_itr; #endif if (ixl_i2c_access_method > 3 || ixl_i2c_access_method < 0) pf->i2c_access_method = 0; else pf->i2c_access_method = ixl_i2c_access_method; if (ixl_tx_itr < 0 || ixl_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid tx_itr value of %d set!\n", ixl_tx_itr); device_printf(dev, "tx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_4K); pf->tx_itr = IXL_ITR_4K; } else pf->tx_itr = ixl_tx_itr; if (ixl_rx_itr < 0 || ixl_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid rx_itr value of %d set!\n", ixl_rx_itr); device_printf(dev, "rx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_8K); pf->rx_itr = IXL_ITR_8K; } else pf->rx_itr = ixl_rx_itr; } diff --git a/sys/dev/ixl/ixl_pf_main.c b/sys/dev/ixl/ixl_pf_main.c index eefdd22ec828..7979aa0a69d9 100644 --- a/sys/dev/ixl/ixl_pf_main.c +++ b/sys/dev/ixl/ixl_pf_main.c @@ -1,5101 +1,5105 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl_pf.h" #ifdef PCI_IOV #include "ixl_pf_iov.h" #endif #ifdef IXL_IW #include "ixl_iw.h" #include "ixl_iw_int.h" #endif static u8 ixl_convert_sysctl_aq_link_speed(u8, bool); static void ixl_sbuf_print_bytes(struct sbuf *, u8 *, int, int, bool); /* Sysctls */ static int ixl_sysctl_set_flowcntl(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_set_advertise(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_supported_speeds(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_current_speed(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_unallocated_queues(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_pf_tx_itr(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_pf_rx_itr(SYSCTL_HANDLER_ARGS); /* Debug Sysctls */ static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hkey(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hena(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hlut(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fw_link_management(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_read_i2c_byte(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_write_i2c_byte(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_fc_ability(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_rs_ability(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_fc_request(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_rs_request(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_auto_enable(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_dump_debug_data(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fw_lldp(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_do_pf_reset(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_do_core_reset(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_do_global_reset(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_do_emp_reset(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_queue_interrupt_table(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS); #ifdef IXL_DEBUG static int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS); #endif #ifdef IXL_IW extern int ixl_enable_iwarp; extern int ixl_limit_iwarp_msix; #endif const uint8_t ixl_bcast_addr[ETHER_ADDR_LEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const char * const ixl_fc_string[6] = { "None", "Rx", "Tx", "Full", "Priority", "Default" }; static char *ixl_fec_string[3] = { "CL108 RS-FEC", "CL74 FC-FEC/BASE-R", "None" }; MALLOC_DEFINE(M_IXL, "ixl", "ixl driver allocations"); /* ** Put the FW, API, NVM, EEtrackID, and OEM version information into a string */ void ixl_nvm_version_str(struct i40e_hw *hw, struct sbuf *buf) { u8 oem_ver = (u8)(hw->nvm.oem_ver >> 24); u16 oem_build = (u16)((hw->nvm.oem_ver >> 16) & 0xFFFF); u8 oem_patch = (u8)(hw->nvm.oem_ver & 0xFF); sbuf_printf(buf, "fw %d.%d.%05d api %d.%d nvm %x.%02x etid %08x oem %d.%d.%d", hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build, hw->aq.api_maj_ver, hw->aq.api_min_ver, (hw->nvm.version & IXL_NVM_VERSION_HI_MASK) >> IXL_NVM_VERSION_HI_SHIFT, (hw->nvm.version & IXL_NVM_VERSION_LO_MASK) >> IXL_NVM_VERSION_LO_SHIFT, hw->nvm.eetrack, oem_ver, oem_build, oem_patch); } void ixl_print_nvm_version(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *sbuf; sbuf = sbuf_new_auto(); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); device_printf(dev, "%s\n", sbuf_data(sbuf)); sbuf_delete(sbuf); } static void ixl_configure_tx_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_tx_queue *que = vsi->tx_queues; vsi->tx_itr_setting = pf->tx_itr; for (int i = 0; i < vsi->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, i), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; txr->latency = IXL_AVE_LATENCY; } } static void ixl_configure_rx_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *que = vsi->rx_queues; vsi->rx_itr_setting = pf->rx_itr; for (int i = 0; i < vsi->num_rx_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, i), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; rxr->latency = IXL_AVE_LATENCY; } } /* * Write PF ITR values to queue ITR registers. */ void ixl_configure_itr(struct ixl_pf *pf) { ixl_configure_tx_itr(pf); ixl_configure_rx_itr(pf); } /********************************************************************* * * Get the hardware capabilities * **********************************************************************/ int ixl_get_hw_capabilities(struct ixl_pf *pf) { struct i40e_aqc_list_capabilities_element_resp *buf; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; int len, i2c_intfc_num; bool again = TRUE; u16 needed; len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp); retry: if (!(buf = (struct i40e_aqc_list_capabilities_element_resp *) malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate cap memory\n"); return (ENOMEM); } /* This populates the hw struct */ status = i40e_aq_discover_capabilities(hw, buf, len, &needed, i40e_aqc_opc_list_func_capabilities, NULL); free(buf, M_DEVBUF); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) && (again == TRUE)) { /* retry once with a larger buffer */ again = FALSE; len = needed; goto retry; } else if (status != I40E_SUCCESS) { device_printf(dev, "capability discovery failed; status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (ENODEV); } /* * Some devices have both MDIO and I2C; since this isn't reported * by the FW, check registers to see if an I2C interface exists. */ i2c_intfc_num = ixl_find_i2c_interface(pf); if (i2c_intfc_num != -1) pf->has_i2c = true; /* Determine functions to use for driver I2C accesses */ switch (pf->i2c_access_method) { case 0: { if (hw->mac.type == I40E_MAC_XL710 && hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver >= 7) { pf->read_i2c_byte = ixl_read_i2c_byte_aq; pf->write_i2c_byte = ixl_write_i2c_byte_aq; } else { pf->read_i2c_byte = ixl_read_i2c_byte_reg; pf->write_i2c_byte = ixl_write_i2c_byte_reg; } break; } case 3: pf->read_i2c_byte = ixl_read_i2c_byte_aq; pf->write_i2c_byte = ixl_write_i2c_byte_aq; break; case 2: pf->read_i2c_byte = ixl_read_i2c_byte_reg; pf->write_i2c_byte = ixl_write_i2c_byte_reg; break; case 1: pf->read_i2c_byte = ixl_read_i2c_byte_bb; pf->write_i2c_byte = ixl_write_i2c_byte_bb; break; default: /* Should not happen */ device_printf(dev, "Error setting I2C access functions\n"); break; } /* Print a subset of the capability information. */ device_printf(dev, "PF-ID[%d]: VFs %d, MSI-X %d, VF MSI-X %d, QPs %d, %s\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.num_tx_qp, (hw->func_caps.mdio_port_mode == 2) ? "I2C" : (hw->func_caps.mdio_port_mode == 1 && pf->has_i2c) ? "MDIO & I2C" : (hw->func_caps.mdio_port_mode == 1) ? "MDIO dedicated" : "MDIO shared"); return (0); } /* For the set_advertise sysctl */ void ixl_set_initial_advertised_speeds(struct ixl_pf *pf) { device_t dev = pf->dev; int err; /* Make sure to initialize the device to the complete list of * supported speeds on driver load, to ensure unloading and * reloading the driver will restore this value. */ err = ixl_set_advertised_speeds(pf, pf->supported_speeds, true); if (err) { /* Non-fatal error */ device_printf(dev, "%s: ixl_set_advertised_speeds() error %d\n", __func__, err); return; } pf->advertised_speed = ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false); } int ixl_teardown_hw_structs(struct ixl_pf *pf) { enum i40e_status_code status = 0; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; /* Shutdown LAN HMC */ if (hw->hmc.hmc_obj) { status = i40e_shutdown_lan_hmc(hw); if (status) { device_printf(dev, "init: LAN HMC shutdown failure; status %s\n", i40e_stat_str(hw, status)); goto err_out; } } /* Shutdown admin queue */ ixl_disable_intr0(hw); status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "init: Admin Queue shutdown failure; status %s\n", i40e_stat_str(hw, status)); ixl_pf_qmgr_release(&pf->qmgr, &pf->qtag); err_out: return (status); } int ixl_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u32 reg; int error = 0; // XXX: clear_hw() actually writes to hw registers -- maybe this isn't necessary i40e_clear_hw(hw); error = i40e_pf_reset(hw); if (error) { device_printf(dev, "init: PF reset failure\n"); error = EIO; goto err_out; } error = i40e_init_adminq(hw); if (error) { device_printf(dev, "init: Admin queue init failure;" " status code %d\n", error); error = EIO; goto err_out; } i40e_clear_pxe_mode(hw); #if 0 error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "init: Error retrieving HW capabilities;" " status code %d\n", error); goto err_out; } error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init: LAN HMC init failed; status code %d\n", error); error = EIO; goto err_out; } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "init: LAN HMC config failed; status code %d\n", error); error = EIO; goto err_out; } // XXX: possible fix for panic, but our failure recovery is still broken error = ixl_switch_config(pf); if (error) { device_printf(dev, "init: ixl_switch_config() failed: %d\n", error); goto err_out; } error = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (error) { device_printf(dev, "init: i40e_aq_set_phy_mask() failed: err %d," " aq_err %d\n", error, hw->aq.asq_last_status); error = EIO; goto err_out; } error = i40e_set_fc(hw, &set_fc_err_mask, true); if (error) { device_printf(dev, "init: setting link flow control failed; retcode %d," " fc_err_mask 0x%02x\n", error, set_fc_err_mask); goto err_out; } // XXX: (Rebuild VSIs?) /* Firmware delay workaround */ if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || (hw->aq.fw_maj_ver < 4)) { i40e_msec_delay(75); error = i40e_aq_set_link_restart_an(hw, TRUE, NULL); if (error) { device_printf(dev, "init: link restart failed, aq_err %d\n", hw->aq.asq_last_status); goto err_out; } } /* Re-enable admin queue interrupt */ if (pf->msix > 1) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } err_out: return (error); #endif ixl_rebuild_hw_structs_after_reset(pf); /* The PF reset should have cleared any critical errors */ atomic_clear_32(&pf->state, IXL_PF_STATE_PF_CRIT_ERR); atomic_clear_32(&pf->state, IXL_PF_STATE_PF_RESET_REQ); reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= IXL_ICR0_CRIT_ERR_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); err_out: return (error); } /* * TODO: Make sure this properly handles admin queue / single rx queue intr */ int ixl_intr(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *que = vsi->rx_queues; u32 icr0; // pf->admin_irq++ ++que->irqs; // TODO: Check against proper field #if 0 /* Clear PBA at start of ISR if using legacy interrupts */ if (pf->msix == 0) wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)); #endif icr0 = rd32(hw, I40E_PFINT_ICR0); #ifdef PCI_IOV if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) iflib_iov_intr_deferred(vsi->ctx); #endif // TODO!: Do the stuff that's done in ixl_msix_adminq here, too! if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) iflib_admin_intr_deferred(vsi->ctx); // TODO: Is intr0 enabled somewhere else? ixl_enable_intr0(hw); if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) return (FILTER_SCHEDULE_THREAD); else return (FILTER_HANDLED); } /********************************************************************* * * MSI-X VSI Interrupt Service routine * **********************************************************************/ int ixl_msix_que(void *arg) { struct ixl_rx_queue *rx_que = arg; ++rx_que->irqs; ixl_set_queue_rx_itr(rx_que); // ixl_set_queue_tx_itr(que); return (FILTER_SCHEDULE_THREAD); } /********************************************************************* * * MSI-X Admin Queue Interrupt Service routine * **********************************************************************/ int ixl_msix_adminq(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u32 reg, mask, rstat_reg; bool do_task = FALSE; DDPRINTF(dev, "begin"); ++pf->admin_irq; reg = rd32(hw, I40E_PFINT_ICR0); /* * For masking off interrupt causes that need to be handled before * they can be re-enabled */ mask = rd32(hw, I40E_PFINT_ICR0_ENA); /* Check on the cause */ if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK; do_task = TRUE; } if (reg & I40E_PFINT_ICR0_MAL_DETECT_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; atomic_set_32(&pf->state, IXL_PF_STATE_MDD_PENDING); do_task = TRUE; } if (reg & I40E_PFINT_ICR0_GRST_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK; device_printf(dev, "Reset Requested!\n"); rstat_reg = rd32(hw, I40E_GLGEN_RSTAT); rstat_reg = (rstat_reg & I40E_GLGEN_RSTAT_RESET_TYPE_MASK) >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT; device_printf(dev, "Reset type: "); switch (rstat_reg) { /* These others might be handled similarly to an EMPR reset */ case I40E_RESET_CORER: printf("CORER\n"); break; case I40E_RESET_GLOBR: printf("GLOBR\n"); break; case I40E_RESET_EMPR: printf("EMPR\n"); break; default: printf("POR\n"); break; } /* overload admin queue task to check reset progress */ atomic_set_int(&pf->state, IXL_PF_STATE_ADAPTER_RESETTING); do_task = TRUE; } /* * PE / PCI / ECC exceptions are all handled in the same way: * mask out these three causes, then request a PF reset * * TODO: I think at least ECC error requires a GLOBR, not PFR */ if (reg & I40E_PFINT_ICR0_ECC_ERR_MASK) device_printf(dev, "ECC Error detected!\n"); if (reg & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) device_printf(dev, "PCI Exception detected!\n"); if (reg & I40E_PFINT_ICR0_PE_CRITERR_MASK) device_printf(dev, "Critical Protocol Engine Error detected!\n"); /* Checks against the conditions above */ if (reg & IXL_ICR0_CRIT_ERR_MASK) { mask &= ~IXL_ICR0_CRIT_ERR_MASK; atomic_set_32(&pf->state, IXL_PF_STATE_PF_RESET_REQ | IXL_PF_STATE_PF_CRIT_ERR); do_task = TRUE; } // TODO: Linux driver never re-enables this interrupt once it has been detected // Then what is supposed to happen? A PF reset? Should it never happen? // TODO: Parse out this error into something human readable if (reg & I40E_PFINT_ICR0_HMC_ERR_MASK) { reg = rd32(hw, I40E_PFHMC_ERRORINFO); if (reg & I40E_PFHMC_ERRORINFO_ERROR_DETECTED_MASK) { device_printf(dev, "HMC Error detected!\n"); device_printf(dev, "INFO 0x%08x\n", reg); reg = rd32(hw, I40E_PFHMC_ERRORDATA); device_printf(dev, "DATA 0x%08x\n", reg); wr32(hw, I40E_PFHMC_ERRORINFO, 0); } } #ifdef PCI_IOV if (reg & I40E_PFINT_ICR0_VFLR_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; iflib_iov_intr_deferred(pf->vsi.ctx); } #endif wr32(hw, I40E_PFINT_ICR0_ENA, mask); ixl_enable_intr0(hw); if (do_task) return (FILTER_SCHEDULE_THREAD); else return (FILTER_HANDLED); } static u_int ixl_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { struct ixl_vsi *vsi = arg; ixl_add_mc_filter(vsi, (u8*)LLADDR(sdl)); return (1); } /********************************************************************* * Filter Routines * * Routines for multicast and vlan filter management. * *********************************************************************/ void ixl_add_multi(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int mcnt = 0, flags; IOCTL_DEBUGOUT("ixl_add_multi: begin"); /* ** First just get a count, to decide if we ** we simply use multicast promiscuous. */ mcnt = if_llmaddr_count(ifp); if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { /* delete existing MC filters */ ixl_del_hw_filters(vsi, mcnt); i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); return; } mcnt = if_foreach_llmaddr(ifp, ixl_add_maddr, vsi); if (mcnt > 0) { flags = (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); ixl_add_hw_filters(vsi, flags, mcnt); } IOCTL_DEBUGOUT("ixl_add_multi: end"); } static u_int ixl_match_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { struct ixl_mac_filter *f = arg; if (cmp_etheraddr(f->macaddr, (u8 *)LLADDR(sdl))) return (1); else return (0); } int ixl_del_multi(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct ixl_mac_filter *f; int mcnt = 0; IOCTL_DEBUGOUT("ixl_del_multi: begin"); SLIST_FOREACH(f, &vsi->ftl, next) if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC) && (if_foreach_llmaddr(ifp, ixl_match_maddr, f) == 0)) { f->flags |= IXL_FILTER_DEL; mcnt++; } if (mcnt > 0) ixl_del_hw_filters(vsi, mcnt); return (mcnt); } void ixl_link_up_msg(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ifnet *ifp = pf->vsi.ifp; char *req_fec_string, *neg_fec_string; u8 fec_abilities; fec_abilities = hw->phy.link_info.req_fec_info; /* If both RS and KR are requested, only show RS */ if (fec_abilities & I40E_AQ_REQUEST_FEC_RS) req_fec_string = ixl_fec_string[0]; else if (fec_abilities & I40E_AQ_REQUEST_FEC_KR) req_fec_string = ixl_fec_string[1]; else req_fec_string = ixl_fec_string[2]; if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) neg_fec_string = ixl_fec_string[0]; else if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) neg_fec_string = ixl_fec_string[1]; else neg_fec_string = ixl_fec_string[2]; log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", ifp->if_xname, ixl_aq_speed_to_str(hw->phy.link_info.link_speed), req_fec_string, neg_fec_string, (hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED) ? "True" : "False", (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX && hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ? ixl_fc_string[3] : (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ? ixl_fc_string[2] : (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ? ixl_fc_string[1] : ixl_fc_string[0]); } /* * Configure admin queue/misc interrupt cause registers in hardware. */ void ixl_configure_intr0_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; /* First set up the adminq - vector 0 */ wr32(hw, I40E_PFINT_ICR0_ENA, 0); /* disable all */ rd32(hw, I40E_PFINT_ICR0); /* read to clear */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* * 0x7FF is the end of the queue list. * This means we won't use MSI-X vector 0 for a queue interrupt * in MSI-X mode. */ wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); /* Value is in 2 usec units, so 0x3E is 62*2 = 124 usecs. */ wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), 0x3E); wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK | I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK); wr32(hw, I40E_PFINT_STAT_CTL0, 0); } /* * Configure queue interrupt cause registers in hardware. * * Linked list for each vector LNKLSTN(i) -> RQCTL(i) -> TQCTL(i) -> EOL */ void ixl_configure_queue_intr_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; u32 reg; u16 vector = 1; // TODO: See if max is really necessary for (int i = 0; i < max(vsi->num_rx_queues, vsi->num_tx_queues); i++, vector++) { /* Make sure interrupt is disabled */ wr32(hw, I40E_PFINT_DYN_CTLN(i), 0); /* Set linked list head to point to corresponding RX queue * e.g. vector 1 (LNKLSTN register 0) points to queue pair 0's RX queue */ reg = ((i << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) | ((I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) & I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_MASK); wr32(hw, I40E_PFINT_LNKLSTN(i), reg); reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(i), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_TQCTL(i), reg); } } /* * Configure for single interrupt vector operation */ void ixl_configure_legacy(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; u32 reg; // TODO: Fix #if 0 /* Configure ITR */ vsi->tx_itr_setting = pf->tx_itr; wr32(hw, I40E_PFINT_ITR0(IXL_TX_ITR), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; vsi->rx_itr_setting = pf->rx_itr; wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; /* XXX: Assuming only 1 queue in single interrupt mode */ #endif vsi->rx_queues[0].rxr.itr = vsi->rx_itr_setting; /* Setup "other" causes */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK ; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* No ITR for non-queue interrupts */ wr32(hw, I40E_PFINT_STAT_CTL0, IXL_ITR_NONE << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT); /* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */ wr32(hw, I40E_PFINT_LNKLST0, 0); /* Associate the queue pair to the vector and enable the q int */ reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(0), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_QINT_TQCTL(0), reg); } void ixl_free_pci_resources(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; device_t dev = iflib_get_dev(vsi->ctx); struct ixl_rx_queue *rx_que = vsi->rx_queues; /* We may get here before stations are set up */ if (rx_que == NULL) goto early; /* ** Release all MSI-X VSI resources: */ iflib_irq_free(vsi->ctx, &vsi->irq); for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) iflib_irq_free(vsi->ctx, &rx_que->que_irq); early: if (pf->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(pf->pci_mem), pf->pci_mem); } void ixl_add_ifmedia(struct ixl_vsi *vsi, u64 phy_types) { /* Display supported media types */ if (phy_types & (I40E_CAP_PHY_TYPE_100BASE_TX)) ifmedia_add(vsi->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_T)) ifmedia_add(vsi->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_SX)) ifmedia_add(vsi->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_LX)) ifmedia_add(vsi->media, IFM_ETHER | IFM_1000_LX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_XAUI) || phy_types & (I40E_CAP_PHY_TYPE_XFI) || phy_types & (I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU)) ifmedia_add(vsi->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_SR)) ifmedia_add(vsi->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_LR)) ifmedia_add(vsi->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_T)) ifmedia_add(vsi->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_CR4) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_CR4_CU) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_AOC) || phy_types & (I40E_CAP_PHY_TYPE_XLAUI) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_KR4)) ifmedia_add(vsi->media, IFM_ETHER | IFM_40G_CR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_SR4)) ifmedia_add(vsi->media, IFM_ETHER | IFM_40G_SR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_LR4)) ifmedia_add(vsi->media, IFM_ETHER | IFM_40G_LR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_KX)) ifmedia_add(vsi->media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) || phy_types & (I40E_CAP_PHY_TYPE_10GBASE_CR1)) ifmedia_add(vsi->media, IFM_ETHER | IFM_10G_CR1, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_AOC)) ifmedia_add(vsi->media, IFM_ETHER | IFM_10G_AOC, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_SFI)) ifmedia_add(vsi->media, IFM_ETHER | IFM_10G_SFI, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_KX4)) ifmedia_add(vsi->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_KR)) ifmedia_add(vsi->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_20GBASE_KR2)) ifmedia_add(vsi->media, IFM_ETHER | IFM_20G_KR2, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_KR4)) ifmedia_add(vsi->media, IFM_ETHER | IFM_40G_KR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_XLPPI)) ifmedia_add(vsi->media, IFM_ETHER | IFM_40G_XLPPI, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_KR)) ifmedia_add(vsi->media, IFM_ETHER | IFM_25G_KR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_CR)) ifmedia_add(vsi->media, IFM_ETHER | IFM_25G_CR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_SR)) ifmedia_add(vsi->media, IFM_ETHER | IFM_25G_SR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_LR)) ifmedia_add(vsi->media, IFM_ETHER | IFM_25G_LR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_AOC)) ifmedia_add(vsi->media, IFM_ETHER | IFM_25G_AOC, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_ACC)) ifmedia_add(vsi->media, IFM_ETHER | IFM_25G_ACC, 0, NULL); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ int ixl_setup_interface(device_t dev, struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; if_ctx_t ctx = vsi->ctx; struct i40e_hw *hw = &pf->hw; struct ifnet *ifp = iflib_get_ifp(ctx); struct i40e_aq_get_phy_abilities_resp abilities; enum i40e_status_code aq_error = 0; INIT_DBG_DEV(dev, "begin"); vsi->shared->isc_max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); /* May need delay to detect fiber correctly */ if (aq_error == I40E_ERR_UNKNOWN_PHY) { /* TODO: Maybe just retry this in a task... */ i40e_msec_delay(200); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); } if (aq_error) { if (aq_error == I40E_ERR_UNKNOWN_PHY) device_printf(dev, "Unknown PHY type detected!\n"); else device_printf(dev, "Error getting supported media types, err %d," " AQ error %d\n", aq_error, hw->aq.asq_last_status); } else { pf->supported_speeds = abilities.link_speed; #if __FreeBSD_version >= 1100000 if_setbaudrate(ifp, ixl_max_aq_speed_to_value(pf->supported_speeds)); #else if_initbaudrate(ifp, ixl_max_aq_speed_to_value(pf->supported_speeds)); #endif ixl_add_ifmedia(vsi, hw->phy.phy_types); } /* Use autoselect media by default */ ifmedia_add(vsi->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(vsi->media, IFM_ETHER | IFM_AUTO); return (0); } /* * Input: bitmap of enum i40e_aq_link_speed */ u64 ixl_max_aq_speed_to_value(u8 link_speeds) { if (link_speeds & I40E_LINK_SPEED_40GB) return IF_Gbps(40); if (link_speeds & I40E_LINK_SPEED_25GB) return IF_Gbps(25); if (link_speeds & I40E_LINK_SPEED_20GB) return IF_Gbps(20); if (link_speeds & I40E_LINK_SPEED_10GB) return IF_Gbps(10); if (link_speeds & I40E_LINK_SPEED_1GB) return IF_Gbps(1); if (link_speeds & I40E_LINK_SPEED_100MB) return IF_Mbps(100); else /* Minimum supported link speed */ return IF_Mbps(100); } /* ** Run when the Admin Queue gets a link state change interrupt. */ void ixl_link_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { struct i40e_hw *hw = &pf->hw; device_t dev = iflib_get_dev(pf->vsi.ctx); struct i40e_aqc_get_link_status *status = (struct i40e_aqc_get_link_status *)&e->desc.params.raw; /* Request link status from adapter */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); /* Print out message if an unqualified module is found */ if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && (pf->advertised_speed) && (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && (!(status->link_info & I40E_AQ_LINK_UP))) device_printf(dev, "Link failed because " "an unqualified module was detected!\n"); /* OS link info is updated elsewhere */ } /********************************************************************* * * Get Firmware Switch configuration * - this will need to be more robust when more complex * switch configurations are enabled. * **********************************************************************/ int ixl_switch_config(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = iflib_get_dev(vsi->ctx); struct i40e_aqc_get_switch_config_resp *sw_config; u8 aq_buf[I40E_AQ_LARGE_BUF]; int ret; u16 next = 0; memset(&aq_buf, 0, sizeof(aq_buf)); sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; ret = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (ret) { device_printf(dev, "aq_get_switch_config() failed, error %d," " aq_error %d\n", ret, pf->hw.aq.asq_last_status); return (ret); } if (pf->dbg_mask & IXL_DBG_SWITCH_INFO) { device_printf(dev, "Switch config: header reported: %d in structure, %d total\n", - sw_config->header.num_reported, sw_config->header.num_total); - for (int i = 0; i < sw_config->header.num_reported; i++) { + LE16_TO_CPU(sw_config->header.num_reported), + LE16_TO_CPU(sw_config->header.num_total)); + for (int i = 0; + i < LE16_TO_CPU(sw_config->header.num_reported); i++) { device_printf(dev, "-> %d: type=%d seid=%d uplink=%d downlink=%d\n", i, sw_config->element[i].element_type, - sw_config->element[i].seid, - sw_config->element[i].uplink_seid, - sw_config->element[i].downlink_seid); + LE16_TO_CPU(sw_config->element[i].seid), + LE16_TO_CPU(sw_config->element[i].uplink_seid), + LE16_TO_CPU(sw_config->element[i].downlink_seid)); } } /* Simplified due to a single VSI */ - vsi->uplink_seid = sw_config->element[0].uplink_seid; - vsi->downlink_seid = sw_config->element[0].downlink_seid; - vsi->seid = sw_config->element[0].seid; + vsi->uplink_seid = LE16_TO_CPU(sw_config->element[0].uplink_seid); + vsi->downlink_seid = LE16_TO_CPU(sw_config->element[0].downlink_seid); + vsi->seid = LE16_TO_CPU(sw_config->element[0].seid); return (ret); } /********************************************************************* * * Initialize the VSI: this handles contexts, which means things * like the number of descriptors, buffer size, * plus we init the rings thru this function. * **********************************************************************/ int ixl_initialize_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; if_softc_ctx_t scctx = iflib_get_softc_ctx(vsi->ctx); struct ixl_tx_queue *tx_que = vsi->tx_queues; struct ixl_rx_queue *rx_que = vsi->rx_queues; device_t dev = iflib_get_dev(vsi->ctx); struct i40e_hw *hw = vsi->hw; struct i40e_vsi_context ctxt; int tc_queues; int err = 0; memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = vsi->seid; if (pf->veb_seid != 0) ctxt.uplink_seid = pf->veb_seid; ctxt.pf_num = hw->pf_id; err = i40e_aq_get_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_get_vsi_params() failed, error %d" " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } ixl_dbg(pf, IXL_DBG_SWITCH_INFO, "get_vsi_params: seid: %d, uplinkseid: %d, vsi_number: %d, " "vsis_allocated: %d, vsis_unallocated: %d, flags: 0x%x, " "pfnum: %d, vfnum: %d, stat idx: %d, enabled: %d\n", ctxt.seid, ctxt.uplink_seid, ctxt.vsi_number, ctxt.vsis_allocated, ctxt.vsis_unallocated, ctxt.flags, ctxt.pf_num, ctxt.vf_num, ctxt.info.stat_counter_idx, ctxt.info.up_enable_bits); /* ** Set the queue and traffic class bits ** - when multiple traffic classes are supported ** this will need to be more robust. */ ctxt.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; ctxt.info.mapping_flags |= I40E_AQ_VSI_QUE_MAP_CONTIG; /* In contig mode, que_mapping[0] is first queue index used by this VSI */ ctxt.info.queue_mapping[0] = 0; /* * This VSI will only use traffic class 0; start traffic class 0's * queue allocation at queue 0, and assign it 2^tc_queues queues (though * the driver may not use all of them). */ tc_queues = fls(pf->qtag.num_allocated) - 1; ctxt.info.tc_mapping[0] = ((pf->qtag.first_qidx << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) & I40E_AQ_VSI_TC_QUE_OFFSET_MASK) | ((tc_queues << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) & I40E_AQ_VSI_TC_QUE_NUMBER_MASK); /* Set VLAN receive stripping mode */ ctxt.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID; ctxt.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL; if (if_getcapenable(vsi->ifp) & IFCAP_VLAN_HWTAGGING) ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; else ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_NOTHING; #ifdef IXL_IW /* Set TCP Enable for iWARP capable VSI */ if (ixl_enable_iwarp && pf->iw_enabled) { ctxt.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; } #endif /* Save VSI number and info for use later */ vsi->vsi_num = ctxt.vsi_number; bcopy(&ctxt.info, &vsi->info, sizeof(vsi->info)); /* Reset VSI statistics */ ixl_vsi_reset_stats(vsi); vsi->hw_filters_add = 0; vsi->hw_filters_del = 0; ctxt.flags = htole16(I40E_AQ_VSI_TYPE_PF); err = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_update_vsi_params() failed, error %d," " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } for (int i = 0; i < vsi->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; struct i40e_hmc_obj_txq tctx; u32 txctl; /* Setup the HMC TX Context */ bzero(&tctx, sizeof(tctx)); tctx.new_context = 1; tctx.base = (txr->tx_paddr/IXL_TX_CTX_BASE_UNITS); tctx.qlen = scctx->isc_ntxd[0]; tctx.fc_ena = 0; /* Disable FCoE */ /* * This value needs to pulled from the VSI that this queue * is assigned to. Index into array is traffic class. */ tctx.rdylist = vsi->info.qs_handle[0]; /* * Set these to enable Head Writeback * - Address is last entry in TX ring (reserved for HWB index) * Leave these as 0 for Descriptor Writeback */ if (vsi->enable_head_writeback) { tctx.head_wb_ena = 1; tctx.head_wb_addr = txr->tx_paddr + (scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc)); } else { tctx.head_wb_ena = 0; tctx.head_wb_addr = 0; } tctx.rdylist_act = 0; err = i40e_clear_lan_tx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear TX context\n"); break; } err = i40e_set_lan_tx_queue_context(hw, i, &tctx); if (err) { device_printf(dev, "Unable to set TX context\n"); break; } /* Associate the ring with this PF */ txctl = I40E_QTX_CTL_PF_QUEUE; txctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & I40E_QTX_CTL_PF_INDX_MASK); wr32(hw, I40E_QTX_CTL(i), txctl); ixl_flush(hw); /* Do ring (re)init */ ixl_init_tx_ring(vsi, tx_que); } for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; struct i40e_hmc_obj_rxq rctx; /* Next setup the HMC RX Context */ rxr->mbuf_sz = iflib_get_rx_mbuf_sz(vsi->ctx); u16 max_rxmax = rxr->mbuf_sz * hw->func_caps.rx_buf_chain_len; /* Set up an RX context for the HMC */ memset(&rctx, 0, sizeof(struct i40e_hmc_obj_rxq)); rctx.dbuff = rxr->mbuf_sz >> I40E_RXQ_CTX_DBUFF_SHIFT; /* ignore header split for now */ rctx.hbuff = 0 >> I40E_RXQ_CTX_HBUFF_SHIFT; rctx.rxmax = (scctx->isc_max_frame_size < max_rxmax) ? scctx->isc_max_frame_size : max_rxmax; rctx.dtype = 0; rctx.dsize = 1; /* do 32byte descriptors */ rctx.hsplit_0 = 0; /* no header split */ rctx.base = (rxr->rx_paddr/IXL_RX_CTX_BASE_UNITS); rctx.qlen = scctx->isc_nrxd[0]; rctx.tphrdesc_ena = 1; rctx.tphwdesc_ena = 1; rctx.tphdata_ena = 0; /* Header Split related */ rctx.tphhead_ena = 0; /* Header Split related */ rctx.lrxqthresh = 1; /* Interrupt at <64 desc avail */ rctx.crcstrip = 1; rctx.l2tsel = 1; rctx.showiv = 1; /* Strip inner VLAN header */ rctx.fc_ena = 0; /* Disable FCoE */ rctx.prefena = 1; /* Prefetch descriptors */ err = i40e_clear_lan_rx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear RX context %d\n", i); break; } err = i40e_set_lan_rx_queue_context(hw, i, &rctx); if (err) { device_printf(dev, "Unable to set RX context %d\n", i); break; } wr32(vsi->hw, I40E_QRX_TAIL(i), 0); } return (err); } void ixl_free_mac_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; while (!SLIST_EMPTY(&vsi->ftl)) { f = SLIST_FIRST(&vsi->ftl); SLIST_REMOVE_HEAD(&vsi->ftl, next); free(f, M_DEVBUF); } } /* ** Provide a update to the queue RX ** interrupt moderation value. */ void ixl_set_queue_rx_itr(struct ixl_rx_queue *que) { struct ixl_vsi *vsi = que->vsi; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; u16 rx_itr; u16 rx_latency = 0; int rx_bytes; /* Idle, do nothing */ if (rxr->bytes == 0) return; if (pf->dynamic_rx_itr) { rx_bytes = rxr->bytes/rxr->itr; rx_itr = rxr->itr; /* Adjust latency range */ switch (rxr->latency) { case IXL_LOW_LATENCY: if (rx_bytes > 10) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (rx_bytes > 20) { rx_latency = IXL_BULK_LATENCY; rx_itr = IXL_ITR_8K; } else if (rx_bytes <= 10) { rx_latency = IXL_LOW_LATENCY; rx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (rx_bytes <= 20) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; } rxr->latency = rx_latency; if (rx_itr != rxr->itr) { /* do an exponential smoothing */ rx_itr = (10 * rx_itr * rxr->itr) / ((9 * rx_itr) + rxr->itr); rxr->itr = min(rx_itr, IXL_MAX_ITR); wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, rxr->me), rxr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->rx_itr_setting & IXL_ITR_DYNAMIC) vsi->rx_itr_setting = pf->rx_itr; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, rxr->me), rxr->itr); } } rxr->bytes = 0; rxr->packets = 0; } /* ** Provide a update to the queue TX ** interrupt moderation value. */ void ixl_set_queue_tx_itr(struct ixl_tx_queue *que) { struct ixl_vsi *vsi = que->vsi; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; u16 tx_itr; u16 tx_latency = 0; int tx_bytes; /* Idle, do nothing */ if (txr->bytes == 0) return; if (pf->dynamic_tx_itr) { tx_bytes = txr->bytes/txr->itr; tx_itr = txr->itr; switch (txr->latency) { case IXL_LOW_LATENCY: if (tx_bytes > 10) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (tx_bytes > 20) { tx_latency = IXL_BULK_LATENCY; tx_itr = IXL_ITR_8K; } else if (tx_bytes <= 10) { tx_latency = IXL_LOW_LATENCY; tx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (tx_bytes <= 20) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; } txr->latency = tx_latency; if (tx_itr != txr->itr) { /* do an exponential smoothing */ tx_itr = (10 * tx_itr * txr->itr) / ((9 * tx_itr) + txr->itr); txr->itr = min(tx_itr, IXL_MAX_ITR); wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, txr->me), txr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->tx_itr_setting & IXL_ITR_DYNAMIC) vsi->tx_itr_setting = pf->tx_itr; /* Update the hardware if needed */ if (txr->itr != vsi->tx_itr_setting) { txr->itr = vsi->tx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, txr->me), txr->itr); } } txr->bytes = 0; txr->packets = 0; return; } #ifdef IXL_DEBUG /** * ixl_sysctl_qtx_tail_handler * Retrieves I40E_QTX_TAIL value from hardware * for a sysctl. */ int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_tx_queue *tx_que; int error; u32 val; tx_que = ((struct ixl_tx_queue *)oidp->oid_arg1); if (!tx_que) return 0; val = rd32(tx_que->vsi->hw, tx_que->txr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /** * ixl_sysctl_qrx_tail_handler * Retrieves I40E_QRX_TAIL value from hardware * for a sysctl. */ int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_rx_queue *rx_que; int error; u32 val; rx_que = ((struct ixl_rx_queue *)oidp->oid_arg1); if (!rx_que) return 0; val = rd32(rx_que->vsi->hw, rx_que->rxr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } #endif /* * Used to set the Tx ITR value for all of the PF LAN VSI's queues. * Writes to the ITR registers immediately. */ static int ixl_sysctl_pf_tx_itr(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int error = 0; int requested_tx_itr; requested_tx_itr = pf->tx_itr; error = sysctl_handle_int(oidp, &requested_tx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->dynamic_tx_itr) { device_printf(dev, "Cannot set TX itr value while dynamic TX itr is enabled\n"); return (EINVAL); } if (requested_tx_itr < 0 || requested_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid TX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } pf->tx_itr = requested_tx_itr; ixl_configure_tx_itr(pf); return (error); } /* * Used to set the Rx ITR value for all of the PF LAN VSI's queues. * Writes to the ITR registers immediately. */ static int ixl_sysctl_pf_rx_itr(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int error = 0; int requested_rx_itr; requested_rx_itr = pf->rx_itr; error = sysctl_handle_int(oidp, &requested_rx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->dynamic_rx_itr) { device_printf(dev, "Cannot set RX itr value while dynamic RX itr is enabled\n"); return (EINVAL); } if (requested_rx_itr < 0 || requested_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid RX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } pf->rx_itr = requested_rx_itr; ixl_configure_rx_itr(pf); return (error); } void ixl_add_hw_stats(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; device_t dev = iflib_get_dev(vsi->ctx); struct i40e_hw_port_stats *pf_stats = &pf->stats; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); /* Driver statistics */ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "admin_irq", CTLFLAG_RD, &pf->admin_irq, "Admin Queue IRQs received"); ixl_add_vsi_sysctls(dev, vsi, ctx, "pf"); ixl_add_queues_sysctls(dev, vsi); ixl_add_sysctls_mac_stats(ctx, child, pf_stats); } void ixl_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_hw_port_stats *stats) { struct sysctl_oid *stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Mac Statistics"); struct sysctl_oid_list *stat_list = SYSCTL_CHILDREN(stat_node); struct i40e_eth_stats *eth_stats = &stats->eth; ixl_add_sysctls_eth_stats(ctx, stat_list, eth_stats); struct ixl_sysctl_info ctls[] = { {&stats->crc_errors, "crc_errors", "CRC Errors"}, {&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"}, {&stats->mac_local_faults, "local_faults", "MAC Local Faults"}, {&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"}, {&stats->rx_length_errors, "rx_length_errors", "Receive Length Errors"}, /* Packet Reception Stats */ {&stats->rx_size_64, "rx_frames_64", "64 byte frames received"}, {&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"}, {&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"}, {&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"}, {&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"}, {&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"}, {&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"}, {&stats->rx_undersize, "rx_undersize", "Undersized packets received"}, {&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"}, {&stats->rx_oversize, "rx_oversized", "Oversized packets received"}, {&stats->rx_jabber, "rx_jabber", "Received Jabber"}, {&stats->checksum_error, "checksum_errors", "Checksum Errors"}, /* Packet Transmission Stats */ {&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"}, {&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"}, {&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"}, {&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"}, {&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"}, {&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"}, {&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"}, /* Flow control */ {&stats->link_xon_tx, "xon_txd", "Link XON transmitted"}, {&stats->link_xon_rx, "xon_recvd", "Link XON received"}, {&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"}, {&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"}, /* End */ {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } void ixl_set_rss_key(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; u32 rss_seed[IXL_RSS_KEY_SIZE_REG]; enum i40e_status_code status; #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #else ixl_get_default_rss_key(rss_seed); #endif /* Fill out hash function seed */ if (hw->mac.type == I40E_MAC_X722) { struct i40e_aqc_get_set_rss_key_data key_data; bcopy(rss_seed, &key_data, 52); status = i40e_aq_set_rss_key(hw, vsi->vsi_num, &key_data); if (status) device_printf(dev, "i40e_aq_set_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (int i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), rss_seed[i]); } } /* * Configure enabled PCTYPES for RSS. */ void ixl_set_rss_pctypes(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u64 set_hena = 0, hena; #ifdef RSS u32 rss_hash_config; rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else if (hw->mac.type == I40E_MAC_X722) set_hena = IXL_DEFAULT_RSS_HENA_X722; else set_hena = IXL_DEFAULT_RSS_HENA_XL710; #endif hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); hena |= set_hena; i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); } void ixl_set_rss_hlut(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = iflib_get_dev(vsi->ctx); int i, que_id; int lut_entry_width; u32 lut = 0; enum i40e_status_code status; lut_entry_width = pf->hw.func_caps.rss_table_entry_width; /* Populate the LUT with max no. of queues in round robin fashion */ u8 hlut_buf[512]; for (i = 0; i < pf->hw.func_caps.rss_table_size; i++) { #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_rx_queues; #else que_id = i % vsi->num_rx_queues; #endif lut = (que_id & ((0x1 << lut_entry_width) - 1)); hlut_buf[i] = lut; } if (hw->mac.type == I40E_MAC_X722) { status = i40e_aq_set_rss_lut(hw, vsi->vsi_num, TRUE, hlut_buf, sizeof(hlut_buf)); if (status) device_printf(dev, "i40e_aq_set_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (i = 0; i < pf->hw.func_caps.rss_table_size >> 2; i++) wr32(hw, I40E_PFQF_HLUT(i), ((u32 *)hlut_buf)[i]); ixl_flush(hw); } } /* ** Setup the PF's RSS parameters. */ void ixl_config_rss(struct ixl_pf *pf) { ixl_set_rss_key(pf); ixl_set_rss_pctypes(pf); ixl_set_rss_hlut(pf); } /* ** This routine updates vlan filters, called by init ** it scans the filter table and then updates the hw ** after a soft reset. */ void ixl_setup_vlan_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; int cnt = 0, flags; if (vsi->num_vlans == 0) return; /* ** Scan the filter list for vlan entries, ** mark them for addition and then call ** for the AQ update. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags & IXL_FILTER_VLAN) { f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); cnt++; } } if (cnt == 0) { printf("setup vlan: no filters found!\n"); return; } flags = IXL_FILTER_VLAN; flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); ixl_add_hw_filters(vsi, flags, cnt); } /* * In some firmware versions there is default MAC/VLAN filter * configured which interferes with filters managed by driver. * Make sure it's removed. */ void ixl_del_default_hw_filters(struct ixl_vsi *vsi) { struct i40e_aqc_remove_macvlan_element_data e; bzero(&e, sizeof(e)); bcopy(vsi->hw->mac.perm_addr, e.mac_addr, ETHER_ADDR_LEN); e.vlan_tag = 0; e.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; i40e_aq_remove_macvlan(vsi->hw, vsi->seid, &e, 1, NULL); bzero(&e, sizeof(e)); bcopy(vsi->hw->mac.perm_addr, e.mac_addr, ETHER_ADDR_LEN); e.vlan_tag = 0; e.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH | I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; i40e_aq_remove_macvlan(vsi->hw, vsi->seid, &e, 1, NULL); } /* ** Initialize filter list and add filters that the hardware ** needs to know about. ** ** Requires VSI's filter list & seid to be set before calling. */ void ixl_init_filters(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; /* Initialize mac filter list for VSI */ SLIST_INIT(&vsi->ftl); /* Receive broadcast Ethernet frames */ i40e_aq_set_vsi_broadcast(&pf->hw, vsi->seid, TRUE, NULL); ixl_del_default_hw_filters(vsi); ixl_add_filter(vsi, vsi->hw->mac.addr, IXL_VLAN_ANY); /* * Prevent Tx flow control frames from being sent out by * non-firmware transmitters. * This affects every VSI in the PF. */ if (pf->enable_tx_fc_filter) i40e_add_filter_to_drop_tx_flow_control_frames(vsi->hw, vsi->seid); } /* ** This routine adds mulicast filters */ void ixl_add_mc_filter(struct ixl_vsi *vsi, u8 *macaddr) { struct ixl_mac_filter *f; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (f != NULL) return; f = ixl_new_filter(vsi, macaddr, IXL_VLAN_ANY); if (f != NULL) f->flags |= IXL_FILTER_MC; else printf("WARNING: no filter available!!\n"); } void ixl_reconfigure_filters(struct ixl_vsi *vsi) { ixl_add_hw_filters(vsi, IXL_FILTER_USED, vsi->num_macs); } /* * This routine adds a MAC/VLAN filter to the software filter * list, then adds that new filter to the HW if it doesn't already * exist in the SW filter list. */ void ixl_add_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f, *tmp; struct ixl_pf *pf; device_t dev; DEBUGOUT("ixl_add_filter: begin"); pf = vsi->back; dev = pf->dev; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, vlan); if (f != NULL) return; /* ** Is this the first vlan being registered, if so we ** need to remove the ANY filter that indicates we are ** not in a vlan, and replace that with a 0 filter. */ if ((vlan != IXL_VLAN_ANY) && (vsi->num_vlans == 1)) { tmp = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (tmp != NULL) { ixl_del_filter(vsi, macaddr, IXL_VLAN_ANY); ixl_add_filter(vsi, macaddr, 0); } } f = ixl_new_filter(vsi, macaddr, vlan); if (f == NULL) { device_printf(dev, "WARNING: no filter available!!\n"); return; } if (f->vlan != IXL_VLAN_ANY) f->flags |= IXL_FILTER_VLAN; else vsi->num_macs++; f->flags |= IXL_FILTER_USED; ixl_add_hw_filters(vsi, f->flags, 1); } void ixl_del_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; f = ixl_find_filter(vsi, macaddr, vlan); if (f == NULL) return; f->flags |= IXL_FILTER_DEL; ixl_del_hw_filters(vsi, 1); if (f->vlan == IXL_VLAN_ANY && (f->flags & IXL_FILTER_VLAN) != 0) vsi->num_macs--; /* Check if this is the last vlan removal */ if (vlan != IXL_VLAN_ANY && vsi->num_vlans == 0) { /* Switch back to a non-vlan filter */ ixl_del_filter(vsi, macaddr, 0); ixl_add_filter(vsi, macaddr, IXL_VLAN_ANY); } return; } /* ** Find the filter with both matching mac addr and vlan id */ struct ixl_mac_filter * ixl_find_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; SLIST_FOREACH(f, &vsi->ftl, next) { if ((cmp_etheraddr(f->macaddr, macaddr) != 0) && (f->vlan == vlan)) { return (f); } } return (NULL); } /* ** This routine takes additions to the vsi filter ** table and creates an Admin Queue call to create ** the filters in the hardware. */ void ixl_add_hw_filters(struct ixl_vsi *vsi, int flags, int cnt) { struct i40e_aqc_add_macvlan_element_data *a, *b; struct ixl_mac_filter *f; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; enum i40e_status_code status; int j = 0; pf = vsi->back; dev = vsi->dev; hw = &pf->hw; if (cnt < 1) { ixl_dbg_info(pf, "ixl_add_hw_filters: cnt == 0\n"); return; } a = malloc(sizeof(struct i40e_aqc_add_macvlan_element_data) * cnt, M_DEVBUF, M_NOWAIT | M_ZERO); if (a == NULL) { device_printf(dev, "add_hw_filters failed to get memory\n"); return; } /* ** Scan the filter list, each time we find one ** we add it to the admin queue array and turn off ** the add bit. */ SLIST_FOREACH(f, &vsi->ftl, next) { if ((f->flags & flags) == flags) { b = &a[j]; // a pox on fvl long names :) bcopy(f->macaddr, b->mac_addr, ETHER_ADDR_LEN); if (f->vlan == IXL_VLAN_ANY) { b->vlan_tag = 0; - b->flags = I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; + b->flags = CPU_TO_LE16( + I40E_AQC_MACVLAN_ADD_IGNORE_VLAN); } else { - b->vlan_tag = f->vlan; + b->vlan_tag = CPU_TO_LE16(f->vlan); b->flags = 0; } - b->flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; + b->flags |= CPU_TO_LE16( + I40E_AQC_MACVLAN_ADD_PERFECT_MATCH); f->flags &= ~IXL_FILTER_ADD; j++; ixl_dbg_filter(pf, "ADD: " MAC_FORMAT "\n", MAC_FORMAT_ARGS(f->macaddr)); } if (j == cnt) break; } if (j > 0) { status = i40e_aq_add_macvlan(hw, vsi->seid, a, j, NULL); if (status) device_printf(dev, "i40e_aq_add_macvlan status %s, " "error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); else vsi->hw_filters_add += j; } free(a, M_DEVBUF); return; } /* ** This routine takes removals in the vsi filter ** table and creates an Admin Queue call to delete ** the filters in the hardware. */ void ixl_del_hw_filters(struct ixl_vsi *vsi, int cnt) { struct i40e_aqc_remove_macvlan_element_data *d, *e; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; struct ixl_mac_filter *f, *f_temp; enum i40e_status_code status; int j = 0; pf = vsi->back; hw = &pf->hw; dev = vsi->dev; d = malloc(sizeof(struct i40e_aqc_remove_macvlan_element_data) * cnt, M_DEVBUF, M_NOWAIT | M_ZERO); if (d == NULL) { device_printf(dev, "%s: failed to get memory\n", __func__); return; } SLIST_FOREACH_SAFE(f, &vsi->ftl, next, f_temp) { if (f->flags & IXL_FILTER_DEL) { e = &d[j]; // a pox on fvl long names :) bcopy(f->macaddr, e->mac_addr, ETHER_ADDR_LEN); e->flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; if (f->vlan == IXL_VLAN_ANY) { e->vlan_tag = 0; e->flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; } else { e->vlan_tag = f->vlan; } ixl_dbg_filter(pf, "DEL: " MAC_FORMAT "\n", MAC_FORMAT_ARGS(f->macaddr)); /* delete entry from vsi list */ SLIST_REMOVE(&vsi->ftl, f, ixl_mac_filter, next); free(f, M_DEVBUF); j++; } if (j == cnt) break; } if (j > 0) { status = i40e_aq_remove_macvlan(hw, vsi->seid, d, j, NULL); if (status) { int sc = 0; for (int i = 0; i < j; i++) sc += (!d[i].error_code); vsi->hw_filters_del += sc; device_printf(dev, "Failed to remove %d/%d filters, error %s\n", j - sc, j, i40e_aq_str(hw, hw->aq.asq_last_status)); } else vsi->hw_filters_del += j; } free(d, M_DEVBUF); return; } int ixl_enable_tx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Enabling PF TX ring %4d / VSI TX ring %4d...\n", pf_qidx, vsi_qidx); i40e_pre_tx_queue_cfg(hw, pf_qidx, TRUE); reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); reg |= I40E_QTX_ENA_QENA_REQ_MASK | I40E_QTX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QTX_ENA(pf_qidx), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); if (reg & I40E_QTX_ENA_QENA_STAT_MASK) break; i40e_usec_delay(10); } if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "TX queue %d still disabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_enable_rx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Enabling PF RX ring %4d / VSI RX ring %4d...\n", pf_qidx, vsi_qidx); reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); reg |= I40E_QRX_ENA_QENA_REQ_MASK | I40E_QRX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QRX_ENA(pf_qidx), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); if (reg & I40E_QRX_ENA_QENA_STAT_MASK) break; i40e_usec_delay(10); } if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "RX queue %d still disabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_enable_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { int error = 0; error = ixl_enable_tx_ring(pf, qtag, vsi_qidx); /* Called function already prints error message */ if (error) return (error); error = ixl_enable_rx_ring(pf, qtag, vsi_qidx); return (error); } /* For PF VSI only */ int ixl_enable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; int error = 0; for (int i = 0; i < vsi->num_tx_queues; i++) error = ixl_enable_tx_ring(pf, &pf->qtag, i); for (int i = 0; i < vsi->num_rx_queues; i++) error = ixl_enable_rx_ring(pf, &pf->qtag, i); return (error); } /* * Returns error on first ring that is detected hung. */ int ixl_disable_tx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); i40e_pre_tx_queue_cfg(hw, pf_qidx, FALSE); i40e_usec_delay(500); reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QTX_ENA(pf_qidx), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); if (!(reg & I40E_QTX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QTX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "TX queue %d still enabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } /* * Returns error on first ring that is detected hung. */ int ixl_disable_rx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(pf_qidx), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); if (!(reg & I40E_QRX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QRX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "RX queue %d still enabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_disable_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { int error = 0; error = ixl_disable_tx_ring(pf, qtag, vsi_qidx); /* Called function already prints error message */ if (error) return (error); error = ixl_disable_rx_ring(pf, qtag, vsi_qidx); return (error); } int ixl_disable_rings(struct ixl_pf *pf, struct ixl_vsi *vsi, struct ixl_pf_qtag *qtag) { int error = 0; for (int i = 0; i < vsi->num_tx_queues; i++) error = ixl_disable_tx_ring(pf, qtag, i); for (int i = 0; i < vsi->num_rx_queues; i++) error = ixl_disable_rx_ring(pf, qtag, i); return (error); } static void ixl_handle_tx_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct ixl_vf *vf; bool mdd_detected = false; bool pf_mdd_detected = false; bool vf_mdd_detected = false; u16 vf_num, queue; u8 pf_num, event; u8 pf_mdet_num, vp_mdet_num; u32 reg; /* find what triggered the MDD event */ reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) { pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >> I40E_GL_MDET_TX_PF_NUM_SHIFT; vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >> I40E_GL_MDET_TX_VF_NUM_SHIFT; event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> I40E_GL_MDET_TX_EVENT_SHIFT; queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >> I40E_GL_MDET_TX_QUEUE_SHIFT; wr32(hw, I40E_GL_MDET_TX, 0xffffffff); mdd_detected = true; } if (!mdd_detected) return; reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) { wr32(hw, I40E_PF_MDET_TX, 0xFFFF); pf_mdet_num = hw->pf_id; pf_mdd_detected = true; } /* Check if MDD was caused by a VF */ for (int i = 0; i < pf->num_vfs; i++) { vf = &(pf->vfs[i]); reg = rd32(hw, I40E_VP_MDET_TX(i)); if (reg & I40E_VP_MDET_TX_VALID_MASK) { wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF); vp_mdet_num = i; vf->num_mdd_events++; vf_mdd_detected = true; } } /* Print out an error message */ if (vf_mdd_detected && pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on TX queue %d, pf number %d (PF-%d), vf number %d (VF-%d)\n", event, queue, pf_num, pf_mdet_num, vf_num, vp_mdet_num); else if (vf_mdd_detected && !pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on TX queue %d, pf number %d, vf number %d (VF-%d)\n", event, queue, pf_num, vf_num, vp_mdet_num); else if (!vf_mdd_detected && pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on TX queue %d, pf number %d (PF-%d)\n", event, queue, pf_num, pf_mdet_num); /* Theoretically shouldn't happen */ else device_printf(dev, "TX Malicious Driver Detection event (unknown)\n"); } static void ixl_handle_rx_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct ixl_vf *vf; bool mdd_detected = false; bool pf_mdd_detected = false; bool vf_mdd_detected = false; u16 queue; u8 pf_num, event; u8 pf_mdet_num, vp_mdet_num; u32 reg; /* * GL_MDET_RX doesn't contain VF number information, unlike * GL_MDET_TX. */ reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) { pf_num = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> I40E_GL_MDET_RX_FUNCTION_SHIFT; event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> I40E_GL_MDET_RX_EVENT_SHIFT; queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >> I40E_GL_MDET_RX_QUEUE_SHIFT; wr32(hw, I40E_GL_MDET_RX, 0xffffffff); mdd_detected = true; } if (!mdd_detected) return; reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) { wr32(hw, I40E_PF_MDET_RX, 0xFFFF); pf_mdet_num = hw->pf_id; pf_mdd_detected = true; } /* Check if MDD was caused by a VF */ for (int i = 0; i < pf->num_vfs; i++) { vf = &(pf->vfs[i]); reg = rd32(hw, I40E_VP_MDET_RX(i)); if (reg & I40E_VP_MDET_RX_VALID_MASK) { wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF); vp_mdet_num = i; vf->num_mdd_events++; vf_mdd_detected = true; } } /* Print out an error message */ if (vf_mdd_detected && pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on RX queue %d, pf number %d (PF-%d), (VF-%d)\n", event, queue, pf_num, pf_mdet_num, vp_mdet_num); else if (vf_mdd_detected && !pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on RX queue %d, pf number %d, (VF-%d)\n", event, queue, pf_num, vp_mdet_num); else if (!vf_mdd_detected && pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on RX queue %d, pf number %d (PF-%d)\n", event, queue, pf_num, pf_mdet_num); /* Theoretically shouldn't happen */ else device_printf(dev, "RX Malicious Driver Detection event (unknown)\n"); } /** * ixl_handle_mdd_event * * Called from interrupt handler to identify possibly malicious vfs * (But also detects events from the PF, as well) **/ void ixl_handle_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; /* * Handle both TX/RX because it's possible they could * both trigger in the same interrupt. */ ixl_handle_tx_mdd_event(pf); ixl_handle_rx_mdd_event(pf); atomic_clear_32(&pf->state, IXL_PF_STATE_MDD_PENDING); /* re-enable mdd interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); ixl_flush(hw); } void ixl_enable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { for (int i = 0; i < vsi->num_rx_queues; i++, que++) ixl_enable_queue(hw, que->rxr.me); } else ixl_enable_intr0(hw); } void ixl_disable_rings_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; for (int i = 0; i < vsi->num_rx_queues; i++, que++) ixl_disable_queue(hw, que->rxr.me); } void ixl_enable_intr0(struct i40e_hw *hw) { u32 reg; /* Use IXL_ITR_NONE so ITR isn't updated here */ reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, reg); } void ixl_disable_intr0(struct i40e_hw *hw) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTL0, reg); ixl_flush(hw); } void ixl_enable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } void ixl_disable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } void ixl_update_stats_counters(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_vf *vf; u64 prev_link_xoff_rx = pf->stats.link_xoff_rx; struct i40e_hw_port_stats *nsd = &pf->stats; struct i40e_hw_port_stats *osd = &pf->stats_offsets; /* Update hw stats */ ixl_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port), pf->stat_offsets_loaded, &osd->crc_errors, &nsd->crc_errors); ixl_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port), pf->stat_offsets_loaded, &osd->illegal_bytes, &nsd->illegal_bytes); ixl_stat_update48(hw, I40E_GLPRT_GORCH(hw->port), I40E_GLPRT_GORCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_bytes, &nsd->eth.rx_bytes); ixl_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port), I40E_GLPRT_GOTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_bytes, &nsd->eth.tx_bytes); ixl_stat_update32(hw, I40E_GLPRT_RDPC(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_discards, &nsd->eth.rx_discards); ixl_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port), I40E_GLPRT_UPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_unicast, &nsd->eth.rx_unicast); ixl_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port), I40E_GLPRT_UPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_unicast, &nsd->eth.tx_unicast); ixl_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port), I40E_GLPRT_MPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_multicast, &nsd->eth.rx_multicast); ixl_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port), I40E_GLPRT_MPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_multicast, &nsd->eth.tx_multicast); ixl_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port), I40E_GLPRT_BPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_broadcast, &nsd->eth.rx_broadcast); ixl_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port), I40E_GLPRT_BPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_broadcast, &nsd->eth.tx_broadcast); ixl_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port), pf->stat_offsets_loaded, &osd->tx_dropped_link_down, &nsd->tx_dropped_link_down); ixl_stat_update32(hw, I40E_GLPRT_MLFC(hw->port), pf->stat_offsets_loaded, &osd->mac_local_faults, &nsd->mac_local_faults); ixl_stat_update32(hw, I40E_GLPRT_MRFC(hw->port), pf->stat_offsets_loaded, &osd->mac_remote_faults, &nsd->mac_remote_faults); ixl_stat_update32(hw, I40E_GLPRT_RLEC(hw->port), pf->stat_offsets_loaded, &osd->rx_length_errors, &nsd->rx_length_errors); /* Flow control (LFC) stats */ ixl_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_rx, &nsd->link_xon_rx); ixl_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_rx, &nsd->link_xoff_rx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); /* * For watchdog management we need to know if we have been paused * during the last interval, so capture that here. */ if (pf->stats.link_xoff_rx != prev_link_xoff_rx) vsi->shared->isc_pause_frames = 1; /* Packet size stats rx */ ixl_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port), I40E_GLPRT_PRC64L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_64, &nsd->rx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port), I40E_GLPRT_PRC127L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_127, &nsd->rx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port), I40E_GLPRT_PRC255L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_255, &nsd->rx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port), I40E_GLPRT_PRC511L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_511, &nsd->rx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port), I40E_GLPRT_PRC1023L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1023, &nsd->rx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port), I40E_GLPRT_PRC1522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1522, &nsd->rx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port), I40E_GLPRT_PRC9522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_big, &nsd->rx_size_big); /* Packet size stats tx */ ixl_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port), I40E_GLPRT_PTC64L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_64, &nsd->tx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port), I40E_GLPRT_PTC127L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_127, &nsd->tx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port), I40E_GLPRT_PTC255L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_255, &nsd->tx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port), I40E_GLPRT_PTC511L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_511, &nsd->tx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port), I40E_GLPRT_PTC1023L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1023, &nsd->tx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port), I40E_GLPRT_PTC1522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1522, &nsd->tx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port), I40E_GLPRT_PTC9522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_big, &nsd->tx_size_big); ixl_stat_update32(hw, I40E_GLPRT_RUC(hw->port), pf->stat_offsets_loaded, &osd->rx_undersize, &nsd->rx_undersize); ixl_stat_update32(hw, I40E_GLPRT_RFC(hw->port), pf->stat_offsets_loaded, &osd->rx_fragments, &nsd->rx_fragments); ixl_stat_update32(hw, I40E_GLPRT_ROC(hw->port), pf->stat_offsets_loaded, &osd->rx_oversize, &nsd->rx_oversize); ixl_stat_update32(hw, I40E_GLPRT_RJC(hw->port), pf->stat_offsets_loaded, &osd->rx_jabber, &nsd->rx_jabber); pf->stat_offsets_loaded = true; /* End hw stats */ /* Update vsi stats */ ixl_update_vsi_stats(vsi); for (int i = 0; i < pf->num_vfs; i++) { vf = &pf->vfs[i]; if (vf->vf_flags & VF_FLAG_ENABLED) ixl_update_eth_stats(&pf->vfs[i].vsi); } } int ixl_prepare_for_reset(struct ixl_pf *pf, bool is_up) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error = 0; error = i40e_shutdown_lan_hmc(hw); if (error) device_printf(dev, "Shutdown LAN HMC failed with code %d\n", error); ixl_disable_intr0(hw); error = i40e_shutdown_adminq(hw); if (error) device_printf(dev, "Shutdown Admin queue failed with code %d\n", error); ixl_pf_qmgr_release(&pf->qmgr, &pf->qtag); return (error); } int ixl_rebuild_hw_structs_after_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; int error = 0; device_printf(dev, "Rebuilding driver state...\n"); error = i40e_pf_reset(hw); if (error) { device_printf(dev, "PF reset failure %s\n", i40e_stat_str(hw, error)); goto ixl_rebuild_hw_structs_after_reset_err; } /* Setup */ error = i40e_init_adminq(hw); if (error != 0 && error != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %d\n", error); goto ixl_rebuild_hw_structs_after_reset_err; } i40e_clear_pxe_mode(hw); error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "ixl_get_hw_capabilities failed: %d\n", error); goto ixl_rebuild_hw_structs_after_reset_err; } error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init_lan_hmc failed: %d\n", error); goto ixl_rebuild_hw_structs_after_reset_err; } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "configure_lan_hmc failed: %d\n", error); goto ixl_rebuild_hw_structs_after_reset_err; } /* reserve a contiguous allocation for the PF's VSI */ error = ixl_pf_qmgr_alloc_contiguous(&pf->qmgr, vsi->num_tx_queues, &pf->qtag); if (error) { device_printf(dev, "Failed to reserve queues for PF LAN VSI, error %d\n", error); /* TODO: error handling */ } error = ixl_switch_config(pf); if (error) { device_printf(dev, "ixl_rebuild_hw_structs_after_reset: ixl_switch_config() failed: %d\n", error); error = EIO; goto ixl_rebuild_hw_structs_after_reset_err; } error = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (error) { device_printf(dev, "init: i40e_aq_set_phy_mask() failed: err %d," " aq_err %d\n", error, hw->aq.asq_last_status); error = EIO; goto ixl_rebuild_hw_structs_after_reset_err; } u8 set_fc_err_mask; error = i40e_set_fc(hw, &set_fc_err_mask, true); if (error) { device_printf(dev, "init: setting link flow control failed; retcode %d," " fc_err_mask 0x%02x\n", error, set_fc_err_mask); error = EIO; goto ixl_rebuild_hw_structs_after_reset_err; } /* Remove default filters reinstalled by FW on reset */ ixl_del_default_hw_filters(vsi); /* Determine link state */ if (ixl_attach_get_link_status(pf)) { error = EINVAL; /* TODO: error handling */ } i40e_aq_set_dcb_parameters(hw, TRUE, NULL); ixl_get_fw_lldp_status(pf); /* Keep admin queue interrupts active while driver is loaded */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } device_printf(dev, "Rebuilding driver state done.\n"); return (0); ixl_rebuild_hw_structs_after_reset_err: device_printf(dev, "Reload the driver to recover\n"); return (error); } void ixl_handle_empr_reset(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; bool is_up = !!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING); int count = 0; u32 reg; ixl_prepare_for_reset(pf, is_up); /* Typically finishes within 3-4 seconds */ while (count++ < 100) { reg = rd32(hw, I40E_GLGEN_RSTAT) & I40E_GLGEN_RSTAT_DEVSTATE_MASK; if (reg) i40e_msec_delay(100); else break; } ixl_dbg(pf, IXL_DBG_INFO, "Reset wait count: %d\n", count); ixl_rebuild_hw_structs_after_reset(pf); atomic_clear_int(&pf->state, IXL_PF_STATE_ADAPTER_RESETTING); } /** * Update VSI-specific ethernet statistics counters. **/ void ixl_update_eth_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *es; struct i40e_eth_stats *oes; struct i40e_hw_port_stats *nsd; u16 stat_idx = vsi->info.stat_counter_idx; es = &vsi->eth_stats; oes = &vsi->eth_stats_offsets; nsd = &pf->stats; /* Gather up the stats that the hw collects */ ixl_stat_update32(hw, I40E_GLV_TEPC(stat_idx), vsi->stat_offsets_loaded, &oes->tx_errors, &es->tx_errors); ixl_stat_update32(hw, I40E_GLV_RDPC(stat_idx), vsi->stat_offsets_loaded, &oes->rx_discards, &es->rx_discards); ixl_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_bytes, &es->rx_bytes); ixl_stat_update48(hw, I40E_GLV_UPRCH(stat_idx), I40E_GLV_UPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unicast, &es->rx_unicast); ixl_stat_update48(hw, I40E_GLV_MPRCH(stat_idx), I40E_GLV_MPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_multicast, &es->rx_multicast); ixl_stat_update48(hw, I40E_GLV_BPRCH(stat_idx), I40E_GLV_BPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_broadcast, &es->rx_broadcast); ixl_stat_update48(hw, I40E_GLV_GOTCH(stat_idx), I40E_GLV_GOTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_bytes, &es->tx_bytes); ixl_stat_update48(hw, I40E_GLV_UPTCH(stat_idx), I40E_GLV_UPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_unicast, &es->tx_unicast); ixl_stat_update48(hw, I40E_GLV_MPTCH(stat_idx), I40E_GLV_MPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_multicast, &es->tx_multicast); ixl_stat_update48(hw, I40E_GLV_BPTCH(stat_idx), I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); vsi->stat_offsets_loaded = true; } void ixl_update_vsi_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf; struct ifnet *ifp; struct i40e_eth_stats *es; u64 tx_discards; struct i40e_hw_port_stats *nsd; pf = vsi->back; ifp = vsi->ifp; es = &vsi->eth_stats; nsd = &pf->stats; ixl_update_eth_stats(vsi); tx_discards = es->tx_discards + nsd->tx_dropped_link_down; /* Update ifnet stats */ IXL_SET_IPACKETS(vsi, es->rx_unicast + es->rx_multicast + es->rx_broadcast); IXL_SET_OPACKETS(vsi, es->tx_unicast + es->tx_multicast + es->tx_broadcast); IXL_SET_IBYTES(vsi, es->rx_bytes); IXL_SET_OBYTES(vsi, es->tx_bytes); IXL_SET_IMCASTS(vsi, es->rx_multicast); IXL_SET_OMCASTS(vsi, es->tx_multicast); IXL_SET_IERRORS(vsi, nsd->crc_errors + nsd->illegal_bytes + nsd->rx_undersize + nsd->rx_oversize + nsd->rx_fragments + nsd->rx_jabber); IXL_SET_OERRORS(vsi, es->tx_errors); IXL_SET_IQDROPS(vsi, es->rx_discards + nsd->eth.rx_discards); IXL_SET_OQDROPS(vsi, tx_discards); IXL_SET_NOPROTO(vsi, es->rx_unknown_protocol); IXL_SET_COLLISIONS(vsi, 0); } /** * Reset all of the stats for the given pf **/ void ixl_pf_reset_stats(struct ixl_pf *pf) { bzero(&pf->stats, sizeof(struct i40e_hw_port_stats)); bzero(&pf->stats_offsets, sizeof(struct i40e_hw_port_stats)); pf->stat_offsets_loaded = false; } /** * Resets all stats of the given vsi **/ void ixl_vsi_reset_stats(struct ixl_vsi *vsi) { bzero(&vsi->eth_stats, sizeof(struct i40e_eth_stats)); bzero(&vsi->eth_stats_offsets, sizeof(struct i40e_eth_stats)); vsi->stat_offsets_loaded = false; } /** * Read and update a 48 bit stat from the hw * * Since the device stats are not reset at PFReset, they likely will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero. **/ void ixl_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat) { u64 new_data; #if defined(__FreeBSD__) && (__FreeBSD_version >= 1000000) && defined(__amd64__) new_data = rd64(hw, loreg); #else /* * Use two rd32's instead of one rd64; FreeBSD versions before * 10 don't support 64-bit bus reads/writes. */ new_data = rd32(hw, loreg); new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; #endif if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = new_data - *offset; else *stat = (new_data + ((u64)1 << 48)) - *offset; *stat &= 0xFFFFFFFFFFFFULL; } /** * Read and update a 32 bit stat from the hw **/ void ixl_stat_update32(struct i40e_hw *hw, u32 reg, bool offset_loaded, u64 *offset, u64 *stat) { u32 new_data; new_data = rd32(hw, reg); if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = (u32)(new_data - *offset); else *stat = (u32)((new_data + ((u64)1 << 32)) - *offset); } void ixl_add_device_sysctls(struct ixl_pf *pf) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); struct sysctl_oid *debug_node; struct sysctl_oid_list *debug_list; struct sysctl_oid *fec_node; struct sysctl_oid_list *fec_list; /* Set up sysctls */ SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_set_flowcntl, "I", IXL_SYSCTL_HELP_FC); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_set_advertise, "I", IXL_SYSCTL_HELP_SET_ADVERTISE); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "supported_speeds", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_supported_speeds, "I", IXL_SYSCTL_HELP_SUPPORTED_SPEED); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_current_speed, "A", "Current Port Speed"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_show_fw, "A", "Firmware version"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "unallocated_queues", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_unallocated_queues, "I", "Queues not allocated to a PF or VF"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "tx_itr", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_pf_tx_itr, "I", "Immediately set TX ITR value for all queues"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "rx_itr", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_pf_rx_itr, "I", "Immediately set RX ITR value for all queues"); SYSCTL_ADD_INT(ctx, ctx_list, OID_AUTO, "dynamic_rx_itr", CTLFLAG_RW, &pf->dynamic_rx_itr, 0, "Enable dynamic RX ITR"); SYSCTL_ADD_INT(ctx, ctx_list, OID_AUTO, "dynamic_tx_itr", CTLFLAG_RW, &pf->dynamic_tx_itr, 0, "Enable dynamic TX ITR"); /* Add FEC sysctls for 25G adapters */ if (i40e_is_25G_device(hw->device_id)) { fec_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "fec", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "FEC Sysctls"); fec_list = SYSCTL_CHILDREN(fec_node); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "fc_ability", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fec_fc_ability, "I", "FC FEC ability enabled"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "rs_ability", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fec_rs_ability, "I", "RS FEC ability enabled"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "fc_requested", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fec_fc_request, "I", "FC FEC mode requested on link"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "rs_requested", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fec_rs_request, "I", "RS FEC mode requested on link"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "auto_fec_enabled", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fec_auto_enable, "I", "Let FW decide FEC ability/request modes"); } SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_lldp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fw_lldp, "I", IXL_SYSCTL_HELP_FW_LLDP); /* Add sysctls meant to print debug information, but don't list them * in "sysctl -a" output. */ debug_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, "Debug Sysctls"); debug_list = SYSCTL_CHILDREN(debug_node); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "shared_debug_mask", CTLFLAG_RW, &pf->hw.debug_mask, 0, "Shared code debug message level"); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "core_debug_mask", CTLFLAG_RW, &pf->dbg_mask, 0, "Non-shared code debug message level"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "link_status", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_link_status, "A", IXL_SYSCTL_HELP_LINK_STATUS); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_abilities", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_phy_abilities, "A", "PHY Abilities"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "filter_list", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_sw_filter_list, "A", "SW Filter List"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "hw_res_alloc", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_hw_res_alloc, "A", "HW Resource Allocation"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "switch_config", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_switch_config, "A", "HW Switch Configuration"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_key", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_hkey, "A", "View RSS key"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_lut", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_hlut, "A", "View RSS lookup table"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_hena", CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_hena, "LU", "View enabled packet types for RSS"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "disable_fw_link_management", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fw_link_management, "I", "Disable FW Link Management"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dump_debug_data", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_dump_debug_data, "A", "Dump Debug Data from FW"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_pf_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_do_pf_reset, "I", "Tell HW to initiate a PF reset"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_core_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_do_core_reset, "I", "Tell HW to initiate a CORE reset"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_global_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_do_global_reset, "I", "Tell HW to initiate a GLOBAL reset"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_emp_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_do_emp_reset, "I", "(This doesn't work) Tell HW to initiate a EMP (entire firmware) reset"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "queue_interrupt_table", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_queue_interrupt_table, "A", "View MSI-X indices for TX/RX queues"); if (pf->has_i2c) { SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_byte", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_read_i2c_byte, "I", IXL_SYSCTL_HELP_READ_I2C); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "write_i2c_byte", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_write_i2c_byte, "I", IXL_SYSCTL_HELP_WRITE_I2C); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_read_i2c_diag_data, "A", "Dump selected diagnostic data from FW"); } } /* * Primarily for finding out how many queues can be assigned to VFs, * at runtime. */ static int ixl_sysctl_unallocated_queues(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int queues; queues = (int)ixl_pf_qmgr_get_num_free(&pf->qmgr); return sysctl_handle_int(oidp, NULL, queues, req); } /* ** Set flow control using sysctl: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ int ixl_sysctl_set_flowcntl(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_fc, error = 0; enum i40e_status_code aq_error = 0; u8 fc_aq_err = 0; /* Get request */ requested_fc = pf->fc; error = sysctl_handle_int(oidp, &requested_fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (requested_fc < 0 || requested_fc > 3) { device_printf(dev, "Invalid fc mode; valid modes are 0 through 3\n"); return (EINVAL); } /* Set fc ability for port */ hw->fc.requested_mode = requested_fc; aq_error = i40e_set_fc(hw, &fc_aq_err, TRUE); if (aq_error) { device_printf(dev, "%s: Error setting new fc mode %d; fc_err %#x\n", __func__, aq_error, fc_aq_err); return (EIO); } pf->fc = requested_fc; return (0); } char * ixl_aq_speed_to_str(enum i40e_aq_link_speed link_speed) { int index; char *speeds[] = { "Unknown", "100 Mbps", "1 Gbps", "10 Gbps", "40 Gbps", "20 Gbps", "25 Gbps", }; switch (link_speed) { case I40E_LINK_SPEED_100MB: index = 1; break; case I40E_LINK_SPEED_1GB: index = 2; break; case I40E_LINK_SPEED_10GB: index = 3; break; case I40E_LINK_SPEED_40GB: index = 4; break; case I40E_LINK_SPEED_20GB: index = 5; break; case I40E_LINK_SPEED_25GB: index = 6; break; case I40E_LINK_SPEED_UNKNOWN: default: index = 0; break; } return speeds[index]; } int ixl_sysctl_current_speed(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int error = 0; ixl_update_link_status(pf); error = sysctl_handle_string(oidp, ixl_aq_speed_to_str(hw->phy.link_info.link_speed), 8, req); return (error); } /* * Converts 8-bit speeds value to and from sysctl flags and * Admin Queue flags. */ static u8 ixl_convert_sysctl_aq_link_speed(u8 speeds, bool to_aq) { static u16 speedmap[6] = { (I40E_LINK_SPEED_100MB | (0x1 << 8)), (I40E_LINK_SPEED_1GB | (0x2 << 8)), (I40E_LINK_SPEED_10GB | (0x4 << 8)), (I40E_LINK_SPEED_20GB | (0x8 << 8)), (I40E_LINK_SPEED_25GB | (0x10 << 8)), (I40E_LINK_SPEED_40GB | (0x20 << 8)) }; u8 retval = 0; for (int i = 0; i < 6; i++) { if (to_aq) retval |= (speeds & (speedmap[i] >> 8)) ? (speedmap[i] & 0xff) : 0; else retval |= (speeds & speedmap[i]) ? (speedmap[i] >> 8) : 0; } return (retval); } int ixl_set_advertised_speeds(struct ixl_pf *pf, int speeds, bool from_aq) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config; enum i40e_status_code aq_error = 0; /* Get current capability information */ aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (aq_error) { device_printf(dev, "%s: Error getting phy capabilities %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EIO); } /* Prepare new config */ bzero(&config, sizeof(config)); if (from_aq) config.link_speed = speeds; else config.link_speed = ixl_convert_sysctl_aq_link_speed(speeds, true); config.phy_type = abilities.phy_type; config.phy_type_ext = abilities.phy_type_ext; config.abilities = abilities.abilities | I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; config.fec_config = (abilities.fec_cfg_curr_mod_ext_info & 0x1e); /* Do aq command & restart link */ aq_error = i40e_aq_set_phy_config(hw, &config, NULL); if (aq_error) { device_printf(dev, "%s: Error setting new phy config %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EIO); } return (0); } /* ** Supported link speedsL ** Flags: ** 0x1 - 100 Mb ** 0x2 - 1G ** 0x4 - 10G ** 0x8 - 20G ** 0x10 - 25G ** 0x20 - 40G */ static int ixl_sysctl_supported_speeds(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int supported = ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false); return sysctl_handle_int(oidp, NULL, supported, req); } /* ** Control link advertise speed: ** Flags: ** 0x1 - advertise 100 Mb ** 0x2 - advertise 1G ** 0x4 - advertise 10G ** 0x8 - advertise 20G ** 0x10 - advertise 25G ** 0x20 - advertise 40G ** ** Set to 0 to disable link */ int ixl_sysctl_set_advertise(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; u8 converted_speeds; int requested_ls = 0; int error = 0; /* Read in new mode */ requested_ls = pf->advertised_speed; error = sysctl_handle_int(oidp, &requested_ls, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Error out if bits outside of possible flag range are set */ if ((requested_ls & ~((u8)0x3F)) != 0) { device_printf(dev, "Input advertised speed out of range; " "valid flags are: 0x%02x\n", ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false)); return (EINVAL); } /* Check if adapter supports input value */ converted_speeds = ixl_convert_sysctl_aq_link_speed((u8)requested_ls, true); if ((converted_speeds | pf->supported_speeds) != pf->supported_speeds) { device_printf(dev, "Invalid advertised speed; " "valid flags are: 0x%02x\n", ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false)); return (EINVAL); } error = ixl_set_advertised_speeds(pf, requested_ls, false); if (error) return (error); pf->advertised_speed = requested_ls; ixl_update_link_status(pf); return (0); } /* ** Get the width and transaction speed of ** the bus this adapter is plugged into. */ void ixl_get_bus_info(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u16 link; u32 offset, num_ports; u64 max_speed; /* Some devices don't use PCIE */ if (hw->mac.type == I40E_MAC_X722) return; /* Read PCI Express Capabilities Link Status Register */ pci_find_cap(dev, PCIY_EXPRESS, &offset); link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); /* Fill out hw struct with PCIE info */ i40e_set_pci_config_data(hw, link); /* Use info to print out bandwidth messages */ device_printf(dev,"PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == i40e_bus_speed_8000) ? "8.0GT/s": (hw->bus.speed == i40e_bus_speed_5000) ? "5.0GT/s": (hw->bus.speed == i40e_bus_speed_2500) ? "2.5GT/s":"Unknown"), (hw->bus.width == i40e_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == i40e_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == i40e_bus_width_pcie_x2) ? "Width x2" : (hw->bus.width == i40e_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); /* * If adapter is in slot with maximum supported speed, * no warning message needs to be printed out. */ if (hw->bus.speed >= i40e_bus_speed_8000 && hw->bus.width >= i40e_bus_width_pcie_x8) return; num_ports = bitcount32(hw->func_caps.valid_functions); max_speed = ixl_max_aq_speed_to_value(pf->supported_speeds) / 1000000; if ((num_ports * max_speed) > hw->bus.speed * hw->bus.width) { device_printf(dev, "PCI-Express bandwidth available" " for this device may be insufficient for" " optimal performance.\n"); device_printf(dev, "Please move the device to a different" " PCI-e link with more lanes and/or higher" " transfer rate.\n"); } } static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; struct sbuf *sbuf; sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } void ixl_print_nvm_cmd(device_t dev, struct i40e_nvm_access *nvma) { if ((nvma->command == I40E_NVM_READ) && ((nvma->config & 0xFF) == 0xF) && (((nvma->config & 0xF00) >> 8) == 0xF) && (nvma->offset == 0) && (nvma->data_size == 1)) { // device_printf(dev, "- Get Driver Status Command\n"); } else if (nvma->command == I40E_NVM_READ) { } else { switch (nvma->command) { case 0xB: device_printf(dev, "- command: I40E_NVM_READ\n"); break; case 0xC: device_printf(dev, "- command: I40E_NVM_WRITE\n"); break; default: device_printf(dev, "- command: unknown 0x%08x\n", nvma->command); break; } device_printf(dev, "- config (ptr) : 0x%02x\n", nvma->config & 0xFF); device_printf(dev, "- config (flags): 0x%01x\n", (nvma->config & 0xF00) >> 8); device_printf(dev, "- offset : 0x%08x\n", nvma->offset); device_printf(dev, "- data_s : 0x%08x\n", nvma->data_size); } } int ixl_handle_nvmupd_cmd(struct ixl_pf *pf, struct ifdrv *ifd) { struct i40e_hw *hw = &pf->hw; struct i40e_nvm_access *nvma; device_t dev = pf->dev; enum i40e_status_code status = 0; size_t nvma_size, ifd_len, exp_len; int err, perrno; DEBUGFUNC("ixl_handle_nvmupd_cmd"); /* Sanity checks */ nvma_size = sizeof(struct i40e_nvm_access); ifd_len = ifd->ifd_len; if (ifd_len < nvma_size || ifd->ifd_data == NULL) { device_printf(dev, "%s: incorrect ifdrv length or data pointer\n", __func__); device_printf(dev, "%s: ifdrv length: %zu, sizeof(struct i40e_nvm_access): %zu\n", __func__, ifd_len, nvma_size); device_printf(dev, "%s: data pointer: %p\n", __func__, ifd->ifd_data); return (EINVAL); } nvma = malloc(ifd_len, M_DEVBUF, M_WAITOK); err = copyin(ifd->ifd_data, nvma, ifd_len); if (err) { device_printf(dev, "%s: Cannot get request from user space\n", __func__); free(nvma, M_DEVBUF); return (err); } if (pf->dbg_mask & IXL_DBG_NVMUPD) ixl_print_nvm_cmd(dev, nvma); if (pf->state & IXL_PF_STATE_ADAPTER_RESETTING) { int count = 0; while (count++ < 100) { i40e_msec_delay(100); if (!(pf->state & IXL_PF_STATE_ADAPTER_RESETTING)) break; } } if (pf->state & IXL_PF_STATE_ADAPTER_RESETTING) { free(nvma, M_DEVBUF); return (-EBUSY); } if (nvma->data_size < 1 || nvma->data_size > 4096) { device_printf(dev, "%s: invalid request, data size not in supported range\n", __func__); free(nvma, M_DEVBUF); return (EINVAL); } /* * Older versions of the NVM update tool don't set ifd_len to the size * of the entire buffer passed to the ioctl. Check the data_size field * in the contained i40e_nvm_access struct and ensure everything is * copied in from userspace. */ exp_len = nvma_size + nvma->data_size - 1; /* One byte is kept in struct */ if (ifd_len < exp_len) { ifd_len = exp_len; nvma = realloc(nvma, ifd_len, M_DEVBUF, M_WAITOK); err = copyin(ifd->ifd_data, nvma, ifd_len); if (err) { device_printf(dev, "%s: Cannot get request from user space\n", __func__); free(nvma, M_DEVBUF); return (err); } } // TODO: Might need a different lock here // IXL_PF_LOCK(pf); status = i40e_nvmupd_command(hw, nvma, nvma->data, &perrno); // IXL_PF_UNLOCK(pf); err = copyout(nvma, ifd->ifd_data, ifd_len); free(nvma, M_DEVBUF); if (err) { device_printf(dev, "%s: Cannot return data to user space\n", __func__); return (err); } /* Let the nvmupdate report errors, show them only when debug is enabled */ if (status != 0 && (pf->dbg_mask & IXL_DBG_NVMUPD) != 0) device_printf(dev, "i40e_nvmupd_command status %s, perrno %d\n", i40e_stat_str(hw, status), perrno); /* * -EPERM is actually ERESTART, which the kernel interprets as it needing * to run this ioctl again. So use -EACCES for -EPERM instead. */ if (perrno == -EPERM) return (-EACCES); else return (perrno); } int ixl_find_i2c_interface(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; bool i2c_en, port_matched; u32 reg; for (int i = 0; i < 4; i++) { reg = rd32(hw, I40E_GLGEN_MDIO_I2C_SEL(i)); i2c_en = (reg & I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_MASK); port_matched = ((reg & I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_MASK) >> I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_SHIFT) & BIT(hw->port); if (i2c_en && port_matched) return (i); } return (-1); } static char * ixl_phy_type_string(u32 bit_pos, bool ext) { static char * phy_types_str[32] = { "SGMII", "1000BASE-KX", "10GBASE-KX4", "10GBASE-KR", "40GBASE-KR4", "XAUI", "XFI", "SFI", "XLAUI", "XLPPI", "40GBASE-CR4", "10GBASE-CR1", "SFP+ Active DA", "QSFP+ Active DA", "Reserved (14)", "Reserved (15)", "Reserved (16)", "100BASE-TX", "1000BASE-T", "10GBASE-T", "10GBASE-SR", "10GBASE-LR", "10GBASE-SFP+Cu", "10GBASE-CR1", "40GBASE-CR4", "40GBASE-SR4", "40GBASE-LR4", "1000BASE-SX", "1000BASE-LX", "1000BASE-T Optical", "20GBASE-KR2", "Reserved (31)" }; static char * ext_phy_types_str[8] = { "25GBASE-KR", "25GBASE-CR", "25GBASE-SR", "25GBASE-LR", "25GBASE-AOC", "25GBASE-ACC", "Reserved (6)", "Reserved (7)" }; if (ext && bit_pos > 7) return "Invalid_Ext"; if (bit_pos > 31) return "Invalid"; return (ext) ? ext_phy_types_str[bit_pos] : phy_types_str[bit_pos]; } /* TODO: ERJ: I don't this is necessary anymore. */ int ixl_aq_get_link_status(struct ixl_pf *pf, struct i40e_aqc_get_link_status *link_status) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct i40e_aq_desc desc; enum i40e_status_code status; struct i40e_aqc_get_link_status *aq_link_status = (struct i40e_aqc_get_link_status *)&desc.params.raw; i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_link_status); link_status->command_flags = CPU_TO_LE16(I40E_AQ_LSE_ENABLE); status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL); if (status) { device_printf(dev, "%s: i40e_aqc_opc_get_link_status status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } bcopy(aq_link_status, link_status, sizeof(struct i40e_aqc_get_link_status)); return (0); } static char * ixl_phy_type_string_ls(u8 val) { if (val >= 0x1F) return ixl_phy_type_string(val - 0x1F, true); else return ixl_phy_type_string(val, false); } static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } struct i40e_aqc_get_link_status link_status; error = ixl_aq_get_link_status(pf, &link_status); if (error) { sbuf_delete(buf); return (error); } sbuf_printf(buf, "\n" "PHY Type : 0x%02x<%s>\n" "Speed : 0x%02x\n" "Link info: 0x%02x\n" "AN info : 0x%02x\n" "Ext info : 0x%02x\n" "Loopback : 0x%02x\n" "Max Frame: %d\n" "Config : 0x%02x\n" "Power : 0x%02x", link_status.phy_type, ixl_phy_type_string_ls(link_status.phy_type), link_status.link_speed, link_status.link_info, link_status.an_info, link_status.ext_info, link_status.loopback, link_status.max_frame_size, link_status.config, link_status.power_desc); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; struct i40e_aq_get_phy_abilities_resp abilities; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } status = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (status) { device_printf(dev, "%s: i40e_aq_get_phy_capabilities() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return (EIO); } sbuf_printf(buf, "\n" "PHY Type : %08x", abilities.phy_type); if (abilities.phy_type != 0) { sbuf_printf(buf, "<"); for (int i = 0; i < 32; i++) if ((1 << i) & abilities.phy_type) sbuf_printf(buf, "%s,", ixl_phy_type_string(i, false)); sbuf_printf(buf, ">\n"); } sbuf_printf(buf, "PHY Ext : %02x", abilities.phy_type_ext); if (abilities.phy_type_ext != 0) { sbuf_printf(buf, "<"); for (int i = 0; i < 4; i++) if ((1 << i) & abilities.phy_type_ext) sbuf_printf(buf, "%s,", ixl_phy_type_string(i, true)); sbuf_printf(buf, ">"); } sbuf_printf(buf, "\n"); sbuf_printf(buf, "Speed : %02x\n" "Abilities: %02x\n" "EEE cap : %04x\n" "EEER reg : %08x\n" "D3 Lpan : %02x\n" "ID : %02x %02x %02x %02x\n" "ModType : %02x %02x %02x\n" "ModType E: %01x\n" "FEC Cfg : %02x\n" "Ext CC : %02x", abilities.link_speed, abilities.abilities, abilities.eee_capability, abilities.eeer_val, abilities.d3_lpan, abilities.phy_id[0], abilities.phy_id[1], abilities.phy_id[2], abilities.phy_id[3], abilities.module_type[0], abilities.module_type[1], abilities.module_type[2], (abilities.fec_cfg_curr_mod_ext_info & 0xe0) >> 5, abilities.fec_cfg_curr_mod_ext_info & 0x1F, abilities.ext_comp_code); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct ixl_vsi *vsi = &pf->vsi; struct ixl_mac_filter *f; device_t dev = pf->dev; int error = 0, ftl_len = 0, ftl_counter = 0; struct sbuf *buf; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_printf(buf, "\n"); /* Print MAC filters */ sbuf_printf(buf, "PF Filters:\n"); SLIST_FOREACH(f, &vsi->ftl, next) ftl_len++; if (ftl_len < 1) sbuf_printf(buf, "(none)\n"); else { SLIST_FOREACH(f, &vsi->ftl, next) { sbuf_printf(buf, MAC_FORMAT ", vlan %4d, flags %#06x", MAC_FORMAT_ARGS(f->macaddr), f->vlan, f->flags); /* don't print '\n' for last entry */ if (++ftl_counter != ftl_len) sbuf_printf(buf, "\n"); } } #ifdef PCI_IOV /* TODO: Give each VF its own filter list sysctl */ struct ixl_vf *vf; if (pf->num_vfs > 0) { sbuf_printf(buf, "\n\n"); for (int i = 0; i < pf->num_vfs; i++) { vf = &pf->vfs[i]; if (!(vf->vf_flags & VF_FLAG_ENABLED)) continue; vsi = &vf->vsi; ftl_len = 0, ftl_counter = 0; sbuf_printf(buf, "VF-%d Filters:\n", vf->vf_num); SLIST_FOREACH(f, &vsi->ftl, next) ftl_len++; if (ftl_len < 1) sbuf_printf(buf, "(none)\n"); else { SLIST_FOREACH(f, &vsi->ftl, next) { sbuf_printf(buf, MAC_FORMAT ", vlan %4d, flags %#06x\n", MAC_FORMAT_ARGS(f->macaddr), f->vlan, f->flags); } } } } #endif error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } #define IXL_SW_RES_SIZE 0x14 int ixl_res_alloc_cmp(const void *a, const void *b) { const struct i40e_aqc_switch_resource_alloc_element_resp *one, *two; one = (const struct i40e_aqc_switch_resource_alloc_element_resp *)a; two = (const struct i40e_aqc_switch_resource_alloc_element_resp *)b; return ((int)one->resource_type - (int)two->resource_type); } /* * Longest string length: 25 */ char * ixl_switch_res_type_string(u8 type) { // TODO: This should be changed to static const char * ixl_switch_res_type_strings[0x14] = { "VEB", "VSI", "Perfect Match MAC address", "S-tag", "(Reserved)", "Multicast hash entry", "Unicast hash entry", "VLAN", "VSI List entry", "(Reserved)", "VLAN Statistic Pool", "Mirror Rule", "Queue Set", "Inner VLAN Forward filter", "(Reserved)", "Inner MAC", "IP", "GRE/VN1 Key", "VN2 Key", "Tunneling Port" }; if (type < 0x14) return ixl_switch_res_type_strings[type]; else return "(Reserved)"; } static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; enum i40e_status_code status; int error = 0; u8 num_entries; struct i40e_aqc_switch_resource_alloc_element_resp resp[IXL_SW_RES_SIZE]; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(resp, sizeof(resp)); status = i40e_aq_get_switch_resource_alloc(hw, &num_entries, resp, IXL_SW_RES_SIZE, NULL); if (status) { device_printf(dev, "%s: get_switch_resource_alloc() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return (error); } /* Sort entries by type for display */ qsort(resp, num_entries, sizeof(struct i40e_aqc_switch_resource_alloc_element_resp), &ixl_res_alloc_cmp); sbuf_cat(buf, "\n"); sbuf_printf(buf, "# of entries: %d\n", num_entries); sbuf_printf(buf, " Type | Guaranteed | Total | Used | Un-allocated\n" " | (this) | (all) | (this) | (all) \n"); for (int i = 0; i < num_entries; i++) { sbuf_printf(buf, "%25s | %10d %5d %6d %12d", ixl_switch_res_type_string(resp[i].resource_type), resp[i].guaranteed, resp[i].total, resp[i].used, resp[i].total_unalloced); if (i < num_entries - 1) sbuf_cat(buf, "\n"); } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } /* ** Caller must init and delete sbuf; this function will clear and ** finish it for caller. */ char * ixl_switch_element_string(struct sbuf *s, struct i40e_aqc_switch_config_element_resp *element) { sbuf_clear(s); switch (element->element_type) { case I40E_AQ_SW_ELEM_TYPE_MAC: sbuf_printf(s, "MAC %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_PF: sbuf_printf(s, "PF %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_VF: sbuf_printf(s, "VF %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_EMP: sbuf_cat(s, "EMP"); break; case I40E_AQ_SW_ELEM_TYPE_BMC: sbuf_cat(s, "BMC"); break; case I40E_AQ_SW_ELEM_TYPE_PV: sbuf_cat(s, "PV"); break; case I40E_AQ_SW_ELEM_TYPE_VEB: sbuf_cat(s, "VEB"); break; case I40E_AQ_SW_ELEM_TYPE_PA: sbuf_cat(s, "PA"); break; case I40E_AQ_SW_ELEM_TYPE_VSI: sbuf_printf(s, "VSI %3d", element->element_info); break; default: sbuf_cat(s, "?"); break; } sbuf_finish(s); return sbuf_data(s); } static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; struct sbuf *nmbuf; enum i40e_status_code status; int error = 0; u16 next = 0; u8 aq_buf[I40E_AQ_LARGE_BUF]; struct i40e_aqc_get_switch_config_resp *sw_config; sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } status = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (status) { device_printf(dev, "%s: aq_get_switch_config() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return error; } if (next) device_printf(dev, "%s: TODO: get more config with SEID %d\n", __func__, next); nmbuf = sbuf_new_auto(); if (!nmbuf) { device_printf(dev, "Could not allocate sbuf for name output.\n"); sbuf_delete(buf); return (ENOMEM); } sbuf_cat(buf, "\n"); /* Assuming <= 255 elements in switch */ sbuf_printf(buf, "# of reported elements: %d\n", sw_config->header.num_reported); sbuf_printf(buf, "total # of elements: %d\n", sw_config->header.num_total); /* Exclude: ** Revision -- all elements are revision 1 for now */ sbuf_printf(buf, "SEID ( Name ) | Uplink | Downlink | Conn Type\n" " | | | (uplink)\n"); for (int i = 0; i < sw_config->header.num_reported; i++) { // "%4d (%8s) | %8s %8s %#8x", sbuf_printf(buf, "%4d", sw_config->element[i].seid); sbuf_cat(buf, " "); sbuf_printf(buf, "(%8s)", ixl_switch_element_string(nmbuf, &sw_config->element[i])); sbuf_cat(buf, " | "); sbuf_printf(buf, "%8d", sw_config->element[i].uplink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "%8d", sw_config->element[i].downlink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "%#8x", sw_config->element[i].connection_type); if (i < sw_config->header.num_reported - 1) sbuf_cat(buf, "\n"); } sbuf_delete(nmbuf); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hkey(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; u32 reg; struct i40e_aqc_get_set_rss_key_data key_data; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(key_data.standard_rss_key, sizeof(key_data.standard_rss_key)); sbuf_cat(buf, "\n"); if (hw->mac.type == I40E_MAC_X722) { status = i40e_aq_get_rss_key(hw, pf->vsi.vsi_num, &key_data); if (status) device_printf(dev, "i40e_aq_get_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (int i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) { reg = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i)); bcopy(®, ((caddr_t)&key_data) + (i << 2), 4); } } ixl_sbuf_print_bytes(buf, (u8 *)&key_data, sizeof(key_data), 0, true); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static void ixl_sbuf_print_bytes(struct sbuf *sb, u8 *buf, int length, int label_offset, bool text) { int i, j, k, width; char c; if (length < 1 || buf == NULL) return; int byte_stride = 16; int lines = length / byte_stride; int rem = length % byte_stride; if (rem > 0) lines++; for (i = 0; i < lines; i++) { width = (rem > 0 && i == lines - 1) ? rem : byte_stride; sbuf_printf(sb, "%4d | ", label_offset + i * byte_stride); for (j = 0; j < width; j++) sbuf_printf(sb, "%02x ", buf[i * byte_stride + j]); if (width < byte_stride) { for (k = 0; k < (byte_stride - width); k++) sbuf_printf(sb, " "); } if (!text) { sbuf_printf(sb, "\n"); continue; } for (j = 0; j < width; j++) { c = (char)buf[i * byte_stride + j]; if (c < 32 || c > 126) sbuf_printf(sb, "."); else sbuf_printf(sb, "%c", c); if (j == width - 1) sbuf_printf(sb, "\n"); } } } static int ixl_sysctl_hlut(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; u8 hlut[512]; u32 reg; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(hlut, sizeof(hlut)); sbuf_cat(buf, "\n"); if (hw->mac.type == I40E_MAC_X722) { status = i40e_aq_get_rss_lut(hw, pf->vsi.vsi_num, TRUE, hlut, sizeof(hlut)); if (status) device_printf(dev, "i40e_aq_get_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (int i = 0; i < hw->func_caps.rss_table_size >> 2; i++) { reg = rd32(hw, I40E_PFQF_HLUT(i)); bcopy(®, &hlut[i << 2], 4); } } ixl_sbuf_print_bytes(buf, hlut, 512, 0, false); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hena(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; u64 hena; hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); return sysctl_handle_long(oidp, NULL, hena, req); } /* * Sysctl to disable firmware's link management * * 1 - Disable link management on this port * 0 - Re-enable link management * * On normal NVMs, firmware manages link by default. */ static int ixl_sysctl_fw_link_management(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_mode = -1; enum i40e_status_code status = 0; int error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested_mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Check for sane value */ if (requested_mode < 0 || requested_mode > 1) { device_printf(dev, "Valid modes are 0 or 1\n"); return (EINVAL); } /* Set new mode */ status = i40e_aq_set_phy_debug(hw, !!(requested_mode) << 4, NULL); if (status) { device_printf(dev, "%s: Error setting new phy debug mode %s," " aq error: %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } return (0); } /* * Read some diagnostic data from an SFP module * Bytes 96-99, 102-105 from device address 0xA2 */ static int ixl_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; struct sbuf *sbuf; int error = 0; u8 output; error = pf->read_i2c_byte(pf, 0, 0xA0, &output); if (error) { device_printf(dev, "Error reading from i2c\n"); return (error); } if (output != 0x3) { device_printf(dev, "Module is not SFP/SFP+/SFP28 (%02X)\n", output); return (EIO); } pf->read_i2c_byte(pf, 92, 0xA0, &output); if (!(output & 0x60)) { device_printf(dev, "Module doesn't support diagnostics: %02X\n", output); return (EIO); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); for (u8 offset = 96; offset < 100; offset++) { pf->read_i2c_byte(pf, offset, 0xA2, &output); sbuf_printf(sbuf, "%02X ", output); } for (u8 offset = 102; offset < 106; offset++) { pf->read_i2c_byte(pf, offset, 0xA2, &output); sbuf_printf(sbuf, "%02X ", output); } sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /* * Sysctl to read a byte from I2C bus. * * Input: 32-bit value: * bits 0-7: device address (0xA0 or 0xA2) * bits 8-15: offset (0-255) * bits 16-31: unused * Output: 8-bit value read */ static int ixl_sysctl_read_i2c_byte(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int input = -1, error = 0; u8 dev_addr, offset, output; /* Read in I2C read parameters */ error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Validate device address */ dev_addr = input & 0xFF; if (dev_addr != 0xA0 && dev_addr != 0xA2) { return (EINVAL); } offset = (input >> 8) & 0xFF; error = pf->read_i2c_byte(pf, offset, dev_addr, &output); if (error) return (error); device_printf(dev, "%02X\n", output); return (0); } /* * Sysctl to write a byte to the I2C bus. * * Input: 32-bit value: * bits 0-7: device address (0xA0 or 0xA2) * bits 8-15: offset (0-255) * bits 16-23: value to write * bits 24-31: unused * Output: 8-bit value written */ static int ixl_sysctl_write_i2c_byte(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int input = -1, error = 0; u8 dev_addr, offset, value; /* Read in I2C write parameters */ error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Validate device address */ dev_addr = input & 0xFF; if (dev_addr != 0xA0 && dev_addr != 0xA2) { return (EINVAL); } offset = (input >> 8) & 0xFF; value = (input >> 16) & 0xFF; error = pf->write_i2c_byte(pf, offset, dev_addr, value); if (error) return (error); device_printf(dev, "%02X written\n", value); return (0); } static int ixl_get_fec_config(struct ixl_pf *pf, struct i40e_aq_get_phy_abilities_resp *abilities, u8 bit_pos, int *is_set) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; enum i40e_status_code status; status = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, abilities, NULL); if (status) { device_printf(dev, "%s: i40e_aq_get_phy_capabilities() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } *is_set = !!(abilities->fec_cfg_curr_mod_ext_info & bit_pos); return (0); } static int ixl_set_fec_config(struct ixl_pf *pf, struct i40e_aq_get_phy_abilities_resp *abilities, u8 bit_pos, int set) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct i40e_aq_set_phy_config config; enum i40e_status_code status; /* Set new PHY config */ memset(&config, 0, sizeof(config)); config.fec_config = abilities->fec_cfg_curr_mod_ext_info & ~(bit_pos); if (set) config.fec_config |= bit_pos; if (config.fec_config != abilities->fec_cfg_curr_mod_ext_info) { config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.phy_type = abilities->phy_type; config.phy_type_ext = abilities->phy_type_ext; config.link_speed = abilities->link_speed; config.eee_capability = abilities->eee_capability; config.eeer = abilities->eeer_val; config.low_power_ctrl = abilities->d3_lpan; status = i40e_aq_set_phy_config(hw, &config, NULL); if (status) { device_printf(dev, "%s: i40e_aq_set_phy_config() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } } return (0); } static int ixl_sysctl_fec_fc_ability(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_ENABLE_FEC_KR, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_KR, !!(mode)); } static int ixl_sysctl_fec_rs_ability(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_ENABLE_FEC_RS, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_RS, !!(mode)); } static int ixl_sysctl_fec_fc_request(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_REQUEST_FEC_KR, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_KR, !!(mode)); } static int ixl_sysctl_fec_rs_request(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_REQUEST_FEC_RS, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_RS, !!(mode)); } static int ixl_sysctl_fec_auto_enable(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_ENABLE_FEC_AUTO, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_AUTO, !!(mode)); } static int ixl_sysctl_dump_debug_data(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } u8 *final_buff; /* This amount is only necessary if reading the entire cluster into memory */ #define IXL_FINAL_BUFF_SIZE (1280 * 1024) final_buff = malloc(IXL_FINAL_BUFF_SIZE, M_DEVBUF, M_WAITOK); if (final_buff == NULL) { device_printf(dev, "Could not allocate memory for output.\n"); goto out; } int final_buff_len = 0; u8 cluster_id = 1; bool more = true; u8 dump_buf[4096]; u16 curr_buff_size = 4096; u8 curr_next_table = 0; u32 curr_next_index = 0; u16 ret_buff_size; u8 ret_next_table; u32 ret_next_index; sbuf_cat(buf, "\n"); while (more) { status = i40e_aq_debug_dump(hw, cluster_id, curr_next_table, curr_next_index, curr_buff_size, dump_buf, &ret_buff_size, &ret_next_table, &ret_next_index, NULL); if (status) { device_printf(dev, "i40e_aq_debug_dump status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); goto free_out; } /* copy info out of temp buffer */ bcopy(dump_buf, (caddr_t)final_buff + final_buff_len, ret_buff_size); final_buff_len += ret_buff_size; if (ret_next_table != curr_next_table) { /* We're done with the current table; we can dump out read data. */ sbuf_printf(buf, "%d:", curr_next_table); int bytes_printed = 0; while (bytes_printed <= final_buff_len) { sbuf_printf(buf, "%16D", ((caddr_t)final_buff + bytes_printed), ""); bytes_printed += 16; } sbuf_cat(buf, "\n"); /* The entire cluster has been read; we're finished */ if (ret_next_table == 0xFF) break; /* Otherwise clear the output buffer and continue reading */ bzero(final_buff, IXL_FINAL_BUFF_SIZE); final_buff_len = 0; } if (ret_next_index == 0xFFFFFFFF) ret_next_index = 0; bzero(dump_buf, sizeof(dump_buf)); curr_next_table = ret_next_table; curr_next_index = ret_next_index; } free_out: free(final_buff, M_DEVBUF); out: error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_fw_lldp(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error = 0; int state, new_state; enum i40e_status_code status; state = new_state = ((pf->state & IXL_PF_STATE_FW_LLDP_DISABLED) == 0); /* Read in new mode */ error = sysctl_handle_int(oidp, &new_state, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Already in requested state */ if (new_state == state) return (error); if (new_state == 0) { if (hw->mac.type == I40E_MAC_X722 || hw->func_caps.npar_enable != 0) { device_printf(dev, "Disabling FW LLDP agent is not supported on this device\n"); return (EINVAL); } if (pf->hw.aq.api_maj_ver < 1 || (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver < 7)) { device_printf(dev, "Disabling FW LLDP agent is not supported in this FW version. Please update FW to enable this feature.\n"); return (EINVAL); } i40e_aq_stop_lldp(&pf->hw, true, NULL); i40e_aq_set_dcb_parameters(&pf->hw, true, NULL); atomic_set_int(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } else { status = i40e_aq_start_lldp(&pf->hw, NULL); if (status != I40E_SUCCESS && hw->aq.asq_last_status == I40E_AQ_RC_EEXIST) device_printf(dev, "FW LLDP agent is already running\n"); atomic_clear_int(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } return (0); } /* * Get FW LLDP Agent status */ int ixl_get_fw_lldp_status(struct ixl_pf *pf) { enum i40e_status_code ret = I40E_SUCCESS; struct i40e_lldp_variables lldp_cfg; struct i40e_hw *hw = &pf->hw; u8 adminstatus = 0; ret = i40e_read_lldp_cfg(hw, &lldp_cfg); if (ret) return ret; /* Get the LLDP AdminStatus for the current port */ adminstatus = lldp_cfg.adminstatus >> (hw->port * 4); adminstatus &= 0xf; /* Check if LLDP agent is disabled */ if (!adminstatus) { device_printf(pf->dev, "FW LLDP agent is disabled for this PF.\n"); atomic_set_int(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } else atomic_clear_int(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); return (0); } int ixl_attach_get_link_status(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error = 0; if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || (hw->aq.fw_maj_ver < 4)) { i40e_msec_delay(75); error = i40e_aq_set_link_restart_an(hw, TRUE, NULL); if (error) { device_printf(dev, "link restart failed, aq_err=%d\n", pf->hw.aq.asq_last_status); return error; } } /* Determine link state */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); return (0); } static int ixl_sysctl_do_pf_reset(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int requested = 0, error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Initiate the PF reset later in the admin task */ atomic_set_32(&pf->state, IXL_PF_STATE_PF_RESET_REQ); return (error); } static int ixl_sysctl_do_core_reset(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int requested = 0, error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested, 0, req); if ((error) || (req->newptr == NULL)) return (error); wr32(hw, I40E_GLGEN_RTRIG, I40E_GLGEN_RTRIG_CORER_MASK); return (error); } static int ixl_sysctl_do_global_reset(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int requested = 0, error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested, 0, req); if ((error) || (req->newptr == NULL)) return (error); wr32(hw, I40E_GLGEN_RTRIG, I40E_GLGEN_RTRIG_GLOBR_MASK); return (error); } static int ixl_sysctl_do_emp_reset(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int requested = 0, error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* TODO: Find out how to bypass this */ if (!(rd32(hw, 0x000B818C) & 0x1)) { device_printf(pf->dev, "SW not allowed to initiate EMPR\n"); error = EINVAL; } else wr32(hw, I40E_GLGEN_RTRIG, I40E_GLGEN_RTRIG_EMPFWR_MASK); return (error); } /* * Print out mapping of TX queue indexes and Rx queue indexes * to MSI-X vectors. */ static int ixl_sysctl_queue_interrupt_table(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; struct sbuf *buf; int error = 0; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); for (int i = 0; i < vsi->num_rx_queues; i++) { rx_que = &vsi->rx_queues[i]; sbuf_printf(buf, "(rxq %3d): %d\n", i, rx_que->msix); } for (int i = 0; i < vsi->num_tx_queues; i++) { tx_que = &vsi->tx_queues[i]; sbuf_printf(buf, "(txq %3d): %d\n", i, tx_que->msix); } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } diff --git a/sys/dev/mfi/mfi_tbolt.c b/sys/dev/mfi/mfi_tbolt.c index d7397d2a2ccc..40ba160205ac 100644 --- a/sys/dev/mfi/mfi_tbolt.c +++ b/sys/dev/mfi/mfi_tbolt.c @@ -1,1500 +1,1500 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Copyright 1994-2009 The FreeBSD Project. * All rights reserved. * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE FREEBSD PROJECT``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FREEBSD PROJECT OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation * are those of the authors and should not be interpreted as representing * official policies,either expressed or implied, of the FreeBSD Project. */ #include __FBSDID("$FreeBSD$"); #include "opt_mfi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct mfi_cmd_tbolt *mfi_tbolt_get_cmd(struct mfi_softc *sc, struct mfi_command *); union mfi_mpi2_request_descriptor * mfi_tbolt_get_request_descriptor(struct mfi_softc *sc, uint16_t index); void mfi_tbolt_complete_cmd(struct mfi_softc *sc); int mfi_tbolt_build_io(struct mfi_softc *sc, struct mfi_command *mfi_cmd, struct mfi_cmd_tbolt *cmd); union mfi_mpi2_request_descriptor *mfi_tbolt_build_mpt_cmd(struct mfi_softc *sc, struct mfi_command *cmd); uint8_t mfi_build_mpt_pass_thru(struct mfi_softc *sc, struct mfi_command *mfi_cmd); union mfi_mpi2_request_descriptor *mfi_build_and_issue_cmd(struct mfi_softc *sc, struct mfi_command *mfi_cmd); void mfi_tbolt_build_ldio(struct mfi_softc *sc, struct mfi_command *mfi_cmd, struct mfi_cmd_tbolt *cmd); static int mfi_tbolt_make_sgl(struct mfi_softc *sc, struct mfi_command *mfi_cmd, pMpi25IeeeSgeChain64_t sgl_ptr, struct mfi_cmd_tbolt *cmd); void map_tbolt_cmd_status(struct mfi_command *mfi_cmd, uint8_t status, uint8_t ext_status); static void mfi_issue_pending_cmds_again (struct mfi_softc *sc); static void mfi_kill_hba (struct mfi_softc *sc); static void mfi_process_fw_state_chg_isr(void *arg); static void mfi_sync_map_complete(struct mfi_command *); static void mfi_queue_map_sync(struct mfi_softc *sc); #define MFI_FUSION_ENABLE_INTERRUPT_MASK (0x00000008) extern int mfi_polled_cmd_timeout; static int mfi_fw_reset_test = 0; #ifdef MFI_DEBUG SYSCTL_INT(_hw_mfi, OID_AUTO, fw_reset_test, CTLFLAG_RWTUN, &mfi_fw_reset_test, 0, "Force a firmware reset condition"); #endif void mfi_tbolt_enable_intr_ppc(struct mfi_softc *sc) { MFI_WRITE4(sc, MFI_OMSK, ~MFI_FUSION_ENABLE_INTERRUPT_MASK); MFI_READ4(sc, MFI_OMSK); } void mfi_tbolt_disable_intr_ppc(struct mfi_softc *sc) { MFI_WRITE4(sc, MFI_OMSK, 0xFFFFFFFF); MFI_READ4(sc, MFI_OMSK); } int32_t mfi_tbolt_read_fw_status_ppc(struct mfi_softc *sc) { return MFI_READ4(sc, MFI_OSP0); } int32_t mfi_tbolt_check_clear_intr_ppc(struct mfi_softc *sc) { int32_t status, mfi_status = 0; status = MFI_READ4(sc, MFI_OSTS); if (status & 1) { MFI_WRITE4(sc, MFI_OSTS, status); MFI_READ4(sc, MFI_OSTS); if (status & MFI_STATE_CHANGE_INTERRUPT) { mfi_status |= MFI_FIRMWARE_STATE_CHANGE; } return mfi_status; } if (!(status & MFI_FUSION_ENABLE_INTERRUPT_MASK)) return 1; MFI_READ4(sc, MFI_OSTS); return 0; } void mfi_tbolt_issue_cmd_ppc(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt) { bus_add |= (MFI_REQ_DESCRIPT_FLAGS_MFA << MFI_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); MFI_WRITE4(sc, MFI_IQPL, (uint32_t)bus_add); MFI_WRITE4(sc, MFI_IQPH, (uint32_t)((uint64_t)bus_add >> 32)); } /* * mfi_tbolt_adp_reset - For controller reset * @regs: MFI register set */ int mfi_tbolt_adp_reset(struct mfi_softc *sc) { int retry = 0, i = 0; int HostDiag; MFI_WRITE4(sc, MFI_WSR, 0xF); MFI_WRITE4(sc, MFI_WSR, 4); MFI_WRITE4(sc, MFI_WSR, 0xB); MFI_WRITE4(sc, MFI_WSR, 2); MFI_WRITE4(sc, MFI_WSR, 7); MFI_WRITE4(sc, MFI_WSR, 0xD); for (i = 0; i < 10000; i++) ; HostDiag = (uint32_t)MFI_READ4(sc, MFI_HDR); while (!( HostDiag & DIAG_WRITE_ENABLE)) { for (i = 0; i < 1000; i++); HostDiag = (uint32_t)MFI_READ4(sc, MFI_HDR); device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%d, " "hostdiag=%#x\n", retry, HostDiag); if (retry++ >= 100) return 1; } device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: HostDiag=%#x\n", HostDiag); MFI_WRITE4(sc, MFI_HDR, (HostDiag | DIAG_RESET_ADAPTER)); for (i=0; i < 10; i++) { for (i = 0; i < 10000; i++); } HostDiag = (uint32_t)MFI_READ4(sc, MFI_RSR); while (HostDiag & DIAG_RESET_ADAPTER) { for (i = 0; i < 1000; i++) ; HostDiag = (uint32_t)MFI_READ4(sc, MFI_RSR); device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%d, " "hostdiag=%#x\n", retry, HostDiag); if (retry++ >= 1000) return 1; } return 0; } /* * This routine initialize Thunderbolt specific device information */ void mfi_tbolt_init_globals(struct mfi_softc *sc) { /* Initialize single reply size and Message size */ sc->reply_size = MEGASAS_THUNDERBOLT_REPLY_SIZE; sc->raid_io_msg_size = MEGASAS_THUNDERBOLT_NEW_MSG_SIZE; /* * Calculating how many SGEs allowed in a allocated main message * (size of the Message - Raid SCSI IO message size(except SGE)) * / size of SGE * (0x100 - (0x90 - 0x10)) / 0x10 = 8 */ sc->max_SGEs_in_main_message = (uint8_t)((sc->raid_io_msg_size - (sizeof(struct mfi_mpi2_request_raid_scsi_io) - sizeof(MPI2_SGE_IO_UNION))) / sizeof(MPI2_SGE_IO_UNION)); /* * (Command frame size allocaed in SRB ext - Raid SCSI IO message size) * / size of SGL ; * (1280 - 256) / 16 = 64 */ sc->max_SGEs_in_chain_message = (MR_COMMAND_SIZE - sc->raid_io_msg_size) / sizeof(MPI2_SGE_IO_UNION); /* * (0x08-1) + 0x40 = 0x47 - 0x01 = 0x46 one is left for command * colscing */ sc->mfi_max_sge = (sc->max_SGEs_in_main_message - 1) + sc->max_SGEs_in_chain_message - 1; /* * This is the offset in number of 4 * 32bit words to the next chain * (0x100 - 0x10)/0x10 = 0xF(15) */ sc->chain_offset_value_for_main_message = (sc->raid_io_msg_size - sizeof(MPI2_SGE_IO_UNION))/16; sc->chain_offset_value_for_mpt_ptmsg = offsetof(struct mfi_mpi2_request_raid_scsi_io, SGL)/16; sc->mfi_cmd_pool_tbolt = NULL; sc->request_desc_pool = NULL; } /* * This function calculates the memory requirement for Thunderbolt * controller, returns the total required memory in bytes */ uint32_t mfi_tbolt_get_memory_requirement(struct mfi_softc *sc) { uint32_t size; size = MEGASAS_THUNDERBOLT_MSG_ALLIGNMENT; /* for Alignment */ size += sc->raid_io_msg_size * (sc->mfi_max_fw_cmds + 1); size += sc->reply_size * sc->mfi_max_fw_cmds; /* this is for SGL's */ size += MEGASAS_MAX_SZ_CHAIN_FRAME * sc->mfi_max_fw_cmds; return size; } /* * Description: * This function will prepare message pools for the Thunderbolt controller * Arguments: * DevExt - HBA miniport driver's adapter data storage structure * pMemLocation - start of the memory allocated for Thunderbolt. * Return Value: * TRUE if successful * FALSE if failed */ int mfi_tbolt_init_desc_pool(struct mfi_softc *sc, uint8_t* mem_location, uint32_t tbolt_contg_length) { uint32_t offset = 0; uint8_t *addr = mem_location; /* Request Descriptor Base physical Address */ /* For Request Decriptors Virtual Memory */ /* Initialise the aligned IO Frames Virtual Memory Pointer */ if (((uintptr_t)addr) & (0xFF)) { addr = &addr[sc->raid_io_msg_size]; addr = (uint8_t *)((uintptr_t)addr & (~0xFF)); sc->request_message_pool_align = addr; } else sc->request_message_pool_align = addr; offset = sc->request_message_pool_align - sc->request_message_pool; sc->request_msg_busaddr = sc->mfi_tb_busaddr + offset; /* DJA XXX should this be bus dma ??? */ /* Skip request message pool */ addr = &addr[sc->raid_io_msg_size * (sc->mfi_max_fw_cmds + 1)]; /* Reply Frame Pool is initialized */ sc->reply_frame_pool = (struct mfi_mpi2_reply_header *) addr; if (((uintptr_t)addr) & (0xFF)) { addr = &addr[sc->reply_size]; addr = (uint8_t *)((uintptr_t)addr & (~0xFF)); } sc->reply_frame_pool_align = (struct mfi_mpi2_reply_header *)addr; offset = (uintptr_t)sc->reply_frame_pool_align - (uintptr_t)sc->request_message_pool; sc->reply_frame_busaddr = sc->mfi_tb_busaddr + offset; /* Skip Reply Frame Pool */ addr += sc->reply_size * sc->mfi_max_fw_cmds; sc->reply_pool_limit = addr; /* initializing reply address to 0xFFFFFFFF */ memset((uint8_t *)sc->reply_frame_pool, 0xFF, (sc->reply_size * sc->mfi_max_fw_cmds)); offset = sc->reply_size * sc->mfi_max_fw_cmds; sc->sg_frame_busaddr = sc->reply_frame_busaddr + offset; /* initialize the last_reply_idx to 0 */ sc->last_reply_idx = 0; MFI_WRITE4(sc, MFI_RFPI, sc->mfi_max_fw_cmds - 1); MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx); offset = (sc->sg_frame_busaddr + (MEGASAS_MAX_SZ_CHAIN_FRAME * sc->mfi_max_fw_cmds)) - sc->mfi_tb_busaddr; if (offset > tbolt_contg_length) device_printf(sc->mfi_dev, "Error:Initialized more than " "allocated\n"); return 0; } /* * This routine prepare and issue INIT2 frame to the Firmware */ int mfi_tbolt_init_MFI_queue(struct mfi_softc *sc) { struct MPI2_IOC_INIT_REQUEST *mpi2IocInit; struct mfi_init_frame *mfi_init; uintptr_t offset = 0; bus_addr_t phyAddress; MFI_ADDRESS *mfiAddressTemp; struct mfi_command *cm, cmd_tmp; int error; mtx_assert(&sc->mfi_io_lock, MA_OWNED); /* Check if initialization is already completed */ if (sc->MFA_enabled) { device_printf(sc->mfi_dev, "tbolt_init already initialised!\n"); return 1; } if ((cm = mfi_dequeue_free(sc)) == NULL) { device_printf(sc->mfi_dev, "tbolt_init failed to get command " " entry!\n"); return (EBUSY); } cmd_tmp.cm_frame = cm->cm_frame; cmd_tmp.cm_frame_busaddr = cm->cm_frame_busaddr; cmd_tmp.cm_dmamap = cm->cm_dmamap; cm->cm_frame = (union mfi_frame *)((uintptr_t)sc->mfi_tb_init); cm->cm_frame_busaddr = sc->mfi_tb_init_busaddr; cm->cm_dmamap = sc->mfi_tb_init_dmamap; cm->cm_frame->header.context = 0; /* * Abuse the SG list area of the frame to hold the init_qinfo * object; */ mfi_init = &cm->cm_frame->init; mpi2IocInit = (struct MPI2_IOC_INIT_REQUEST *)sc->mfi_tb_ioc_init_desc; bzero(mpi2IocInit, sizeof(struct MPI2_IOC_INIT_REQUEST)); mpi2IocInit->Function = MPI2_FUNCTION_IOC_INIT; mpi2IocInit->WhoInit = MPI2_WHOINIT_HOST_DRIVER; /* set MsgVersion and HeaderVersion host driver was built with */ mpi2IocInit->MsgVersion = MPI2_VERSION; mpi2IocInit->HeaderVersion = MPI2_HEADER_VERSION; mpi2IocInit->SystemRequestFrameSize = sc->raid_io_msg_size/4; mpi2IocInit->ReplyDescriptorPostQueueDepth = (uint16_t)sc->mfi_max_fw_cmds; mpi2IocInit->ReplyFreeQueueDepth = 0; /* Not supported by MR. */ /* Get physical address of reply frame pool */ offset = (uintptr_t) sc->reply_frame_pool_align - (uintptr_t)sc->request_message_pool; phyAddress = sc->mfi_tb_busaddr + offset; mfiAddressTemp = (MFI_ADDRESS *)&mpi2IocInit->ReplyDescriptorPostQueueAddress; mfiAddressTemp->u.addressLow = (uint32_t)phyAddress; mfiAddressTemp->u.addressHigh = (uint32_t)((uint64_t)phyAddress >> 32); /* Get physical address of request message pool */ offset = sc->request_message_pool_align - sc->request_message_pool; phyAddress = sc->mfi_tb_busaddr + offset; mfiAddressTemp = (MFI_ADDRESS *)&mpi2IocInit->SystemRequestFrameBaseAddress; mfiAddressTemp->u.addressLow = (uint32_t)phyAddress; mfiAddressTemp->u.addressHigh = (uint32_t)((uint64_t)phyAddress >> 32); mpi2IocInit->ReplyFreeQueueAddress = 0; /* Not supported by MR. */ mpi2IocInit->TimeStamp = time_uptime; if (sc->verbuf) { snprintf((char *)sc->verbuf, strlen(MEGASAS_VERSION) + 2, "%s\n", MEGASAS_VERSION); mfi_init->driver_ver_lo = (uint32_t)sc->verbuf_h_busaddr; mfi_init->driver_ver_hi = (uint32_t)((uint64_t)sc->verbuf_h_busaddr >> 32); } /* Get the physical address of the mpi2 ioc init command */ phyAddress = sc->mfi_tb_ioc_init_busaddr; mfi_init->qinfo_new_addr_lo = (uint32_t)phyAddress; mfi_init->qinfo_new_addr_hi = (uint32_t)((uint64_t)phyAddress >> 32); mfi_init->header.flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; mfi_init->header.cmd = MFI_CMD_INIT; mfi_init->header.data_len = sizeof(struct MPI2_IOC_INIT_REQUEST); mfi_init->header.cmd_status = MFI_STAT_INVALID_STATUS; cm->cm_data = NULL; cm->cm_flags |= MFI_CMD_POLLED; cm->cm_timestamp = time_uptime; if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "failed to send IOC init2 " "command %d at %lx\n", error, (long)cm->cm_frame_busaddr); goto out; } if (mfi_init->header.cmd_status == MFI_STAT_OK) { sc->MFA_enabled = 1; } else { device_printf(sc->mfi_dev, "Init command Failed %#x\n", mfi_init->header.cmd_status); error = mfi_init->header.cmd_status; goto out; } out: cm->cm_frame = cmd_tmp.cm_frame; cm->cm_frame_busaddr = cmd_tmp.cm_frame_busaddr; cm->cm_dmamap = cmd_tmp.cm_dmamap; mfi_release_command(cm); return (error); } int mfi_tbolt_alloc_cmd(struct mfi_softc *sc) { struct mfi_cmd_tbolt *cmd; bus_addr_t io_req_base_phys; uint8_t *io_req_base; int i = 0, j = 0, offset = 0; /* * sc->mfi_cmd_pool_tbolt is an array of struct mfi_cmd_tbolt pointers. * Allocate the dynamic array first and then allocate individual * commands. */ sc->request_desc_pool = malloc(sizeof( union mfi_mpi2_request_descriptor) * sc->mfi_max_fw_cmds, M_MFIBUF, M_NOWAIT|M_ZERO); if (sc->request_desc_pool == NULL) { device_printf(sc->mfi_dev, "Could not alloc " "memory for request_desc_pool\n"); return (ENOMEM); } sc->mfi_cmd_pool_tbolt = malloc(sizeof(struct mfi_cmd_tbolt*) * sc->mfi_max_fw_cmds, M_MFIBUF, M_NOWAIT|M_ZERO); if (sc->mfi_cmd_pool_tbolt == NULL) { free(sc->request_desc_pool, M_MFIBUF); device_printf(sc->mfi_dev, "Could not alloc " "memory for cmd_pool_tbolt\n"); return (ENOMEM); } for (i = 0; i < sc->mfi_max_fw_cmds; i++) { sc->mfi_cmd_pool_tbolt[i] = malloc(sizeof( struct mfi_cmd_tbolt),M_MFIBUF, M_NOWAIT|M_ZERO); if (!sc->mfi_cmd_pool_tbolt[i]) { device_printf(sc->mfi_dev, "Could not alloc " "cmd_pool_tbolt entry\n"); for (j = 0; j < i; j++) free(sc->mfi_cmd_pool_tbolt[j], M_MFIBUF); free(sc->request_desc_pool, M_MFIBUF); sc->request_desc_pool = NULL; free(sc->mfi_cmd_pool_tbolt, M_MFIBUF); sc->mfi_cmd_pool_tbolt = NULL; return (ENOMEM); } } /* * The first 256 bytes (SMID 0) is not used. Don't add to the cmd * list */ io_req_base = sc->request_message_pool_align + MEGASAS_THUNDERBOLT_NEW_MSG_SIZE; io_req_base_phys = sc->request_msg_busaddr + MEGASAS_THUNDERBOLT_NEW_MSG_SIZE; /* * Add all the commands to command pool (instance->cmd_pool) */ /* SMID 0 is reserved. Set SMID/index from 1 */ for (i = 0; i < sc->mfi_max_fw_cmds; i++) { cmd = sc->mfi_cmd_pool_tbolt[i]; offset = MEGASAS_THUNDERBOLT_NEW_MSG_SIZE * i; cmd->index = i + 1; cmd->request_desc = (union mfi_mpi2_request_descriptor *) (sc->request_desc_pool + i); cmd->io_request = (struct mfi_mpi2_request_raid_scsi_io *) (io_req_base + offset); cmd->io_request_phys_addr = io_req_base_phys + offset; cmd->sg_frame = (MPI2_SGE_IO_UNION *)(sc->reply_pool_limit + i * MEGASAS_MAX_SZ_CHAIN_FRAME); cmd->sg_frame_phys_addr = sc->sg_frame_busaddr + i * MEGASAS_MAX_SZ_CHAIN_FRAME; cmd->sync_cmd_idx = sc->mfi_max_fw_cmds; TAILQ_INSERT_TAIL(&(sc->mfi_cmd_tbolt_tqh), cmd, next); } return 0; } int mfi_tbolt_reset(struct mfi_softc *sc) { uint32_t fw_state; mtx_lock(&sc->mfi_io_lock); if (sc->hw_crit_error) { device_printf(sc->mfi_dev, "HW CRITICAL ERROR\n"); mtx_unlock(&sc->mfi_io_lock); return 1; } if (sc->mfi_flags & MFI_FLAGS_TBOLT) { fw_state = sc->mfi_read_fw_status(sc); if ((fw_state & MFI_FWSTATE_FAULT) == MFI_FWSTATE_FAULT || mfi_fw_reset_test) { if ((sc->disableOnlineCtrlReset == 0) && (sc->adpreset == 0)) { device_printf(sc->mfi_dev, "Adapter RESET " "condition is detected\n"); sc->adpreset = 1; sc->issuepend_done = 0; sc->MFA_enabled = 0; sc->last_reply_idx = 0; mfi_process_fw_state_chg_isr((void *) sc); } mtx_unlock(&sc->mfi_io_lock); return 0; } } mtx_unlock(&sc->mfi_io_lock); return 1; } /* * mfi_intr_tbolt - isr entry point */ void mfi_intr_tbolt(void *arg) { struct mfi_softc *sc = (struct mfi_softc *)arg; if (sc->mfi_check_clear_intr(sc) == 1) { return; } if (sc->mfi_detaching) return; mtx_lock(&sc->mfi_io_lock); mfi_tbolt_complete_cmd(sc); sc->mfi_flags &= ~MFI_FLAGS_QFRZN; mfi_startio(sc); mtx_unlock(&sc->mfi_io_lock); return; } /* * map_cmd_status - Maps FW cmd status to OS cmd status * @cmd : Pointer to cmd * @status : status of cmd returned by FW * @ext_status : ext status of cmd returned by FW */ void map_tbolt_cmd_status(struct mfi_command *mfi_cmd, uint8_t status, uint8_t ext_status) { switch (status) { case MFI_STAT_OK: mfi_cmd->cm_frame->header.cmd_status = MFI_STAT_OK; mfi_cmd->cm_frame->dcmd.header.cmd_status = MFI_STAT_OK; mfi_cmd->cm_error = MFI_STAT_OK; break; case MFI_STAT_SCSI_IO_FAILED: case MFI_STAT_LD_INIT_IN_PROGRESS: mfi_cmd->cm_frame->header.cmd_status = status; mfi_cmd->cm_frame->header.scsi_status = ext_status; mfi_cmd->cm_frame->dcmd.header.cmd_status = status; mfi_cmd->cm_frame->dcmd.header.scsi_status = ext_status; break; case MFI_STAT_SCSI_DONE_WITH_ERROR: mfi_cmd->cm_frame->header.cmd_status = ext_status; mfi_cmd->cm_frame->dcmd.header.cmd_status = ext_status; break; case MFI_STAT_LD_OFFLINE: case MFI_STAT_DEVICE_NOT_FOUND: mfi_cmd->cm_frame->header.cmd_status = status; mfi_cmd->cm_frame->dcmd.header.cmd_status = status; break; default: mfi_cmd->cm_frame->header.cmd_status = status; mfi_cmd->cm_frame->dcmd.header.cmd_status = status; break; } } /* * mfi_tbolt_return_cmd - Return a cmd to free command pool * @instance: Adapter soft state * @tbolt_cmd: Tbolt command packet to be returned to free command pool * @mfi_cmd: Oning MFI command packe */ void mfi_tbolt_return_cmd(struct mfi_softc *sc, struct mfi_cmd_tbolt *tbolt_cmd, struct mfi_command *mfi_cmd) { mtx_assert(&sc->mfi_io_lock, MA_OWNED); mfi_cmd->cm_flags &= ~MFI_CMD_TBOLT; mfi_cmd->cm_extra_frames = 0; tbolt_cmd->sync_cmd_idx = sc->mfi_max_fw_cmds; TAILQ_INSERT_TAIL(&sc->mfi_cmd_tbolt_tqh, tbolt_cmd, next); } void mfi_tbolt_complete_cmd(struct mfi_softc *sc) { struct mfi_mpi2_reply_header *desc, *reply_desc; struct mfi_command *cmd_mfi; /* For MFA Cmds */ struct mfi_cmd_tbolt *cmd_tbolt; uint16_t smid; uint8_t reply_descript_type; struct mfi_mpi2_request_raid_scsi_io *scsi_io_req; uint32_t status, extStatus; uint16_t num_completed; union desc_value val; mtx_assert(&sc->mfi_io_lock, MA_OWNED); desc = (struct mfi_mpi2_reply_header *) ((uintptr_t)sc->reply_frame_pool_align + sc->last_reply_idx * sc->reply_size); reply_desc = desc; if (reply_desc == NULL) { device_printf(sc->mfi_dev, "reply desc is NULL!!\n"); return; } reply_descript_type = reply_desc->ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; if (reply_descript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) return; num_completed = 0; val.word = ((union mfi_mpi2_reply_descriptor *)desc)->words; /* Read Reply descriptor */ while ((val.u.low != 0xFFFFFFFF) && (val.u.high != 0xFFFFFFFF)) { smid = reply_desc->SMID; if (smid == 0 || smid > sc->mfi_max_fw_cmds) { device_printf(sc->mfi_dev, "smid is %d cannot " "proceed - skipping\n", smid); goto next; } cmd_tbolt = sc->mfi_cmd_pool_tbolt[smid - 1]; if (cmd_tbolt->sync_cmd_idx == sc->mfi_max_fw_cmds) { device_printf(sc->mfi_dev, "cmd_tbolt %p " "has invalid sync_cmd_idx=%d - skipping\n", cmd_tbolt, cmd_tbolt->sync_cmd_idx); goto next; } cmd_mfi = &sc->mfi_commands[cmd_tbolt->sync_cmd_idx]; scsi_io_req = cmd_tbolt->io_request; status = cmd_mfi->cm_frame->dcmd.header.cmd_status; extStatus = cmd_mfi->cm_frame->dcmd.header.scsi_status; map_tbolt_cmd_status(cmd_mfi, status, extStatus); /* mfi_tbolt_return_cmd is handled by mfi complete / return */ if ((cmd_mfi->cm_flags & MFI_CMD_SCSI) != 0 && (cmd_mfi->cm_flags & MFI_CMD_POLLED) != 0) { /* polled LD/SYSPD IO command */ /* XXX mark okay for now DJA */ cmd_mfi->cm_frame->header.cmd_status = MFI_STAT_OK; } else { /* remove command from busy queue if not polled */ if ((cmd_mfi->cm_flags & MFI_ON_MFIQ_BUSY) != 0) mfi_remove_busy(cmd_mfi); /* complete the command */ mfi_complete(sc, cmd_mfi); } next: sc->last_reply_idx++; if (sc->last_reply_idx >= sc->mfi_max_fw_cmds) { MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx); sc->last_reply_idx = 0; } /* Set it back to all 0xfff */ ((union mfi_mpi2_reply_descriptor*)desc)->words = ~((uint64_t)0x00); num_completed++; /* Get the next reply descriptor */ desc = (struct mfi_mpi2_reply_header *) ((uintptr_t)sc->reply_frame_pool_align + sc->last_reply_idx * sc->reply_size); reply_desc = desc; val.word = ((union mfi_mpi2_reply_descriptor*)desc)->words; reply_descript_type = reply_desc->ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; if (reply_descript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) break; } if (!num_completed) return; /* update replyIndex to FW */ if (sc->last_reply_idx) MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx); return; } /* * mfi_get_cmd - Get a command from the free pool * @instance: Adapter soft state * * Returns a free command from the pool */ struct mfi_cmd_tbolt * mfi_tbolt_get_cmd(struct mfi_softc *sc, struct mfi_command *mfi_cmd) { struct mfi_cmd_tbolt *cmd = NULL; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if ((cmd = TAILQ_FIRST(&sc->mfi_cmd_tbolt_tqh)) == NULL) return (NULL); TAILQ_REMOVE(&sc->mfi_cmd_tbolt_tqh, cmd, next); memset((uint8_t *)cmd->sg_frame, 0, MEGASAS_MAX_SZ_CHAIN_FRAME); memset((uint8_t *)cmd->io_request, 0, MEGASAS_THUNDERBOLT_NEW_MSG_SIZE); cmd->sync_cmd_idx = mfi_cmd->cm_index; mfi_cmd->cm_extra_frames = cmd->index; /* Frame count used as SMID */ mfi_cmd->cm_flags |= MFI_CMD_TBOLT; return cmd; } union mfi_mpi2_request_descriptor * mfi_tbolt_get_request_descriptor(struct mfi_softc *sc, uint16_t index) { uint8_t *p; if (index >= sc->mfi_max_fw_cmds) { device_printf(sc->mfi_dev, "Invalid SMID (0x%x)request " "for descriptor\n", index); return NULL; } p = sc->request_desc_pool + sizeof(union mfi_mpi2_request_descriptor) * index; memset(p, 0, sizeof(union mfi_mpi2_request_descriptor)); return (union mfi_mpi2_request_descriptor *)p; } /* Used to build IOCTL cmd */ uint8_t mfi_build_mpt_pass_thru(struct mfi_softc *sc, struct mfi_command *mfi_cmd) { MPI25_IEEE_SGE_CHAIN64 *mpi25_ieee_chain; struct mfi_mpi2_request_raid_scsi_io *io_req; struct mfi_cmd_tbolt *cmd; cmd = mfi_tbolt_get_cmd(sc, mfi_cmd); if (!cmd) return EBUSY; io_req = cmd->io_request; mpi25_ieee_chain = (MPI25_IEEE_SGE_CHAIN64 *)&io_req->SGL.IeeeChain; io_req->Function = MPI2_FUNCTION_PASSTHRU_IO_REQUEST; io_req->SGLOffset0 = offsetof(struct mfi_mpi2_request_raid_scsi_io, SGL) / 4; io_req->ChainOffset = sc->chain_offset_value_for_mpt_ptmsg; mpi25_ieee_chain->Address = mfi_cmd->cm_frame_busaddr; /* In MFI pass thru, nextChainOffset will always be zero to indicate the end of the chain. */ mpi25_ieee_chain->Flags= MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR; /* setting the length to the maximum length */ mpi25_ieee_chain->Length = 1024; return 0; } void mfi_tbolt_build_ldio(struct mfi_softc *sc, struct mfi_command *mfi_cmd, struct mfi_cmd_tbolt *cmd) { uint32_t start_lba_lo = 0, start_lba_hi = 0, device_id; struct mfi_mpi2_request_raid_scsi_io *io_request; struct IO_REQUEST_INFO io_info; device_id = mfi_cmd->cm_frame->io.header.target_id; io_request = cmd->io_request; io_request->RaidContext.TargetID = device_id; io_request->RaidContext.Status = 0; io_request->RaidContext.exStatus = 0; io_request->RaidContext.regLockFlags = 0; start_lba_lo = mfi_cmd->cm_frame->io.lba_lo; start_lba_hi = mfi_cmd->cm_frame->io.lba_hi; memset(&io_info, 0, sizeof(struct IO_REQUEST_INFO)); io_info.ldStartBlock = ((uint64_t)start_lba_hi << 32) | start_lba_lo; io_info.numBlocks = mfi_cmd->cm_frame->io.header.data_len; io_info.ldTgtId = device_id; if ((mfi_cmd->cm_frame->header.flags & MFI_FRAME_DIR_READ) == MFI_FRAME_DIR_READ) io_info.isRead = 1; io_request->RaidContext.timeoutValue = MFI_FUSION_FP_DEFAULT_TIMEOUT; io_request->Function = MPI2_FUNCTION_LD_IO_REQUEST; io_request->DevHandle = device_id; cmd->request_desc->header.RequestFlags = (MFI_REQ_DESCRIPT_FLAGS_LD_IO << MFI_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); if ((io_request->IoFlags == 6) && (io_info.numBlocks == 0)) io_request->RaidContext.RegLockLength = 0x100; io_request->DataLength = mfi_cmd->cm_frame->io.header.data_len * MFI_SECTOR_LEN; } int mfi_tbolt_build_io(struct mfi_softc *sc, struct mfi_command *mfi_cmd, struct mfi_cmd_tbolt *cmd) { struct mfi_mpi2_request_raid_scsi_io *io_request; uint32_t sge_count; uint8_t cdb_len; int readop; u_int64_t lba; io_request = cmd->io_request; if (!(mfi_cmd->cm_frame->header.cmd == MFI_CMD_LD_READ || mfi_cmd->cm_frame->header.cmd == MFI_CMD_LD_WRITE)) return 1; mfi_tbolt_build_ldio(sc, mfi_cmd, cmd); /* Convert to SCSI command CDB */ bzero(io_request->CDB.CDB32, sizeof(io_request->CDB.CDB32)); if (mfi_cmd->cm_frame->header.cmd == MFI_CMD_LD_WRITE) readop = 0; else readop = 1; lba = mfi_cmd->cm_frame->io.lba_hi; lba = (lba << 32) + mfi_cmd->cm_frame->io.lba_lo; cdb_len = mfi_build_cdb(readop, 0, lba, mfi_cmd->cm_frame->io.header.data_len, io_request->CDB.CDB32); /* Just the CDB length, rest of the Flags are zero */ io_request->IoFlags = cdb_len; /* * Construct SGL */ sge_count = mfi_tbolt_make_sgl(sc, mfi_cmd, (pMpi25IeeeSgeChain64_t) &io_request->SGL, cmd); if (sge_count > sc->mfi_max_sge) { device_printf(sc->mfi_dev, "Error. sge_count (0x%x) exceeds " "max (0x%x) allowed\n", sge_count, sc->mfi_max_sge); return 1; } io_request->RaidContext.numSGE = sge_count; io_request->SGLFlags = MPI2_SGE_FLAGS_64_BIT_ADDRESSING; if (mfi_cmd->cm_frame->header.cmd == MFI_CMD_LD_WRITE) io_request->Control = MPI2_SCSIIO_CONTROL_WRITE; else io_request->Control = MPI2_SCSIIO_CONTROL_READ; io_request->SGLOffset0 = offsetof( struct mfi_mpi2_request_raid_scsi_io, SGL)/4; io_request->SenseBufferLowAddress = mfi_cmd->cm_sense_busaddr; io_request->SenseBufferLength = MFI_SENSE_LEN; io_request->RaidContext.Status = MFI_STAT_INVALID_STATUS; io_request->RaidContext.exStatus = MFI_STAT_INVALID_STATUS; return 0; } static int mfi_tbolt_make_sgl(struct mfi_softc *sc, struct mfi_command *mfi_cmd, pMpi25IeeeSgeChain64_t sgl_ptr, struct mfi_cmd_tbolt *cmd) { uint8_t i, sg_processed, sg_to_process; uint8_t sge_count, sge_idx; union mfi_sgl *os_sgl; pMpi25IeeeSgeChain64_t sgl_end; /* * Return 0 if there is no data transfer */ if (!mfi_cmd->cm_sg || !mfi_cmd->cm_len) { device_printf(sc->mfi_dev, "Buffer empty \n"); return 0; } os_sgl = mfi_cmd->cm_sg; sge_count = mfi_cmd->cm_frame->header.sg_count; if (sge_count > sc->mfi_max_sge) { device_printf(sc->mfi_dev, "sgl ptr %p sg_cnt %d \n", os_sgl, sge_count); return sge_count; } if (sge_count > sc->max_SGEs_in_main_message) /* One element to store the chain info */ sge_idx = sc->max_SGEs_in_main_message - 1; else sge_idx = sge_count; if (sc->mfi_flags & (MFI_FLAGS_INVADER | MFI_FLAGS_FURY)) { sgl_end = sgl_ptr + (sc->max_SGEs_in_main_message - 1); sgl_end->Flags = 0; } for (i = 0; i < sge_idx; i++) { /* * For 32bit BSD we are getting 32 bit SGL's from OS * but FW only take 64 bit SGL's so copying from 32 bit * SGL's to 64. */ if (sc->mfi_flags & MFI_FLAGS_SKINNY) { sgl_ptr->Length = os_sgl->sg_skinny[i].len; sgl_ptr->Address = os_sgl->sg_skinny[i].addr; } else { sgl_ptr->Length = os_sgl->sg32[i].len; sgl_ptr->Address = os_sgl->sg32[i].addr; } if (i == sge_count - 1 && (sc->mfi_flags & (MFI_FLAGS_INVADER | MFI_FLAGS_FURY))) sgl_ptr->Flags = MPI25_IEEE_SGE_FLAGS_END_OF_LIST; else sgl_ptr->Flags = 0; sgl_ptr++; cmd->io_request->ChainOffset = 0; } sg_processed = i; if (sg_processed < sge_count) { pMpi25IeeeSgeChain64_t sg_chain; sg_to_process = sge_count - sg_processed; cmd->io_request->ChainOffset = sc->chain_offset_value_for_main_message; sg_chain = sgl_ptr; /* Prepare chain element */ sg_chain->NextChainOffset = 0; if (sc->mfi_flags & (MFI_FLAGS_INVADER | MFI_FLAGS_FURY)) sg_chain->Flags = MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT; else sg_chain->Flags = MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR; sg_chain->Length = (sizeof(MPI2_SGE_IO_UNION) * (sge_count - sg_processed)); sg_chain->Address = cmd->sg_frame_phys_addr; sgl_ptr = (pMpi25IeeeSgeChain64_t)cmd->sg_frame; for (; i < sge_count; i++) { if (sc->mfi_flags & MFI_FLAGS_SKINNY) { sgl_ptr->Length = os_sgl->sg_skinny[i].len; sgl_ptr->Address = os_sgl->sg_skinny[i].addr; } else { sgl_ptr->Length = os_sgl->sg32[i].len; sgl_ptr->Address = os_sgl->sg32[i].addr; } if (i == sge_count - 1 && (sc->mfi_flags & (MFI_FLAGS_INVADER | MFI_FLAGS_FURY))) sgl_ptr->Flags = MPI25_IEEE_SGE_FLAGS_END_OF_LIST; else sgl_ptr->Flags = 0; sgl_ptr++; } } return sge_count; } union mfi_mpi2_request_descriptor * mfi_build_and_issue_cmd(struct mfi_softc *sc, struct mfi_command *mfi_cmd) { struct mfi_cmd_tbolt *cmd; union mfi_mpi2_request_descriptor *req_desc = NULL; uint16_t index; cmd = mfi_tbolt_get_cmd(sc, mfi_cmd); if (cmd == NULL) return (NULL); index = cmd->index; req_desc = mfi_tbolt_get_request_descriptor(sc, index-1); if (req_desc == NULL) { mfi_tbolt_return_cmd(sc, cmd, mfi_cmd); return (NULL); } if (mfi_tbolt_build_io(sc, mfi_cmd, cmd) != 0) { mfi_tbolt_return_cmd(sc, cmd, mfi_cmd); return (NULL); } req_desc->header.SMID = index; return req_desc; } union mfi_mpi2_request_descriptor * mfi_tbolt_build_mpt_cmd(struct mfi_softc *sc, struct mfi_command *cmd) { union mfi_mpi2_request_descriptor *req_desc = NULL; uint16_t index; if (mfi_build_mpt_pass_thru(sc, cmd)) { device_printf(sc->mfi_dev, "Couldn't build MFI pass thru " "cmd\n"); return NULL; } /* For fusion the frame_count variable is used for SMID */ index = cmd->cm_extra_frames; req_desc = mfi_tbolt_get_request_descriptor(sc, index - 1); if (req_desc == NULL) return NULL; bzero(req_desc, sizeof(*req_desc)); req_desc->header.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO << MFI_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); req_desc->header.SMID = index; return req_desc; } int mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm) { struct mfi_frame_header *hdr; uint8_t *cdb; union mfi_mpi2_request_descriptor *req_desc = NULL; int tm = mfi_polled_cmd_timeout * 1000; hdr = &cm->cm_frame->header; cdb = cm->cm_frame->pass.cdb; if (sc->adpreset) return 1; if ((cm->cm_flags & MFI_CMD_POLLED) == 0) { cm->cm_timestamp = time_uptime; mfi_enqueue_busy(cm); } else { /* still get interrupts for it */ hdr->cmd_status = MFI_STAT_INVALID_STATUS; hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; } if (hdr->cmd == MFI_CMD_PD_SCSI_IO) { /* check for inquiry commands coming from CLI */ - if (cdb[0] != 0x28 || cdb[0] != 0x2A) { + if (cdb[0] != 0x28 && cdb[0] != 0x2A) { if ((req_desc = mfi_tbolt_build_mpt_cmd(sc, cm)) == NULL) { device_printf(sc->mfi_dev, "Mapping from MFI " "to MPT Failed \n"); return 1; } } else device_printf(sc->mfi_dev, "DJA NA XXX SYSPDIO\n"); } else if (hdr->cmd == MFI_CMD_LD_SCSI_IO || hdr->cmd == MFI_CMD_LD_READ || hdr->cmd == MFI_CMD_LD_WRITE) { cm->cm_flags |= MFI_CMD_SCSI; if ((req_desc = mfi_build_and_issue_cmd(sc, cm)) == NULL) { device_printf(sc->mfi_dev, "LDIO Failed \n"); return 1; } } else if ((req_desc = mfi_tbolt_build_mpt_cmd(sc, cm)) == NULL) { device_printf(sc->mfi_dev, "Mapping from MFI to MPT Failed\n"); return (1); } if (cm->cm_flags & MFI_CMD_SCSI) { /* * LD IO needs to be posted since it doesn't get * acknowledged via a status update so have the * controller reply via mfi_tbolt_complete_cmd. */ hdr->flags &= ~MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; } MFI_WRITE4(sc, MFI_ILQP, (req_desc->words & 0xFFFFFFFF)); MFI_WRITE4(sc, MFI_IHQP, (req_desc->words >>0x20)); if ((cm->cm_flags & MFI_CMD_POLLED) == 0) return 0; /* * This is a polled command, so busy-wait for it to complete. * * The value of hdr->cmd_status is updated directly by the hardware * so there is no guarantee that mfi_tbolt_complete_cmd is called * prior to this value changing. */ while (hdr->cmd_status == MFI_STAT_INVALID_STATUS) { DELAY(1000); tm -= 1; if (tm <= 0) break; if (cm->cm_flags & MFI_CMD_SCSI) { /* * Force check reply queue. * This ensures that dump works correctly */ mfi_tbolt_complete_cmd(sc); } } /* ensure the command cleanup has been processed before returning */ mfi_tbolt_complete_cmd(sc); if (hdr->cmd_status == MFI_STAT_INVALID_STATUS) { device_printf(sc->mfi_dev, "Frame %p timed out " "command 0x%X\n", hdr, cm->cm_frame->dcmd.opcode); return (ETIMEDOUT); } return 0; } static void mfi_issue_pending_cmds_again(struct mfi_softc *sc) { struct mfi_command *cm, *tmp; struct mfi_cmd_tbolt *cmd; mtx_assert(&sc->mfi_io_lock, MA_OWNED); TAILQ_FOREACH_REVERSE_SAFE(cm, &sc->mfi_busy, BUSYQ, cm_link, tmp) { cm->retry_for_fw_reset++; /* * If a command has continuously been tried multiple times * and causing a FW reset condition, no further recoveries * should be performed on the controller */ if (cm->retry_for_fw_reset == 3) { device_printf(sc->mfi_dev, "megaraid_sas: command %p " "index=%d was tried multiple times during adapter " "reset - Shutting down the HBA\n", cm, cm->cm_index); mfi_kill_hba(sc); sc->hw_crit_error = 1; return; } mfi_remove_busy(cm); if ((cm->cm_flags & MFI_CMD_TBOLT) != 0) { if (cm->cm_extra_frames != 0 && cm->cm_extra_frames <= sc->mfi_max_fw_cmds) { cmd = sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - 1]; mfi_tbolt_return_cmd(sc, cmd, cm); } else { device_printf(sc->mfi_dev, "Invalid extra_frames: %d detected\n", cm->cm_extra_frames); } } if (cm->cm_frame->dcmd.opcode != MFI_DCMD_CTRL_EVENT_WAIT) { device_printf(sc->mfi_dev, "APJ ****requeue command %p index=%d\n", cm, cm->cm_index); mfi_requeue_ready(cm); } else mfi_release_command(cm); } mfi_startio(sc); } static void mfi_kill_hba(struct mfi_softc *sc) { if (sc->mfi_flags & MFI_FLAGS_TBOLT) MFI_WRITE4(sc, 0x00, MFI_STOP_ADP); else MFI_WRITE4(sc, MFI_IDB, MFI_STOP_ADP); } static void mfi_process_fw_state_chg_isr(void *arg) { struct mfi_softc *sc= (struct mfi_softc *)arg; int error, status; if (sc->adpreset == 1) { device_printf(sc->mfi_dev, "First stage of FW reset " "initiated...\n"); sc->mfi_adp_reset(sc); sc->mfi_enable_intr(sc); device_printf(sc->mfi_dev, "First stage of reset complete, " "second stage initiated...\n"); sc->adpreset = 2; /* waiting for about 20 second before start the second init */ for (int wait = 0; wait < 20000; wait++) DELAY(1000); device_printf(sc->mfi_dev, "Second stage of FW reset " "initiated...\n"); while ((status = MFI_READ4(sc, MFI_RSR)) & 0x04); sc->mfi_disable_intr(sc); /* We expect the FW state to be READY */ if (mfi_transition_firmware(sc)) { device_printf(sc->mfi_dev, "controller is not in " "ready state\n"); mfi_kill_hba(sc); sc->hw_crit_error = 1; return; } if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) { device_printf(sc->mfi_dev, "Failed to initialise MFI " "queue\n"); mfi_kill_hba(sc); sc->hw_crit_error = 1; return; } /* Init last reply index and max */ MFI_WRITE4(sc, MFI_RFPI, sc->mfi_max_fw_cmds - 1); MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx); sc->mfi_enable_intr(sc); sc->adpreset = 0; if (sc->mfi_aen_cm != NULL) { free(sc->mfi_aen_cm->cm_data, M_MFIBUF); mfi_remove_busy(sc->mfi_aen_cm); mfi_release_command(sc->mfi_aen_cm); sc->mfi_aen_cm = NULL; } if (sc->mfi_map_sync_cm != NULL) { mfi_remove_busy(sc->mfi_map_sync_cm); mfi_release_command(sc->mfi_map_sync_cm); sc->mfi_map_sync_cm = NULL; } mfi_issue_pending_cmds_again(sc); /* * Issue pending command can result in adapter being marked * dead because of too many re-tries. Check for that * condition before clearing the reset condition on the FW */ if (!sc->hw_crit_error) { /* * Initiate AEN (Asynchronous Event Notification) & * Sync Map */ mfi_aen_setup(sc, sc->last_seq_num); mfi_tbolt_sync_map_info(sc); sc->issuepend_done = 1; device_printf(sc->mfi_dev, "second stage of reset " "complete, FW is ready now.\n"); } else { device_printf(sc->mfi_dev, "second stage of reset " "never completed, hba was marked offline.\n"); } } else { device_printf(sc->mfi_dev, "mfi_process_fw_state_chg_isr " "called with unhandled value:%d\n", sc->adpreset); } } /* * The ThunderBolt HW has an option for the driver to directly * access the underlying disks and operate on the RAID. To * do this there needs to be a capability to keep the RAID controller * and driver in sync. The FreeBSD driver does not take advantage * of this feature since it adds a lot of complexity and slows down * performance. Performance is gained by using the controller's * cache etc. * * Even though this driver doesn't access the disks directly, an * AEN like command is used to inform the RAID firmware to "sync" * with all LD's via the MFI_DCMD_LD_MAP_GET_INFO command. This * command in write mode will return when the RAID firmware has * detected a change to the RAID state. Examples of this type * of change are removing a disk. Once the command returns then * the driver needs to acknowledge this and "sync" all LD's again. * This repeats until we shutdown. Then we need to cancel this * pending command. * * If this is not done right the RAID firmware will not remove a * pulled drive and the RAID won't go degraded etc. Effectively, * stopping any RAID mangement to functions. * * Doing another LD sync, requires the use of an event since the * driver needs to do a mfi_wait_command and can't do that in an * interrupt thread. * * The driver could get the RAID state via the MFI_DCMD_LD_MAP_GET_INFO * That requires a bunch of structure and it is simpler to just do * the MFI_DCMD_LD_GET_LIST versus walking the RAID map. */ void mfi_tbolt_sync_map_info(struct mfi_softc *sc) { int error = 0, i; struct mfi_command *cmd = NULL; struct mfi_dcmd_frame *dcmd = NULL; uint32_t context = 0; union mfi_ld_ref *ld_sync = NULL; size_t ld_size; struct mfi_frame_header *hdr; struct mfi_command *cm = NULL; struct mfi_ld_list *list = NULL; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if (sc->mfi_map_sync_cm != NULL || sc->cm_map_abort) return; error = mfi_dcmd_command(sc, &cm, MFI_DCMD_LD_GET_LIST, (void **)&list, sizeof(*list)); if (error) goto out; cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAIN; if (mfi_wait_command(sc, cm) != 0) { device_printf(sc->mfi_dev, "Failed to get device listing\n"); goto out; } hdr = &cm->cm_frame->header; if (hdr->cmd_status != MFI_STAT_OK) { device_printf(sc->mfi_dev, "MFI_DCMD_LD_GET_LIST failed %x\n", hdr->cmd_status); goto out; } ld_size = sizeof(*ld_sync) * list->ld_count; ld_sync = (union mfi_ld_ref *) malloc(ld_size, M_MFIBUF, M_NOWAIT | M_ZERO); if (ld_sync == NULL) { device_printf(sc->mfi_dev, "Failed to allocate sync\n"); goto out; } for (i = 0; i < list->ld_count; i++) ld_sync[i].ref = list->ld_list[i].ld.ref; if ((cmd = mfi_dequeue_free(sc)) == NULL) { device_printf(sc->mfi_dev, "Failed to get command\n"); free(ld_sync, M_MFIBUF); goto out; } context = cmd->cm_frame->header.context; bzero(cmd->cm_frame, sizeof(union mfi_frame)); cmd->cm_frame->header.context = context; dcmd = &cmd->cm_frame->dcmd; bzero(dcmd->mbox, MFI_MBOX_SIZE); dcmd->header.cmd = MFI_CMD_DCMD; dcmd->header.flags = MFI_FRAME_DIR_WRITE; dcmd->header.timeout = 0; dcmd->header.data_len = ld_size; dcmd->header.scsi_status = 0; dcmd->opcode = MFI_DCMD_LD_MAP_GET_INFO; cmd->cm_sg = &dcmd->sgl; cmd->cm_total_frame_size = MFI_DCMD_FRAME_SIZE; cmd->cm_data = ld_sync; cmd->cm_private = ld_sync; cmd->cm_len = ld_size; cmd->cm_complete = mfi_sync_map_complete; sc->mfi_map_sync_cm = cmd; cmd->cm_flags = MFI_CMD_DATAOUT; cmd->cm_frame->dcmd.mbox[0] = list->ld_count; cmd->cm_frame->dcmd.mbox[1] = MFI_DCMD_MBOX_PEND_FLAG; if ((error = mfi_mapcmd(sc, cmd)) != 0) { device_printf(sc->mfi_dev, "failed to send map sync\n"); free(ld_sync, M_MFIBUF); sc->mfi_map_sync_cm = NULL; mfi_release_command(cmd); goto out; } out: if (list) free(list, M_MFIBUF); if (cm) mfi_release_command(cm); } static void mfi_sync_map_complete(struct mfi_command *cm) { struct mfi_frame_header *hdr; struct mfi_softc *sc; int aborted = 0; sc = cm->cm_sc; mtx_assert(&sc->mfi_io_lock, MA_OWNED); hdr = &cm->cm_frame->header; if (sc->mfi_map_sync_cm == NULL) return; if (sc->cm_map_abort || hdr->cmd_status == MFI_STAT_INVALID_STATUS) { sc->cm_map_abort = 0; aborted = 1; } free(cm->cm_data, M_MFIBUF); wakeup(&sc->mfi_map_sync_cm); sc->mfi_map_sync_cm = NULL; mfi_release_command(cm); /* set it up again so the driver can catch more events */ if (!aborted) mfi_queue_map_sync(sc); } static void mfi_queue_map_sync(struct mfi_softc *sc) { mtx_assert(&sc->mfi_io_lock, MA_OWNED); taskqueue_enqueue(taskqueue_swi, &sc->mfi_map_sync_task); } void mfi_handle_map_sync(void *context, int pending) { struct mfi_softc *sc; sc = context; mtx_lock(&sc->mfi_io_lock); mfi_tbolt_sync_map_info(sc); mtx_unlock(&sc->mfi_io_lock); } diff --git a/sys/dev/rt/if_rt.c b/sys/dev/rt/if_rt.c index 2bb81fe98c79..2c9c503ccf91 100644 --- a/sys/dev/rt/if_rt.c +++ b/sys/dev/rt/if_rt.c @@ -1,2951 +1,2968 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015-2016, Stanislav Galabov * Copyright (c) 2014, Aleksandr A. Mityaev * Copyright (c) 2011, Aleksandr Rybalko * based on hard work * by Alexander Egorenkov * and by Damien Bergamini * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "if_rtvar.h" #include "if_rtreg.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_platform.h" #include "opt_rt305x.h" #ifdef FDT #include #include #include #endif #include #include #ifdef RT_MDIO #include #include #include "mdio_if.h" #endif #if 0 #include #include #endif #ifdef IF_RT_PHY_SUPPORT #include "miibus_if.h" #endif /* * Defines and macros */ #define RT_MAX_AGG_SIZE 3840 #define RT_TX_DATA_SEG0_SIZE MJUMPAGESIZE #define RT_MS(_v, _f) (((_v) & _f) >> _f##_S) #define RT_SM(_v, _f) (((_v) << _f##_S) & _f) #define RT_TX_WATCHDOG_TIMEOUT 5 #define RT_CHIPID_RT2880 0x2880 #define RT_CHIPID_RT3050 0x3050 +#define RT_CHIPID_RT3883 0x3883 #define RT_CHIPID_RT5350 0x5350 #define RT_CHIPID_MT7620 0x7620 #define RT_CHIPID_MT7621 0x7621 #ifdef FDT /* more specific and new models should go first */ static const struct ofw_compat_data rt_compat_data[] = { { "ralink,rt2880-eth", RT_CHIPID_RT2880 }, { "ralink,rt3050-eth", RT_CHIPID_RT3050 }, { "ralink,rt3352-eth", RT_CHIPID_RT3050 }, - { "ralink,rt3883-eth", RT_CHIPID_RT3050 }, + { "ralink,rt3883-eth", RT_CHIPID_RT3883 }, { "ralink,rt5350-eth", RT_CHIPID_RT5350 }, { "ralink,mt7620a-eth", RT_CHIPID_MT7620 }, { "mediatek,mt7620-eth", RT_CHIPID_MT7620 }, { "ralink,mt7621-eth", RT_CHIPID_MT7621 }, { "mediatek,mt7621-eth", RT_CHIPID_MT7621 }, { NULL, 0 } }; #endif /* * Static function prototypes */ static int rt_probe(device_t dev); static int rt_attach(device_t dev); static int rt_detach(device_t dev); static int rt_shutdown(device_t dev); static int rt_suspend(device_t dev); static int rt_resume(device_t dev); static void rt_init_locked(void *priv); static void rt_init(void *priv); static void rt_stop_locked(void *priv); static void rt_stop(void *priv); static void rt_start(struct ifnet *ifp); static int rt_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static void rt_periodic(void *arg); static void rt_tx_watchdog(void *arg); static void rt_intr(void *arg); static void rt_rt5350_intr(void *arg); static void rt_tx_coherent_intr(struct rt_softc *sc); static void rt_rx_coherent_intr(struct rt_softc *sc); static void rt_rx_delay_intr(struct rt_softc *sc); static void rt_tx_delay_intr(struct rt_softc *sc); static void rt_rx_intr(struct rt_softc *sc, int qid); static void rt_tx_intr(struct rt_softc *sc, int qid); static void rt_rx_done_task(void *context, int pending); static void rt_tx_done_task(void *context, int pending); static void rt_periodic_task(void *context, int pending); static int rt_rx_eof(struct rt_softc *sc, struct rt_softc_rx_ring *ring, int limit); static void rt_tx_eof(struct rt_softc *sc, struct rt_softc_tx_ring *ring); static void rt_update_stats(struct rt_softc *sc); static void rt_watchdog(struct rt_softc *sc); static void rt_update_raw_counters(struct rt_softc *sc); static void rt_intr_enable(struct rt_softc *sc, uint32_t intr_mask); static void rt_intr_disable(struct rt_softc *sc, uint32_t intr_mask); static int rt_txrx_enable(struct rt_softc *sc); static int rt_alloc_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring, int qid); static void rt_reset_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring); static void rt_free_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring); static int rt_alloc_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring, int qid); static void rt_reset_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring); static void rt_free_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring); static void rt_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error); static void rt_sysctl_attach(struct rt_softc *sc); #ifdef IF_RT_PHY_SUPPORT void rt_miibus_statchg(device_t); #endif #if defined(IF_RT_PHY_SUPPORT) || defined(RT_MDIO) static int rt_miibus_readreg(device_t, int, int); static int rt_miibus_writereg(device_t, int, int, int); #endif static int rt_ifmedia_upd(struct ifnet *); static void rt_ifmedia_sts(struct ifnet *, struct ifmediareq *); static SYSCTL_NODE(_hw, OID_AUTO, rt, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "RT driver parameters"); #ifdef IF_RT_DEBUG static int rt_debug = 0; SYSCTL_INT(_hw_rt, OID_AUTO, debug, CTLFLAG_RWTUN, &rt_debug, 0, "RT debug level"); #endif static int rt_probe(device_t dev) { struct rt_softc *sc = device_get_softc(dev); char buf[80]; #ifdef FDT const struct ofw_compat_data * cd; cd = ofw_bus_search_compatible(dev, rt_compat_data); if (cd->ocd_data == 0) return (ENXIO); sc->rt_chipid = (unsigned int)(cd->ocd_data); #else #if defined(MT7620) sc->rt_chipid = RT_CHIPID_MT7620; #elif defined(MT7621) sc->rt_chipid = RT_CHIPID_MT7621; #elif defined(RT5350) sc->rt_chipid = RT_CHIPID_RT5350; #else sc->rt_chipid = RT_CHIPID_RT3050; #endif #endif snprintf(buf, sizeof(buf), "Ralink %cT%x onChip Ethernet driver", sc->rt_chipid >= 0x7600 ? 'M' : 'R', sc->rt_chipid); device_set_desc_copy(dev, buf); return (BUS_PROBE_GENERIC); } /* * macaddr_atoi - translate string MAC address to uint8_t array */ static int macaddr_atoi(const char *str, uint8_t *mac) { int count, i; unsigned int amac[ETHER_ADDR_LEN]; /* Aligned version */ count = sscanf(str, "%x%*c%x%*c%x%*c%x%*c%x%*c%x", &amac[0], &amac[1], &amac[2], &amac[3], &amac[4], &amac[5]); if (count < ETHER_ADDR_LEN) { memset(mac, 0, ETHER_ADDR_LEN); return (1); } /* Copy aligned to result */ for (i = 0; i < ETHER_ADDR_LEN; i ++) mac[i] = (amac[i] & 0xff); return (0); } #ifdef USE_GENERATED_MAC_ADDRESS /* * generate_mac(uin8_t *mac) * This is MAC address generator for cases when real device MAC address * unknown or not yet accessible. * Use 'b','s','d' signature and 3 octets from CRC32 on kenv. * MAC = 'b', 's', 'd', CRC[3]^CRC[2], CRC[1], CRC[0] * * Output - MAC address, that do not change between reboots, if hints or * bootloader info unchange. */ static void generate_mac(uint8_t *mac) { unsigned char *cp; int i = 0; uint32_t crc = 0xffffffff; /* Generate CRC32 on kenv */ for (cp = kenvp[0]; cp != NULL; cp = kenvp[++i]) { crc = calculate_crc32c(crc, cp, strlen(cp) + 1); } crc = ~crc; mac[0] = 'b'; mac[1] = 's'; mac[2] = 'd'; mac[3] = (crc >> 24) ^ ((crc >> 16) & 0xff); mac[4] = (crc >> 8) & 0xff; mac[5] = crc & 0xff; } #endif /* * ether_request_mac - try to find usable MAC address. */ static int ether_request_mac(device_t dev, uint8_t *mac) { char *var; /* * "ethaddr" is passed via envp on RedBoot platforms * "kmac" is passed via argv on RouterBOOT platforms */ #if defined(RT305X_UBOOT) || defined(__REDBOOT__) || defined(__ROUTERBOOT__) if ((var = kern_getenv("ethaddr")) != NULL || (var = kern_getenv("kmac")) != NULL ) { if(!macaddr_atoi(var, mac)) { printf("%s: use %s macaddr from KENV\n", device_get_nameunit(dev), var); freeenv(var); return (0); } freeenv(var); } #endif /* * Try from hints * hint.[dev].[unit].macaddr */ if (!resource_string_value(device_get_name(dev), device_get_unit(dev), "macaddr", (const char **)&var)) { if(!macaddr_atoi(var, mac)) { printf("%s: use %s macaddr from hints\n", device_get_nameunit(dev), var); return (0); } } #ifdef USE_GENERATED_MAC_ADDRESS generate_mac(mac); device_printf(dev, "use generated %02x:%02x:%02x:%02x:%02x:%02x " "macaddr\n", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); #else /* Hardcoded */ mac[0] = 0x00; mac[1] = 0x18; mac[2] = 0xe7; mac[3] = 0xd5; mac[4] = 0x83; mac[5] = 0x90; device_printf(dev, "use hardcoded 00:18:e7:d5:83:90 macaddr\n"); #endif return (0); } /* * Reset hardware */ static void reset_freng(struct rt_softc *sc) { /* XXX hard reset kills everything so skip it ... */ return; } static int rt_attach(device_t dev) { struct rt_softc *sc; struct ifnet *ifp; int error, i; +#ifdef FDT + phandle_t node; + char fdtval[32]; +#endif sc = device_get_softc(dev); sc->dev = dev; +#ifdef FDT + node = ofw_bus_get_node(sc->dev); +#endif + mtx_init(&sc->lock, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE); sc->mem_rid = 0; sc->mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, RF_ACTIVE | RF_SHAREABLE); if (sc->mem == NULL) { device_printf(dev, "could not allocate memory resource\n"); error = ENXIO; goto fail; } sc->bst = rman_get_bustag(sc->mem); sc->bsh = rman_get_bushandle(sc->mem); sc->irq_rid = 0; sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irq_rid, RF_ACTIVE); if (sc->irq == NULL) { device_printf(dev, "could not allocate interrupt resource\n"); error = ENXIO; goto fail; } #ifdef IF_RT_DEBUG sc->debug = rt_debug; SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &sc->debug, 0, "rt debug level"); #endif /* Reset hardware */ reset_freng(sc); if (sc->rt_chipid == RT_CHIPID_MT7620) { sc->csum_fail_ip = MT7620_RXD_SRC_IP_CSUM_FAIL; sc->csum_fail_l4 = MT7620_RXD_SRC_L4_CSUM_FAIL; } else if (sc->rt_chipid == RT_CHIPID_MT7621) { sc->csum_fail_ip = MT7621_RXD_SRC_IP_CSUM_FAIL; sc->csum_fail_l4 = MT7621_RXD_SRC_L4_CSUM_FAIL; } else { sc->csum_fail_ip = RT305X_RXD_SRC_IP_CSUM_FAIL; sc->csum_fail_l4 = RT305X_RXD_SRC_L4_CSUM_FAIL; } /* Fill in soc-specific registers map */ switch(sc->rt_chipid) { case RT_CHIPID_MT7620: case RT_CHIPID_MT7621: sc->gdma1_base = MT7620_GDMA1_BASE; /* fallthrough */ case RT_CHIPID_RT5350: device_printf(dev, "%cT%x Ethernet MAC (rev 0x%08x)\n", sc->rt_chipid >= 0x7600 ? 'M' : 'R', sc->rt_chipid, sc->mac_rev); /* RT5350: No GDMA, PSE, CDMA, PPE */ RT_WRITE(sc, GE_PORT_BASE + 0x0C00, // UDPCS, TCPCS, IPCS=1 RT_READ(sc, GE_PORT_BASE + 0x0C00) | (0x7<<16)); sc->delay_int_cfg=RT5350_PDMA_BASE+RT5350_DELAY_INT_CFG; sc->fe_int_status=RT5350_FE_INT_STATUS; sc->fe_int_enable=RT5350_FE_INT_ENABLE; sc->pdma_glo_cfg=RT5350_PDMA_BASE+RT5350_PDMA_GLO_CFG; sc->pdma_rst_idx=RT5350_PDMA_BASE+RT5350_PDMA_RST_IDX; for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { sc->tx_base_ptr[i]=RT5350_PDMA_BASE+RT5350_TX_BASE_PTR(i); sc->tx_max_cnt[i]=RT5350_PDMA_BASE+RT5350_TX_MAX_CNT(i); sc->tx_ctx_idx[i]=RT5350_PDMA_BASE+RT5350_TX_CTX_IDX(i); sc->tx_dtx_idx[i]=RT5350_PDMA_BASE+RT5350_TX_DTX_IDX(i); } sc->rx_ring_count=2; sc->rx_base_ptr[0]=RT5350_PDMA_BASE+RT5350_RX_BASE_PTR0; sc->rx_max_cnt[0]=RT5350_PDMA_BASE+RT5350_RX_MAX_CNT0; sc->rx_calc_idx[0]=RT5350_PDMA_BASE+RT5350_RX_CALC_IDX0; sc->rx_drx_idx[0]=RT5350_PDMA_BASE+RT5350_RX_DRX_IDX0; sc->rx_base_ptr[1]=RT5350_PDMA_BASE+RT5350_RX_BASE_PTR1; sc->rx_max_cnt[1]=RT5350_PDMA_BASE+RT5350_RX_MAX_CNT1; sc->rx_calc_idx[1]=RT5350_PDMA_BASE+RT5350_RX_CALC_IDX1; sc->rx_drx_idx[1]=RT5350_PDMA_BASE+RT5350_RX_DRX_IDX1; sc->int_rx_done_mask=RT5350_INT_RXQ0_DONE; sc->int_tx_done_mask=RT5350_INT_TXQ0_DONE; break; default: device_printf(dev, "RT305XF Ethernet MAC (rev 0x%08x)\n", sc->mac_rev); sc->gdma1_base = GDMA1_BASE; sc->delay_int_cfg=PDMA_BASE+DELAY_INT_CFG; sc->fe_int_status=GE_PORT_BASE+FE_INT_STATUS; sc->fe_int_enable=GE_PORT_BASE+FE_INT_ENABLE; sc->pdma_glo_cfg=PDMA_BASE+PDMA_GLO_CFG; sc->pdma_rst_idx=PDMA_BASE+PDMA_RST_IDX; for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { sc->tx_base_ptr[i]=PDMA_BASE+TX_BASE_PTR(i); sc->tx_max_cnt[i]=PDMA_BASE+TX_MAX_CNT(i); sc->tx_ctx_idx[i]=PDMA_BASE+TX_CTX_IDX(i); sc->tx_dtx_idx[i]=PDMA_BASE+TX_DTX_IDX(i); } sc->rx_ring_count=1; sc->rx_base_ptr[0]=PDMA_BASE+RX_BASE_PTR0; sc->rx_max_cnt[0]=PDMA_BASE+RX_MAX_CNT0; sc->rx_calc_idx[0]=PDMA_BASE+RX_CALC_IDX0; sc->rx_drx_idx[0]=PDMA_BASE+RX_DRX_IDX0; sc->int_rx_done_mask=INT_RX_DONE; sc->int_tx_done_mask=INT_TXQ0_DONE; } if (sc->gdma1_base != 0) RT_WRITE(sc, sc->gdma1_base + GDMA_FWD_CFG, ( GDM_ICS_EN | /* Enable IP Csum */ GDM_TCS_EN | /* Enable TCP Csum */ GDM_UCS_EN | /* Enable UDP Csum */ GDM_STRPCRC | /* Strip CRC from packet */ GDM_DST_PORT_CPU << GDM_UFRC_P_SHIFT | /* fwd UCast to CPU */ GDM_DST_PORT_CPU << GDM_BFRC_P_SHIFT | /* fwd BCast to CPU */ GDM_DST_PORT_CPU << GDM_MFRC_P_SHIFT | /* fwd MCast to CPU */ GDM_DST_PORT_CPU << GDM_OFRC_P_SHIFT /* fwd Other to CPU */ )); - if (sc->rt_chipid == RT_CHIPID_RT2880) - RT_WRITE(sc, MDIO_CFG, MDIO_2880_100T_INIT); +#ifdef FDT + if (sc->rt_chipid == RT_CHIPID_RT2880 || + sc->rt_chipid == RT_CHIPID_RT3883) { + if (OF_getprop(node, "port-mode", fdtval, sizeof(fdtval)) > 0 && + strcmp(fdtval, "gigasw") == 0) + RT_WRITE(sc, MDIO_CFG, MDIO_2880_GIGA_INIT); + else + RT_WRITE(sc, MDIO_CFG, MDIO_2880_100T_INIT); + } +#endif /* allocate Tx and Rx rings */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { error = rt_alloc_tx_ring(sc, &sc->tx_ring[i], i); if (error != 0) { device_printf(dev, "could not allocate Tx ring #%d\n", i); goto fail; } } sc->tx_ring_mgtqid = 5; for (i = 0; i < sc->rx_ring_count; i++) { error = rt_alloc_rx_ring(sc, &sc->rx_ring[i], i); if (error != 0) { device_printf(dev, "could not allocate Rx ring\n"); goto fail; } } callout_init(&sc->periodic_ch, 0); callout_init_mtx(&sc->tx_watchdog_ch, &sc->lock, 0); ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "could not if_alloc()\n"); error = ENOMEM; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = rt_init; ifp->if_ioctl = rt_ioctl; ifp->if_start = rt_start; #define RT_TX_QLEN 256 IFQ_SET_MAXLEN(&ifp->if_snd, RT_TX_QLEN); ifp->if_snd.ifq_drv_maxlen = RT_TX_QLEN; IFQ_SET_READY(&ifp->if_snd); #ifdef IF_RT_PHY_SUPPORT error = mii_attach(dev, &sc->rt_miibus, ifp, rt_ifmedia_upd, rt_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); error = ENXIO; goto fail; } #else ifmedia_init(&sc->rt_ifmedia, 0, rt_ifmedia_upd, rt_ifmedia_sts); ifmedia_add(&sc->rt_ifmedia, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); ifmedia_set(&sc->rt_ifmedia, IFM_ETHER | IFM_100_TX | IFM_FDX); #endif /* IF_RT_PHY_SUPPORT */ ether_request_mac(dev, sc->mac_addr); ether_ifattach(ifp, sc->mac_addr); /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_RXCSUM|IFCAP_TXCSUM; ifp->if_capenable |= IFCAP_RXCSUM|IFCAP_TXCSUM; /* init task queue */ NET_TASK_INIT(&sc->rx_done_task, 0, rt_rx_done_task, sc); TASK_INIT(&sc->tx_done_task, 0, rt_tx_done_task, sc); TASK_INIT(&sc->periodic_task, 0, rt_periodic_task, sc); sc->rx_process_limit = 100; sc->taskqueue = taskqueue_create("rt_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->taskqueue); taskqueue_start_threads(&sc->taskqueue, 1, PI_NET, "%s taskq", device_get_nameunit(sc->dev)); rt_sysctl_attach(sc); /* set up interrupt */ error = bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, (sc->rt_chipid == RT_CHIPID_RT5350 || sc->rt_chipid == RT_CHIPID_MT7620 || sc->rt_chipid == RT_CHIPID_MT7621) ? rt_rt5350_intr : rt_intr, sc, &sc->irqh); if (error != 0) { printf("%s: could not set up interrupt\n", device_get_nameunit(dev)); goto fail; } #ifdef IF_RT_DEBUG device_printf(dev, "debug var at %#08x\n", (u_int)&(sc->debug)); #endif return (0); fail: /* free Tx and Rx rings */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_free_tx_ring(sc, &sc->tx_ring[i]); for (i = 0; i < sc->rx_ring_count; i++) rt_free_rx_ring(sc, &sc->rx_ring[i]); mtx_destroy(&sc->lock); if (sc->mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, sc->mem); if (sc->irq != NULL) bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, sc->irq); return (error); } /* * Set media options. */ static int rt_ifmedia_upd(struct ifnet *ifp) { struct rt_softc *sc; #ifdef IF_RT_PHY_SUPPORT struct mii_data *mii; struct mii_softc *miisc; int error = 0; sc = ifp->if_softc; RT_SOFTC_LOCK(sc); mii = device_get_softc(sc->rt_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); error = mii_mediachg(mii); RT_SOFTC_UNLOCK(sc); return (error); #else /* !IF_RT_PHY_SUPPORT */ struct ifmedia *ifm; struct ifmedia_entry *ife; sc = ifp->if_softc; ifm = &sc->rt_ifmedia; ife = ifm->ifm_cur; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if (IFM_SUBTYPE(ife->ifm_media) == IFM_AUTO) { device_printf(sc->dev, "AUTO is not supported for multiphy MAC"); return (EINVAL); } /* * Ignore everything */ return (0); #endif /* IF_RT_PHY_SUPPORT */ } /* * Report current media status. */ static void rt_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { #ifdef IF_RT_PHY_SUPPORT struct rt_softc *sc; struct mii_data *mii; sc = ifp->if_softc; RT_SOFTC_LOCK(sc); mii = device_get_softc(sc->rt_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; ifmr->ifm_active = IFM_ETHER | IFM_100_TX | IFM_FDX; ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; RT_SOFTC_UNLOCK(sc); #else /* !IF_RT_PHY_SUPPORT */ ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER | IFM_100_TX | IFM_FDX; #endif /* IF_RT_PHY_SUPPORT */ } static int rt_detach(device_t dev) { struct rt_softc *sc; struct ifnet *ifp; int i; sc = device_get_softc(dev); ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_ANY, "detaching\n"); RT_SOFTC_LOCK(sc); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); callout_stop(&sc->periodic_ch); callout_stop(&sc->tx_watchdog_ch); taskqueue_drain(sc->taskqueue, &sc->rx_done_task); taskqueue_drain(sc->taskqueue, &sc->tx_done_task); taskqueue_drain(sc->taskqueue, &sc->periodic_task); /* free Tx and Rx rings */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_free_tx_ring(sc, &sc->tx_ring[i]); for (i = 0; i < sc->rx_ring_count; i++) rt_free_rx_ring(sc, &sc->rx_ring[i]); RT_SOFTC_UNLOCK(sc); #ifdef IF_RT_PHY_SUPPORT if (sc->rt_miibus != NULL) device_delete_child(dev, sc->rt_miibus); #endif ether_ifdetach(ifp); if_free(ifp); taskqueue_free(sc->taskqueue); mtx_destroy(&sc->lock); bus_generic_detach(dev); bus_teardown_intr(dev, sc->irq, sc->irqh); bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, sc->irq); bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, sc->mem); return (0); } static int rt_shutdown(device_t dev) { struct rt_softc *sc; sc = device_get_softc(dev); RT_DPRINTF(sc, RT_DEBUG_ANY, "shutting down\n"); rt_stop(sc); return (0); } static int rt_suspend(device_t dev) { struct rt_softc *sc; sc = device_get_softc(dev); RT_DPRINTF(sc, RT_DEBUG_ANY, "suspending\n"); rt_stop(sc); return (0); } static int rt_resume(device_t dev) { struct rt_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_ANY, "resuming\n"); if (ifp->if_flags & IFF_UP) rt_init(sc); return (0); } /* * rt_init_locked - Run initialization process having locked mtx. */ static void rt_init_locked(void *priv) { struct rt_softc *sc; struct ifnet *ifp; #ifdef IF_RT_PHY_SUPPORT struct mii_data *mii; #endif int i, ntries; uint32_t tmp; sc = priv; ifp = sc->ifp; #ifdef IF_RT_PHY_SUPPORT mii = device_get_softc(sc->rt_miibus); #endif RT_DPRINTF(sc, RT_DEBUG_ANY, "initializing\n"); RT_SOFTC_ASSERT_LOCKED(sc); /* hardware reset */ //RT_WRITE(sc, GE_PORT_BASE + FE_RST_GLO, PSE_RESET); //rt305x_sysctl_set(SYSCTL_RSTCTRL, SYSCTL_RSTCTRL_FRENG); /* Fwd to CPU (uni|broad|multi)cast and Unknown */ if (sc->gdma1_base != 0) RT_WRITE(sc, sc->gdma1_base + GDMA_FWD_CFG, ( GDM_ICS_EN | /* Enable IP Csum */ GDM_TCS_EN | /* Enable TCP Csum */ GDM_UCS_EN | /* Enable UDP Csum */ GDM_STRPCRC | /* Strip CRC from packet */ GDM_DST_PORT_CPU << GDM_UFRC_P_SHIFT | /* fwd UCast to CPU */ GDM_DST_PORT_CPU << GDM_BFRC_P_SHIFT | /* fwd BCast to CPU */ GDM_DST_PORT_CPU << GDM_MFRC_P_SHIFT | /* fwd MCast to CPU */ GDM_DST_PORT_CPU << GDM_OFRC_P_SHIFT /* fwd Other to CPU */ )); /* disable DMA engine */ RT_WRITE(sc, sc->pdma_glo_cfg, 0); RT_WRITE(sc, sc->pdma_rst_idx, 0xffffffff); /* wait while DMA engine is busy */ for (ntries = 0; ntries < 100; ntries++) { tmp = RT_READ(sc, sc->pdma_glo_cfg); if (!(tmp & (FE_TX_DMA_BUSY | FE_RX_DMA_BUSY))) break; DELAY(1000); } if (ntries == 100) { device_printf(sc->dev, "timeout waiting for DMA engine\n"); goto fail; } /* reset Rx and Tx rings */ tmp = FE_RST_DRX_IDX0 | FE_RST_DTX_IDX3 | FE_RST_DTX_IDX2 | FE_RST_DTX_IDX1 | FE_RST_DTX_IDX0; RT_WRITE(sc, sc->pdma_rst_idx, tmp); /* XXX switch set mac address */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_reset_tx_ring(sc, &sc->tx_ring[i]); for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { /* update TX_BASE_PTRx */ RT_WRITE(sc, sc->tx_base_ptr[i], sc->tx_ring[i].desc_phys_addr); RT_WRITE(sc, sc->tx_max_cnt[i], RT_SOFTC_TX_RING_DESC_COUNT); RT_WRITE(sc, sc->tx_ctx_idx[i], 0); } /* init Rx ring */ for (i = 0; i < sc->rx_ring_count; i++) rt_reset_rx_ring(sc, &sc->rx_ring[i]); /* update RX_BASE_PTRx */ for (i = 0; i < sc->rx_ring_count; i++) { RT_WRITE(sc, sc->rx_base_ptr[i], sc->rx_ring[i].desc_phys_addr); RT_WRITE(sc, sc->rx_max_cnt[i], RT_SOFTC_RX_RING_DATA_COUNT); RT_WRITE(sc, sc->rx_calc_idx[i], RT_SOFTC_RX_RING_DATA_COUNT - 1); } /* write back DDONE, 16byte burst enable RX/TX DMA */ tmp = FE_TX_WB_DDONE | FE_DMA_BT_SIZE16 | FE_RX_DMA_EN | FE_TX_DMA_EN; if (sc->rt_chipid == RT_CHIPID_MT7620 || sc->rt_chipid == RT_CHIPID_MT7621) tmp |= (1<<31); RT_WRITE(sc, sc->pdma_glo_cfg, tmp); /* disable interrupts mitigation */ RT_WRITE(sc, sc->delay_int_cfg, 0); /* clear pending interrupts */ RT_WRITE(sc, sc->fe_int_status, 0xffffffff); /* enable interrupts */ if (sc->rt_chipid == RT_CHIPID_RT5350 || sc->rt_chipid == RT_CHIPID_MT7620 || sc->rt_chipid == RT_CHIPID_MT7621) tmp = RT5350_INT_TX_COHERENT | RT5350_INT_RX_COHERENT | RT5350_INT_TXQ3_DONE | RT5350_INT_TXQ2_DONE | RT5350_INT_TXQ1_DONE | RT5350_INT_TXQ0_DONE | RT5350_INT_RXQ1_DONE | RT5350_INT_RXQ0_DONE; else tmp = CNT_PPE_AF | CNT_GDM_AF | PSE_P2_FC | GDM_CRC_DROP | PSE_BUF_DROP | GDM_OTHER_DROP | PSE_P1_FC | PSE_P0_FC | PSE_FQ_EMPTY | INT_TX_COHERENT | INT_RX_COHERENT | INT_TXQ3_DONE | INT_TXQ2_DONE | INT_TXQ1_DONE | INT_TXQ0_DONE | INT_RX_DONE; sc->intr_enable_mask = tmp; RT_WRITE(sc, sc->fe_int_enable, tmp); if (rt_txrx_enable(sc) != 0) goto fail; #ifdef IF_RT_PHY_SUPPORT if (mii) mii_mediachg(mii); #endif /* IF_RT_PHY_SUPPORT */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->periodic_round = 0; callout_reset(&sc->periodic_ch, hz / 10, rt_periodic, sc); return; fail: rt_stop_locked(sc); } /* * rt_init - lock and initialize device. */ static void rt_init(void *priv) { struct rt_softc *sc; sc = priv; RT_SOFTC_LOCK(sc); rt_init_locked(sc); RT_SOFTC_UNLOCK(sc); } /* * rt_stop_locked - stop TX/RX w/ lock */ static void rt_stop_locked(void *priv) { struct rt_softc *sc; struct ifnet *ifp; sc = priv; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_ANY, "stopping\n"); RT_SOFTC_ASSERT_LOCKED(sc); sc->tx_timer = 0; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); callout_stop(&sc->periodic_ch); callout_stop(&sc->tx_watchdog_ch); RT_SOFTC_UNLOCK(sc); taskqueue_block(sc->taskqueue); /* * Sometime rt_stop_locked called from isr and we get panic * When found, I fix it */ #ifdef notyet taskqueue_drain(sc->taskqueue, &sc->rx_done_task); taskqueue_drain(sc->taskqueue, &sc->tx_done_task); taskqueue_drain(sc->taskqueue, &sc->periodic_task); #endif RT_SOFTC_LOCK(sc); /* disable interrupts */ RT_WRITE(sc, sc->fe_int_enable, 0); if(sc->rt_chipid != RT_CHIPID_RT5350 && sc->rt_chipid != RT_CHIPID_MT7620 && sc->rt_chipid != RT_CHIPID_MT7621) { /* reset adapter */ RT_WRITE(sc, GE_PORT_BASE + FE_RST_GLO, PSE_RESET); } if (sc->gdma1_base != 0) RT_WRITE(sc, sc->gdma1_base + GDMA_FWD_CFG, ( GDM_ICS_EN | /* Enable IP Csum */ GDM_TCS_EN | /* Enable TCP Csum */ GDM_UCS_EN | /* Enable UDP Csum */ GDM_STRPCRC | /* Strip CRC from packet */ GDM_DST_PORT_CPU << GDM_UFRC_P_SHIFT | /* fwd UCast to CPU */ GDM_DST_PORT_CPU << GDM_BFRC_P_SHIFT | /* fwd BCast to CPU */ GDM_DST_PORT_CPU << GDM_MFRC_P_SHIFT | /* fwd MCast to CPU */ GDM_DST_PORT_CPU << GDM_OFRC_P_SHIFT /* fwd Other to CPU */ )); } static void rt_stop(void *priv) { struct rt_softc *sc; sc = priv; RT_SOFTC_LOCK(sc); rt_stop_locked(sc); RT_SOFTC_UNLOCK(sc); } /* * rt_tx_data - transmit packet. */ static int rt_tx_data(struct rt_softc *sc, struct mbuf *m, int qid) { struct ifnet *ifp; struct rt_softc_tx_ring *ring; struct rt_softc_tx_data *data; struct rt_txdesc *desc; struct mbuf *m_d; bus_dma_segment_t dma_seg[RT_SOFTC_MAX_SCATTER]; int error, ndmasegs, ndescs, i; KASSERT(qid >= 0 && qid < RT_SOFTC_TX_RING_COUNT, ("%s: Tx data: invalid qid=%d\n", device_get_nameunit(sc->dev), qid)); RT_SOFTC_TX_RING_ASSERT_LOCKED(&sc->tx_ring[qid]); ifp = sc->ifp; ring = &sc->tx_ring[qid]; desc = &ring->desc[ring->desc_cur]; data = &ring->data[ring->data_cur]; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, data->dma_map, m, dma_seg, &ndmasegs, 0); if (error != 0) { /* too many fragments, linearize */ RT_DPRINTF(sc, RT_DEBUG_TX, "could not load mbuf DMA map, trying to linearize " "mbuf: ndmasegs=%d, len=%d, error=%d\n", ndmasegs, m->m_pkthdr.len, error); m_d = m_collapse(m, M_NOWAIT, 16); if (m_d == NULL) { m_freem(m); m = NULL; return (ENOMEM); } m = m_d; sc->tx_defrag_packets++; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, data->dma_map, m, dma_seg, &ndmasegs, 0); if (error != 0) { device_printf(sc->dev, "could not load mbuf DMA map: " "ndmasegs=%d, len=%d, error=%d\n", ndmasegs, m->m_pkthdr.len, error); m_freem(m); return (error); } } if (m->m_pkthdr.len == 0) ndmasegs = 0; /* determine how many Tx descs are required */ ndescs = 1 + ndmasegs / 2; if ((ring->desc_queued + ndescs) > (RT_SOFTC_TX_RING_DESC_COUNT - 2)) { RT_DPRINTF(sc, RT_DEBUG_TX, "there are not enough Tx descs\n"); sc->no_tx_desc_avail++; bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(m); return (EFBIG); } data->m = m; /* set up Tx descs */ for (i = 0; i < ndmasegs; i += 2) { /* TODO: this needs to be refined as MT7620 for example has * a different word3 layout than RT305x and RT5350 (the last * one doesn't use word3 at all). And so does MT7621... */ if (sc->rt_chipid != RT_CHIPID_MT7621) { /* Set destination */ if (sc->rt_chipid != RT_CHIPID_MT7620) desc->dst = (TXDSCR_DST_PORT_GDMA1); if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) desc->dst |= (TXDSCR_IP_CSUM_GEN | TXDSCR_UDP_CSUM_GEN | TXDSCR_TCP_CSUM_GEN); /* Set queue id */ desc->qn = qid; /* No PPPoE */ desc->pppoe = 0; /* No VLAN */ desc->vid = 0; } else { desc->vid = 0; desc->pppoe = 0; desc->qn = 0; desc->dst = 2; } desc->sdp0 = htole32(dma_seg[i].ds_addr); desc->sdl0 = htole16(dma_seg[i].ds_len | ( ((i+1) == ndmasegs )?RT_TXDESC_SDL0_LASTSEG:0 )); if ((i+1) < ndmasegs) { desc->sdp1 = htole32(dma_seg[i+1].ds_addr); desc->sdl1 = htole16(dma_seg[i+1].ds_len | ( ((i+2) == ndmasegs )?RT_TXDESC_SDL1_LASTSEG:0 )); } else { desc->sdp1 = 0; desc->sdl1 = 0; } if ((i+2) < ndmasegs) { ring->desc_queued++; ring->desc_cur = (ring->desc_cur + 1) % RT_SOFTC_TX_RING_DESC_COUNT; } desc = &ring->desc[ring->desc_cur]; } RT_DPRINTF(sc, RT_DEBUG_TX, "sending data: len=%d, ndmasegs=%d, " "DMA ds_len=%d/%d/%d/%d/%d\n", m->m_pkthdr.len, ndmasegs, (int) dma_seg[0].ds_len, (int) dma_seg[1].ds_len, (int) dma_seg[2].ds_len, (int) dma_seg[3].ds_len, (int) dma_seg[4].ds_len); bus_dmamap_sync(ring->seg0_dma_tag, ring->seg0_dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREWRITE); ring->desc_queued++; ring->desc_cur = (ring->desc_cur + 1) % RT_SOFTC_TX_RING_DESC_COUNT; ring->data_queued++; ring->data_cur = (ring->data_cur + 1) % RT_SOFTC_TX_RING_DATA_COUNT; /* kick Tx */ RT_WRITE(sc, sc->tx_ctx_idx[qid], ring->desc_cur); return (0); } /* * rt_start - start Transmit/Receive */ static void rt_start(struct ifnet *ifp) { struct rt_softc *sc; struct mbuf *m; int qid = 0 /* XXX must check QoS priority */; sc = ifp->if_softc; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; for (;;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; m->m_pkthdr.rcvif = NULL; RT_SOFTC_TX_RING_LOCK(&sc->tx_ring[qid]); if (sc->tx_ring[qid].data_queued >= RT_SOFTC_TX_RING_DATA_COUNT) { RT_SOFTC_TX_RING_UNLOCK(&sc->tx_ring[qid]); RT_DPRINTF(sc, RT_DEBUG_TX, "if_start: Tx ring with qid=%d is full\n", qid); m_freem(m); ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); sc->tx_data_queue_full[qid]++; break; } if (rt_tx_data(sc, m, qid) != 0) { RT_SOFTC_TX_RING_UNLOCK(&sc->tx_ring[qid]); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); break; } RT_SOFTC_TX_RING_UNLOCK(&sc->tx_ring[qid]); sc->tx_timer = RT_TX_WATCHDOG_TIMEOUT; callout_reset(&sc->tx_watchdog_ch, hz, rt_tx_watchdog, sc); } } /* * rt_update_promisc - set/clear promiscuous mode. Unused yet, because * filtering done by attached Ethernet switch. */ static void rt_update_promisc(struct ifnet *ifp) { struct rt_softc *sc; sc = ifp->if_softc; printf("%s: %s promiscuous mode\n", device_get_nameunit(sc->dev), (ifp->if_flags & IFF_PROMISC) ? "entering" : "leaving"); } /* * rt_ioctl - ioctl handler. */ static int rt_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct rt_softc *sc; struct ifreq *ifr; #ifdef IF_RT_PHY_SUPPORT struct mii_data *mii; #endif /* IF_RT_PHY_SUPPORT */ int error, startall; sc = ifp->if_softc; ifr = (struct ifreq *) data; error = 0; switch (cmd) { case SIOCSIFFLAGS: startall = 0; RT_SOFTC_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ sc->if_flags) & IFF_PROMISC) rt_update_promisc(ifp); } else { rt_init_locked(sc); startall = 1; } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) rt_stop_locked(sc); } sc->if_flags = ifp->if_flags; RT_SOFTC_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: #ifdef IF_RT_PHY_SUPPORT mii = device_get_softc(sc->rt_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd); #else error = ifmedia_ioctl(ifp, ifr, &sc->rt_ifmedia, cmd); #endif /* IF_RT_PHY_SUPPORT */ break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * rt_periodic - Handler of PERIODIC interrupt */ static void rt_periodic(void *arg) { struct rt_softc *sc; sc = arg; RT_DPRINTF(sc, RT_DEBUG_PERIODIC, "periodic\n"); taskqueue_enqueue(sc->taskqueue, &sc->periodic_task); } /* * rt_tx_watchdog - Handler of TX Watchdog */ static void rt_tx_watchdog(void *arg) { struct rt_softc *sc; struct ifnet *ifp; sc = arg; ifp = sc->ifp; if (sc->tx_timer == 0) return; if (--sc->tx_timer == 0) { device_printf(sc->dev, "Tx watchdog timeout: resetting\n"); #ifdef notyet /* * XXX: Commented out, because reset break input. */ rt_stop_locked(sc); rt_init_locked(sc); #endif if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); sc->tx_watchdog_timeouts++; } callout_reset(&sc->tx_watchdog_ch, hz, rt_tx_watchdog, sc); } /* * rt_cnt_ppe_af - Handler of PPE Counter Table Almost Full interrupt */ static void rt_cnt_ppe_af(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PPE Counter Table Almost Full\n"); } /* * rt_cnt_gdm_af - Handler of GDMA 1 & 2 Counter Table Almost Full interrupt */ static void rt_cnt_gdm_af(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "GDMA 1 & 2 Counter Table Almost Full\n"); } /* * rt_pse_p2_fc - Handler of PSE port2 (GDMA 2) flow control interrupt */ static void rt_pse_p2_fc(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE port2 (GDMA 2) flow control asserted.\n"); } /* * rt_gdm_crc_drop - Handler of GDMA 1/2 discard a packet due to CRC error * interrupt */ static void rt_gdm_crc_drop(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "GDMA 1 & 2 discard a packet due to CRC error\n"); } /* * rt_pse_buf_drop - Handler of buffer sharing limitation interrupt */ static void rt_pse_buf_drop(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE discards a packet due to buffer sharing limitation\n"); } /* * rt_gdm_other_drop - Handler of discard on other reason interrupt */ static void rt_gdm_other_drop(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "GDMA 1 & 2 discard a packet due to other reason\n"); } /* * rt_pse_p1_fc - Handler of PSE port1 (GDMA 1) flow control interrupt */ static void rt_pse_p1_fc(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE port1 (GDMA 1) flow control asserted.\n"); } /* * rt_pse_p0_fc - Handler of PSE port0 (CDMA) flow control interrupt */ static void rt_pse_p0_fc(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE port0 (CDMA) flow control asserted.\n"); } /* * rt_pse_fq_empty - Handler of PSE free Q empty threshold reached interrupt */ static void rt_pse_fq_empty(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE free Q empty threshold reached & forced drop " "condition occurred.\n"); } /* * rt_intr - main ISR */ static void rt_intr(void *arg) { struct rt_softc *sc; struct ifnet *ifp; uint32_t status; sc = arg; ifp = sc->ifp; /* acknowledge interrupts */ status = RT_READ(sc, sc->fe_int_status); RT_WRITE(sc, sc->fe_int_status, status); RT_DPRINTF(sc, RT_DEBUG_INTR, "interrupt: status=0x%08x\n", status); if (status == 0xffffffff || /* device likely went away */ status == 0) /* not for us */ return; sc->interrupts++; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; if (status & CNT_PPE_AF) rt_cnt_ppe_af(sc); if (status & CNT_GDM_AF) rt_cnt_gdm_af(sc); if (status & PSE_P2_FC) rt_pse_p2_fc(sc); if (status & GDM_CRC_DROP) rt_gdm_crc_drop(sc); if (status & PSE_BUF_DROP) rt_pse_buf_drop(sc); if (status & GDM_OTHER_DROP) rt_gdm_other_drop(sc); if (status & PSE_P1_FC) rt_pse_p1_fc(sc); if (status & PSE_P0_FC) rt_pse_p0_fc(sc); if (status & PSE_FQ_EMPTY) rt_pse_fq_empty(sc); if (status & INT_TX_COHERENT) rt_tx_coherent_intr(sc); if (status & INT_RX_COHERENT) rt_rx_coherent_intr(sc); if (status & RX_DLY_INT) rt_rx_delay_intr(sc); if (status & TX_DLY_INT) rt_tx_delay_intr(sc); if (status & INT_RX_DONE) rt_rx_intr(sc, 0); if (status & INT_TXQ3_DONE) rt_tx_intr(sc, 3); if (status & INT_TXQ2_DONE) rt_tx_intr(sc, 2); if (status & INT_TXQ1_DONE) rt_tx_intr(sc, 1); if (status & INT_TXQ0_DONE) rt_tx_intr(sc, 0); } /* * rt_rt5350_intr - main ISR for Ralink 5350 SoC */ static void rt_rt5350_intr(void *arg) { struct rt_softc *sc; struct ifnet *ifp; uint32_t status; sc = arg; ifp = sc->ifp; /* acknowledge interrupts */ status = RT_READ(sc, sc->fe_int_status); RT_WRITE(sc, sc->fe_int_status, status); RT_DPRINTF(sc, RT_DEBUG_INTR, "interrupt: status=0x%08x\n", status); if (status == 0xffffffff || /* device likely went away */ status == 0) /* not for us */ return; sc->interrupts++; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; if (status & RT5350_INT_TX_COHERENT) rt_tx_coherent_intr(sc); if (status & RT5350_INT_RX_COHERENT) rt_rx_coherent_intr(sc); if (status & RT5350_RX_DLY_INT) rt_rx_delay_intr(sc); if (status & RT5350_TX_DLY_INT) rt_tx_delay_intr(sc); if (status & RT5350_INT_RXQ1_DONE) rt_rx_intr(sc, 1); if (status & RT5350_INT_RXQ0_DONE) rt_rx_intr(sc, 0); if (status & RT5350_INT_TXQ3_DONE) rt_tx_intr(sc, 3); if (status & RT5350_INT_TXQ2_DONE) rt_tx_intr(sc, 2); if (status & RT5350_INT_TXQ1_DONE) rt_tx_intr(sc, 1); if (status & RT5350_INT_TXQ0_DONE) rt_tx_intr(sc, 0); } static void rt_tx_coherent_intr(struct rt_softc *sc) { uint32_t tmp; int i; RT_DPRINTF(sc, RT_DEBUG_INTR, "Tx coherent interrupt\n"); sc->tx_coherent_interrupts++; /* restart DMA engine */ tmp = RT_READ(sc, sc->pdma_glo_cfg); tmp &= ~(FE_TX_WB_DDONE | FE_TX_DMA_EN); RT_WRITE(sc, sc->pdma_glo_cfg, tmp); for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_reset_tx_ring(sc, &sc->tx_ring[i]); for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { RT_WRITE(sc, sc->tx_base_ptr[i], sc->tx_ring[i].desc_phys_addr); RT_WRITE(sc, sc->tx_max_cnt[i], RT_SOFTC_TX_RING_DESC_COUNT); RT_WRITE(sc, sc->tx_ctx_idx[i], 0); } rt_txrx_enable(sc); } /* * rt_rx_coherent_intr */ static void rt_rx_coherent_intr(struct rt_softc *sc) { uint32_t tmp; int i; RT_DPRINTF(sc, RT_DEBUG_INTR, "Rx coherent interrupt\n"); sc->rx_coherent_interrupts++; /* restart DMA engine */ tmp = RT_READ(sc, sc->pdma_glo_cfg); tmp &= ~(FE_RX_DMA_EN); RT_WRITE(sc, sc->pdma_glo_cfg, tmp); /* init Rx ring */ for (i = 0; i < sc->rx_ring_count; i++) rt_reset_rx_ring(sc, &sc->rx_ring[i]); for (i = 0; i < sc->rx_ring_count; i++) { RT_WRITE(sc, sc->rx_base_ptr[i], sc->rx_ring[i].desc_phys_addr); RT_WRITE(sc, sc->rx_max_cnt[i], RT_SOFTC_RX_RING_DATA_COUNT); RT_WRITE(sc, sc->rx_calc_idx[i], RT_SOFTC_RX_RING_DATA_COUNT - 1); } rt_txrx_enable(sc); } /* * rt_rx_intr - a packet received */ static void rt_rx_intr(struct rt_softc *sc, int qid) { KASSERT(qid >= 0 && qid < sc->rx_ring_count, ("%s: Rx interrupt: invalid qid=%d\n", device_get_nameunit(sc->dev), qid)); RT_DPRINTF(sc, RT_DEBUG_INTR, "Rx interrupt\n"); sc->rx_interrupts[qid]++; RT_SOFTC_LOCK(sc); if (!(sc->intr_disable_mask & (sc->int_rx_done_mask << qid))) { rt_intr_disable(sc, (sc->int_rx_done_mask << qid)); taskqueue_enqueue(sc->taskqueue, &sc->rx_done_task); } sc->intr_pending_mask |= (sc->int_rx_done_mask << qid); RT_SOFTC_UNLOCK(sc); } static void rt_rx_delay_intr(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "Rx delay interrupt\n"); sc->rx_delay_interrupts++; } static void rt_tx_delay_intr(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "Tx delay interrupt\n"); sc->tx_delay_interrupts++; } /* * rt_tx_intr - Transsmition of packet done */ static void rt_tx_intr(struct rt_softc *sc, int qid) { KASSERT(qid >= 0 && qid < RT_SOFTC_TX_RING_COUNT, ("%s: Tx interrupt: invalid qid=%d\n", device_get_nameunit(sc->dev), qid)); RT_DPRINTF(sc, RT_DEBUG_INTR, "Tx interrupt: qid=%d\n", qid); sc->tx_interrupts[qid]++; RT_SOFTC_LOCK(sc); if (!(sc->intr_disable_mask & (sc->int_tx_done_mask << qid))) { rt_intr_disable(sc, (sc->int_tx_done_mask << qid)); taskqueue_enqueue(sc->taskqueue, &sc->tx_done_task); } sc->intr_pending_mask |= (sc->int_tx_done_mask << qid); RT_SOFTC_UNLOCK(sc); } /* * rt_rx_done_task - run RX task */ static void rt_rx_done_task(void *context, int pending) { struct rt_softc *sc; struct ifnet *ifp; int again; sc = context; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_RX, "Rx done task\n"); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; sc->intr_pending_mask &= ~sc->int_rx_done_mask; again = rt_rx_eof(sc, &sc->rx_ring[0], sc->rx_process_limit); RT_SOFTC_LOCK(sc); if ((sc->intr_pending_mask & sc->int_rx_done_mask) || again) { RT_DPRINTF(sc, RT_DEBUG_RX, "Rx done task: scheduling again\n"); taskqueue_enqueue(sc->taskqueue, &sc->rx_done_task); } else { rt_intr_enable(sc, sc->int_rx_done_mask); } RT_SOFTC_UNLOCK(sc); } /* * rt_tx_done_task - check for pending TX task in all queues */ static void rt_tx_done_task(void *context, int pending) { struct rt_softc *sc; struct ifnet *ifp; uint32_t intr_mask; int i; sc = context; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_TX, "Tx done task\n"); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; for (i = RT_SOFTC_TX_RING_COUNT - 1; i >= 0; i--) { if (sc->intr_pending_mask & (sc->int_tx_done_mask << i)) { sc->intr_pending_mask &= ~(sc->int_tx_done_mask << i); rt_tx_eof(sc, &sc->tx_ring[i]); } } sc->tx_timer = 0; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if(sc->rt_chipid == RT_CHIPID_RT5350 || sc->rt_chipid == RT_CHIPID_MT7620 || sc->rt_chipid == RT_CHIPID_MT7621) intr_mask = ( RT5350_INT_TXQ3_DONE | RT5350_INT_TXQ2_DONE | RT5350_INT_TXQ1_DONE | RT5350_INT_TXQ0_DONE); else intr_mask = ( INT_TXQ3_DONE | INT_TXQ2_DONE | INT_TXQ1_DONE | INT_TXQ0_DONE); RT_SOFTC_LOCK(sc); rt_intr_enable(sc, ~sc->intr_pending_mask & (sc->intr_disable_mask & intr_mask)); if (sc->intr_pending_mask & intr_mask) { RT_DPRINTF(sc, RT_DEBUG_TX, "Tx done task: scheduling again\n"); taskqueue_enqueue(sc->taskqueue, &sc->tx_done_task); } RT_SOFTC_UNLOCK(sc); if (!IFQ_IS_EMPTY(&ifp->if_snd)) rt_start(ifp); } /* * rt_periodic_task - run periodic task */ static void rt_periodic_task(void *context, int pending) { struct rt_softc *sc; struct ifnet *ifp; sc = context; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_PERIODIC, "periodic task: round=%lu\n", sc->periodic_round); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; RT_SOFTC_LOCK(sc); sc->periodic_round++; rt_update_stats(sc); if ((sc->periodic_round % 10) == 0) { rt_update_raw_counters(sc); rt_watchdog(sc); } RT_SOFTC_UNLOCK(sc); callout_reset(&sc->periodic_ch, hz / 10, rt_periodic, sc); } /* * rt_rx_eof - check for frames that done by DMA engine and pass it into * network subsystem. */ static int rt_rx_eof(struct rt_softc *sc, struct rt_softc_rx_ring *ring, int limit) { struct ifnet *ifp; /* struct rt_softc_rx_ring *ring; */ struct rt_rxdesc *desc; struct rt_softc_rx_data *data; struct mbuf *m, *mnew; bus_dma_segment_t segs[1]; bus_dmamap_t dma_map; uint32_t index, desc_flags; int error, nsegs, len, nframes; ifp = sc->ifp; /* ring = &sc->rx_ring[0]; */ nframes = 0; while (limit != 0) { index = RT_READ(sc, sc->rx_drx_idx[0]); if (ring->cur == index) break; desc = &ring->desc[ring->cur]; data = &ring->data[ring->cur]; bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef IF_RT_DEBUG if ( sc->debug & RT_DEBUG_RX ) { printf("\nRX Descriptor[%#08x] dump:\n", (u_int)desc); hexdump(desc, 16, 0, 0); printf("-----------------------------------\n"); } #endif /* XXX Sometime device don`t set DDONE bit */ #ifdef DDONE_FIXED if (!(desc->sdl0 & htole16(RT_RXDESC_SDL0_DDONE))) { RT_DPRINTF(sc, RT_DEBUG_RX, "DDONE=0, try next\n"); break; } #endif len = le16toh(desc->sdl0) & 0x3fff; RT_DPRINTF(sc, RT_DEBUG_RX, "new frame len=%d\n", len); nframes++; mnew = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (mnew == NULL) { sc->rx_mbuf_alloc_errors++; if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto skip; } mnew->m_len = mnew->m_pkthdr.len = MJUMPAGESIZE; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, ring->spare_dma_map, mnew, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { RT_DPRINTF(sc, RT_DEBUG_RX, "could not load Rx mbuf DMA map: " "error=%d, nsegs=%d\n", error, nsegs); m_freem(mnew); sc->rx_mbuf_dmamap_errors++; if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto skip; } KASSERT(nsegs == 1, ("%s: too many DMA segments", device_get_nameunit(sc->dev))); bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); dma_map = data->dma_map; data->dma_map = ring->spare_dma_map; ring->spare_dma_map = dma_map; bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_PREREAD); m = data->m; desc_flags = desc->word3; data->m = mnew; /* Add 2 for proper align of RX IP header */ desc->sdp0 = htole32(segs[0].ds_addr+2); desc->sdl0 = htole32(segs[0].ds_len-2); desc->word3 = 0; RT_DPRINTF(sc, RT_DEBUG_RX, "Rx frame: rxdesc flags=0x%08x\n", desc_flags); m->m_pkthdr.rcvif = ifp; /* Add 2 to fix data align, after sdp0 = addr + 2 */ m->m_data += 2; m->m_pkthdr.len = m->m_len = len; /* check for crc errors */ if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) { /*check for valid checksum*/ if (desc_flags & (sc->csum_fail_ip|sc->csum_fail_l4)) { RT_DPRINTF(sc, RT_DEBUG_RX, "rxdesc: crc error\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); if (!(ifp->if_flags & IFF_PROMISC)) { m_freem(m); goto skip; } } if ((desc_flags & sc->csum_fail_ip) == 0) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; m->m_pkthdr.csum_flags |= CSUM_IP_VALID; m->m_pkthdr.csum_data = 0xffff; } m->m_flags &= ~M_HASFCS; } (*ifp->if_input)(ifp, m); skip: desc->sdl0 &= ~htole16(RT_RXDESC_SDL0_DDONE); bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ring->cur = (ring->cur + 1) % RT_SOFTC_RX_RING_DATA_COUNT; limit--; } if (ring->cur == 0) RT_WRITE(sc, sc->rx_calc_idx[0], RT_SOFTC_RX_RING_DATA_COUNT - 1); else RT_WRITE(sc, sc->rx_calc_idx[0], ring->cur - 1); RT_DPRINTF(sc, RT_DEBUG_RX, "Rx eof: nframes=%d\n", nframes); sc->rx_packets += nframes; return (limit == 0); } /* * rt_tx_eof - check for successful transmitted frames and mark their * descriptor as free. */ static void rt_tx_eof(struct rt_softc *sc, struct rt_softc_tx_ring *ring) { struct ifnet *ifp; struct rt_txdesc *desc; struct rt_softc_tx_data *data; uint32_t index; int ndescs, nframes; ifp = sc->ifp; ndescs = 0; nframes = 0; for (;;) { index = RT_READ(sc, sc->tx_dtx_idx[ring->qid]); if (ring->desc_next == index) break; ndescs++; desc = &ring->desc[ring->desc_next]; bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (desc->sdl0 & htole16(RT_TXDESC_SDL0_LASTSEG) || desc->sdl1 & htole16(RT_TXDESC_SDL1_LASTSEG)) { nframes++; data = &ring->data[ring->data_next]; bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); data->m = NULL; if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); RT_SOFTC_TX_RING_LOCK(ring); ring->data_queued--; ring->data_next = (ring->data_next + 1) % RT_SOFTC_TX_RING_DATA_COUNT; RT_SOFTC_TX_RING_UNLOCK(ring); } desc->sdl0 &= ~htole16(RT_TXDESC_SDL0_DDONE); bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); RT_SOFTC_TX_RING_LOCK(ring); ring->desc_queued--; ring->desc_next = (ring->desc_next + 1) % RT_SOFTC_TX_RING_DESC_COUNT; RT_SOFTC_TX_RING_UNLOCK(ring); } RT_DPRINTF(sc, RT_DEBUG_TX, "Tx eof: qid=%d, ndescs=%d, nframes=%d\n", ring->qid, ndescs, nframes); } /* * rt_update_stats - query statistics counters and update related variables. */ static void rt_update_stats(struct rt_softc *sc) { struct ifnet *ifp; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_STATS, "update statistic: \n"); /* XXX do update stats here */ } /* * rt_watchdog - reinit device on watchdog event. */ static void rt_watchdog(struct rt_softc *sc) { uint32_t tmp; #ifdef notyet int ntries; #endif if(sc->rt_chipid != RT_CHIPID_RT5350 && sc->rt_chipid != RT_CHIPID_MT7620 && sc->rt_chipid != RT_CHIPID_MT7621) { tmp = RT_READ(sc, PSE_BASE + CDMA_OQ_STA); RT_DPRINTF(sc, RT_DEBUG_WATCHDOG, "watchdog: PSE_IQ_STA=0x%08x\n", tmp); } /* XXX: do not reset */ #ifdef notyet if (((tmp >> P0_IQ_PCNT_SHIFT) & 0xff) != 0) { sc->tx_queue_not_empty[0]++; for (ntries = 0; ntries < 10; ntries++) { tmp = RT_READ(sc, PSE_BASE + PSE_IQ_STA); if (((tmp >> P0_IQ_PCNT_SHIFT) & 0xff) == 0) break; DELAY(1); } } if (((tmp >> P1_IQ_PCNT_SHIFT) & 0xff) != 0) { sc->tx_queue_not_empty[1]++; for (ntries = 0; ntries < 10; ntries++) { tmp = RT_READ(sc, PSE_BASE + PSE_IQ_STA); if (((tmp >> P1_IQ_PCNT_SHIFT) & 0xff) == 0) break; DELAY(1); } } #endif } /* * rt_update_raw_counters - update counters. */ static void rt_update_raw_counters(struct rt_softc *sc) { sc->tx_bytes += RT_READ(sc, CNTR_BASE + GDMA_TX_GBCNT0); sc->tx_packets += RT_READ(sc, CNTR_BASE + GDMA_TX_GPCNT0); sc->tx_skip += RT_READ(sc, CNTR_BASE + GDMA_TX_SKIPCNT0); sc->tx_collision+= RT_READ(sc, CNTR_BASE + GDMA_TX_COLCNT0); sc->rx_bytes += RT_READ(sc, CNTR_BASE + GDMA_RX_GBCNT0); sc->rx_packets += RT_READ(sc, CNTR_BASE + GDMA_RX_GPCNT0); sc->rx_crc_err += RT_READ(sc, CNTR_BASE + GDMA_RX_CSUM_ERCNT0); sc->rx_short_err+= RT_READ(sc, CNTR_BASE + GDMA_RX_SHORT_ERCNT0); sc->rx_long_err += RT_READ(sc, CNTR_BASE + GDMA_RX_LONG_ERCNT0); sc->rx_phy_err += RT_READ(sc, CNTR_BASE + GDMA_RX_FERCNT0); sc->rx_fifo_overflows+= RT_READ(sc, CNTR_BASE + GDMA_RX_OERCNT0); } static void rt_intr_enable(struct rt_softc *sc, uint32_t intr_mask) { uint32_t tmp; sc->intr_disable_mask &= ~intr_mask; tmp = sc->intr_enable_mask & ~sc->intr_disable_mask; RT_WRITE(sc, sc->fe_int_enable, tmp); } static void rt_intr_disable(struct rt_softc *sc, uint32_t intr_mask) { uint32_t tmp; sc->intr_disable_mask |= intr_mask; tmp = sc->intr_enable_mask & ~sc->intr_disable_mask; RT_WRITE(sc, sc->fe_int_enable, tmp); } /* * rt_txrx_enable - enable TX/RX DMA */ static int rt_txrx_enable(struct rt_softc *sc) { struct ifnet *ifp; uint32_t tmp; int ntries; ifp = sc->ifp; /* enable Tx/Rx DMA engine */ for (ntries = 0; ntries < 200; ntries++) { tmp = RT_READ(sc, sc->pdma_glo_cfg); if (!(tmp & (FE_TX_DMA_BUSY | FE_RX_DMA_BUSY))) break; DELAY(1000); } if (ntries == 200) { device_printf(sc->dev, "timeout waiting for DMA engine\n"); return (-1); } DELAY(50); tmp |= FE_TX_WB_DDONE | FE_RX_DMA_EN | FE_TX_DMA_EN; RT_WRITE(sc, sc->pdma_glo_cfg, tmp); /* XXX set Rx filter */ return (0); } /* * rt_alloc_rx_ring - allocate RX DMA ring buffer */ static int rt_alloc_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring, int qid) { struct rt_rxdesc *desc; struct rt_softc_rx_data *data; bus_dma_segment_t segs[1]; int i, nsegs, error; error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, RT_SOFTC_RX_RING_DATA_COUNT * sizeof(struct rt_rxdesc), 1, RT_SOFTC_RX_RING_DATA_COUNT * sizeof(struct rt_rxdesc), 0, NULL, NULL, &ring->desc_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Rx desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->desc_dma_tag, (void **) &ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->desc_dma_map); if (error != 0) { device_printf(sc->dev, "could not allocate Rx desc DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->desc_dma_tag, ring->desc_dma_map, ring->desc, RT_SOFTC_RX_RING_DATA_COUNT * sizeof(struct rt_rxdesc), rt_dma_map_addr, &ring->desc_phys_addr, 0); if (error != 0) { device_printf(sc->dev, "could not load Rx desc DMA map\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 0, NULL, NULL, &ring->data_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Rx data DMA tag\n"); goto fail; } for (i = 0; i < RT_SOFTC_RX_RING_DATA_COUNT; i++) { desc = &ring->desc[i]; data = &ring->data[i]; error = bus_dmamap_create(ring->data_dma_tag, 0, &data->dma_map); if (error != 0) { device_printf(sc->dev, "could not create Rx data DMA " "map\n"); goto fail; } data->m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (data->m == NULL) { device_printf(sc->dev, "could not allocate Rx mbuf\n"); error = ENOMEM; goto fail; } data->m->m_len = data->m->m_pkthdr.len = MJUMPAGESIZE; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, data->dma_map, data->m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->dev, "could not load Rx mbuf DMA map\n"); goto fail; } KASSERT(nsegs == 1, ("%s: too many DMA segments", device_get_nameunit(sc->dev))); /* Add 2 for proper align of RX IP header */ desc->sdp0 = htole32(segs[0].ds_addr+2); desc->sdl0 = htole32(segs[0].ds_len-2); } error = bus_dmamap_create(ring->data_dma_tag, 0, &ring->spare_dma_map); if (error != 0) { device_printf(sc->dev, "could not create Rx spare DMA map\n"); goto fail; } bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ring->qid = qid; return (0); fail: rt_free_rx_ring(sc, ring); return (error); } /* * rt_reset_rx_ring - reset RX ring buffer */ static void rt_reset_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring) { struct rt_rxdesc *desc; int i; for (i = 0; i < RT_SOFTC_RX_RING_DATA_COUNT; i++) { desc = &ring->desc[i]; desc->sdl0 &= ~htole16(RT_RXDESC_SDL0_DDONE); } bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ring->cur = 0; } /* * rt_free_rx_ring - free memory used by RX ring buffer */ static void rt_free_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring) { struct rt_softc_rx_data *data; int i; if (ring->desc != NULL) { bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->desc_dma_tag, ring->desc_dma_map); bus_dmamem_free(ring->desc_dma_tag, ring->desc, ring->desc_dma_map); } if (ring->desc_dma_tag != NULL) bus_dma_tag_destroy(ring->desc_dma_tag); for (i = 0; i < RT_SOFTC_RX_RING_DATA_COUNT; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); } if (data->dma_map != NULL) bus_dmamap_destroy(ring->data_dma_tag, data->dma_map); } if (ring->spare_dma_map != NULL) bus_dmamap_destroy(ring->data_dma_tag, ring->spare_dma_map); if (ring->data_dma_tag != NULL) bus_dma_tag_destroy(ring->data_dma_tag); } /* * rt_alloc_tx_ring - allocate TX ring buffer */ static int rt_alloc_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring, int qid) { struct rt_softc_tx_data *data; int error, i; mtx_init(&ring->lock, device_get_nameunit(sc->dev), NULL, MTX_DEF); error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, RT_SOFTC_TX_RING_DESC_COUNT * sizeof(struct rt_txdesc), 1, RT_SOFTC_TX_RING_DESC_COUNT * sizeof(struct rt_txdesc), 0, NULL, NULL, &ring->desc_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Tx desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->desc_dma_tag, (void **) &ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->desc_dma_map); if (error != 0) { device_printf(sc->dev, "could not allocate Tx desc DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->desc_dma_tag, ring->desc_dma_map, ring->desc, (RT_SOFTC_TX_RING_DESC_COUNT * sizeof(struct rt_txdesc)), rt_dma_map_addr, &ring->desc_phys_addr, 0); if (error != 0) { device_printf(sc->dev, "could not load Tx desc DMA map\n"); goto fail; } ring->desc_queued = 0; ring->desc_cur = 0; ring->desc_next = 0; error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, RT_SOFTC_TX_RING_DATA_COUNT * RT_TX_DATA_SEG0_SIZE, 1, RT_SOFTC_TX_RING_DATA_COUNT * RT_TX_DATA_SEG0_SIZE, 0, NULL, NULL, &ring->seg0_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Tx seg0 DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->seg0_dma_tag, (void **) &ring->seg0, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->seg0_dma_map); if (error != 0) { device_printf(sc->dev, "could not allocate Tx seg0 DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->seg0_dma_tag, ring->seg0_dma_map, ring->seg0, RT_SOFTC_TX_RING_DATA_COUNT * RT_TX_DATA_SEG0_SIZE, rt_dma_map_addr, &ring->seg0_phys_addr, 0); if (error != 0) { device_printf(sc->dev, "could not load Tx seg0 DMA map\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, RT_SOFTC_MAX_SCATTER, MJUMPAGESIZE, 0, NULL, NULL, &ring->data_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Tx data DMA tag\n"); goto fail; } for (i = 0; i < RT_SOFTC_TX_RING_DATA_COUNT; i++) { data = &ring->data[i]; error = bus_dmamap_create(ring->data_dma_tag, 0, &data->dma_map); if (error != 0) { device_printf(sc->dev, "could not create Tx data DMA " "map\n"); goto fail; } } ring->data_queued = 0; ring->data_cur = 0; ring->data_next = 0; ring->qid = qid; return (0); fail: rt_free_tx_ring(sc, ring); return (error); } /* * rt_reset_tx_ring - reset TX ring buffer to empty state */ static void rt_reset_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring) { struct rt_softc_tx_data *data; struct rt_txdesc *desc; int i; for (i = 0; i < RT_SOFTC_TX_RING_DESC_COUNT; i++) { desc = &ring->desc[i]; desc->sdl0 = 0; desc->sdl1 = 0; } ring->desc_queued = 0; ring->desc_cur = 0; ring->desc_next = 0; bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->seg0_dma_tag, ring->seg0_dma_map, BUS_DMASYNC_PREWRITE); for (i = 0; i < RT_SOFTC_TX_RING_DATA_COUNT; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); data->m = NULL; } } ring->data_queued = 0; ring->data_cur = 0; ring->data_next = 0; } /* * rt_free_tx_ring - free RX ring buffer */ static void rt_free_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring) { struct rt_softc_tx_data *data; int i; if (ring->desc != NULL) { bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->desc_dma_tag, ring->desc_dma_map); bus_dmamem_free(ring->desc_dma_tag, ring->desc, ring->desc_dma_map); } if (ring->desc_dma_tag != NULL) bus_dma_tag_destroy(ring->desc_dma_tag); if (ring->seg0 != NULL) { bus_dmamap_sync(ring->seg0_dma_tag, ring->seg0_dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->seg0_dma_tag, ring->seg0_dma_map); bus_dmamem_free(ring->seg0_dma_tag, ring->seg0, ring->seg0_dma_map); } if (ring->seg0_dma_tag != NULL) bus_dma_tag_destroy(ring->seg0_dma_tag); for (i = 0; i < RT_SOFTC_TX_RING_DATA_COUNT; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); } if (data->dma_map != NULL) bus_dmamap_destroy(ring->data_dma_tag, data->dma_map); } if (ring->data_dma_tag != NULL) bus_dma_tag_destroy(ring->data_dma_tag); mtx_destroy(&ring->lock); } /* * rt_dma_map_addr - get address of busdma segment */ static void rt_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error != 0) return; KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg)); *(bus_addr_t *) arg = segs[0].ds_addr; } /* * rt_sysctl_attach - attach sysctl nodes for NIC counters. */ static void rt_sysctl_attach(struct rt_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid *stats; ctx = device_get_sysctl_ctx(sc->dev); tree = device_get_sysctl_tree(sc->dev); /* statistic counters */ stats = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "statistic"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "interrupts", CTLFLAG_RD, &sc->interrupts, "all interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_coherent_interrupts", CTLFLAG_RD, &sc->tx_coherent_interrupts, "Tx coherent interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_coherent_interrupts", CTLFLAG_RD, &sc->rx_coherent_interrupts, "Rx coherent interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_interrupts", CTLFLAG_RD, &sc->rx_interrupts[0], "Rx interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_delay_interrupts", CTLFLAG_RD, &sc->rx_delay_interrupts, "Rx delay interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_interrupts", CTLFLAG_RD, &sc->tx_interrupts[3], "Tx AC3 interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_interrupts", CTLFLAG_RD, &sc->tx_interrupts[2], "Tx AC2 interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_interrupts", CTLFLAG_RD, &sc->tx_interrupts[1], "Tx AC1 interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_interrupts", CTLFLAG_RD, &sc->tx_interrupts[0], "Tx AC0 interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_delay_interrupts", CTLFLAG_RD, &sc->tx_delay_interrupts, "Tx delay interrupts"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_desc_queued", CTLFLAG_RD, &sc->tx_ring[3].desc_queued, 0, "Tx AC3 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_data_queued", CTLFLAG_RD, &sc->tx_ring[3].data_queued, 0, "Tx AC3 data queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_desc_queued", CTLFLAG_RD, &sc->tx_ring[2].desc_queued, 0, "Tx AC2 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_data_queued", CTLFLAG_RD, &sc->tx_ring[2].data_queued, 0, "Tx AC2 data queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_desc_queued", CTLFLAG_RD, &sc->tx_ring[1].desc_queued, 0, "Tx AC1 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_data_queued", CTLFLAG_RD, &sc->tx_ring[1].data_queued, 0, "Tx AC1 data queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_desc_queued", CTLFLAG_RD, &sc->tx_ring[0].desc_queued, 0, "Tx AC0 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_data_queued", CTLFLAG_RD, &sc->tx_ring[0].data_queued, 0, "Tx AC0 data queued"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[3], "Tx AC3 data queue full"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[2], "Tx AC2 data queue full"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[1], "Tx AC1 data queue full"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[0], "Tx AC0 data queue full"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_watchdog_timeouts", CTLFLAG_RD, &sc->tx_watchdog_timeouts, "Tx watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_defrag_packets", CTLFLAG_RD, &sc->tx_defrag_packets, "Tx defragmented packets"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "no_tx_desc_avail", CTLFLAG_RD, &sc->no_tx_desc_avail, "no Tx descriptors available"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_mbuf_alloc_errors", CTLFLAG_RD, &sc->rx_mbuf_alloc_errors, "Rx mbuf allocation errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_mbuf_dmamap_errors", CTLFLAG_RD, &sc->rx_mbuf_dmamap_errors, "Rx mbuf DMA mapping errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_queue_0_not_empty", CTLFLAG_RD, &sc->tx_queue_not_empty[0], "Tx queue 0 not empty"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_queue_1_not_empty", CTLFLAG_RD, &sc->tx_queue_not_empty[1], "Tx queue 1 not empty"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_packets", CTLFLAG_RD, &sc->rx_packets, "Rx packets"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_crc_errors", CTLFLAG_RD, &sc->rx_crc_err, "Rx CRC errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_phy_errors", CTLFLAG_RD, &sc->rx_phy_err, "Rx PHY errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_dup_packets", CTLFLAG_RD, &sc->rx_dup_packets, "Rx duplicate packets"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_fifo_overflows", CTLFLAG_RD, &sc->rx_fifo_overflows, "Rx FIFO overflows"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_bytes", CTLFLAG_RD, &sc->rx_bytes, "Rx bytes"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_long_err", CTLFLAG_RD, &sc->rx_long_err, "Rx too long frame errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_short_err", CTLFLAG_RD, &sc->rx_short_err, "Rx too short frame errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_bytes", CTLFLAG_RD, &sc->tx_bytes, "Tx bytes"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_packets", CTLFLAG_RD, &sc->tx_packets, "Tx packets"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_skip", CTLFLAG_RD, &sc->tx_skip, "Tx skip count for GDMA ports"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_collision", CTLFLAG_RD, &sc->tx_collision, "Tx collision count for GDMA ports"); } #if defined(IF_RT_PHY_SUPPORT) || defined(RT_MDIO) /* This code is only work RT2880 and same chip. */ /* TODO: make RT3052 and later support code. But nobody need it? */ static int rt_miibus_readreg(device_t dev, int phy, int reg) { struct rt_softc *sc = device_get_softc(dev); int dat; /* * PSEUDO_PHYAD is a special value for indicate switch attached. * No one PHY use PSEUDO_PHYAD (0x1e) address. */ #ifndef RT_MDIO if (phy == 31) { /* Fake PHY ID for bfeswitch attach */ switch (reg) { case MII_BMSR: return (BMSR_EXTSTAT|BMSR_MEDIAMASK); case MII_PHYIDR1: return (0x40); /* As result of faking */ case MII_PHYIDR2: /* PHY will detect as */ return (0x6250); /* bfeswitch */ } } #endif /* Wait prev command done if any */ while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); dat = ((phy << MDIO_PHY_ADDR_SHIFT) & MDIO_PHY_ADDR_MASK) | ((reg << MDIO_PHYREG_ADDR_SHIFT) & MDIO_PHYREG_ADDR_MASK); RT_WRITE(sc, MDIO_ACCESS, dat); RT_WRITE(sc, MDIO_ACCESS, dat | MDIO_CMD_ONGO); while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); return (RT_READ(sc, MDIO_ACCESS) & MDIO_PHY_DATA_MASK); } static int rt_miibus_writereg(device_t dev, int phy, int reg, int val) { struct rt_softc *sc = device_get_softc(dev); int dat; /* Wait prev command done if any */ while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); dat = MDIO_CMD_WR | ((phy << MDIO_PHY_ADDR_SHIFT) & MDIO_PHY_ADDR_MASK) | ((reg << MDIO_PHYREG_ADDR_SHIFT) & MDIO_PHYREG_ADDR_MASK) | (val & MDIO_PHY_DATA_MASK); RT_WRITE(sc, MDIO_ACCESS, dat); RT_WRITE(sc, MDIO_ACCESS, dat | MDIO_CMD_ONGO); while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); return (0); } #endif #ifdef IF_RT_PHY_SUPPORT void rt_miibus_statchg(device_t dev) { struct rt_softc *sc = device_get_softc(dev); struct mii_data *mii; mii = device_get_softc(sc->rt_miibus); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: /* XXX check link here */ sc->flags |= 1; break; default: break; } } } #endif /* IF_RT_PHY_SUPPORT */ static device_method_t rt_dev_methods[] = { DEVMETHOD(device_probe, rt_probe), DEVMETHOD(device_attach, rt_attach), DEVMETHOD(device_detach, rt_detach), DEVMETHOD(device_shutdown, rt_shutdown), DEVMETHOD(device_suspend, rt_suspend), DEVMETHOD(device_resume, rt_resume), #ifdef IF_RT_PHY_SUPPORT /* MII interface */ DEVMETHOD(miibus_readreg, rt_miibus_readreg), DEVMETHOD(miibus_writereg, rt_miibus_writereg), DEVMETHOD(miibus_statchg, rt_miibus_statchg), #endif DEVMETHOD_END }; static driver_t rt_driver = { "rt", rt_dev_methods, sizeof(struct rt_softc) }; static devclass_t rt_dev_class; DRIVER_MODULE(rt, nexus, rt_driver, rt_dev_class, 0, 0); #ifdef FDT DRIVER_MODULE(rt, simplebus, rt_driver, rt_dev_class, 0, 0); #endif MODULE_DEPEND(rt, ether, 1, 1, 1); MODULE_DEPEND(rt, miibus, 1, 1, 1); #ifdef RT_MDIO MODULE_DEPEND(rt, mdio, 1, 1, 1); static int rtmdio_probe(device_t); static int rtmdio_attach(device_t); static int rtmdio_detach(device_t); static struct mtx miibus_mtx; MTX_SYSINIT(miibus_mtx, &miibus_mtx, "rt mii lock", MTX_DEF); /* * Declare an additional, separate driver for accessing the MDIO bus. */ static device_method_t rtmdio_methods[] = { /* Device interface */ DEVMETHOD(device_probe, rtmdio_probe), DEVMETHOD(device_attach, rtmdio_attach), DEVMETHOD(device_detach, rtmdio_detach), /* bus interface */ DEVMETHOD(bus_add_child, device_add_child_ordered), /* MDIO access */ DEVMETHOD(mdio_readreg, rt_miibus_readreg), DEVMETHOD(mdio_writereg, rt_miibus_writereg), }; DEFINE_CLASS_0(rtmdio, rtmdio_driver, rtmdio_methods, sizeof(struct rt_softc)); static devclass_t rtmdio_devclass; DRIVER_MODULE(miiproxy, rt, miiproxy_driver, miiproxy_devclass, 0, 0); DRIVER_MODULE(rtmdio, simplebus, rtmdio_driver, rtmdio_devclass, 0, 0); DRIVER_MODULE(mdio, rtmdio, mdio_driver, mdio_devclass, 0, 0); static int rtmdio_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "ralink,rt2880-mdio")) return (ENXIO); - device_set_desc(dev, "FV built-in ethernet interface, MDIO controller"); + device_set_desc(dev, "RT built-in ethernet interface, MDIO controller"); return(0); } static int rtmdio_attach(device_t dev) { struct rt_softc *sc; int error; sc = device_get_softc(dev); sc->dev = dev; sc->mem_rid = 0; sc->mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, RF_ACTIVE | RF_SHAREABLE); if (sc->mem == NULL) { device_printf(dev, "couldn't map memory\n"); error = ENXIO; goto fail; } sc->bst = rman_get_bustag(sc->mem); sc->bsh = rman_get_bushandle(sc->mem); bus_generic_probe(dev); bus_enumerate_hinted_children(dev); error = bus_generic_attach(dev); fail: return(error); } static int rtmdio_detach(device_t dev) { return(0); } #endif diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 461998568040..af8b0d2b471e 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -1,808 +1,809 @@ # $FreeBSD$ SYSDIR?=${SRCTOP}/sys .include "${SYSDIR}/conf/kern.opts.mk" SUBDIR_PARALLEL= # Modules that include binary-only blobs of microcode should be selectable by # MK_SOURCELESS_UCODE option (see below). .include "${SYSDIR}/conf/config.mk" .if defined(MODULES_OVERRIDE) && !defined(ALL_MODULES) SUBDIR=${MODULES_OVERRIDE} .else SUBDIR= \ ${_3dfx} \ ${_3dfx_linux} \ ${_aac} \ ${_aacraid} \ accf_data \ accf_dns \ accf_http \ acl_nfs4 \ acl_posix1e \ ${_acpi} \ ae \ ${_aesni} \ age \ ${_agp} \ ahci \ aic7xxx \ alc \ ale \ alq \ ${_amd_ecc_inject} \ ${_amdgpio} \ ${_amdsbwd} \ ${_amdsmn} \ ${_amdtemp} \ amr \ ${_an} \ ${_aout} \ ${_apm} \ ${_arcmsr} \ ${_allwinner} \ ${_armv8crypto} \ ${_asmc} \ ata \ ath \ ath_dfs \ ath_hal \ ath_hal_ar5210 \ ath_hal_ar5211 \ ath_hal_ar5212 \ ath_hal_ar5416 \ ath_hal_ar9300 \ ath_main \ ath_rate \ ath_pci \ ${_autofs} \ ${_bce} \ ${_bcm283x_clkman} \ ${_bcm283x_pwm} \ bfe \ bge \ bhnd \ ${_bxe} \ ${_bios} \ ${_blake2} \ bnxt \ bridgestp \ bwi \ bwn \ ${_bytgpio} \ ${_chvgpio} \ cam \ ${_cardbus} \ ${_carp} \ cas \ ${_cbb} \ cc \ ${_ccp} \ cd9660 \ cd9660_iconv \ ${_ce} \ ${_cfi} \ ${_chromebook_platform} \ ${_ciss} \ cloudabi \ ${_cloudabi32} \ ${_cloudabi64} \ ${_cmx} \ ${_coretemp} \ ${_cp} \ ${_cpsw} \ ${_cpuctl} \ ${_cpufreq} \ ${_crypto} \ ${_cryptodev} \ ${_ctau} \ ctl \ ${_cxgb} \ ${_cxgbe} \ dc \ dcons \ dcons_crom \ ${_dpms} \ dummynet \ ${_efirt} \ ${_em} \ ${_ena} \ esp \ ${_et} \ evdev \ ${_exca} \ ext2fs \ fdc \ fdescfs \ ${_ffec} \ filemon \ firewire \ firmware \ fusefs \ ${_fxp} \ gem \ geom \ ${_glxiic} \ ${_glxsb} \ gpio \ hifn \ hme \ ${_hpt27xx} \ ${_hptiop} \ ${_hptmv} \ ${_hptnr} \ ${_hptrr} \ hwpmc \ ${_hwpmc_mips24k} \ ${_hwpmc_mips74k} \ ${_hyperv} \ i2c \ ${_iavf} \ ${_ibcore} \ ${_ichwd} \ ${_ida} \ if_bridge \ if_disc \ if_edsc \ ${_if_enc} \ if_epair \ ${_if_gif} \ ${_if_gre} \ ${_if_me} \ if_lagg \ ${_if_ndis} \ ${_if_stf} \ if_tuntap \ if_vlan \ if_vxlan \ iflib \ ${_iir} \ imgact_binmisc \ ${_intelspi} \ ${_io} \ ${_ioat} \ ${_ipoib} \ ${_ipdivert} \ ${_ipfilter} \ ${_ipfw} \ ipfw_nat \ ${_ipfw_nat64} \ ${_ipfw_nptv6} \ ${_ipfw_pmod} \ ${_ipmi} \ ip6_mroute_mod \ ip_mroute_mod \ ${_ips} \ ${_ipsec} \ ${_ipw} \ ${_ipwfw} \ ${_isci} \ ${_iser} \ isp \ ${_ispfw} \ ${_itwd} \ ${_iwi} \ ${_iwifw} \ ${_iwm} \ ${_iwmfw} \ ${_iwn} \ ${_iwnfw} \ ${_ix} \ ${_ixv} \ ${_ixl} \ jme \ kbdmux \ kgssapi \ kgssapi_krb5 \ khelp \ krpc \ ksyms \ ${_ktls_ocf} \ le \ lge \ libalias \ libiconv \ libmchain \ lindebugfs \ linuxkpi \ ${_lio} \ lpt \ mac_biba \ mac_bsdextended \ mac_ifoff \ mac_lomac \ mac_mls \ mac_none \ mac_ntpd \ mac_partition \ mac_portacl \ mac_seeotheruids \ mac_stub \ mac_test \ malo \ md \ mdio \ mem \ mfi \ mii \ mlx \ mlxfw \ ${_mlx4} \ ${_mlx4ib} \ ${_mlx4en} \ ${_mlx5} \ ${_mlx5en} \ ${_mlx5ib} \ ${_mly} \ mmc \ mmcsd \ ${_mpr} \ ${_mps} \ mpt \ mqueue \ mrsas \ msdosfs \ msdosfs_iconv \ msk \ ${_mthca} \ mvs \ mwl \ ${_mwlfw} \ mxge \ my \ ${_nctgpio} \ ${_ndis} \ ${_netgraph} \ ${_nfe} \ nfscl \ nfscommon \ nfsd \ nfslock \ nfslockd \ nfssvc \ nge \ nmdm \ nullfs \ ${_ntb} \ ${_nvd} \ ${_nvdimm} \ ${_nvme} \ ${_nvram} \ oce \ ${_ocs_fc} \ otus \ ${_otusfw} \ ow \ ${_padlock} \ ${_padlock_rng} \ ${_pccard} \ ${_pcfclock} \ ${_pf} \ ${_pflog} \ ${_pfsync} \ plip \ ${_pms} \ ppbus \ ppc \ ppi \ pps \ procfs \ proto \ pseudofs \ ${_pst} \ pty \ puc \ pwm \ ${_qlxge} \ ${_qlxgb} \ ${_qlxgbe} \ ${_qlnx} \ ral \ ${_ralfw} \ ${_random_fortuna} \ ${_random_other} \ rc4 \ ${_rdma} \ ${_rdrand_rng} \ re \ rl \ ${_rockchip} \ rtwn \ rtwn_pci \ rtwn_usb \ ${_rtwnfw} \ ${_s3} \ ${_safe} \ ${_sbni} \ scc \ sdhci \ ${_sdhci_acpi} \ sdhci_pci \ sdio \ sem \ send \ ${_sfxge} \ sge \ ${_sgx} \ ${_sgx_linux} \ siftr \ siis \ sis \ sk \ ${_smartpqi} \ smbfs \ snp \ sound \ ${_speaker} \ spi \ ${_splash} \ ${_sppp} \ ste \ stge \ ${_superio} \ ${_sym} \ ${_syscons} \ sysvipc \ tcp \ ${_ti} \ tmpfs \ ${_toecore} \ ${_tpm} \ ${_twa} \ twe \ tws \ uart \ ubsec \ udf \ udf_iconv \ ufs \ uinput \ unionfs \ usb \ ${_vesa} \ ${_virtio} \ vge \ ${_viawd} \ videomode \ vkbd \ ${_vmd} \ ${_vmm} \ ${_vmware} \ vr \ vte \ ${_wbwd} \ ${_wi} \ wlan \ wlan_acl \ wlan_amrr \ wlan_ccmp \ wlan_rssadapt \ wlan_tkip \ wlan_wep \ wlan_xauth \ ${_wpi} \ ${_wpifw} \ ${_x86bios} \ xl \ xz \ zlib .if ${MK_AUTOFS} != "no" || defined(ALL_MODULES) _autofs= autofs .endif .if ${MK_CDDL} != "no" || defined(ALL_MODULES) .if (${MACHINE_CPUARCH} != "arm" || ${MACHINE_ARCH:Marmv[67]*} != "") && \ ${MACHINE_CPUARCH} != "mips" .if ${KERN_OPTS:MKDTRACE_HOOKS} SUBDIR+= dtrace .endif .endif SUBDIR+= opensolaris .endif .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) .if exists(${SRCTOP}/sys/opencrypto) _crypto= crypto _cryptodev= cryptodev _random_fortuna=random_fortuna _random_other= random_other _ktls_ocf= ktls_ocf .endif .endif .if ${MK_CUSE} != "no" || defined(ALL_MODULES) SUBDIR+= cuse .endif .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _carp= carp _toecore= toecore _if_enc= if_enc _if_gif= if_gif _if_gre= if_gre _ipfw_pmod= ipfw_pmod .if ${MK_IPSEC_SUPPORT} != "no" _ipsec= ipsec .endif .endif .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _if_stf= if_stf .endif .if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES) _if_me= if_me _ipdivert= ipdivert _ipfw= ipfw .if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES) _ipfw_nat64= ipfw_nat64 .endif .endif .if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES) _ipfw_nptv6= ipfw_nptv6 .endif .if ${MK_IPFILTER} != "no" || defined(ALL_MODULES) _ipfilter= ipfilter .endif .if ${MK_ISCSI} != "no" || defined(ALL_MODULES) SUBDIR+= cfiscsi SUBDIR+= iscsi SUBDIR+= iscsi_initiator .endif .if !empty(OPT_FDT) SUBDIR+= fdt .endif # Linuxulator .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \ ${MACHINE_CPUARCH} == "i386" SUBDIR+= linprocfs SUBDIR+= linsysfs .endif .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" SUBDIR+= linux .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" SUBDIR+= linux64 SUBDIR+= linux_common .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \ ${MACHINE_CPUARCH} == "i386" _ena= ena .if ${MK_OFED} != "no" || defined(ALL_MODULES) _ibcore= ibcore _ipoib= ipoib _iser= iser .endif _mlx4= mlx4 _mlx5= mlx5 .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _mlx4en= mlx4en _mlx5en= mlx5en .endif .if ${MK_OFED} != "no" || defined(ALL_MODULES) _mthca= mthca _mlx4ib= mlx4ib _mlx5ib= mlx5ib .endif .endif .if ${MK_NETGRAPH} != "no" || defined(ALL_MODULES) _netgraph= netgraph .endif .if (${MK_PF} != "no" && (${MK_INET_SUPPORT} != "no" || \ ${MK_INET6_SUPPORT} != "no")) || defined(ALL_MODULES) _pf= pf _pflog= pflog .if ${MK_INET_SUPPORT} != "no" _pfsync= pfsync .endif .endif .if ${MK_SOURCELESS_UCODE} != "no" _bce= bce _fxp= fxp _ispfw= ispfw _ti= ti .if ${MACHINE_CPUARCH} != "mips" _mwlfw= mwlfw _otusfw= otusfw _ralfw= ralfw _rtwnfw= rtwnfw .endif .endif .if ${MK_SOURCELESS_UCODE} != "no" && ${MACHINE_CPUARCH} != "arm" && \ ${MACHINE_CPUARCH} != "mips" && \ ${MACHINE_ARCH} != "powerpc" && ${MACHINE_ARCH} != "powerpcspe" && \ ${MACHINE_CPUARCH} != "riscv" _cxgbe= cxgbe .endif # These rely on 64bit atomics .if ${MACHINE_ARCH} != "powerpc" && ${MACHINE_ARCH} != "powerpcspe" && \ ${MACHINE_CPUARCH} != "mips" _mps= mps _mpr= mpr .endif .if ${MK_TESTS} != "no" || defined(ALL_MODULES) SUBDIR+= tests .endif .if ${MK_ZFS} != "no" || defined(ALL_MODULES) SUBDIR+= zfs .endif .if (${MACHINE_CPUARCH} == "mips" && ${MACHINE_ARCH:Mmips64} == "") _hwpmc_mips24k= hwpmc_mips24k _hwpmc_mips74k= hwpmc_mips74k .endif .if ${MACHINE_CPUARCH} != "aarch64" && ${MACHINE_CPUARCH} != "arm" && \ ${MACHINE_CPUARCH} != "mips" && ${MACHINE_CPUARCH} != "powerpc" && \ ${MACHINE_CPUARCH} != "riscv" _syscons= syscons .endif .if ${MACHINE_CPUARCH} != "mips" # no BUS_SPACE_UNSPECIFIED # No barrier instruction support (specific to this driver) _sym= sym # intr_disable() is a macro, causes problems .if ${MK_SOURCELESS_UCODE} != "no" _cxgb= cxgb .endif .endif .if ${MACHINE_CPUARCH} == "aarch64" _allwinner= allwinner _armv8crypto= armv8crypto _efirt= efirt _em= em _rockchip= rockchip .endif .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" _agp= agp _an= an _aout= aout _bios= bios .if ${MK_SOURCELESS_UCODE} != "no" _bxe= bxe .endif _cardbus= cardbus _cbb= cbb _cpuctl= cpuctl _cpufreq= cpufreq _dpms= dpms _em= em _et= et _exca= exca _if_ndis= if_ndis _io= io _itwd= itwd _ix= ix _ixv= ixv .if ${MK_SOURCELESS_UCODE} != "no" _lio= lio .endif _nctgpio= nctgpio _ndis= ndis _ntb= ntb _ocs_fc= ocs_fc _pccard= pccard .if ${MK_OFED} != "no" || defined(ALL_MODULES) _rdma= rdma .endif _safe= safe _speaker= speaker _splash= splash _sppp= sppp _vmware= vmware _wbwd= wbwd _wi= wi _aac= aac _aacraid= aacraid _acpi= acpi .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) .if ${COMPILER_TYPE} != "gcc" || ${COMPILER_VERSION} > 40201 _aesni= aesni .endif .endif _amd_ecc_inject=amd_ecc_inject _amdsbwd= amdsbwd _amdsmn= amdsmn _amdtemp= amdtemp _arcmsr= arcmsr _asmc= asmc .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) _blake2= blake2 .endif _bytgpio= bytgpio _chvgpio= chvgpio _ciss= ciss _chromebook_platform= chromebook_platform _cmx= cmx _coretemp= coretemp .if ${MK_SOURCELESS_HOST} != "no" && empty(KCSAN_ENABLED) _hpt27xx= hpt27xx .endif _hptiop= hptiop .if ${MK_SOURCELESS_HOST} != "no" && empty(KCSAN_ENABLED) _hptmv= hptmv _hptnr= hptnr _hptrr= hptrr .endif _hyperv= hyperv _ichwd= ichwd _ida= ida _iir= iir _intelspi= intelspi _ipmi= ipmi _ips= ips _isci= isci _ipw= ipw _iwi= iwi _iwm= iwm _iwn= iwn .if ${MK_SOURCELESS_UCODE} != "no" _ipwfw= ipwfw _iwifw= iwifw _iwmfw= iwmfw _iwnfw= iwnfw .endif _mly= mly _nfe= nfe _nvd= nvd _nvme= nvme _nvram= nvram .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) _padlock= padlock _padlock_rng= padlock_rng _rdrand_rng= rdrand_rng .endif _s3= s3 _sdhci_acpi= sdhci_acpi _superio= superio _tpm= tpm _twa= twa _vesa= vesa _viawd= viawd _virtio= virtio _wpi= wpi .if ${MK_SOURCELESS_UCODE} != "no" _wpifw= wpifw .endif _x86bios= x86bios .endif .if ${MACHINE_CPUARCH} == "amd64" _amdgpio= amdgpio _ccp= ccp _efirt= efirt _iavf= iavf _ioat= ioat _ixl= ixl _nvdimm= nvdimm _pms= pms _qlxge= qlxge _qlxgb= qlxgb _vmd= vmd .if ${MK_SOURCELESS_UCODE} != "no" _qlxgbe= qlxgbe _qlnx= qlnx .endif _sfxge= sfxge _sgx= sgx _sgx_linux= sgx_linux _smartpqi= smartpqi .if ${MK_BHYVE} != "no" || defined(ALL_MODULES) .if ${KERN_OPTS:MSMP} _vmm= vmm .endif .endif .endif .if ${MACHINE_CPUARCH} == "i386" # XXX some of these can move to the general case when de-i386'ed # XXX some of these can move now, but are untested on other architectures. _3dfx= 3dfx _3dfx_linux= 3dfx_linux _apm= apm .if ${MK_SOURCELESS_HOST} != "no" _ce= ce .endif .if ${MK_SOURCELESS_UCODE} != "no" _cp= cp .endif _glxiic= glxiic _glxsb= glxsb _pcfclock= pcfclock _pst= pst _sbni= sbni .if ${MK_SOURCELESS_UCODE} != "no" _ctau= ctau .endif .endif .if ${MACHINE_CPUARCH} == "arm" _cfi= cfi _cpsw= cpsw .endif .if ${MACHINE_CPUARCH} == "powerpc" +_aacraid= aacraid _agp= agp _an= an _cardbus= cardbus _cbb= cbb _cfi= cfi _cpufreq= cpufreq _exca= exca _ffec= ffec _nvd= nvd _nvme= nvme _pccard= pccard _wi= wi _virtio= virtio .endif .if ${MACHINE_ARCH} == "powerpc64" _ipmi= ipmi _nvram= opal_nvram .endif .if ${MACHINE_ARCH} == "powerpc64" || ${MACHINE_ARCH} == "powerpc" # Don't build powermac_nvram for powerpcspe, it's never supported. _nvram+= powermac_nvram .endif .if (${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \ ${MACHINE_ARCH:Marmv[67]*} != "" || ${MACHINE_CPUARCH} == "i386") _cloudabi32= cloudabi32 .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" _cloudabi64= cloudabi64 .endif .endif .if ${MACHINE_ARCH:Marmv[67]*} != "" || ${MACHINE_CPUARCH} == "aarch64" _bcm283x_clkman= bcm283x_clkman _bcm283x_pwm= bcm283x_pwm .endif SUBDIR+=${MODULES_EXTRA} .for reject in ${WITHOUT_MODULES} SUBDIR:= ${SUBDIR:N${reject}} .endfor # Calling kldxref(8) for each module is expensive. .if !defined(NO_XREF) .MAKEFLAGS+= -DNO_XREF afterinstall: .PHONY @if type kldxref >/dev/null 2>&1; then \ ${ECHO} ${KLDXREF_CMD} ${DESTDIR}${KMODDIR}; \ ${KLDXREF_CMD} ${DESTDIR}${KMODDIR}; \ fi .endif SUBDIR:= ${SUBDIR:u:O} .include diff --git a/sys/modules/aacraid/Makefile b/sys/modules/aacraid/Makefile index 8852d477b739..8bbe1d7db708 100644 --- a/sys/modules/aacraid/Makefile +++ b/sys/modules/aacraid/Makefile @@ -1,18 +1,21 @@ # $FreeBSD$ .PATH: ${SRCTOP}/sys/dev/aacraid .if ${MACHINE_CPUARCH} == "i386" SUBDIR= aacraid_linux .endif KMOD= aacraid SRCS= aacraid.c aacraid_pci.c aacraid_cam.c +.if ${MACHINE_CPUARCH} == "powerpc" +SRCS+= aacraid_endian.c +.endif SRCS+= opt_scsi.h opt_cam.h opt_aacraid.h SRCS+= device_if.h bus_if.h pci_if.h # To enable debug output from the driver, uncomment these two lines. #CFLAGS+= -DAACRAID_DEBUG=2 #SRCS+= aacraid_debug.c .include diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c index ed5ad1f5fad2..5bdeb4cb11ff 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1,1773 +1,1773 @@ /*- * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 * * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include "ipoib.h" #include static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); #include #include #include #include #include /* For ARPHRD_xxx */ #include #include #include #include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); int ipoib_sendq_size = IPOIB_TX_RING_SIZE; int ipoib_recvq_size = IPOIB_RX_RING_SIZE; module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level = 1; module_param_named(debug_level, ipoib_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif struct ipoib_path_iter { struct ipoib_dev_priv *priv; struct ipoib_path path; }; static const u8 ipv4_bcast_addr[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff }; struct workqueue_struct *ipoib_workqueue; struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device, void *client_data); static struct net_device *ipoib_get_net_dev_by_params( struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, void *client_data); static void ipoib_start(struct ifnet *dev); static int ipoib_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro); static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void ipoib_input(struct ifnet *ifp, struct mbuf *m); #define IPOIB_MTAP(_ifp, _m) \ do { \ if (bpf_peers_present((_ifp)->if_bpf)) { \ M_ASSERTVALID(_m); \ ipoib_mtap_mb((_ifp), (_m)); \ } \ } while (0) static struct unrhdr *ipoib_unrhdr; static void ipoib_unrhdr_init(void *arg) { ipoib_unrhdr = new_unrhdr(0, 65535, NULL); } SYSINIT(ipoib_unrhdr_init, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_init, NULL); static void ipoib_unrhdr_uninit(void *arg) { if (ipoib_unrhdr != NULL) { struct unrhdr *hdr; hdr = ipoib_unrhdr; ipoib_unrhdr = NULL; delete_unrhdr(hdr); } } SYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_uninit, NULL); /* * This is for clients that have an ipoib_header in the mbuf. */ static void ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb) { struct ipoib_header *ih; struct ether_header eh; ih = mtod(mb, struct ipoib_header *); eh.ether_type = ih->proto; bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN); bzero(&eh.ether_shost, ETHER_ADDR_LEN); mb->m_data += sizeof(struct ipoib_header); mb->m_len -= sizeof(struct ipoib_header); bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); mb->m_data -= sizeof(struct ipoib_header); mb->m_len += sizeof(struct ipoib_header); } void ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto) { struct ether_header eh; eh.ether_type = proto; bzero(&eh.ether_shost, ETHER_ADDR_LEN); bzero(&eh.ether_dhost, ETHER_ADDR_LEN); bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); } static struct ib_client ipoib_client = { .name = "ipoib", .add = ipoib_add_one, .remove = ipoib_remove_one, .get_net_dev_by_params = ipoib_get_net_dev_by_params, }; int ipoib_open(struct ipoib_dev_priv *priv) { struct ifnet *dev = priv->dev; ipoib_dbg(priv, "bringing up interface\n"); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); if (ipoib_pkey_dev_delay_open(priv)) return 0; if (ipoib_ib_dev_open(priv)) goto err_disable; if (ipoib_ib_dev_up(priv)) goto err_stop; if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring up any child interfaces too */ mutex_lock(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0) ipoib_open(cpriv); mutex_unlock(&priv->vlan_mutex); } dev->if_drv_flags |= IFF_DRV_RUNNING; dev->if_drv_flags &= ~IFF_DRV_OACTIVE; return 0; err_stop: ipoib_ib_dev_stop(priv, 1); err_disable: clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); return -EINVAL; } static void ipoib_init(void *arg) { struct ifnet *dev; struct ipoib_dev_priv *priv; priv = arg; dev = priv->dev; if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) ipoib_open(priv); queue_work(ipoib_workqueue, &priv->flush_light); } static int ipoib_stop(struct ipoib_dev_priv *priv) { struct ifnet *dev = priv->dev; ipoib_dbg(priv, "stopping interface\n"); clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); ipoib_ib_dev_down(priv, 0); ipoib_ib_dev_stop(priv, 0); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring down any child interfaces too */ mutex_lock(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0) ipoib_stop(cpriv); mutex_unlock(&priv->vlan_mutex); } return 0; } static int ipoib_propagate_ifnet_mtu(struct ipoib_dev_priv *priv, int new_mtu, bool propagate) { struct ifnet *ifp; struct ifreq ifr; int error; ifp = priv->dev; if (ifp->if_mtu == new_mtu) return (0); if (propagate) { strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ); ifr.ifr_mtu = new_mtu; CURVNET_SET(ifp->if_vnet); error = ifhwioctl(SIOCSIFMTU, ifp, (caddr_t)&ifr, curthread); CURVNET_RESTORE(); } else { ifp->if_mtu = new_mtu; error = 0; } return (error); } int ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu, bool propagate) { int error, prev_admin_mtu; /* dev->if_mtu > 2K ==> connected mode */ if (ipoib_cm_admin_enabled(priv)) { if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv))) return -EINVAL; if (new_mtu > priv->mcast_mtu) ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); return (ipoib_propagate_ifnet_mtu(priv, new_mtu, propagate)); } if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; prev_admin_mtu = priv->admin_mtu; priv->admin_mtu = new_mtu; error = ipoib_propagate_ifnet_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu), propagate); if (error == 0) { /* check for MTU change to avoid infinite loop */ if (prev_admin_mtu != new_mtu) queue_work(ipoib_workqueue, &priv->flush_light); } else priv->admin_mtu = prev_admin_mtu; return (error); } static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ipoib_dev_priv *priv = ifp->if_softc; struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; /* check if detaching */ if (priv == NULL || priv->gone != 0) return (ENXIO); switch (command) { case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) error = -ipoib_open(priv); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ipoib_stop(priv); break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifp->if_drv_flags & IFF_DRV_RUNNING) queue_work(ipoib_workqueue, &priv->restart_task); break; case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], INFINIBAND_ALEN); break; case SIOCSIFMTU: /* * Set the interface MTU. */ error = -ipoib_change_mtu(priv, ifr->ifr_mtu, false); break; default: error = EINVAL; break; } return (error); } static struct ipoib_path * __path_find(struct ipoib_dev_priv *priv, void *gid) { struct rb_node *n = priv->path_tree.rb_node; struct ipoib_path *path; int ret; while (n) { path = rb_entry(n, struct ipoib_path, rb_node); ret = memcmp(gid, path->pathrec.dgid.raw, sizeof (union ib_gid)); if (ret < 0) n = n->rb_left; else if (ret > 0) n = n->rb_right; else return path; } return NULL; } static int __path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path) { struct rb_node **n = &priv->path_tree.rb_node; struct rb_node *pn = NULL; struct ipoib_path *tpath; int ret; while (*n) { pn = *n; tpath = rb_entry(pn, struct ipoib_path, rb_node); ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, sizeof (union ib_gid)); if (ret < 0) n = &pn->rb_left; else if (ret > 0) n = &pn->rb_right; else return -EEXIST; } rb_link_node(&path->rb_node, pn, n); rb_insert_color(&path->rb_node, &priv->path_tree); list_add_tail(&path->list, &priv->path_list); return 0; } void ipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path) { _IF_DRAIN(&path->queue); if (path->ah) ipoib_put_ah(path->ah); if (ipoib_cm_get(path)) ipoib_cm_destroy_tx(ipoib_cm_get(path)); kfree(path); } #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct ipoib_path_iter * ipoib_path_iter_init(struct ipoib_dev_priv *priv) { struct ipoib_path_iter *iter; iter = kmalloc(sizeof *iter, GFP_KERNEL); if (!iter) return NULL; iter->priv = priv; memset(iter->path.pathrec.dgid.raw, 0, 16); if (ipoib_path_iter_next(iter)) { kfree(iter); return NULL; } return iter; } int ipoib_path_iter_next(struct ipoib_path_iter *iter) { struct ipoib_dev_priv *priv = iter->priv; struct rb_node *n; struct ipoib_path *path; int ret = 1; spin_lock_irq(&priv->lock); n = rb_first(&priv->path_tree); while (n) { path = rb_entry(n, struct ipoib_path, rb_node); if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, sizeof (union ib_gid)) < 0) { iter->path = *path; ret = 0; break; } n = rb_next(n); } spin_unlock_irq(&priv->lock); return ret; } void ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path) { *path = iter->path; } #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ void ipoib_mark_paths_invalid(struct ipoib_dev_priv *priv) { struct ipoib_path *path, *tp; spin_lock_irq(&priv->lock); list_for_each_entry_safe(path, tp, &priv->path_list, list) { ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n", be16_to_cpu(path->pathrec.dlid), path->pathrec.dgid.raw, ":"); path->valid = 0; } spin_unlock_irq(&priv->lock); } void ipoib_flush_paths(struct ipoib_dev_priv *priv) { struct ipoib_path *path, *tp; LIST_HEAD(remove_list); unsigned long flags; spin_lock_irqsave(&priv->lock, flags); list_splice_init(&priv->path_list, &remove_list); list_for_each_entry(path, &remove_list, list) rb_erase(&path->rb_node, &priv->path_tree); list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); spin_unlock_irqrestore(&priv->lock, flags); wait_for_completion(&path->done); ipoib_path_free(priv, path); spin_lock_irqsave(&priv->lock, flags); } spin_unlock_irqrestore(&priv->lock, flags); } static void path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr) { struct ipoib_path *path = path_ptr; struct ipoib_dev_priv *priv = path->priv; struct ifnet *dev = priv->dev; struct ipoib_ah *ah = NULL; struct ipoib_ah *old_ah = NULL; struct epoch_tracker et; struct ifqueue mbqueue; struct mbuf *mb; unsigned long flags; if (!status) ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n", be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":"); else ipoib_dbg(priv, "PathRec status %d for GID %16D\n", status, path->pathrec.dgid.raw, ":"); bzero(&mbqueue, sizeof(mbqueue)); if (!status) { struct ib_ah_attr av; if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) ah = ipoib_create_ah(priv, priv->pd, &av); } spin_lock_irqsave(&priv->lock, flags); if (ah) { path->pathrec = *pathrec; old_ah = path->ah; path->ah = ah; ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", ah, be16_to_cpu(pathrec->dlid), pathrec->sl); for (;;) { _IF_DEQUEUE(&path->queue, mb); if (mb == NULL) break; _IF_ENQUEUE(&mbqueue, mb); } #ifdef CONFIG_INFINIBAND_IPOIB_CM if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path)) ipoib_cm_set(path, ipoib_cm_create_tx(priv, path)); #endif path->valid = 1; } path->query = NULL; complete(&path->done); spin_unlock_irqrestore(&priv->lock, flags); if (old_ah) ipoib_put_ah(old_ah); NET_EPOCH_ENTER(et); for (;;) { _IF_DEQUEUE(&mbqueue, mb); if (mb == NULL) break; mb->m_pkthdr.rcvif = dev; if (dev->if_transmit(dev, mb)) ipoib_warn(priv, "dev_queue_xmit failed " "to requeue packet\n"); } NET_EPOCH_EXIT(et); } static struct ipoib_path * path_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr) { struct ipoib_path *path; if (!priv->broadcast) return NULL; path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; path->priv = priv; bzero(&path->queue, sizeof(path->queue)); #ifdef CONFIG_INFINIBAND_IPOIB_CM memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN); #endif memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid)); path->pathrec.sgid = priv->local_gid; path->pathrec.pkey = cpu_to_be16(priv->pkey); path->pathrec.numb_path = 1; path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; return path; } static int path_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path) { struct ifnet *dev = priv->dev; ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU; struct ib_sa_path_rec p_rec; p_rec = path->pathrec; p_rec.mtu_selector = IB_SA_GT; switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) { case 512: p_rec.mtu = IB_MTU_256; break; case 1024: p_rec.mtu = IB_MTU_512; break; case 2048: p_rec.mtu = IB_MTU_1024; break; case 4096: p_rec.mtu = IB_MTU_2048; break; default: /* Wildcard everything */ comp_mask = 0; p_rec.mtu = 0; p_rec.mtu_selector = 0; } ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n", p_rec.dgid.raw, ":", comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0); init_completion(&path->done); path->query_id = ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, &p_rec, comp_mask | IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_TRAFFIC_CLASS | IB_SA_PATH_REC_PKEY, 1000, GFP_ATOMIC, path_rec_completion, path, &path->query); if (path->query_id < 0) { ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); path->query = NULL; complete(&path->done); return path->query_id; } return 0; } static void ipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh) { struct ipoib_path *path; path = __path_find(priv, eh->hwaddr + 4); if (!path || !path->valid) { int new_path = 0; if (!path) { path = path_rec_create(priv, eh->hwaddr); new_path = 1; } if (path) { if (_IF_QLEN(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) _IF_ENQUEUE(&path->queue, mb); else { if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); m_freem(mb); } if (!path->query && path_rec_start(priv, path)) { spin_unlock_irqrestore(&priv->lock, flags); if (new_path) ipoib_path_free(priv, path); return; } else __path_add(priv, path); } else { if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); m_freem(mb); } return; } if (ipoib_cm_get(path) && ipoib_cm_up(path)) { ipoib_cm_send(priv, mb, ipoib_cm_get(path)); } else if (path->ah) { ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr)); } else if ((path->query || !path_rec_start(priv, path)) && path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) { _IF_ENQUEUE(&path->queue, mb); } else { if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); m_freem(mb); } } static int ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb) { struct ipoib_header *eh; eh = mtod(mb, struct ipoib_header *); if (IPOIB_IS_MULTICAST(eh->hwaddr)) { /* Add in the P_Key for multicast*/ eh->hwaddr[8] = (priv->pkey >> 8) & 0xff; eh->hwaddr[9] = priv->pkey & 0xff; ipoib_mcast_send(priv, eh->hwaddr + 4, mb); } else ipoib_unicast_send(mb, priv, eh); return 0; } void ipoib_start_locked(struct ifnet *dev, struct ipoib_dev_priv *priv) { struct mbuf *mb; assert_spin_locked(&priv->lock); while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) && (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) { IFQ_DRV_DEQUEUE(&dev->if_snd, mb); if (mb == NULL) break; IPOIB_MTAP(dev, mb); ipoib_send_one(priv, mb); } } static void _ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv) { if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; spin_lock(&priv->lock); ipoib_start_locked(dev, priv); spin_unlock(&priv->lock); } static void ipoib_start(struct ifnet *dev) { _ipoib_start(dev, dev->if_softc); } static void ipoib_vlan_start(struct ifnet *dev) { struct ipoib_dev_priv *priv; struct mbuf *mb; priv = VLAN_COOKIE(dev); if (priv != NULL) return _ipoib_start(dev, priv); while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) { IFQ_DRV_DEQUEUE(&dev->if_snd, mb); if (mb == NULL) break; m_freem(mb); if_inc_counter(dev, IFCOUNTER_OERRORS, 1); } } int ipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port) { /* Allocate RX/TX "rings" to hold queued mbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", ca->name, ipoib_recvq_size); goto out; } priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ if (ipoib_ib_dev_init(priv, ca, port)) goto out_tx_ring_cleanup; return 0; out_tx_ring_cleanup: kfree(priv->tx_ring); out_rx_ring_cleanup: kfree(priv->rx_ring); out: return -ENOMEM; } static void ipoib_detach(struct ipoib_dev_priv *priv) { struct ifnet *dev; dev = priv->dev; if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { priv->gone = 1; bpfdetach(dev); if_detach(dev); if_free(dev); free_unr(ipoib_unrhdr, priv->unit); } else VLAN_SETCOOKIE(priv->dev, NULL); free(priv, M_TEMP); } void ipoib_dev_cleanup(struct ipoib_dev_priv *priv) { struct ipoib_dev_priv *cpriv, *tcpriv; /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { ipoib_dev_cleanup(cpriv); ipoib_detach(cpriv); } ipoib_ib_dev_cleanup(priv); kfree(priv->rx_ring); kfree(priv->tx_ring); priv->rx_ring = NULL; priv->tx_ring = NULL; } static struct ipoib_dev_priv * ipoib_priv_alloc(void) { struct ipoib_dev_priv *priv; priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK); spin_lock_init(&priv->lock); spin_lock_init(&priv->drain_lock); mutex_init(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN); return (priv); } struct ipoib_dev_priv * ipoib_intf_alloc(const char *name) { struct ipoib_dev_priv *priv; struct sockaddr_dl *sdl; struct ifnet *dev; priv = ipoib_priv_alloc(); dev = priv->dev = if_alloc(IFT_INFINIBAND); if (!dev) { free(priv, M_TEMP); return NULL; } dev->if_softc = priv; priv->unit = alloc_unr(ipoib_unrhdr); if (priv->unit == -1) { if_free(dev); free(priv, M_TEMP); return NULL; } if_initname(dev, name, priv->unit); dev->if_flags = IFF_BROADCAST | IFF_MULTICAST; dev->if_addrlen = INFINIBAND_ALEN; dev->if_hdrlen = IPOIB_HEADER_LEN; if_attach(dev); dev->if_init = ipoib_init; dev->if_ioctl = ipoib_ioctl; dev->if_start = ipoib_start; dev->if_output = ipoib_output; dev->if_input = ipoib_input; dev->if_resolvemulti = ipoib_resolvemulti; dev->if_baudrate = IF_Gbps(10); dev->if_broadcastaddr = priv->broadcastaddr; dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2; sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr; sdl->sdl_type = IFT_INFINIBAND; sdl->sdl_alen = dev->if_addrlen; priv->dev = dev; if_link_state_change(dev, LINK_STATE_DOWN); bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN); return dev->if_softc; } int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) { struct ib_device_attr *device_attr = &hca->attrs; priv->hca_caps = device_attr->device_cap_flags; priv->dev->if_hwassist = 0; priv->dev->if_capabilities = 0; #ifndef CONFIG_INFINIBAND_IPOIB_CM if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { set_bit(IPOIB_FLAG_CSUM, &priv->flags); priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; } #if 0 if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) { priv->dev->if_capabilities |= IFCAP_TSO4; priv->dev->if_hwassist |= CSUM_TSO; } #endif #endif priv->dev->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE; priv->dev->if_capenable = priv->dev->if_capabilities; return 0; } static struct ifnet * ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); if (!priv) goto alloc_mem_failed; if (!ib_query_port(hca, port, &attr)) priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); else { printk(KERN_WARNING "%s: ib_query_port %d failed\n", hca->name, port); goto device_init_failed; } /* MTU will be reset when mcast join happens */ priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu; result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; } if (ipoib_set_dev_features(priv, hca)) goto device_init_failed; /* * Set the full membership bit, so that we join the right * broadcast group, etc. */ priv->pkey |= 0x8000; priv->broadcastaddr[8] = priv->pkey >> 8; priv->broadcastaddr[9] = priv->pkey & 0xff; result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL); if (result) { printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; } memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof (union ib_gid)); result = ipoib_dev_init(priv, hca, port); if (result < 0) { printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", hca->name, port, result); goto device_init_failed; } if (ipoib_cm_admin_enabled(priv)) priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)); INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); result = ib_register_event_handler(&priv->event_handler); if (result < 0) { printk(KERN_WARNING "%s: ib_register_event_handler failed for " "port %d (ret = %d)\n", hca->name, port, result); goto event_failed; } if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port); return priv->dev; event_failed: ipoib_dev_cleanup(priv); device_init_failed: ipoib_detach(priv); alloc_mem_failed: return ERR_PTR(result); } static void ipoib_add_one(struct ib_device *device) { struct list_head *dev_list; struct ifnet *dev; struct ipoib_dev_priv *priv; int s, e, p; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); if (!dev_list) return; INIT_LIST_HEAD(dev_list); if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; } else { s = 1; e = device->phys_port_cnt; } for (p = s; p <= e; ++p) { if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) continue; dev = ipoib_add_port("ib", device, p); if (!IS_ERR(dev)) { priv = dev->if_softc; list_add_tail(&priv->list, dev_list); } } ib_set_client_data(device, &ipoib_client, dev_list); } static void ipoib_remove_one(struct ib_device *device, void *client_data) { struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list = client_data; if (!dev_list) return; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; list_for_each_entry_safe(priv, tmp, dev_list, list) { if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND) continue; ipoib_stop(priv); ib_unregister_event_handler(&priv->event_handler); /* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */ flush_workqueue(ipoib_workqueue); ipoib_dev_cleanup(priv); ipoib_detach(priv); } kfree(dev_list); } static int ipoib_match_dev_addr(const struct sockaddr *addr, struct net_device *dev) { struct epoch_tracker et; struct ifaddr *ifa; int retval = 0; CURVNET_SET(dev->if_vnet); NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != addr->sa_family || ifa->ifa_addr->sa_len != addr->sa_len) { continue; } if (memcmp(ifa->ifa_addr, addr, addr->sa_len) == 0) { retval = 1; break; } } NET_EPOCH_EXIT(et); CURVNET_RESTORE(); return (retval); } /* * ipoib_match_gid_pkey_addr - returns the number of IPoIB netdevs on * top a given ipoib device matching a pkey_index and address, if one * exists. * * @found_net_dev: contains a matching net_device if the return value * >= 1, with a reference held. */ static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, const union ib_gid *gid, u16 pkey_index, const struct sockaddr *addr, struct net_device **found_net_dev) { struct ipoib_dev_priv *child_priv; int matches = 0; if (priv->pkey_index == pkey_index && (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) { if (addr == NULL || ipoib_match_dev_addr(addr, priv->dev) != 0) { if (*found_net_dev == NULL) { struct net_device *net_dev; if (priv->parent != NULL) net_dev = priv->parent; else net_dev = priv->dev; *found_net_dev = net_dev; dev_hold(net_dev); } matches++; } } /* Check child interfaces */ mutex_lock(&priv->vlan_mutex); list_for_each_entry(child_priv, &priv->child_intfs, list) { matches += ipoib_match_gid_pkey_addr(child_priv, gid, pkey_index, addr, found_net_dev); if (matches > 1) break; } mutex_unlock(&priv->vlan_mutex); return matches; } /* * __ipoib_get_net_dev_by_params - returns the number of matching * net_devs found (between 0 and 2). Also return the matching * net_device in the @net_dev parameter, holding a reference to the * net_device, if the number of matches >= 1 */ static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port, u16 pkey_index, const union ib_gid *gid, const struct sockaddr *addr, struct net_device **net_dev) { struct ipoib_dev_priv *priv; int matches = 0; *net_dev = NULL; list_for_each_entry(priv, dev_list, list) { if (priv->port != port) continue; matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index, addr, net_dev); if (matches > 1) break; } return matches; } static struct net_device * ipoib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, void *client_data) { struct net_device *net_dev; struct list_head *dev_list = client_data; u16 pkey_index; int matches; int ret; if (!rdma_protocol_ib(dev, port)) return NULL; ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index); if (ret) return NULL; if (!dev_list) return NULL; /* See if we can find a unique device matching the L2 parameters */ matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, gid, NULL, &net_dev); switch (matches) { case 0: return NULL; case 1: return net_dev; } dev_put(net_dev); /* Couldn't find a unique device with L2 parameters only. Use L3 * address to uniquely match the net device */ matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, gid, addr, &net_dev); switch (matches) { case 0: return NULL; default: dev_warn_ratelimited(&dev->dev, "duplicate IP address detected\n"); /* Fall through */ case 1: return net_dev; } } static void ipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct ipoib_dev_priv *parent; struct ipoib_dev_priv *priv; struct epoch_tracker et; struct ifnet *dev; uint16_t pkey; int error; if (ifp->if_type != IFT_INFINIBAND) return; NET_EPOCH_ENTER(et); dev = VLAN_DEVAT(ifp, vtag); NET_EPOCH_EXIT(et); if (dev == NULL) return; priv = NULL; error = 0; parent = ifp->if_softc; /* We only support 15 bits of pkey. */ if (vtag & 0x8000) return; pkey = vtag | 0x8000; /* Set full membership bit. */ if (pkey == parent->pkey) return; /* Check for dups */ mutex_lock(&parent->vlan_mutex); list_for_each_entry(priv, &parent->child_intfs, list) { if (priv->pkey == pkey) { priv = NULL; error = EBUSY; goto out; } } priv = ipoib_priv_alloc(); priv->dev = dev; priv->max_ib_mtu = parent->max_ib_mtu; priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); error = ipoib_set_dev_features(priv, parent->ca); if (error) goto out; priv->pkey = pkey; priv->broadcastaddr[8] = pkey >> 8; priv->broadcastaddr[9] = pkey & 0xff; dev->if_broadcastaddr = priv->broadcastaddr; error = ipoib_dev_init(priv, parent->ca, parent->port); if (error) goto out; priv->parent = parent->dev; list_add_tail(&priv->list, &parent->child_intfs); VLAN_SETCOOKIE(dev, priv); dev->if_start = ipoib_vlan_start; dev->if_drv_flags &= ~IFF_DRV_RUNNING; dev->if_hdrlen = IPOIB_HEADER_LEN; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ipoib_open(priv); mutex_unlock(&parent->vlan_mutex); return; out: mutex_unlock(&parent->vlan_mutex); if (priv) free(priv, M_TEMP); if (error) ipoib_warn(parent, "failed to initialize subinterface: device %s, port %d vtag 0x%X", parent->ca->name, parent->port, vtag); return; } static void ipoib_unconfig_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct ipoib_dev_priv *parent; struct ipoib_dev_priv *priv; struct epoch_tracker et; struct ifnet *dev; uint16_t pkey; if (ifp->if_type != IFT_INFINIBAND) return; NET_EPOCH_ENTER(et); dev = VLAN_DEVAT(ifp, vtag); NET_EPOCH_EXIT(et); if (dev) VLAN_SETCOOKIE(dev, NULL); pkey = vtag | 0x8000; parent = ifp->if_softc; mutex_lock(&parent->vlan_mutex); list_for_each_entry(priv, &parent->child_intfs, list) { if (priv->pkey == pkey) { ipoib_dev_cleanup(priv); list_del(&priv->list); break; } } mutex_unlock(&parent->vlan_mutex); } eventhandler_tag ipoib_vlan_attach; eventhandler_tag ipoib_vlan_detach; static int __init ipoib_init_module(void) { int ret; ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE)); #ifdef CONFIG_INFINIBAND_IPOIB_CM ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST); ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST); /* * We create our own workqueue mainly because we want to be * able to flush it when devices are being removed. We can't * use schedule_work()/flush_scheduled_work() because both * unregister_netdev() and linkwatch_event take the rtnl lock, * so flush_scheduled_work() can deadlock during device * removal. */ ipoib_workqueue = create_singlethread_workqueue("ipoib"); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; } ib_sa_register_client(&ipoib_sa_client); ret = ib_register_client(&ipoib_client); if (ret) goto err_sa; return 0; err_sa: ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); err_fs: return ret; } static void __exit ipoib_cleanup_module(void) { EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach); EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach); ib_unregister_client(&ipoib_client); ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); } /* * Infiniband output routine. */ static int ipoib_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_char edst[INFINIBAND_ALEN]; #if defined(INET) || defined(INET6) struct llentry *lle = NULL; #endif struct ipoib_header *eh; int error = 0, is_gw = 0; short type; NET_EPOCH_ASSERT(); if (ro != NULL) is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) goto bad; #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) { error = ENETDOWN; goto bad; } if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) { error = ENETDOWN; goto bad; } switch (dst->sa_family) { #ifdef INET case AF_INET: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, lle->ll_addr, sizeof(edst)); else if (m->m_flags & M_MCAST) ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst); else error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_INFINIBAND); switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN); else bcopy(ar_tha(ah), edst, INFINIBAND_ALEN); } break; #endif #ifdef INET6 case AF_INET6: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, lle->ll_addr, sizeof(edst)); else if (m->m_flags & M_MCAST) ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst); else error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL); if (error) return error; type = htons(ETHERTYPE_IPV6); break; #endif default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); error = EAFNOSUPPORT; goto bad; } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto bad; } eh = mtod(m, struct ipoib_header *); (void)memcpy(&eh->proto, &type, sizeof(eh->proto)); (void)memcpy(&eh->hwaddr, edst, sizeof (edst)); /* * Queue message on interface, update output statistics if * successful, and start output if interface not yet active. */ return ((ifp->if_transmit)(ifp, m)); bad: if (m != NULL) m_freem(m); return (error); } /* * Upper layer processing for a received Infiniband packet. */ void ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto) { struct epoch_tracker et; int isr; #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* Allow monitor mode to claim this frame, after stats are updated. */ if (ifp->if_flags & IFF_MONITOR) { if_printf(ifp, "discard frame at IFF_MONITOR\n"); m_freem(m); return; } /* * Dispatch frame to upper layer. */ switch (proto) { #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } isr = NETISR_ARP; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif default: goto discard; } NET_EPOCH_ENTER(et); netisr_dispatch(isr, m); NET_EPOCH_EXIT(et); return; discard: m_freem(m); } /* * Process a received Infiniband packet. */ static void ipoib_input(struct ifnet *ifp, struct mbuf *m) { struct ipoib_header *eh; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } CURVNET_SET_QUIET(ifp->if_vnet); /* Let BPF have it before we strip the header. */ IPOIB_MTAP(ifp, m); eh = mtod(m, struct ipoib_header *); /* * Reset layer specific mbuf flags to avoid confusing upper layers. * Strip off Infiniband header. */ m->m_flags &= ~M_VLANTAG; m_clrprotoflags(m); m_adj(m, IPOIB_HEADER_LEN); if (IPOIB_IS_MULTICAST(eh->hwaddr)) { if (memcmp(eh->hwaddr, ifp->if_broadcastaddr, ifp->if_addrlen) == 0) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } ipoib_demux(ifp, m, ntohs(eh->proto)); CURVNET_RESTORE(); } static int ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if (!IPOIB_IS_MULTICAST(e_addr)) return EADDRNOTAVAIL; *llsa = NULL; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); sdl->sdl_alen = INFINIBAND_ALEN; e_addr = LLADDR(sdl); ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; /* * An IP6 address of 0 means listen to all * of the multicast address used for IP6. * This has no meaning in ipoib. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) return EADDRNOTAVAIL; if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); sdl->sdl_alen = INFINIBAND_ALEN; e_addr = LLADDR(sdl); ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif default: return EAFNOSUPPORT; } } module_init(ipoib_init_module); module_exit(ipoib_cleanup_module); static int ipoib_evhand(module_t mod, int event, void *arg) { - return (0); + return (0); } static moduledata_t ipoib_mod = { - .name = "ipoib", - .evhand = ipoib_evhand, + .name = "ipoib", + .evhand = ipoib_evhand, }; DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_LAST, SI_ORDER_ANY); MODULE_DEPEND(ipoib, ibcore, 1, 1, 1); MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1); diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64 index 012731854520..0c2d96afe88a 100644 --- a/sys/powerpc/conf/GENERIC64 +++ b/sys/powerpc/conf/GENERIC64 @@ -1,267 +1,268 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/powerpc # # For more information on this file, please read the handbook section on # Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu AIM ident GENERIC machine powerpc powerpc64 makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Platform support options POWERMAC #NewWorld Apple PowerMacs options PS3 #Sony Playstation 3 options MAMBO #IBM Mambo Full System Simulator options PSERIES #PAPR-compliant systems (e.g. IBM p) options POWERNV #Non-virtualized OpenPOWER systems options FDT #Flattened Device Tree options SCHED_ULE #ULE scheduler options NUMA #Non-Uniform Memory Architecture support options PREEMPTION #Enable kernel thread preemption options VIMAGE # Subsystem virtualization, e.g. VNET options INET #InterNETworking options INET6 #IPv6 communications protocols options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_OFFLOAD # TCP offload options TCP_BLACKBOX # Enhanced TCP event logging options TCP_HHOOK # hhook(9) framework for TCP options TCP_RFC7413 # TCP Fast Open options SCTP #Stream Control Transmission Protocol options FFS #Berkeley Fast Filesystem options SOFTUPDATES #Enable FFS soft updates support options UFS_ACL #Support for access control lists options UFS_DIRHASH #Improve performance on big directories options UFS_GJOURNAL #Enable gjournal-based UFS journaling options QUOTA #Enable disk quotas for UFS options MD_ROOT #MD is a potential root device options MD_ROOT_MEM #Enable use of initrd as MD root options NFSCL #Network Filesystem Client options NFSD #Network Filesystem Server options NFSLOCKD #Network Lock Manager options NFS_ROOT #NFS usable as root device options MSDOSFS #MSDOS Filesystem options CD9660 #ISO 9660 Filesystem options PROCFS #Process filesystem (requires PSEUDOFS) options PSEUDOFS #Pseudo-filesystem framework options GEOM_PART_APM #Apple Partition Maps. options GEOM_PART_GPT #GUID Partition Tables. options GEOM_LABEL #Provides labelization options COMPAT_FREEBSD32 #Compatible with FreeBSD/powerpc binaries options COMPAT_FREEBSD5 #Compatible with FreeBSD5 options COMPAT_FREEBSD6 #Compatible with FreeBSD6 options COMPAT_FREEBSD7 #Compatible with FreeBSD7 options COMPAT_FREEBSD9 # Compatible with FreeBSD9 options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 options COMPAT_FREEBSD12 # Compatible with FreeBSD12 options SCSI_DELAY=5000 #Delay (in ms) before probing SCSI options KTRACE #ktrace(1) syscall trace support options STACK #stack(9) support options SYSVSHM #SYSV-style shared memory options SYSVMSG #SYSV-style message queues options SYSVSEM #SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB #Support DDB #options DEADLKRES #Enable the deadlock resolver options INVARIANTS #Enable calls of extra sanity checking options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS options WITNESS #Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Kernel dump features. options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps options DEBUGNET # debugnet networking options NETDUMP # netdump(4) client support # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel # CPU frequency control device cpufreq # Standard busses device pci options PCI_HP # PCI-Express native HotPlug device agp # ATA controllers device ahci # AHCI-compatible SATA controllers device ata # Legacy ATA/SATA controllers device mvs # Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA device siis # SiliconImage SiI3124/SiI3132/SiI3531 SATA # NVM Express (NVMe) support device nvme # base NVMe driver options NVME_USE_NVD=0 # prefer the cam(4) based nda(4) driver device nvd # expose NVMe namespaces as disks, depends on nvme # SCSI Controllers +device aacraid # Adaptec by PMC RAID device ahc # AHA2940 and onboard AIC7xxx devices options AHC_ALLOW_MEMIO # Attempt to use memory mapped I/O device isp # Qlogic family device ispfw # Firmware module for Qlogic host adapters device mpt # LSI-Logic MPT-Fusion device mps # LSI-Logic MPT-Fusion 2 device sym # NCR/Symbios/LSI Logic 53C8XX/53C1010/53C1510D # ATA/SCSI peripherals device scbus # SCSI bus (required for ATA/SCSI) device ch # SCSI media changers device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) device ses # Enclosure Service (SES and SAF-TE) # vt is the default console driver, resembling an SCO console device vt # Core console driver device kbdmux # Serial (COM) ports device scc device uart device uart_z8530 device iflib # Ethernet hardware device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel PRO/10GbE PCIE PF Ethernet Family device ixv # Intel PRO/10GbE PCIE VF Ethernet Family device glc # Sony Playstation 3 Ethernet device llan # IBM pSeries Virtual Ethernet device cxgbe # Chelsio 10/25G NIC # PCI Ethernet NICs that use the common MII bus controller code. device miibus # MII bus support device bge # Broadcom BCM570xx Gigabit Ethernet device gem # Sun GEM/Sun ERI/Apple GMAC device dc # DEC/Intel 21143 and various workalikes device fxp # Intel EtherExpress PRO/100B (82557, 82558) device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 # Pseudo devices. device crypto # core crypto support device loop # Network loopback device ether # Ethernet support device vlan # 802.1Q VLAN support device tuntap # Packet tunnel. device md # Memory "disks" device ofwd # Open Firmware disks device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf #Berkeley packet filter # USB support options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface device xhci # XHCI PCI->USB interface device usb # USB Bus (required) device uhid # "Human Interface Devices" device ukbd # Keyboard options KBD_INSTALL_CDEV # install a CDEV entry in /dev device umass # Disks/Mass storage - Requires scbus and da0 device ums # Mouse # USB Ethernet device aue # ADMtek USB Ethernet device axe # ASIX Electronics USB Ethernet device cdce # Generic USB over Ethernet device cue # CATC USB Ethernet device kue # Kawasaki LSI USB Ethernet # Wireless NIC cards options IEEE80211_SUPPORT_MESH # FireWire support device firewire # FireWire bus code device sbp # SCSI over FireWire (Requires scbus and da) device fwe # Ethernet over FireWire (non-standard!) # Misc device iicbus # I2C bus code device iic device kiic # Keywest I2C device ad7417 # PowerMac7,2 temperature sensor device ds1631 # PowerMac11,2 temperature sensor device ds1775 # PowerMac7,2 temperature sensor device fcu # Apple Fan Control Unit device max6690 # PowerMac7,2 temperature sensor device powermac_nvram # Open Firmware configuration NVRAM device smu # Apple System Management Unit device atibl # ATI-based backlight driver for PowerBooks/iBooks device nvbl # nVidia-based backlight driver for PowerBooks/iBooks device opalflash # PowerNV embedded flash memory # ADB support device adb device pmu # Sound support device sound # Generic sound driver (required) device snd_ai2s # Apple I2S audio device snd_hda # Intel High Definition Audio device snd_uaudio # USB Audio # Netmap provides direct access to TX/RX rings on supported NICs device netmap # netmap(4) support # evdev interface options EVDEV_SUPPORT # evdev support in legacy drivers device evdev # input event device support device uinput # install /dev/uinput cdev # VirtIO support device virtio # Generic VirtIO bus (required) device virtio_pci # VirtIO PCI device device vtnet # VirtIO Ethernet device device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index fa6accf6156b..839f26706f47 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -1,452 +1,456 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kernel.h 8.3 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _SYS_KERNEL_H_ #define _SYS_KERNEL_H_ #include #ifdef _KERNEL /* for intrhook below */ #include /* for timestamping SYSINITs; other files may assume this is included here */ #include /* Global variables for the kernel. */ /* 1.1 */ extern char kernelname[MAXPATHLEN]; extern int tick; /* usec per tick (1000000 / hz) */ extern int hz; /* system clock's frequency */ extern int psratio; /* ratio: prof / stat */ extern int stathz; /* statistics clock's frequency */ extern int profhz; /* profiling clock's frequency */ extern int profprocs; /* number of process's profiling */ extern volatile int ticks; #endif /* _KERNEL */ /* * Enumerated types for known system startup interfaces. * * Startup occurs in ascending numeric order; the list entries are * sorted prior to attempting startup to guarantee order. Items * of the same level are arbitrated for order based on the 'order' * element. * * These numbers are arbitrary and are chosen ONLY for ordering; the * enumeration values are explicit rather than implicit to provide * for binary compatibility with inserted elements. * * The SI_SUB_LAST value must have the highest lexical value. */ enum sysinit_sub_id { SI_SUB_DUMMY = 0x0000000, /* not executed; for linker*/ SI_SUB_DONE = 0x0000001, /* processed*/ SI_SUB_TUNABLES = 0x0700000, /* establish tunable values */ SI_SUB_COPYRIGHT = 0x0800001, /* first use of console*/ SI_SUB_VM = 0x1000000, /* virtual memory system init*/ SI_SUB_KMEM = 0x1800000, /* kernel memory*/ SI_SUB_HYPERVISOR = 0x1A40000, /* * Hypervisor detection and * virtualization support * setup. */ SI_SUB_WITNESS = 0x1A80000, /* witness initialization */ SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */ SI_SUB_LOCK = 0x1B00000, /* various locks */ SI_SUB_EVENTHANDLER = 0x1C00000, /* eventhandler init */ SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */ SI_SUB_KLD = 0x2000000, /* KLD and module setup */ SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/ SI_SUB_RACCT = 0x2110000, /* resource accounting */ SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */ SI_SUB_RANDOM = 0x2160000, /* random number generator */ SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */ SI_SUB_MAC_POLICY = 0x21C0000, /* TrustedBSD MAC policies */ SI_SUB_MAC_LATE = 0x21D0000, /* TrustedBSD MAC subsystem */ SI_SUB_VNET = 0x21E0000, /* vnet 0 */ SI_SUB_INTRINSIC = 0x2200000, /* proc 0*/ SI_SUB_VM_CONF = 0x2300000, /* config VM, set limits*/ SI_SUB_DDB_SERVICES = 0x2380000, /* capture, scripting, etc. */ SI_SUB_RUN_QUEUE = 0x2400000, /* set up run queue*/ SI_SUB_KTRACE = 0x2480000, /* ktrace */ SI_SUB_OPENSOLARIS = 0x2490000, /* OpenSolaris compatibility */ SI_SUB_AUDIT = 0x24C0000, /* audit */ SI_SUB_CREATE_INIT = 0x2500000, /* create init process*/ SI_SUB_SCHED_IDLE = 0x2600000, /* required idle procs */ SI_SUB_MBUF = 0x2700000, /* mbuf subsystem */ SI_SUB_INTR = 0x2800000, /* interrupt threads */ SI_SUB_TASKQ = 0x2880000, /* task queues */ SI_SUB_EPOCH = 0x2888000, /* epoch subsystem */ #ifdef EARLY_AP_STARTUP SI_SUB_SMP = 0x2900000, /* start the APs*/ #endif SI_SUB_SOFTINTR = 0x2A00000, /* start soft interrupt thread */ SI_SUB_DEVFS = 0x2F00000, /* devfs ready for devices */ SI_SUB_INIT_IF = 0x3000000, /* prep for net interfaces */ SI_SUB_NETGRAPH = 0x3010000, /* Let Netgraph initialize */ SI_SUB_DTRACE = 0x3020000, /* DTrace subsystem */ SI_SUB_DTRACE_PROVIDER = 0x3048000, /* DTrace providers */ SI_SUB_DTRACE_ANON = 0x308C000, /* DTrace anon enabling */ SI_SUB_DRIVERS = 0x3100000, /* Let Drivers initialize */ SI_SUB_CONFIGURE = 0x3800000, /* Configure devices */ SI_SUB_VFS = 0x4000000, /* virtual filesystem*/ SI_SUB_CLOCKS = 0x4800000, /* real time and stat clocks*/ SI_SUB_SYSV_SHM = 0x6400000, /* System V shared memory*/ SI_SUB_SYSV_SEM = 0x6800000, /* System V semaphores*/ SI_SUB_SYSV_MSG = 0x6C00000, /* System V message queues*/ SI_SUB_P1003_1B = 0x6E00000, /* P1003.1B realtime */ SI_SUB_PSEUDO = 0x7000000, /* pseudo devices*/ SI_SUB_EXEC = 0x7400000, /* execve() handlers */ SI_SUB_PROTO_BEGIN = 0x8000000, /* VNET initialization */ SI_SUB_PROTO_PFIL = 0x8100000, /* Initialize pfil before FWs */ SI_SUB_PROTO_IF = 0x8400000, /* interfaces*/ SI_SUB_PROTO_DOMAININIT = 0x8600000, /* domain registration system */ SI_SUB_PROTO_MC = 0x8700000, /* Multicast */ SI_SUB_PROTO_DOMAIN = 0x8800000, /* domains (address families?)*/ SI_SUB_PROTO_FIREWALL = 0x8806000, /* Firewalls */ SI_SUB_PROTO_IFATTACHDOMAIN = 0x8808000,/* domain dependent data init */ SI_SUB_PROTO_END = 0x8ffffff, /* VNET helper functions */ SI_SUB_KPROF = 0x9000000, /* kernel profiling*/ SI_SUB_KICK_SCHEDULER = 0xa000000, /* start the timeout events*/ SI_SUB_INT_CONFIG_HOOKS = 0xa800000, /* Interrupts enabled config */ SI_SUB_ROOT_CONF = 0xb000000, /* Find root devices */ SI_SUB_INTRINSIC_POST = 0xd000000, /* proc 0 cleanup*/ SI_SUB_SYSCALLS = 0xd800000, /* register system calls */ SI_SUB_VNET_DONE = 0xdc00000, /* vnet registration complete */ SI_SUB_KTHREAD_INIT = 0xe000000, /* init process*/ SI_SUB_KTHREAD_PAGE = 0xe400000, /* pageout daemon*/ SI_SUB_KTHREAD_VM = 0xe800000, /* vm daemon*/ SI_SUB_KTHREAD_BUF = 0xea00000, /* buffer daemon*/ SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/ SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/ #ifndef EARLY_AP_STARTUP SI_SUB_SMP = 0xf000000, /* start the APs*/ #endif SI_SUB_RACCTD = 0xf100000, /* start racctd*/ SI_SUB_LAST = 0xfffffff /* final initialization */ }; /* * Some enumerated orders; "ANY" sorts last. */ enum sysinit_elem_order { SI_ORDER_FIRST = 0x0000000, /* first*/ SI_ORDER_SECOND = 0x0000001, /* second*/ SI_ORDER_THIRD = 0x0000002, /* third*/ SI_ORDER_FOURTH = 0x0000003, /* fourth*/ + SI_ORDER_FIFTH = 0x0000004, /* fifth*/ + SI_ORDER_SIXTH = 0x0000005, /* sixth*/ + SI_ORDER_SEVENTH = 0x0000006, /* seventh*/ + SI_ORDER_EIGHTH = 0x0000007, /* eighth*/ SI_ORDER_MIDDLE = 0x1000000, /* somewhere in the middle */ SI_ORDER_ANY = 0xfffffff /* last*/ }; /* * A system initialization call instance * * At the moment there is one instance of sysinit. We probably do not * want two which is why this code is if'd out, but we definitely want * to discern SYSINIT's which take non-constant data pointers and * SYSINIT's which take constant data pointers, * * The C_* macros take functions expecting const void * arguments * while the non-C_* macros take functions expecting just void * arguments. * * With -Wcast-qual on, the compiler issues warnings: * - if we pass non-const data or functions taking non-const data * to a C_* macro. * * - if we pass const data to the normal macros * * However, no warning is issued if we pass a function taking const data * through a normal non-const macro. This is ok because the function is * saying it won't modify the data so we don't care whether the data is * modifiable or not. */ typedef void (*sysinit_nfunc_t)(void *); typedef void (*sysinit_cfunc_t)(const void *); struct sysinit { enum sysinit_sub_id subsystem; /* subsystem identifier*/ enum sysinit_elem_order order; /* init order within subsystem*/ sysinit_cfunc_t func; /* function */ const void *udata; /* multiplexer/argument */ }; /* * Default: no special processing * * The C_ version of SYSINIT is for data pointers to const * data ( and functions taking data pointers to const data ). * At the moment it is no different from SYSINIT and thus * still results in warnings. * * The casts are necessary to have the compiler produce the * correct warnings when -Wcast-qual is used. * */ #ifdef TSLOG struct sysinit_tslog { sysinit_cfunc_t func; const void * data; const char * name; }; static inline void sysinit_tslog_shim(const void * data) { const struct sysinit_tslog * x = data; TSRAW(curthread, TS_ENTER, "SYSINIT", x->name); (x->func)(x->data); TSRAW(curthread, TS_EXIT, "SYSINIT", x->name); } #define C_SYSINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit_tslog uniquifier ## _sys_init_tslog = { \ func, \ (ident), \ #uniquifier \ }; \ static struct sysinit uniquifier ## _sys_init = { \ subsystem, \ order, \ sysinit_tslog_shim, \ &uniquifier ## _sys_init_tslog \ }; \ DATA_WSET(sysinit_set,uniquifier ## _sys_init) #else #define C_SYSINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit uniquifier ## _sys_init = { \ subsystem, \ order, \ func, \ (ident) \ }; \ DATA_WSET(sysinit_set,uniquifier ## _sys_init) #endif #define SYSINIT(uniquifier, subsystem, order, func, ident) \ C_SYSINIT(uniquifier, subsystem, order, \ (sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident)) /* * Called on module unload: no special processing */ #define C_SYSUNINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit uniquifier ## _sys_uninit = { \ subsystem, \ order, \ func, \ (ident) \ }; \ DATA_WSET(sysuninit_set,uniquifier ## _sys_uninit) #define SYSUNINIT(uniquifier, subsystem, order, func, ident) \ C_SYSUNINIT(uniquifier, subsystem, order, \ (sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident)) void sysinit_add(struct sysinit **set, struct sysinit **set_end); /* * Infrastructure for tunable 'constants'. Value may be specified at compile * time or kernel load time. Rules relating tunables together can be placed * in a SYSINIT function at SI_SUB_TUNABLES with SI_ORDER_ANY. * * WARNING: developers should never use the reserved suffixes specified in * loader.conf(5) for any tunables or conflicts will result. */ /* * int * please avoid using for new tunables! */ extern void tunable_int_init(void *); struct tunable_int { const char *path; int *var; }; #define TUNABLE_INT(path, var) \ static struct tunable_int __CONCAT(__tunable_int_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_int_init, \ &__CONCAT(__tunable_int_, __LINE__)) #define TUNABLE_INT_FETCH(path, var) getenv_int((path), (var)) /* * long */ extern void tunable_long_init(void *); struct tunable_long { const char *path; long *var; }; #define TUNABLE_LONG(path, var) \ static struct tunable_long __CONCAT(__tunable_long_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_long_init,\ &__CONCAT(__tunable_long_, __LINE__)) #define TUNABLE_LONG_FETCH(path, var) getenv_long((path), (var)) /* * unsigned long */ extern void tunable_ulong_init(void *); struct tunable_ulong { const char *path; unsigned long *var; }; #define TUNABLE_ULONG(path, var) \ static struct tunable_ulong __CONCAT(__tunable_ulong_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_ulong_init, \ &__CONCAT(__tunable_ulong_, __LINE__)) #define TUNABLE_ULONG_FETCH(path, var) getenv_ulong((path), (var)) /* * int64_t */ extern void tunable_int64_init(void *); struct tunable_int64 { const char *path; int64_t *var; }; #define TUNABLE_INT64(path, var) \ static struct tunable_int64 __CONCAT(__tunable_int64_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_int64_init, \ &__CONCAT(__tunable_int64_, __LINE__)) #define TUNABLE_INT64_FETCH(path, var) getenv_int64((path), (var)) /* * uint64_t */ extern void tunable_uint64_init(void *); struct tunable_uint64 { const char *path; uint64_t *var; }; #define TUNABLE_UINT64(path, var) \ static struct tunable_uint64 __CONCAT(__tunable_uint64_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_uint64_init, \ &__CONCAT(__tunable_uint64_, __LINE__)) #define TUNABLE_UINT64_FETCH(path, var) getenv_uint64((path), (var)) /* * quad */ extern void tunable_quad_init(void *); struct tunable_quad { const char *path; quad_t *var; }; #define TUNABLE_QUAD(path, var) \ static struct tunable_quad __CONCAT(__tunable_quad_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_quad_init, \ &__CONCAT(__tunable_quad_, __LINE__)) #define TUNABLE_QUAD_FETCH(path, var) getenv_quad((path), (var)) extern void tunable_str_init(void *); struct tunable_str { const char *path; char *var; int size; }; #define TUNABLE_STR(path, var, size) \ static struct tunable_str __CONCAT(__tunable_str_, __LINE__) = { \ (path), \ (var), \ (size), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_str_init, \ &__CONCAT(__tunable_str_, __LINE__)) #define TUNABLE_STR_FETCH(path, var, size) \ getenv_string((path), (var), (size)) typedef void (*ich_func_t)(void *_arg); struct intr_config_hook { TAILQ_ENTRY(intr_config_hook) ich_links; ich_func_t ich_func; void *ich_arg; }; int config_intrhook_establish(struct intr_config_hook *hook); void config_intrhook_disestablish(struct intr_config_hook *hook); void config_intrhook_oneshot(ich_func_t _func, void *_arg); #endif /* !_SYS_KERNEL_H_*/